This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Preprocessing and postprocessing for File::Find.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
cad2e5aa
JH
328 char *t;
329 I32 ml_anch;
2c2d71f5 330 char *tmp;
6eb5f6b9
JH
331 register char *other_last = Nullch; /* other substr checked before this */
332 char *check_at; /* check substr found at this pos */
30944b6d
IZ
333#ifdef DEBUGGING
334 char *i_strpos = strpos;
335#endif
cad2e5aa
JH
336
337 DEBUG_r( if (!PL_colorset) reginitcolors() );
338 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 339 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
340 PL_colors[4],PL_colors[5],PL_colors[0],
341 prog->precomp,
342 PL_colors[1],
343 (strlen(prog->precomp) > 60 ? "..." : ""),
344 PL_colors[0],
b900a521 345 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
346 strpos, PL_colors[1],
347 (strend - strpos > 60 ? "..." : ""))
348 );
349
2c2d71f5
JH
350 if (prog->minlen > strend - strpos) {
351 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 352 goto fail;
2c2d71f5 353 }
653099ff 354 check = prog->check_substr;
2c2d71f5 355 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
356 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
357 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 358 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
359
360 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 361 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
362 I32 slen;
363
364 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
365 /* SvCUR is not set on references: SvRV and SvPVX overlap */
366 && sv && !SvROK(sv)
367 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 368 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 369 goto fail;
2c2d71f5 370 }
adac82c7 371 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 372 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
373 if (SvTAIL(check)) {
374 slen = SvCUR(check); /* >= 1 */
cad2e5aa 375
2c2d71f5
JH
376 if ( strend - s > slen || strend - s < slen - 1
377 || (strend - s == slen && strend[-1] != '\n')) {
378 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
379 goto fail_finish;
cad2e5aa
JH
380 }
381 /* Now should match s[0..slen-2] */
382 slen--;
653099ff 383 if (slen && (*SvPVX(check) != *s
cad2e5aa 384 || (slen > 1
653099ff 385 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
386 report_neq:
387 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
388 goto fail_finish;
389 }
cad2e5aa 390 }
653099ff
GS
391 else if (*SvPVX(check) != *s
392 || ((slen = SvCUR(check)) > 1
393 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
394 goto report_neq;
395 goto success_at_start;
cad2e5aa 396 }
2c2d71f5 397 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 398 s = strpos;
2c2d71f5 399 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 402 if (!ml_anch) {
653099ff
GS
403 I32 end = prog->check_offset_max + CHR_SVLEN(check)
404 - (SvTAIL(check) != 0);
2c2d71f5
JH
405 I32 eshift = strend - s - end;
406
407 if (end_shift < eshift)
408 end_shift = eshift;
409 }
cad2e5aa 410 }
2c2d71f5 411 else { /* Can match at random position */
cad2e5aa
JH
412 ml_anch = 0;
413 s = strpos;
2c2d71f5
JH
414 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
415 /* Should be nonnegative! */
416 end_shift = prog->minlen - start_shift -
653099ff 417 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
418 }
419
2c2d71f5 420#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 421 if (end_shift < 0)
6bbae5e6 422 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
423#endif
424
2c2d71f5
JH
425 restart:
426 /* Find a possible match in the region s..strend by looking for
427 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 428 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
429 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
430 I32 p = -1; /* Internal iterator of scream. */
431 I32 *pp = data ? data->scream_pos : &p;
432
2c2d71f5
JH
433 if (PL_screamfirst[BmRARE(check)] >= 0
434 || ( BmRARE(check) == '\n'
435 && (BmPREVIOUS(check) == SvCUR(check) - 1)
436 && SvTAIL(check) ))
437 s = screaminstr(sv, check,
438 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 439 else
2c2d71f5 440 goto fail_finish;
cad2e5aa
JH
441 if (data)
442 *data->scream_olds = s;
443 }
444 else
445 s = fbm_instr((unsigned char*)s + start_shift,
446 (unsigned char*)strend - end_shift,
2c2d71f5 447 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
448
449 /* Update the count-of-usability, remove useless subpatterns,
450 unshift s. */
2c2d71f5
JH
451
452 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
453 (s ? "Found" : "Did not find"),
454 ((check == prog->anchored_substr) ? "anchored" : "floating"),
455 PL_colors[0],
7b0972df
JH
456 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
457 SvPVX(check),
2c2d71f5
JH
458 PL_colors[1], (SvTAIL(check) ? "$" : ""),
459 (s ? " at offset " : "...\n") ) );
460
461 if (!s)
462 goto fail_finish;
463
6eb5f6b9
JH
464 check_at = s;
465
2c2d71f5 466 /* Finish the diagnostic message */
30944b6d 467 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
468
469 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
470 Start with the other substr.
471 XXXX no SCREAM optimization yet - and a very coarse implementation
472 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
473 *always* match. Probably should be marked during compile...
474 Probably it is right to do no SCREAM here...
475 */
476
477 if (prog->float_substr && prog->anchored_substr) {
30944b6d 478 /* Take into account the "other" substring. */
2c2d71f5
JH
479 /* XXXX May be hopelessly wrong for UTF... */
480 if (!other_last)
6eb5f6b9 481 other_last = strpos;
2c2d71f5 482 if (check == prog->float_substr) {
30944b6d
IZ
483 do_other_anchored:
484 {
2c2d71f5
JH
485 char *last = s - start_shift, *last1, *last2;
486 char *s1 = s;
487
488 tmp = PL_bostr;
489 t = s - prog->check_offset_max;
490 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
491 && (!(prog->reganch & ROPT_UTF8)
492 || (PL_bostr = strpos, /* Used in regcopmaybe() */
493 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
494 && t > strpos)))
30944b6d 495 /* EMPTY */;
2c2d71f5
JH
496 else
497 t = strpos;
498 t += prog->anchored_offset;
6eb5f6b9
JH
499 if (t < other_last) /* These positions already checked */
500 t = other_last;
2c2d71f5
JH
501 PL_bostr = tmp;
502 last2 = last1 = strend - prog->minlen;
503 if (last < last1)
504 last1 = last;
505 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
506 /* On end-of-str: see comment below. */
507 s = fbm_instr((unsigned char*)t,
508 (unsigned char*)last1 + prog->anchored_offset
509 + SvCUR(prog->anchored_substr)
510 - (SvTAIL(prog->anchored_substr)!=0),
511 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
512 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
513 (s ? "Found" : "Contradicts"),
514 PL_colors[0],
7b0972df
JH
515 (int)(SvCUR(prog->anchored_substr)
516 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
517 SvPVX(prog->anchored_substr),
518 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
519 if (!s) {
520 if (last1 >= last2) {
521 DEBUG_r(PerlIO_printf(Perl_debug_log,
522 ", giving up...\n"));
523 goto fail_finish;
524 }
525 DEBUG_r(PerlIO_printf(Perl_debug_log,
526 ", trying floating at offset %ld...\n",
30944b6d 527 (long)(s1 + 1 - i_strpos)));
2c2d71f5 528 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 529 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
530 s = HOPc(last, 1);
531 goto restart;
532 }
533 else {
534 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 535 (long)(s - i_strpos)));
2c2d71f5 536 t = s - prog->anchored_offset;
6eb5f6b9 537 other_last = s + 1;
30944b6d 538 s = s1;
2c2d71f5
JH
539 if (t == strpos)
540 goto try_at_start;
2c2d71f5
JH
541 goto try_at_offset;
542 }
30944b6d 543 }
2c2d71f5
JH
544 }
545 else { /* Take into account the floating substring. */
546 char *last, *last1;
547 char *s1 = s;
548
549 t = s - start_shift;
550 last1 = last = strend - prog->minlen + prog->float_min_offset;
551 if (last - t > prog->float_max_offset)
552 last = t + prog->float_max_offset;
553 s = t + prog->float_min_offset;
6eb5f6b9
JH
554 if (s < other_last)
555 s = other_last;
2c2d71f5
JH
556 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
557 /* fbm_instr() takes into account exact value of end-of-str
558 if the check is SvTAIL(ed). Since false positives are OK,
559 and end-of-str is not later than strend we are OK. */
560 s = fbm_instr((unsigned char*)s,
561 (unsigned char*)last + SvCUR(prog->float_substr)
562 - (SvTAIL(prog->float_substr)!=0),
563 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
564 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->float_substr)
568 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->float_substr),
570 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 == last) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying anchored starting at offset %ld...\n",
30944b6d 579 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 580 other_last = last + 1;
2c2d71f5
JH
581 PL_regeol = strend; /* Used in HOP() */
582 s = HOPc(t, 1);
583 goto restart;
584 }
585 else {
586 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 587 (long)(s - i_strpos)));
6eb5f6b9 588 other_last = s + 1;
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
594 }
cad2e5aa 595 }
2c2d71f5
JH
596
597 t = s - prog->check_offset_max;
598 tmp = PL_bostr;
599 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
600 && (!(prog->reganch & ROPT_UTF8)
601 || (PL_bostr = strpos, /* Used in regcopmaybe() */
602 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
603 && t > strpos)))) {
604 PL_bostr = tmp;
605 /* Fixed substring is found far enough so that the match
606 cannot start at strpos. */
607 try_at_offset:
cad2e5aa 608 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
609 /* Eventually fbm_*() should handle this, but often
610 anchored_offset is not 0, so this check will not be wasted. */
611 /* XXXX In the code below we prefer to look for "^" even in
612 presence of anchored substrings. And we search even
613 beyond the found float position. These pessimizations
614 are historical artefacts only. */
615 find_anchor:
2c2d71f5 616 while (t < strend - prog->minlen) {
cad2e5aa 617 if (*t == '\n') {
4ee3650e 618 if (t < check_at - prog->check_offset_min) {
30944b6d 619 if (prog->anchored_substr) {
4ee3650e
GS
620 /* Since we moved from the found position,
621 we definitely contradict the found anchored
30944b6d
IZ
622 substr. Due to the above check we do not
623 contradict "check" substr.
624 Thus we can arrive here only if check substr
625 is float. Redo checking for "other"=="fixed".
626 */
627 strpos = t + 1;
628 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
629 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
630 goto do_other_anchored;
631 }
4ee3650e
GS
632 /* We don't contradict the found floating substring. */
633 /* XXXX Why not check for STCLASS? */
cad2e5aa 634 s = t + 1;
2c2d71f5 635 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 636 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
637 goto set_useful;
638 }
4ee3650e
GS
639 /* Position contradicts check-string */
640 /* XXXX probably better to look for check-string
641 than for "\n", so one should lower the limit for t? */
642 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 643 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 644 other_last = strpos = s = t + 1;
cad2e5aa
JH
645 goto restart;
646 }
647 t++;
648 }
2c2d71f5
JH
649 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
650 PL_colors[0],PL_colors[1]));
651 goto fail_finish;
cad2e5aa 652 }
f5952150
GS
653 else {
654 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
655 PL_colors[0],PL_colors[1]));
656 }
cad2e5aa
JH
657 s = t;
658 set_useful:
2c2d71f5 659 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
660 }
661 else {
2c2d71f5 662 PL_bostr = tmp;
f5952150 663 /* The found string does not prohibit matching at strpos,
2c2d71f5 664 - no optimization of calling REx engine can be performed,
f5952150
GS
665 unless it was an MBOL and we are not after MBOL,
666 or a future STCLASS check will fail this. */
2c2d71f5
JH
667 try_at_start:
668 /* Even in this situation we may use MBOL flag if strpos is offset
669 wrt the start of the string. */
05b4157f 670 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
671 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
672 /* May be due to an implicit anchor of m{.*foo} */
673 && !(prog->reganch & ROPT_IMPLICIT))
674 {
cad2e5aa
JH
675 t = strpos;
676 goto find_anchor;
677 }
30944b6d 678 DEBUG_r( if (ml_anch)
f5952150
GS
679 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
680 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 681 );
2c2d71f5 682 success_at_start:
30944b6d 683 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 684 && prog->check_substr /* Could be deleted already */
cad2e5aa 685 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
686 && prog->check_substr == prog->float_substr)
687 {
cad2e5aa 688 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 689 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
690 SvREFCNT_dec(prog->check_substr);
691 prog->check_substr = Nullsv; /* disable */
692 prog->float_substr = Nullsv; /* clear */
693 s = strpos;
3cf5c195
IZ
694 /* XXXX This is a remnant of the old implementation. It
695 looks wasteful, since now INTUIT can use many
6eb5f6b9 696 other heuristics. */
cad2e5aa
JH
697 prog->reganch &= ~RE_USE_INTUIT;
698 }
699 else
700 s = strpos;
701 }
702
6eb5f6b9
JH
703 /* Last resort... */
704 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
705 if (prog->regstclass) {
706 /* minlen == 0 is possible if regstclass is \b or \B,
707 and the fixed substr is ''$.
708 Since minlen is already taken into account, s+1 is before strend;
709 accidentally, minlen >= 1 guaranties no false positives at s + 1
710 even for \b or \B. But (minlen? 1 : 0) below assumes that
711 regstclass does not come from lookahead... */
712 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
713 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
714 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
715 ? STR_LEN(prog->regstclass)
716 : 1);
6eb5f6b9 717 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
718 ? s + (prog->minlen? cl_l : 0)
719 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 720 : strend) ;
8fdd422b 721 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
722
723 t = s;
76384e4a
GS
724 if (prog->reganch & ROPT_UTF8) {
725 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
726 PL_bostr = startpos;
727 }
6eb5f6b9
JH
728 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
729 if (!s) {
730#ifdef DEBUGGING
731 char *what;
732#endif
733 if (endpos == strend) {
734 DEBUG_r( PerlIO_printf(Perl_debug_log,
735 "Could not match STCLASS...\n") );
736 goto fail;
737 }
66e933ab
GS
738 DEBUG_r( PerlIO_printf(Perl_debug_log,
739 "This position contradicts STCLASS...\n") );
653099ff
GS
740 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
741 goto fail;
6eb5f6b9
JH
742 /* Contradict one of substrings */
743 if (prog->anchored_substr) {
6eb5f6b9
JH
744 if (prog->anchored_substr == check) {
745 DEBUG_r( what = "anchored" );
746 hop_and_restart:
747 PL_regeol = strend; /* Used in HOP() */
748 s = HOPc(t, 1);
66e933ab
GS
749 if (s + start_shift + end_shift > strend) {
750 /* XXXX Should be taken into account earlier? */
751 DEBUG_r( PerlIO_printf(Perl_debug_log,
752 "Could not match STCLASS...\n") );
753 goto fail;
754 }
6eb5f6b9 755 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 756 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
757 what, (long)(s + start_shift - i_strpos)) );
758 goto restart;
759 }
66e933ab 760 /* Have both, check_string is floating */
6eb5f6b9
JH
761 if (t + start_shift >= check_at) /* Contradicts floating=check */
762 goto retry_floating_check;
763 /* Recheck anchored substring, but not floating... */
764 s = check_at;
765 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 766 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
767 (long)(other_last - i_strpos)) );
768 goto do_other_anchored;
769 }
60e71179
GS
770 /* Another way we could have checked stclass at the
771 current position only: */
772 if (ml_anch) {
773 s = t = t + 1;
774 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
775 "Looking for /%s^%s/m starting at offset %ld...\n",
776 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 777 goto try_at_offset;
66e933ab 778 }
60e71179
GS
779 if (!prog->float_substr) /* Could have been deleted */
780 goto fail;
6eb5f6b9
JH
781 /* Check is floating subtring. */
782 retry_floating_check:
783 t = check_at - start_shift;
784 DEBUG_r( what = "floating" );
785 goto hop_and_restart;
786 }
787 DEBUG_r( if (t != s)
788 PerlIO_printf(Perl_debug_log,
789 "By STCLASS: moving %ld --> %ld\n",
790 (long)(t - i_strpos), (long)(s - i_strpos));
791 else
792 PerlIO_printf(Perl_debug_log,
793 "Does not contradict STCLASS...\n") );
794 }
2c2d71f5 795 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 796 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 797 return s;
2c2d71f5
JH
798
799 fail_finish: /* Substring not found */
66e933ab
GS
800 if (prog->check_substr) /* could be removed already */
801 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 802 fail:
2c2d71f5 803 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
804 PL_colors[4],PL_colors[5]));
805 return Nullch;
806}
9661b544 807
6eb5f6b9 808/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
809STATIC char *
810S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 811{
6eb5f6b9
JH
812 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
813 char *m;
d8093b23
G
814 STRLEN ln;
815 unsigned int c1;
816 unsigned int c2;
6eb5f6b9
JH
817 char *e;
818 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 819
6eb5f6b9
JH
820 /* We know what class it must start with. */
821 switch (OP(c)) {
822 case ANYOFUTF8:
823 while (s < strend) {
824 if (REGINCLASSUTF8(c, (U8*)s)) {
825 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 826 goto got_it;
3cf5c195 827 else
6eb5f6b9 828 tmp = doevery;
a0ed51b3 829 }
6eb5f6b9
JH
830 else
831 tmp = 1;
a0ed51b3
LW
832 s += UTF8SKIP(s);
833 }
6eb5f6b9
JH
834 break;
835 case ANYOF:
a0ed51b3 836 while (s < strend) {
d8093b23 837 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
838 if (tmp && (norun || regtry(prog, s)))
839 goto got_it;
840 else
841 tmp = doevery;
a0ed51b3 842 }
6eb5f6b9
JH
843 else
844 tmp = 1;
a0ed51b3 845 s++;
a0d0e21e 846 }
6eb5f6b9
JH
847 break;
848 case EXACTF:
849 m = STRING(c);
850 ln = STR_LEN(c);
d8093b23 851 c1 = *(U8*)m;
6eb5f6b9
JH
852 c2 = PL_fold[c1];
853 goto do_exactf;
854 case EXACTFL:
855 m = STRING(c);
856 ln = STR_LEN(c);
d8093b23 857 c1 = *(U8*)m;
6eb5f6b9
JH
858 c2 = PL_fold_locale[c1];
859 do_exactf:
860 e = strend - ln;
b3c9acc1 861
6eb5f6b9
JH
862 if (norun && e < s)
863 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
864 /* Here it is NOT UTF! */
865 if (c1 == c2) {
866 while (s <= e) {
d8093b23 867 if ( *(U8*)s == c1
66e933ab
GS
868 && (ln == 1 || !(OP(c) == EXACTF
869 ? ibcmp(s, m, ln)
870 : ibcmp_locale(s, m, ln)))
6eb5f6b9 871 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
872 goto got_it;
873 s++;
874 }
875 } else {
876 while (s <= e) {
d8093b23 877 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
878 && (ln == 1 || !(OP(c) == EXACTF
879 ? ibcmp(s, m, ln)
880 : ibcmp_locale(s, m, ln)))
6eb5f6b9 881 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
882 goto got_it;
883 s++;
884 }
885 }
886 break;
bbce6d69 887 case BOUNDL:
3280af22 888 PL_reg_flags |= RF_tainted;
bbce6d69 889 /* FALL THROUGH */
a0d0e21e 890 case BOUND:
6eb5f6b9 891 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 892 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 893 while (s < strend) {
95bac841 894 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 895 tmp = !tmp;
6eb5f6b9 896 if ((norun || regtry(prog, s)))
a0d0e21e 897 goto got_it;
a687059c 898 }
a0d0e21e
LW
899 s++;
900 }
6eb5f6b9 901 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
902 goto got_it;
903 break;
a0ed51b3
LW
904 case BOUNDLUTF8:
905 PL_reg_flags |= RF_tainted;
906 /* FALL THROUGH */
907 case BOUNDUTF8:
6eb5f6b9 908 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 909 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 910 while (s < strend) {
76384e4a 911 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
912 swash_fetch(PL_utf8_alnum, (U8*)s) :
913 isALNUM_LC_utf8((U8*)s)))
914 {
a0ed51b3 915 tmp = !tmp;
6eb5f6b9 916 if ((norun || regtry(prog, s)))
a0ed51b3
LW
917 goto got_it;
918 }
919 s += UTF8SKIP(s);
920 }
6eb5f6b9 921 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
922 goto got_it;
923 break;
bbce6d69 924 case NBOUNDL:
3280af22 925 PL_reg_flags |= RF_tainted;
bbce6d69 926 /* FALL THROUGH */
a0d0e21e 927 case NBOUND:
6eb5f6b9 928 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 929 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 930 while (s < strend) {
95bac841 931 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 932 tmp = !tmp;
6eb5f6b9 933 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
934 goto got_it;
935 s++;
936 }
6eb5f6b9 937 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
938 goto got_it;
939 break;
a0ed51b3
LW
940 case NBOUNDLUTF8:
941 PL_reg_flags |= RF_tainted;
942 /* FALL THROUGH */
943 case NBOUNDUTF8:
6eb5f6b9 944 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 945 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 946 while (s < strend) {
76384e4a 947 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
948 swash_fetch(PL_utf8_alnum, (U8*)s) :
949 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 950 tmp = !tmp;
6eb5f6b9 951 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
952 goto got_it;
953 s += UTF8SKIP(s);
954 }
6eb5f6b9 955 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
956 goto got_it;
957 break;
a0d0e21e
LW
958 case ALNUM:
959 while (s < strend) {
bbce6d69 960 if (isALNUM(*s)) {
6eb5f6b9 961 if (tmp && (norun || regtry(prog, s)))
bbce6d69 962 goto got_it;
963 else
964 tmp = doevery;
965 }
966 else
967 tmp = 1;
968 s++;
969 }
970 break;
a0ed51b3
LW
971 case ALNUMUTF8:
972 while (s < strend) {
dfe13c55 973 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 974 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
975 goto got_it;
976 else
977 tmp = doevery;
978 }
979 else
980 tmp = 1;
981 s += UTF8SKIP(s);
982 }
983 break;
bbce6d69 984 case ALNUML:
3280af22 985 PL_reg_flags |= RF_tainted;
bbce6d69 986 while (s < strend) {
987 if (isALNUM_LC(*s)) {
6eb5f6b9 988 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 989 goto got_it;
a687059c 990 else
a0d0e21e
LW
991 tmp = doevery;
992 }
993 else
994 tmp = 1;
995 s++;
996 }
997 break;
a0ed51b3
LW
998 case ALNUMLUTF8:
999 PL_reg_flags |= RF_tainted;
1000 while (s < strend) {
dfe13c55 1001 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1002 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1003 goto got_it;
1004 else
1005 tmp = doevery;
1006 }
1007 else
1008 tmp = 1;
1009 s += UTF8SKIP(s);
1010 }
1011 break;
a0d0e21e
LW
1012 case NALNUM:
1013 while (s < strend) {
bbce6d69 1014 if (!isALNUM(*s)) {
6eb5f6b9 1015 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1016 goto got_it;
1017 else
1018 tmp = doevery;
1019 }
1020 else
1021 tmp = 1;
1022 s++;
1023 }
1024 break;
a0ed51b3
LW
1025 case NALNUMUTF8:
1026 while (s < strend) {
dfe13c55 1027 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
bbce6d69 1038 case NALNUML:
3280af22 1039 PL_reg_flags |= RF_tainted;
bbce6d69 1040 while (s < strend) {
1041 if (!isALNUM_LC(*s)) {
6eb5f6b9 1042 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1043 goto got_it;
a687059c 1044 else
a0d0e21e 1045 tmp = doevery;
a687059c 1046 }
a0d0e21e
LW
1047 else
1048 tmp = 1;
1049 s++;
1050 }
1051 break;
a0ed51b3
LW
1052 case NALNUMLUTF8:
1053 PL_reg_flags |= RF_tainted;
1054 while (s < strend) {
dfe13c55 1055 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1056 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1057 goto got_it;
1058 else
1059 tmp = doevery;
1060 }
1061 else
1062 tmp = 1;
1063 s += UTF8SKIP(s);
1064 }
1065 break;
a0d0e21e
LW
1066 case SPACE:
1067 while (s < strend) {
1068 if (isSPACE(*s)) {
6eb5f6b9 1069 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1070 goto got_it;
1071 else
1072 tmp = doevery;
2304df62 1073 }
a0d0e21e
LW
1074 else
1075 tmp = 1;
1076 s++;
1077 }
1078 break;
a0ed51b3
LW
1079 case SPACEUTF8:
1080 while (s < strend) {
dfe13c55 1081 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
bbce6d69 1092 case SPACEL:
3280af22 1093 PL_reg_flags |= RF_tainted;
bbce6d69 1094 while (s < strend) {
1095 if (isSPACE_LC(*s)) {
6eb5f6b9 1096 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
1101 else
1102 tmp = 1;
1103 s++;
1104 }
1105 break;
a0ed51b3
LW
1106 case SPACELUTF8:
1107 PL_reg_flags |= RF_tainted;
1108 while (s < strend) {
dfe13c55 1109 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1110 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
1115 else
1116 tmp = 1;
1117 s += UTF8SKIP(s);
1118 }
1119 break;
a0d0e21e
LW
1120 case NSPACE:
1121 while (s < strend) {
1122 if (!isSPACE(*s)) {
6eb5f6b9 1123 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1124 goto got_it;
1125 else
1126 tmp = doevery;
a687059c 1127 }
a0d0e21e
LW
1128 else
1129 tmp = 1;
1130 s++;
1131 }
1132 break;
a0ed51b3
LW
1133 case NSPACEUTF8:
1134 while (s < strend) {
dfe13c55 1135 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
bbce6d69 1146 case NSPACEL:
3280af22 1147 PL_reg_flags |= RF_tainted;
bbce6d69 1148 while (s < strend) {
1149 if (!isSPACE_LC(*s)) {
6eb5f6b9 1150 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s++;
1158 }
1159 break;
a0ed51b3
LW
1160 case NSPACELUTF8:
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
dfe13c55 1163 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1164 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
1169 else
1170 tmp = 1;
1171 s += UTF8SKIP(s);
1172 }
1173 break;
a0d0e21e
LW
1174 case DIGIT:
1175 while (s < strend) {
1176 if (isDIGIT(*s)) {
6eb5f6b9 1177 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1178 goto got_it;
1179 else
1180 tmp = doevery;
2b69d0c2 1181 }
a0d0e21e
LW
1182 else
1183 tmp = 1;
1184 s++;
1185 }
1186 break;
a0ed51b3
LW
1187 case DIGITUTF8:
1188 while (s < strend) {
dfe13c55 1189 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
b8c5462f
JH
1200 case DIGITL:
1201 PL_reg_flags |= RF_tainted;
1202 while (s < strend) {
1203 if (isDIGIT_LC(*s)) {
6eb5f6b9 1204 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s++;
1212 }
1213 break;
1214 case DIGITLUTF8:
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
1217 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1218 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
1223 else
1224 tmp = 1;
1225 s += UTF8SKIP(s);
1226 }
1227 break;
a0d0e21e
LW
1228 case NDIGIT:
1229 while (s < strend) {
1230 if (!isDIGIT(*s)) {
6eb5f6b9 1231 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1232 goto got_it;
1233 else
1234 tmp = doevery;
a687059c 1235 }
a0d0e21e
LW
1236 else
1237 tmp = 1;
1238 s++;
1239 }
1240 break;
a0ed51b3
LW
1241 case NDIGITUTF8:
1242 while (s < strend) {
dfe13c55 1243 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
b8c5462f
JH
1254 case NDIGITL:
1255 PL_reg_flags |= RF_tainted;
1256 while (s < strend) {
1257 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1258 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s++;
a0ed51b3 1266 }
b8c5462f
JH
1267 break;
1268 case NDIGITLUTF8:
1269 PL_reg_flags |= RF_tainted;
1270 while (s < strend) {
1271 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1272 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1273 goto got_it;
cf93c79d 1274 else
b8c5462f
JH
1275 tmp = doevery;
1276 }
1277 else
1278 tmp = 1;
1279 s += UTF8SKIP(s);
1280 }
1281 break;
b3c9acc1 1282 default:
3c3eec57
GS
1283 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1284 break;
d6a28714 1285 }
6eb5f6b9
JH
1286 return 0;
1287 got_it:
1288 return s;
1289}
1290
1291/*
1292 - regexec_flags - match a regexp against a string
1293 */
1294I32
1295Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1296 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1297/* strend: pointer to null at end of string */
1298/* strbeg: real beginning of string */
1299/* minend: end of match must be >=minend after stringarg. */
1300/* data: May be used for some additional optimizations. */
1301/* nosave: For optimizations. */
1302{
1303 dTHR;
1304 register char *s;
1305 register regnode *c;
1306 register char *startpos = stringarg;
6eb5f6b9
JH
1307 I32 minlen; /* must match at least this many chars */
1308 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1309 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1310 constant substr. */ /* CC */
1311 I32 end_shift = 0; /* Same for the end. */ /* CC */
1312 I32 scream_pos = -1; /* Internal iterator of scream. */
1313 char *scream_olds;
1314 SV* oreplsv = GvSV(PL_replgv);
1315
1316 PL_regcc = 0;
1317
1318 cache_re(prog);
1319#ifdef DEBUGGING
1320 PL_regnarrate = PL_debug & 512;
1321#endif
1322
1323 /* Be paranoid... */
1324 if (prog == NULL || startpos == NULL) {
1325 Perl_croak(aTHX_ "NULL regexp parameter");
1326 return 0;
1327 }
1328
1329 minlen = prog->minlen;
1330 if (strend - startpos < minlen) goto phooey;
1331
1332 if (startpos == strbeg) /* is ^ valid at stringarg? */
1333 PL_regprev = '\n';
1334 else {
1335 PL_regprev = (U32)stringarg[-1];
1336 if (!PL_multiline && PL_regprev == '\n')
1337 PL_regprev = '\0'; /* force ^ to NOT match */
1338 }
1339
1340 /* Check validity of program. */
1341 if (UCHARAT(prog->program) != REG_MAGIC) {
1342 Perl_croak(aTHX_ "corrupted regexp program");
1343 }
1344
1345 PL_reg_flags = 0;
1346 PL_reg_eval_set = 0;
1347 PL_reg_maxiter = 0;
1348
1349 if (prog->reganch & ROPT_UTF8)
1350 PL_reg_flags |= RF_utf8;
1351
1352 /* Mark beginning of line for ^ and lookbehind. */
1353 PL_regbol = startpos;
1354 PL_bostr = strbeg;
1355 PL_reg_sv = sv;
1356
1357 /* Mark end of line for $ (and such) */
1358 PL_regeol = strend;
1359
1360 /* see how far we have to get to not match where we matched before */
1361 PL_regtill = startpos+minend;
1362
1363 /* We start without call_cc context. */
1364 PL_reg_call_cc = 0;
1365
1366 /* If there is a "must appear" string, look for it. */
1367 s = startpos;
1368
1369 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1370 MAGIC *mg;
1371
1372 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1373 PL_reg_ganch = startpos;
1374 else if (sv && SvTYPE(sv) >= SVt_PVMG
1375 && SvMAGIC(sv)
1376 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1377 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1378 if (prog->reganch & ROPT_ANCH_GPOS) {
1379 if (s > PL_reg_ganch)
1380 goto phooey;
1381 s = PL_reg_ganch;
1382 }
1383 }
1384 else /* pos() not defined */
1385 PL_reg_ganch = strbeg;
1386 }
1387
1388 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1389 re_scream_pos_data d;
1390
1391 d.scream_olds = &scream_olds;
1392 d.scream_pos = &scream_pos;
1393 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1394 if (!s)
1395 goto phooey; /* not present */
1396 }
1397
1398 DEBUG_r( if (!PL_colorset) reginitcolors() );
1399 DEBUG_r(PerlIO_printf(Perl_debug_log,
1400 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1401 PL_colors[4],PL_colors[5],PL_colors[0],
1402 prog->precomp,
1403 PL_colors[1],
1404 (strlen(prog->precomp) > 60 ? "..." : ""),
1405 PL_colors[0],
1406 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1407 startpos, PL_colors[1],
1408 (strend - startpos > 60 ? "..." : ""))
1409 );
1410
1411 /* Simplest case: anchored match need be tried only once. */
1412 /* [unless only anchor is BOL and multiline is set] */
1413 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1414 if (s == startpos && regtry(prog, startpos))
1415 goto got_it;
1416 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1417 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1418 {
1419 char *end;
1420
1421 if (minlen)
1422 dontbother = minlen - 1;
1423 end = HOPc(strend, -dontbother) - 1;
1424 /* for multiline we only have to try after newlines */
1425 if (prog->check_substr) {
1426 if (s == startpos)
1427 goto after_try;
1428 while (1) {
1429 if (regtry(prog, s))
1430 goto got_it;
1431 after_try:
1432 if (s >= end)
1433 goto phooey;
1434 if (prog->reganch & RE_USE_INTUIT) {
1435 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1436 if (!s)
1437 goto phooey;
1438 }
1439 else
1440 s++;
1441 }
1442 } else {
1443 if (s > startpos)
1444 s--;
1445 while (s < end) {
1446 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1447 if (regtry(prog, s))
1448 goto got_it;
1449 }
1450 }
1451 }
1452 }
1453 goto phooey;
1454 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1455 if (regtry(prog, PL_reg_ganch))
1456 goto got_it;
1457 goto phooey;
1458 }
1459
1460 /* Messy cases: unanchored match. */
1461 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1462 /* we have /x+whatever/ */
1463 /* it must be a one character string (XXXX Except UTF?) */
1464 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1465#ifdef DEBUGGING
1466 int did_match = 0;
1467#endif
1468
6eb5f6b9
JH
1469 if (UTF) {
1470 while (s < strend) {
1471 if (*s == ch) {
bf93d4cc 1472 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1473 if (regtry(prog, s)) goto got_it;
1474 s += UTF8SKIP(s);
1475 while (s < strend && *s == ch)
1476 s += UTF8SKIP(s);
1477 }
1478 s += UTF8SKIP(s);
1479 }
1480 }
1481 else {
1482 while (s < strend) {
1483 if (*s == ch) {
bf93d4cc 1484 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1485 if (regtry(prog, s)) goto got_it;
1486 s++;
1487 while (s < strend && *s == ch)
1488 s++;
1489 }
1490 s++;
1491 }
1492 }
bf93d4cc
GS
1493 DEBUG_r(did_match ||
1494 PerlIO_printf(Perl_debug_log,
1495 "Did not find anchored character...\n"));
6eb5f6b9
JH
1496 }
1497 /*SUPPRESS 560*/
1498 else if (prog->anchored_substr != Nullsv
1499 || (prog->float_substr != Nullsv
1500 && prog->float_max_offset < strend - s)) {
1501 SV *must = prog->anchored_substr
1502 ? prog->anchored_substr : prog->float_substr;
1503 I32 back_max =
1504 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1505 I32 back_min =
1506 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1507 char *last = HOPc(strend, /* Cannot start after this */
1508 -(I32)(CHR_SVLEN(must)
1509 - (SvTAIL(must) != 0) + back_min));
1510 char *last1; /* Last position checked before */
bf93d4cc
GS
1511#ifdef DEBUGGING
1512 int did_match = 0;
1513#endif
6eb5f6b9
JH
1514
1515 if (s > PL_bostr)
1516 last1 = HOPc(s, -1);
1517 else
1518 last1 = s - 1; /* bogus */
1519
1520 /* XXXX check_substr already used to find `s', can optimize if
1521 check_substr==must. */
1522 scream_pos = -1;
1523 dontbother = end_shift;
1524 strend = HOPc(strend, -dontbother);
1525 while ( (s <= last) &&
1526 ((flags & REXEC_SCREAM)
1527 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1528 end_shift, &scream_pos, 0))
1529 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1530 (unsigned char*)strend, must,
1531 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1532 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1533 if (HOPc(s, -back_max) > last1) {
1534 last1 = HOPc(s, -back_min);
1535 s = HOPc(s, -back_max);
1536 }
1537 else {
1538 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1539
1540 last1 = HOPc(s, -back_min);
1541 s = t;
1542 }
1543 if (UTF) {
1544 while (s <= last1) {
1545 if (regtry(prog, s))
1546 goto got_it;
1547 s += UTF8SKIP(s);
1548 }
1549 }
1550 else {
1551 while (s <= last1) {
1552 if (regtry(prog, s))
1553 goto got_it;
1554 s++;
1555 }
1556 }
1557 }
bf93d4cc
GS
1558 DEBUG_r(did_match ||
1559 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1560 ((must == prog->anchored_substr)
1561 ? "anchored" : "floating"),
1562 PL_colors[0],
1563 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1564 SvPVX(must),
1565 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1566 goto phooey;
1567 }
155aba94 1568 else if ((c = prog->regstclass)) {
66e933ab
GS
1569 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1570 /* don't bother with what can't match */
6eb5f6b9
JH
1571 strend = HOPc(strend, -(minlen - 1));
1572 if (find_byclass(prog, c, s, strend, startpos, 0))
1573 goto got_it;
bf93d4cc 1574 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1575 }
1576 else {
1577 dontbother = 0;
1578 if (prog->float_substr != Nullsv) { /* Trim the end. */
1579 char *last;
d6a28714
JH
1580
1581 if (flags & REXEC_SCREAM) {
1582 last = screaminstr(sv, prog->float_substr, s - strbeg,
1583 end_shift, &scream_pos, 1); /* last one */
1584 if (!last)
1585 last = scream_olds; /* Only one occurence. */
b8c5462f 1586 }
d6a28714
JH
1587 else {
1588 STRLEN len;
1589 char *little = SvPV(prog->float_substr, len);
1590
1591 if (SvTAIL(prog->float_substr)) {
1592 if (memEQ(strend - len + 1, little, len - 1))
1593 last = strend - len + 1;
1594 else if (!PL_multiline)
1595 last = memEQ(strend - len, little, len)
1596 ? strend - len : Nullch;
b8c5462f 1597 else
d6a28714
JH
1598 goto find_last;
1599 } else {
1600 find_last:
1601 if (len)
1602 last = rninstr(s, strend, little, little + len);
b8c5462f 1603 else
d6a28714 1604 last = strend; /* matching `$' */
b8c5462f 1605 }
b8c5462f 1606 }
bf93d4cc
GS
1607 if (last == NULL) {
1608 DEBUG_r(PerlIO_printf(Perl_debug_log,
1609 "%sCan't trim the tail, match fails (should not happen)%s\n",
1610 PL_colors[4],PL_colors[5]));
1611 goto phooey; /* Should not happen! */
1612 }
d6a28714
JH
1613 dontbother = strend - last + prog->float_min_offset;
1614 }
1615 if (minlen && (dontbother < minlen))
1616 dontbother = minlen - 1;
1617 strend -= dontbother; /* this one's always in bytes! */
1618 /* We don't know much -- general case. */
1619 if (UTF) {
1620 for (;;) {
1621 if (regtry(prog, s))
1622 goto got_it;
1623 if (s >= strend)
1624 break;
b8c5462f 1625 s += UTF8SKIP(s);
d6a28714
JH
1626 };
1627 }
1628 else {
1629 do {
1630 if (regtry(prog, s))
1631 goto got_it;
1632 } while (s++ < strend);
1633 }
1634 }
1635
1636 /* Failure. */
1637 goto phooey;
1638
1639got_it:
1640 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1641
1642 if (PL_reg_eval_set) {
1643 /* Preserve the current value of $^R */
1644 if (oreplsv != GvSV(PL_replgv))
1645 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1646 restored, the value remains
1647 the same. */
1648 restore_pos(aTHXo_ 0);
1649 }
1650
1651 /* make sure $`, $&, $', and $digit will work later */
1652 if ( !(flags & REXEC_NOT_FIRST) ) {
1653 if (RX_MATCH_COPIED(prog)) {
1654 Safefree(prog->subbeg);
1655 RX_MATCH_COPIED_off(prog);
1656 }
1657 if (flags & REXEC_COPY_STR) {
1658 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1659
1660 s = savepvn(strbeg, i);
1661 prog->subbeg = s;
1662 prog->sublen = i;
1663 RX_MATCH_COPIED_on(prog);
1664 }
1665 else {
1666 prog->subbeg = strbeg;
1667 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1668 }
1669 }
1670
1671 return 1;
1672
1673phooey:
bf93d4cc
GS
1674 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1675 PL_colors[4],PL_colors[5]));
d6a28714
JH
1676 if (PL_reg_eval_set)
1677 restore_pos(aTHXo_ 0);
1678 return 0;
1679}
1680
1681/*
1682 - regtry - try match at specific point
1683 */
1684STATIC I32 /* 0 failure, 1 success */
1685S_regtry(pTHX_ regexp *prog, char *startpos)
1686{
1687 dTHR;
1688 register I32 i;
1689 register I32 *sp;
1690 register I32 *ep;
1691 CHECKPOINT lastcp;
1692
1693 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1694 MAGIC *mg;
1695
1696 PL_reg_eval_set = RS_init;
1697 DEBUG_r(DEBUG_s(
b900a521
JH
1698 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1699 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1700 ));
e8347627 1701 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1702 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1703 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1704 SAVETMPS;
1705 /* Apparently this is not needed, judging by wantarray. */
e8347627 1706 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1707 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1708
1709 if (PL_reg_sv) {
1710 /* Make $_ available to executed code. */
1711 if (PL_reg_sv != DEFSV) {
1712 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1713 SAVESPTR(DEFSV);
1714 DEFSV = PL_reg_sv;
b8c5462f 1715 }
d6a28714
JH
1716
1717 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1718 && (mg = mg_find(PL_reg_sv, 'g')))) {
1719 /* prepare for quick setting of pos */
1720 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1721 mg = mg_find(PL_reg_sv, 'g');
1722 mg->mg_len = -1;
b8c5462f 1723 }
d6a28714
JH
1724 PL_reg_magic = mg;
1725 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1726 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1727 }
1728 if (!PL_reg_curpm)
0f79a09d 1729 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1730 PL_reg_curpm->op_pmregexp = prog;
1731 PL_reg_oldcurpm = PL_curpm;
1732 PL_curpm = PL_reg_curpm;
1733 if (RX_MATCH_COPIED(prog)) {
1734 /* Here is a serious problem: we cannot rewrite subbeg,
1735 since it may be needed if this match fails. Thus
1736 $` inside (?{}) could fail... */
1737 PL_reg_oldsaved = prog->subbeg;
1738 PL_reg_oldsavedlen = prog->sublen;
1739 RX_MATCH_COPIED_off(prog);
1740 }
1741 else
1742 PL_reg_oldsaved = Nullch;
1743 prog->subbeg = PL_bostr;
1744 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1745 }
1746 prog->startp[0] = startpos - PL_bostr;
1747 PL_reginput = startpos;
1748 PL_regstartp = prog->startp;
1749 PL_regendp = prog->endp;
1750 PL_reglastparen = &prog->lastparen;
1751 prog->lastparen = 0;
1752 PL_regsize = 0;
1753 DEBUG_r(PL_reg_starttry = startpos);
1754 if (PL_reg_start_tmpl <= prog->nparens) {
1755 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1756 if(PL_reg_start_tmp)
1757 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1758 else
1759 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1760 }
1761
1762 /* XXXX What this code is doing here?!!! There should be no need
1763 to do this again and again, PL_reglastparen should take care of
1764 this! */
1765 sp = prog->startp;
1766 ep = prog->endp;
1767 if (prog->nparens) {
1768 for (i = prog->nparens; i >= 1; i--) {
1769 *++sp = -1;
1770 *++ep = -1;
1771 }
1772 }
1773 REGCP_SET;
1774 if (regmatch(prog->program + 1)) {
1775 prog->endp[0] = PL_reginput - PL_bostr;
1776 return 1;
1777 }
1778 REGCP_UNWIND;
1779 return 0;
1780}
1781
1782/*
1783 - regmatch - main matching routine
1784 *
1785 * Conceptually the strategy is simple: check to see whether the current
1786 * node matches, call self recursively to see whether the rest matches,
1787 * and then act accordingly. In practice we make some effort to avoid
1788 * recursion, in particular by going through "ordinary" nodes (that don't
1789 * need to know whether the rest of the match failed) by a loop instead of
1790 * by recursion.
1791 */
1792/* [lwall] I've hoisted the register declarations to the outer block in order to
1793 * maybe save a little bit of pushing and popping on the stack. It also takes
1794 * advantage of machines that use a register save mask on subroutine entry.
1795 */
1796STATIC I32 /* 0 failure, 1 success */
1797S_regmatch(pTHX_ regnode *prog)
1798{
1799 dTHR;
1800 register regnode *scan; /* Current node. */
1801 regnode *next; /* Next node. */
1802 regnode *inner; /* Next node in internal branch. */
1803 register I32 nextchr; /* renamed nextchr - nextchar colides with
1804 function of same name */
1805 register I32 n; /* no or next */
1806 register I32 ln; /* len or last */
1807 register char *s; /* operand or save */
1808 register char *locinput = PL_reginput;
1809 register I32 c1, c2, paren; /* case fold search, parenth */
1810 int minmod = 0, sw = 0, logical = 0;
1811#ifdef DEBUGGING
1812 PL_regindent++;
1813#endif
1814
1815 /* Note that nextchr is a byte even in UTF */
1816 nextchr = UCHARAT(locinput);
1817 scan = prog;
1818 while (scan != NULL) {
1819#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1820#ifdef DEBUGGING
1821# define sayYES goto yes
1822# define sayNO goto no
7821416a
IZ
1823# define sayYES_FINAL goto yes_final
1824# define sayYES_LOUD goto yes_loud
1825# define sayNO_FINAL goto no_final
1826# define sayNO_SILENT goto do_no
d6a28714
JH
1827# define saySAME(x) if (x) goto yes; else goto no
1828# define REPORT_CODE_OFF 24
1829#else
1830# define sayYES return 1
1831# define sayNO return 0
7821416a
IZ
1832# define sayYES_FINAL return 1
1833# define sayYES_LOUD return 1
1834# define sayNO_FINAL return 0
1835# define sayNO_SILENT return 0
d6a28714
JH
1836# define saySAME(x) return x
1837#endif
1838 DEBUG_r( {
1839 SV *prop = sv_newmortal();
1840 int docolor = *PL_colors[0];
1841 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1842 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1843 /* The part of the string before starttry has one color
1844 (pref0_len chars), between starttry and current
1845 position another one (pref_len - pref0_len chars),
1846 after the current position the third one.
1847 We assume that pref0_len <= pref_len, otherwise we
1848 decrease pref0_len. */
1849 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1850 ? (5 + taill) - l : locinput - PL_bostr);
1851 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1852
1853 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1854 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1855 ? (5 + taill) - pref_len : PL_regeol - locinput);
1856 if (pref0_len < 0)
1857 pref0_len = 0;
1858 if (pref0_len > pref_len)
1859 pref0_len = pref_len;
1860 regprop(prop, scan);
1861 PerlIO_printf(Perl_debug_log,
b900a521
JH
1862 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1863 (IV)(locinput - PL_bostr),
d6a28714
JH
1864 PL_colors[4], pref0_len,
1865 locinput - pref_len, PL_colors[5],
1866 PL_colors[2], pref_len - pref0_len,
1867 locinput - pref_len + pref0_len, PL_colors[3],
1868 (docolor ? "" : "> <"),
1869 PL_colors[0], l, locinput, PL_colors[1],
1870 15 - l - pref_len + 1,
1871 "",
b900a521 1872 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1873 SvPVX(prop));
1874 } );
1875
1876 next = scan + NEXT_OFF(scan);
1877 if (next == scan)
1878 next = NULL;
1879
1880 switch (OP(scan)) {
1881 case BOL:
1882 if (locinput == PL_bostr
1883 ? PL_regprev == '\n'
1884 : (PL_multiline &&
1885 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1886 {
1887 /* regtill = regbol; */
b8c5462f
JH
1888 break;
1889 }
d6a28714
JH
1890 sayNO;
1891 case MBOL:
1892 if (locinput == PL_bostr
1893 ? PL_regprev == '\n'
1894 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1895 {
b8c5462f
JH
1896 break;
1897 }
d6a28714
JH
1898 sayNO;
1899 case SBOL:
c2a73568 1900 if (locinput == PL_bostr)
b8c5462f 1901 break;
d6a28714
JH
1902 sayNO;
1903 case GPOS:
1904 if (locinput == PL_reg_ganch)
1905 break;
1906 sayNO;
1907 case EOL:
1908 if (PL_multiline)
1909 goto meol;
1910 else
1911 goto seol;
1912 case MEOL:
1913 meol:
1914 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1915 sayNO;
b8c5462f 1916 break;
d6a28714
JH
1917 case SEOL:
1918 seol:
1919 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1920 sayNO;
d6a28714 1921 if (PL_regeol - locinput > 1)
b8c5462f 1922 sayNO;
b8c5462f 1923 break;
d6a28714
JH
1924 case EOS:
1925 if (PL_regeol != locinput)
b8c5462f 1926 sayNO;
d6a28714
JH
1927 break;
1928 case SANYUTF8:
b8c5462f 1929 if (nextchr & 0x80) {
b8c5462f 1930 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1931 if (locinput > PL_regeol)
1932 sayNO;
b8c5462f
JH
1933 nextchr = UCHARAT(locinput);
1934 break;
1935 }
d6a28714 1936 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1937 sayNO;
b8c5462f 1938 nextchr = UCHARAT(++locinput);
a0d0e21e 1939 break;
d6a28714
JH
1940 case SANY:
1941 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1942 sayNO;
1943 nextchr = UCHARAT(++locinput);
b85d18e9 1944 break;
d6a28714 1945 case ANYUTF8:
a0ed51b3 1946 if (nextchr & 0x80) {
b8c5462f 1947 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1948 if (locinput > PL_regeol)
1949 sayNO;
a0ed51b3
LW
1950 nextchr = UCHARAT(locinput);
1951 break;
1952 }
155aba94 1953 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1954 sayNO;
1955 nextchr = UCHARAT(++locinput);
1956 break;
d6a28714 1957 case REG_ANY:
155aba94 1958 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1959 sayNO;
76e3520e 1960 nextchr = UCHARAT(++locinput);
a0d0e21e 1961 break;
d6a28714 1962 case EXACT:
cd439c50
IZ
1963 s = STRING(scan);
1964 ln = STR_LEN(scan);
d6a28714
JH
1965 /* Inline the first character, for speed. */
1966 if (UCHARAT(s) != nextchr)
1967 sayNO;
1968 if (PL_regeol - locinput < ln)
1969 sayNO;
1970 if (ln > 1 && memNE(s, locinput, ln))
1971 sayNO;
1972 locinput += ln;
1973 nextchr = UCHARAT(locinput);
1974 break;
1975 case EXACTFL:
b8c5462f
JH
1976 PL_reg_flags |= RF_tainted;
1977 /* FALL THROUGH */
d6a28714 1978 case EXACTF:
cd439c50
IZ
1979 s = STRING(scan);
1980 ln = STR_LEN(scan);
d6a28714
JH
1981
1982 if (UTF) {
1983 char *l = locinput;
1984 char *e = s + ln;
1985 c1 = OP(scan) == EXACTF;
1986 while (s < e) {
1987 if (l >= PL_regeol)
1988 sayNO;
1989 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1990 toLOWER_utf8((U8*)l) :
1991 toLOWER_LC_utf8((U8*)l)))
1992 {
1993 sayNO;
1994 }
1995 s += UTF8SKIP(s);
1996 l += UTF8SKIP(l);
b8c5462f 1997 }
d6a28714 1998 locinput = l;
a0ed51b3
LW
1999 nextchr = UCHARAT(locinput);
2000 break;
2001 }
d6a28714
JH
2002
2003 /* Inline the first character, for speed. */
2004 if (UCHARAT(s) != nextchr &&
2005 UCHARAT(s) != ((OP(scan) == EXACTF)
2006 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2007 sayNO;
d6a28714 2008 if (PL_regeol - locinput < ln)
b8c5462f 2009 sayNO;
d6a28714
JH
2010 if (ln > 1 && (OP(scan) == EXACTF
2011 ? ibcmp(s, locinput, ln)
2012 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2013 sayNO;
d6a28714
JH
2014 locinput += ln;
2015 nextchr = UCHARAT(locinput);
a0d0e21e 2016 break;
d6a28714 2017 case ANYOFUTF8:
d6a28714 2018 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2019 sayNO;
d6a28714
JH
2020 if (locinput >= PL_regeol)
2021 sayNO;
2022 locinput += PL_utf8skip[nextchr];
2023 nextchr = UCHARAT(locinput);
2024 break;
2025 case ANYOF:
d6a28714 2026 if (nextchr < 0)
b8c5462f 2027 nextchr = UCHARAT(locinput);
936ed897 2028 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2029 sayNO;
2030 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2031 sayNO;
b8c5462f
JH
2032 nextchr = UCHARAT(++locinput);
2033 break;
d6a28714 2034 case ALNUML:
b8c5462f
JH
2035 PL_reg_flags |= RF_tainted;
2036 /* FALL THROUGH */
d6a28714 2037 case ALNUM:
b8c5462f 2038 if (!nextchr)
4633a7c4 2039 sayNO;
d6a28714
JH
2040 if (!(OP(scan) == ALNUM
2041 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2042 sayNO;
2043 nextchr = UCHARAT(++locinput);
bbce6d69 2044 break;
d6a28714 2045 case ALNUMLUTF8:
3280af22 2046 PL_reg_flags |= RF_tainted;
bbce6d69 2047 /* FALL THROUGH */
d6a28714 2048 case ALNUMUTF8:
b8c5462f
JH
2049 if (!nextchr)
2050 sayNO;
2051 if (nextchr & 0x80) {
d6a28714
JH
2052 if (!(OP(scan) == ALNUMUTF8
2053 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2054 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2055 {
2056 sayNO;
a0ed51b3 2057 }
b8c5462f 2058 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2059 nextchr = UCHARAT(locinput);
2060 break;
2061 }
d6a28714
JH
2062 if (!(OP(scan) == ALNUMUTF8
2063 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2064 sayNO;
b8c5462f 2065 nextchr = UCHARAT(++locinput);
a0d0e21e 2066 break;
d6a28714 2067 case NALNUML:
b8c5462f
JH
2068 PL_reg_flags |= RF_tainted;
2069 /* FALL THROUGH */
d6a28714
JH
2070 case NALNUM:
2071 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2072 sayNO;
d6a28714
JH
2073 if (OP(scan) == NALNUM
2074 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2075 sayNO;
b8c5462f 2076 nextchr = UCHARAT(++locinput);
a0ed51b3 2077 break;
d6a28714 2078 case NALNUMLUTF8:
b8c5462f
JH
2079 PL_reg_flags |= RF_tainted;
2080 /* FALL THROUGH */
d6a28714 2081 case NALNUMUTF8:
3280af22 2082 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2083 sayNO;
b8c5462f 2084 if (nextchr & 0x80) {
d6a28714
JH
2085 if (OP(scan) == NALNUMUTF8
2086 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2087 : isALNUM_LC_utf8((U8*)locinput))
2088 {
b8c5462f 2089 sayNO;
d6a28714 2090 }
b8c5462f
JH
2091 locinput += PL_utf8skip[nextchr];
2092 nextchr = UCHARAT(locinput);
2093 break;
2094 }
d6a28714
JH
2095 if (OP(scan) == NALNUMUTF8
2096 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2097 sayNO;
76e3520e 2098 nextchr = UCHARAT(++locinput);
a0d0e21e 2099 break;
d6a28714
JH
2100 case BOUNDL:
2101 case NBOUNDL:
3280af22 2102 PL_reg_flags |= RF_tainted;
bbce6d69 2103 /* FALL THROUGH */
d6a28714
JH
2104 case BOUND:
2105 case NBOUND:
2106 /* was last char in word? */
2107 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2108 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2109 ln = isALNUM(ln);
2110 n = isALNUM(nextchr);
2111 }
2112 else {
2113 ln = isALNUM_LC(ln);
2114 n = isALNUM_LC(nextchr);
2115 }
2116 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2117 sayNO;
a0d0e21e 2118 break;
d6a28714
JH
2119 case BOUNDLUTF8:
2120 case NBOUNDLUTF8:
a0ed51b3
LW
2121 PL_reg_flags |= RF_tainted;
2122 /* FALL THROUGH */
d6a28714
JH
2123 case BOUNDUTF8:
2124 case NBOUNDUTF8:
2125 /* was last char in word? */
2126 ln = (locinput != PL_regbol)
2127 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2128 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2129 ln = isALNUM_uni(ln);
2130 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2131 }
d6a28714
JH
2132 else {
2133 ln = isALNUM_LC_uni(ln);
2134 n = isALNUM_LC_utf8((U8*)locinput);
2135 }
2136 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2137 sayNO;
a0ed51b3 2138 break;
d6a28714 2139 case SPACEL:
3280af22 2140 PL_reg_flags |= RF_tainted;
bbce6d69 2141 /* FALL THROUGH */
d6a28714 2142 case SPACE:
9442cb0e 2143 if (!nextchr)
4633a7c4 2144 sayNO;
d6a28714
JH
2145 if (!(OP(scan) == SPACE
2146 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2147 sayNO;
76e3520e 2148 nextchr = UCHARAT(++locinput);
a0d0e21e 2149 break;
d6a28714 2150 case SPACELUTF8:
a0ed51b3
LW
2151 PL_reg_flags |= RF_tainted;
2152 /* FALL THROUGH */
d6a28714 2153 case SPACEUTF8:
9442cb0e 2154 if (!nextchr)
a0ed51b3
LW
2155 sayNO;
2156 if (nextchr & 0x80) {
d6a28714 2157 if (!(OP(scan) == SPACEUTF8
9442cb0e 2158 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2159 : isSPACE_LC_utf8((U8*)locinput)))
2160 {
a0ed51b3 2161 sayNO;
d6a28714 2162 }
6f06b55f 2163 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2164 nextchr = UCHARAT(locinput);
2165 break;
2166 }
d6a28714
JH
2167 if (!(OP(scan) == SPACEUTF8
2168 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2169 sayNO;
2170 nextchr = UCHARAT(++locinput);
2171 break;
d6a28714 2172 case NSPACEL:
3280af22 2173 PL_reg_flags |= RF_tainted;
bbce6d69 2174 /* FALL THROUGH */
d6a28714 2175 case NSPACE:
9442cb0e 2176 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2177 sayNO;
9442cb0e 2178 if (OP(scan) == NSPACE
d6a28714 2179 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2180 sayNO;
b8c5462f 2181 nextchr = UCHARAT(++locinput);
a0d0e21e 2182 break;
d6a28714 2183 case NSPACELUTF8:
a0ed51b3
LW
2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714 2186 case NSPACEUTF8:
9442cb0e 2187 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2188 sayNO;
2189 if (nextchr & 0x80) {
d6a28714 2190 if (OP(scan) == NSPACEUTF8
9442cb0e 2191 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2192 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2193 {
2194 sayNO;
2195 }
2196 locinput += PL_utf8skip[nextchr];
2197 nextchr = UCHARAT(locinput);
2198 break;
a0ed51b3 2199 }
d6a28714
JH
2200 if (OP(scan) == NSPACEUTF8
2201 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2202 sayNO;
76e3520e 2203 nextchr = UCHARAT(++locinput);
a0d0e21e 2204 break;
d6a28714 2205 case DIGITL:
a0ed51b3
LW
2206 PL_reg_flags |= RF_tainted;
2207 /* FALL THROUGH */
d6a28714 2208 case DIGIT:
9442cb0e 2209 if (!nextchr)
a0ed51b3 2210 sayNO;
d6a28714
JH
2211 if (!(OP(scan) == DIGIT
2212 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2213 sayNO;
76e3520e 2214 nextchr = UCHARAT(++locinput);
a0d0e21e 2215 break;
d6a28714 2216 case DIGITLUTF8:
a0ed51b3
LW
2217 PL_reg_flags |= RF_tainted;
2218 /* FALL THROUGH */
d6a28714 2219 case DIGITUTF8:
a0ed51b3
LW
2220 if (!nextchr)
2221 sayNO;
2222 if (nextchr & 0x80) {
9442cb0e
GS
2223 if (!(OP(scan) == DIGITUTF8
2224 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2225 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2226 {
a0ed51b3 2227 sayNO;
dfe13c55 2228 }
6f06b55f 2229 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2230 nextchr = UCHARAT(locinput);
2231 break;
2232 }
9442cb0e
GS
2233 if (!(OP(scan) == DIGITUTF8
2234 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2235 sayNO;
2236 nextchr = UCHARAT(++locinput);
2237 break;
d6a28714 2238 case NDIGITL:
b8c5462f
JH
2239 PL_reg_flags |= RF_tainted;
2240 /* FALL THROUGH */
d6a28714 2241 case NDIGIT:
9442cb0e 2242 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2243 sayNO;
9442cb0e 2244 if (OP(scan) == NDIGIT
d6a28714 2245 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2246 sayNO;
76e3520e 2247 nextchr = UCHARAT(++locinput);
a0d0e21e 2248 break;
d6a28714 2249 case NDIGITLUTF8:
b8c5462f
JH
2250 PL_reg_flags |= RF_tainted;
2251 /* FALL THROUGH */
d6a28714 2252 case NDIGITUTF8:
b8c5462f
JH
2253 if (!nextchr && locinput >= PL_regeol)
2254 sayNO;
a0ed51b3 2255 if (nextchr & 0x80) {
9442cb0e
GS
2256 if (OP(scan) == NDIGITUTF8
2257 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2258 : isDIGIT_LC_utf8((U8*)locinput))
2259 {
a0ed51b3 2260 sayNO;
9442cb0e 2261 }
6f06b55f 2262 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2263 nextchr = UCHARAT(locinput);
2264 break;
2265 }
9442cb0e
GS
2266 if (OP(scan) == NDIGITUTF8
2267 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2268 sayNO;
2269 nextchr = UCHARAT(++locinput);
2270 break;
2271 case CLUMP:
dfe13c55 2272 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2273 sayNO;
6f06b55f 2274 locinput += PL_utf8skip[nextchr];
dfe13c55 2275 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2276 locinput += UTF8SKIP(locinput);
2277 if (locinput > PL_regeol)
2278 sayNO;
2279 nextchr = UCHARAT(locinput);
2280 break;
c8756f30 2281 case REFFL:
3280af22 2282 PL_reg_flags |= RF_tainted;
c8756f30 2283 /* FALL THROUGH */
c277df42 2284 case REF:
c8756f30 2285 case REFF:
c277df42 2286 n = ARG(scan); /* which paren pair */
cf93c79d 2287 ln = PL_regstartp[n];
2c2d71f5 2288 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2289 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2290 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2291 if (ln == PL_regendp[n])
a0d0e21e 2292 break;
a0ed51b3 2293
cf93c79d 2294 s = PL_bostr + ln;
a0ed51b3
LW
2295 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2296 char *l = locinput;
cf93c79d 2297 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2298 /*
2299 * Note that we can't do the "other character" lookup trick as
2300 * in the 8-bit case (no pun intended) because in Unicode we
2301 * have to map both upper and title case to lower case.
2302 */
2303 if (OP(scan) == REFF) {
2304 while (s < e) {
2305 if (l >= PL_regeol)
2306 sayNO;
dfe13c55 2307 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2308 sayNO;
2309 s += UTF8SKIP(s);
2310 l += UTF8SKIP(l);
2311 }
2312 }
2313 else {
2314 while (s < e) {
2315 if (l >= PL_regeol)
2316 sayNO;
dfe13c55 2317 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2318 sayNO;
2319 s += UTF8SKIP(s);
2320 l += UTF8SKIP(l);
2321 }
2322 }
2323 locinput = l;
2324 nextchr = UCHARAT(locinput);
2325 break;
2326 }
2327
a0d0e21e 2328 /* Inline the first character, for speed. */
76e3520e 2329 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2330 (OP(scan) == REF ||
2331 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2332 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2333 sayNO;
cf93c79d 2334 ln = PL_regendp[n] - ln;
3280af22 2335 if (locinput + ln > PL_regeol)
4633a7c4 2336 sayNO;
c8756f30
AK
2337 if (ln > 1 && (OP(scan) == REF
2338 ? memNE(s, locinput, ln)
2339 : (OP(scan) == REFF
2340 ? ibcmp(s, locinput, ln)
2341 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2342 sayNO;
a0d0e21e 2343 locinput += ln;
76e3520e 2344 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2345 break;
2346
2347 case NOTHING:
c277df42 2348 case TAIL:
a0d0e21e
LW
2349 break;
2350 case BACK:
2351 break;
c277df42
IZ
2352 case EVAL:
2353 {
2354 dSP;
533c011a 2355 OP_4tree *oop = PL_op;
3280af22
NIS
2356 COP *ocurcop = PL_curcop;
2357 SV **ocurpad = PL_curpad;
c277df42
IZ
2358 SV *ret;
2359
2360 n = ARG(scan);
533c011a 2361 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2362 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2363 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2364 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2365
cea2e8a9 2366 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2367 SPAGAIN;
2368 ret = POPs;
2369 PUTBACK;
2370
0f5d15d6
IZ
2371 PL_op = oop;
2372 PL_curpad = ocurpad;
2373 PL_curcop = ocurcop;
c277df42 2374 if (logical) {
0f5d15d6
IZ
2375 if (logical == 2) { /* Postponed subexpression. */
2376 regexp *re;
22c35a8c 2377 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2378 re_cc_state state;
0f5d15d6
IZ
2379 CHECKPOINT cp, lastcp;
2380
2381 if(SvROK(ret) || SvRMAGICAL(ret)) {
2382 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2383
2384 if(SvMAGICAL(sv))
2385 mg = mg_find(sv, 'r');
2386 }
2387 if (mg) {
2388 re = (regexp *)mg->mg_obj;
df0003d4 2389 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2390 }
2391 else {
2392 STRLEN len;
2393 char *t = SvPV(ret, len);
2394 PMOP pm;
2395 char *oprecomp = PL_regprecomp;
2396 I32 osize = PL_regsize;
2397 I32 onpar = PL_regnpar;
2398
2399 pm.op_pmflags = 0;
393fec97 2400 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2401 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2402 if (!(SvFLAGS(ret)
2403 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2404 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2405 PL_regprecomp = oprecomp;
2406 PL_regsize = osize;
2407 PL_regnpar = onpar;
2408 }
2409 DEBUG_r(
2410 PerlIO_printf(Perl_debug_log,
2411 "Entering embedded `%s%.60s%s%s'\n",
2412 PL_colors[0],
2413 re->precomp,
2414 PL_colors[1],
2415 (strlen(re->precomp) > 60 ? "..." : ""))
2416 );
2417 state.node = next;
2418 state.prev = PL_reg_call_cc;
2419 state.cc = PL_regcc;
2420 state.re = PL_reg_re;
2421
2ab05381 2422 PL_regcc = 0;
0f5d15d6
IZ
2423
2424 cp = regcppush(0); /* Save *all* the positions. */
2425 REGCP_SET;
2426 cache_re(re);
2427 state.ss = PL_savestack_ix;
2428 *PL_reglastparen = 0;
2429 PL_reg_call_cc = &state;
2430 PL_reginput = locinput;
2c2d71f5
JH
2431
2432 /* XXXX This is too dramatic a measure... */
2433 PL_reg_maxiter = 0;
2434
0f5d15d6 2435 if (regmatch(re->program + 1)) {
2c914db6
IZ
2436 /* Even though we succeeded, we need to restore
2437 global variables, since we may be wrapped inside
2438 SUSPEND, thus the match may be not finished yet. */
2439
2440 /* XXXX Do this only if SUSPENDed? */
2441 PL_reg_call_cc = state.prev;
2442 PL_regcc = state.cc;
2443 PL_reg_re = state.re;
2444 cache_re(PL_reg_re);
2445
2446 /* XXXX This is too dramatic a measure... */
2447 PL_reg_maxiter = 0;
2448
2449 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2450 ReREFCNT_dec(re);
2451 regcpblow(cp);
2452 sayYES;
2453 }
0f5d15d6
IZ
2454 ReREFCNT_dec(re);
2455 REGCP_UNWIND;
2456 regcppop();
2457 PL_reg_call_cc = state.prev;
2458 PL_regcc = state.cc;
2459 PL_reg_re = state.re;
d3790889 2460 cache_re(PL_reg_re);
2c2d71f5
JH
2461
2462 /* XXXX This is too dramatic a measure... */
2463 PL_reg_maxiter = 0;
2464
0f5d15d6
IZ
2465 sayNO;
2466 }
c277df42 2467 sw = SvTRUE(ret);
0f5d15d6 2468 logical = 0;
a0ed51b3
LW
2469 }
2470 else
3280af22 2471 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2472 break;
2473 }
a0d0e21e 2474 case OPEN:
c277df42 2475 n = ARG(scan); /* which paren pair */
3280af22
NIS
2476 PL_reg_start_tmp[n] = locinput;
2477 if (n > PL_regsize)
2478 PL_regsize = n;
a0d0e21e
LW
2479 break;
2480 case CLOSE:
c277df42 2481 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2482 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2483 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2484 if (n > *PL_reglastparen)
2485 *PL_reglastparen = n;
a0d0e21e 2486 break;
c277df42
IZ
2487 case GROUPP:
2488 n = ARG(scan); /* which paren pair */
cf93c79d 2489 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2490 break;
2491 case IFTHEN:
2c2d71f5 2492 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2493 if (sw)
2494 next = NEXTOPER(NEXTOPER(scan));
2495 else {
2496 next = scan + ARG(scan);
2497 if (OP(next) == IFTHEN) /* Fake one. */
2498 next = NEXTOPER(NEXTOPER(next));
2499 }
2500 break;
2501 case LOGICAL:
0f5d15d6 2502 logical = scan->flags;
c277df42 2503 break;
2ab05381
IZ
2504/*******************************************************************
2505 PL_regcc contains infoblock about the innermost (...)* loop, and
2506 a pointer to the next outer infoblock.
2507
2508 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2509
2510 1) After matching X, regnode for CURLYX is processed;
2511
2512 2) This regnode creates infoblock on the stack, and calls
2513 regmatch() recursively with the starting point at WHILEM node;
2514
2515 3) Each hit of WHILEM node tries to match A and Z (in the order
2516 depending on the current iteration, min/max of {min,max} and
2517 greediness). The information about where are nodes for "A"
2518 and "Z" is read from the infoblock, as is info on how many times "A"
2519 was already matched, and greediness.
2520
2521 4) After A matches, the same WHILEM node is hit again.
2522
2523 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2524 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2525 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2526 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2527 of the external loop.
2528
2529 Currently present infoblocks form a tree with a stem formed by PL_curcc
2530 and whatever it mentions via ->next, and additional attached trees
2531 corresponding to temporarily unset infoblocks as in "5" above.
2532
2533 In the following picture infoblocks for outer loop of
2534 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2535 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2536 infoblocks are drawn below the "reset" infoblock.
2537
2538 In fact in the picture below we do not show failed matches for Z and T
2539 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2540 more obvious *why* one needs to *temporary* unset infoblocks.]
2541
2542 Matched REx position InfoBlocks Comment
2543 (Y(A)*?Z)*?T x
2544 Y(A)*?Z)*?T x <- O
2545 Y (A)*?Z)*?T x <- O
2546 Y A)*?Z)*?T x <- O <- I
2547 YA )*?Z)*?T x <- O <- I
2548 YA A)*?Z)*?T x <- O <- I
2549 YAA )*?Z)*?T x <- O <- I
2550 YAA Z)*?T x <- O # Temporary unset I
2551 I
2552
2553 YAAZ Y(A)*?Z)*?T x <- O
2554 I
2555
2556 YAAZY (A)*?Z)*?T x <- O
2557 I
2558
2559 YAAZY A)*?Z)*?T x <- O <- I
2560 I
2561
2562 YAAZYA )*?Z)*?T x <- O <- I
2563 I
2564
2565 YAAZYA Z)*?T x <- O # Temporary unset I
2566 I,I
2567
2568 YAAZYAZ )*?T x <- O
2569 I,I
2570
2571 YAAZYAZ T x # Temporary unset O
2572 O
2573 I,I
2574
2575 YAAZYAZT x
2576 O
2577 I,I
2578 *******************************************************************/
a0d0e21e
LW
2579 case CURLYX: {
2580 CURCUR cc;
3280af22 2581 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2582
2583 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2584 next += ARG(next);
3280af22
NIS
2585 cc.oldcc = PL_regcc;
2586 PL_regcc = &cc;
2587 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2588 cc.cur = -1;
2589 cc.min = ARG1(scan);
2590 cc.max = ARG2(scan);
c277df42 2591 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2592 cc.next = next;
2593 cc.minmod = minmod;
2594 cc.lastloc = 0;
3280af22 2595 PL_reginput = locinput;
a0d0e21e
LW
2596 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2597 regcpblow(cp);
3280af22 2598 PL_regcc = cc.oldcc;
4633a7c4 2599 saySAME(n);
a0d0e21e
LW
2600 }
2601 /* NOT REACHED */
2602 case WHILEM: {
2603 /*
2604 * This is really hard to understand, because after we match
2605 * what we're trying to match, we must make sure the rest of
2c2d71f5 2606 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2607 * to go back UP the parse tree by recursing ever deeper. And
2608 * if it fails, we have to reset our parent's current state
2609 * that we can try again after backing off.
2610 */
2611
c277df42 2612 CHECKPOINT cp, lastcp;
3280af22 2613 CURCUR* cc = PL_regcc;
c277df42
IZ
2614 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2615
4633a7c4 2616 n = cc->cur + 1; /* how many we know we matched */
3280af22 2617 PL_reginput = locinput;
a0d0e21e 2618
c277df42
IZ
2619 DEBUG_r(
2620 PerlIO_printf(Perl_debug_log,
2621 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2622 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2623 (long)n, (long)cc->min,
2624 (long)cc->max, (long)cc)
2625 );
4633a7c4 2626
a0d0e21e
LW
2627 /* If degenerate scan matches "", assume scan done. */
2628
579cf2c3 2629 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2630 PL_regcc = cc->oldcc;
2ab05381
IZ
2631 if (PL_regcc)
2632 ln = PL_regcc->cur;
c277df42 2633 DEBUG_r(
c3464db5
DD
2634 PerlIO_printf(Perl_debug_log,
2635 "%*s empty match detected, try continuation...\n",
3280af22 2636 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2637 );
a0d0e21e 2638 if (regmatch(cc->next))
4633a7c4 2639 sayYES;
2ab05381
IZ
2640 if (PL_regcc)
2641 PL_regcc->cur = ln;
3280af22 2642 PL_regcc = cc;
4633a7c4 2643 sayNO;
a0d0e21e
LW
2644 }
2645
2646 /* First just match a string of min scans. */
2647
2648 if (n < cc->min) {
2649 cc->cur = n;
2650 cc->lastloc = locinput;
4633a7c4
LW
2651 if (regmatch(cc->scan))
2652 sayYES;
2653 cc->cur = n - 1;
c277df42 2654 cc->lastloc = lastloc;
4633a7c4 2655 sayNO;
a0d0e21e
LW
2656 }
2657
2c2d71f5
JH
2658 if (scan->flags) {
2659 /* Check whether we already were at this position.
2660 Postpone detection until we know the match is not
2661 *that* much linear. */
2662 if (!PL_reg_maxiter) {
2663 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2664 PL_reg_leftiter = PL_reg_maxiter;
2665 }
2666 if (PL_reg_leftiter-- == 0) {
2667 I32 size = (PL_reg_maxiter + 7)/8;
2668 if (PL_reg_poscache) {
2669 if (PL_reg_poscache_size < size) {
2670 Renew(PL_reg_poscache, size, char);
2671 PL_reg_poscache_size = size;
2672 }
2673 Zero(PL_reg_poscache, size, char);
2674 }
2675 else {
2676 PL_reg_poscache_size = size;
2677 Newz(29, PL_reg_poscache, size, char);
2678 }
2679 DEBUG_r(
2680 PerlIO_printf(Perl_debug_log,
2681 "%sDetected a super-linear match, switching on caching%s...\n",
2682 PL_colors[4], PL_colors[5])
2683 );
2684 }
2685 if (PL_reg_leftiter < 0) {
2686 I32 o = locinput - PL_bostr, b;
2687
2688 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2689 b = o % 8;
2690 o /= 8;
2691 if (PL_reg_poscache[o] & (1<<b)) {
2692 DEBUG_r(
2693 PerlIO_printf(Perl_debug_log,
2694 "%*s already tried at this position...\n",
2695 REPORT_CODE_OFF+PL_regindent*2, "")
2696 );
7821416a 2697 sayNO_SILENT;
2c2d71f5
JH
2698 }
2699 PL_reg_poscache[o] |= (1<<b);
2700 }
2701 }
2702
a0d0e21e
LW
2703 /* Prefer next over scan for minimal matching. */
2704
2705 if (cc->minmod) {
3280af22 2706 PL_regcc = cc->oldcc;
2ab05381
IZ
2707 if (PL_regcc)
2708 ln = PL_regcc->cur;
5f05dabc 2709 cp = regcppush(cc->parenfloor);
c277df42 2710 REGCP_SET;
5f05dabc 2711 if (regmatch(cc->next)) {
c277df42 2712 regcpblow(cp);
4633a7c4 2713 sayYES; /* All done. */
5f05dabc 2714 }
c277df42 2715 REGCP_UNWIND;
5f05dabc 2716 regcppop();
2ab05381
IZ
2717 if (PL_regcc)
2718 PL_regcc->cur = ln;
3280af22 2719 PL_regcc = cc;
a0d0e21e 2720
c277df42 2721 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2722 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2723 && !(PL_reg_flags & RF_warned)) {
2724 PL_reg_flags |= RF_warned;
e476b1b5 2725 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2726 "Complex regular subexpression recursion",
2727 REG_INFTY - 1);
c277df42 2728 }
4633a7c4 2729 sayNO;
c277df42 2730 }
a687059c 2731
c277df42 2732 DEBUG_r(
c3464db5
DD
2733 PerlIO_printf(Perl_debug_log,
2734 "%*s trying longer...\n",
3280af22 2735 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2736 );
a0d0e21e 2737 /* Try scanning more and see if it helps. */
3280af22 2738 PL_reginput = locinput;
a0d0e21e
LW
2739 cc->cur = n;
2740 cc->lastloc = locinput;
5f05dabc 2741 cp = regcppush(cc->parenfloor);
c277df42 2742 REGCP_SET;
5f05dabc 2743 if (regmatch(cc->scan)) {
c277df42 2744 regcpblow(cp);
4633a7c4 2745 sayYES;
5f05dabc 2746 }
c277df42 2747 REGCP_UNWIND;
5f05dabc 2748 regcppop();
4633a7c4 2749 cc->cur = n - 1;
c277df42 2750 cc->lastloc = lastloc;
4633a7c4 2751 sayNO;
a0d0e21e
LW
2752 }
2753
2754 /* Prefer scan over next for maximal matching. */
2755
2756 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2757 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2758 cc->cur = n;
2759 cc->lastloc = locinput;
c277df42 2760 REGCP_SET;
5f05dabc 2761 if (regmatch(cc->scan)) {
c277df42 2762 regcpblow(cp);
4633a7c4 2763 sayYES;
5f05dabc 2764 }
c277df42 2765 REGCP_UNWIND;
a0d0e21e 2766 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2767 PL_reginput = locinput;
c277df42 2768 DEBUG_r(
c3464db5
DD
2769 PerlIO_printf(Perl_debug_log,
2770 "%*s failed, try continuation...\n",
3280af22 2771 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2772 );
2773 }
e476b1b5 2774 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2775 && !(PL_reg_flags & RF_warned)) {
3280af22 2776 PL_reg_flags |= RF_warned;
e476b1b5 2777 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2778 "Complex regular subexpression recursion",
2779 REG_INFTY - 1);
a0d0e21e
LW
2780 }
2781
2782 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2783 PL_regcc = cc->oldcc;
2ab05381
IZ
2784 if (PL_regcc)
2785 ln = PL_regcc->cur;
a0d0e21e 2786 if (regmatch(cc->next))
4633a7c4 2787 sayYES;
2ab05381
IZ
2788 if (PL_regcc)
2789 PL_regcc->cur = ln;
3280af22 2790 PL_regcc = cc;
4633a7c4 2791 cc->cur = n - 1;
c277df42 2792 cc->lastloc = lastloc;
4633a7c4 2793 sayNO;
a0d0e21e
LW
2794 }
2795 /* NOT REACHED */
c277df42
IZ
2796 case BRANCHJ:
2797 next = scan + ARG(scan);
2798 if (next == scan)
2799 next = NULL;
2800 inner = NEXTOPER(NEXTOPER(scan));
2801 goto do_branch;
2802 case BRANCH:
2803 inner = NEXTOPER(scan);
2804 do_branch:
2805 {
2806 CHECKPOINT lastcp;
2807 c1 = OP(scan);
2808 if (OP(next) != c1) /* No choice. */
2809 next = inner; /* Avoid recursion. */
a0d0e21e 2810 else {
3280af22 2811 int lastparen = *PL_reglastparen;
c277df42
IZ
2812
2813 REGCP_SET;
a0d0e21e 2814 do {
3280af22 2815 PL_reginput = locinput;
c277df42 2816 if (regmatch(inner))
4633a7c4 2817 sayYES;
c277df42 2818 REGCP_UNWIND;
3280af22 2819 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2820 PL_regendp[n] = -1;
3280af22 2821 *PL_reglastparen = n;
c277df42 2822 scan = next;
a0d0e21e 2823 /*SUPPRESS 560*/
155aba94 2824 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2825 next += n;
a0d0e21e 2826 else
c277df42 2827 next = NULL;
c277df42
IZ
2828 inner = NEXTOPER(scan);
2829 if (c1 == BRANCHJ) {
2830 inner = NEXTOPER(inner);
2831 }
2832 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2833 sayNO;
a0d0e21e 2834 /* NOTREACHED */
a687059c 2835 }
a0d0e21e
LW
2836 }
2837 break;
2838 case MINMOD:
2839 minmod = 1;
2840 break;
c277df42
IZ
2841 case CURLYM:
2842 {
00db4c45 2843 I32 l = 0;
c277df42
IZ
2844 CHECKPOINT lastcp;
2845
2846 /* We suppose that the next guy does not need
2847 backtracking: in particular, it is of constant length,
2848 and has no parenths to influence future backrefs. */
2849 ln = ARG1(scan); /* min to match */
2850 n = ARG2(scan); /* max to match */
c277df42
IZ
2851 paren = scan->flags;
2852 if (paren) {
3280af22
NIS
2853 if (paren > PL_regsize)
2854 PL_regsize = paren;
2855 if (paren > *PL_reglastparen)
2856 *PL_reglastparen = paren;
c277df42 2857 }
dc45a647 2858 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2859 if (paren)
2860 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2861 PL_reginput = locinput;
c277df42
IZ
2862 if (minmod) {
2863 minmod = 0;
2864 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2865 sayNO;
5f4b28b2 2866 if (ln && l == 0 && n >= ln
c277df42
IZ
2867 /* In fact, this is tricky. If paren, then the
2868 fact that we did/didnot match may influence
2869 future execution. */
2870 && !(paren && ln == 0))
2871 ln = n;
3280af22 2872 locinput = PL_reginput;
22c35a8c 2873 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2874 c1 = (U8)*STRING(next);
c277df42 2875 if (OP(next) == EXACTF)
22c35a8c 2876 c2 = PL_fold[c1];
c277df42 2877 else if (OP(next) == EXACTFL)
22c35a8c 2878 c2 = PL_fold_locale[c1];
c277df42
IZ
2879 else
2880 c2 = c1;
a0ed51b3
LW
2881 }
2882 else
c277df42
IZ
2883 c1 = c2 = -1000;
2884 REGCP_SET;
5f4b28b2 2885 /* This may be improved if l == 0. */
c277df42
IZ
2886 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2887 /* If it could work, try it. */
2888 if (c1 == -1000 ||
3280af22
NIS
2889 UCHARAT(PL_reginput) == c1 ||
2890 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2891 {
2892 if (paren) {
2893 if (n) {
cf93c79d
IZ
2894 PL_regstartp[paren] =
2895 HOPc(PL_reginput, -l) - PL_bostr;
2896 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2897 }
2898 else
cf93c79d 2899 PL_regendp[paren] = -1;
c277df42
IZ
2900 }
2901 if (regmatch(next))
2902 sayYES;
2903 REGCP_UNWIND;
2904 }
2905 /* Couldn't or didn't -- move forward. */
3280af22 2906 PL_reginput = locinput;
c277df42
IZ
2907 if (regrepeat_hard(scan, 1, &l)) {
2908 ln++;
3280af22 2909 locinput = PL_reginput;
c277df42
IZ
2910 }
2911 else
2912 sayNO;
2913 }
a0ed51b3
LW
2914 }
2915 else {
c277df42
IZ
2916 n = regrepeat_hard(scan, n, &l);
2917 if (n != 0 && l == 0
2918 /* In fact, this is tricky. If paren, then the
2919 fact that we did/didnot match may influence
2920 future execution. */
2921 && !(paren && ln == 0))
2922 ln = n;
3280af22 2923 locinput = PL_reginput;
c277df42 2924 DEBUG_r(
5c0ca799 2925 PerlIO_printf(Perl_debug_log,
faccc32b 2926 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2927 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2928 (IV) n, (IV)l)
c277df42
IZ
2929 );
2930 if (n >= ln) {
22c35a8c 2931 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2932 c1 = (U8)*STRING(next);
c277df42 2933 if (OP(next) == EXACTF)
22c35a8c 2934 c2 = PL_fold[c1];
c277df42 2935 else if (OP(next) == EXACTFL)
22c35a8c 2936 c2 = PL_fold_locale[c1];
c277df42
IZ
2937 else
2938 c2 = c1;
a0ed51b3
LW
2939 }
2940 else
c277df42
IZ
2941 c1 = c2 = -1000;
2942 }
2943 REGCP_SET;
2944 while (n >= ln) {
2945 /* If it could work, try it. */
2946 if (c1 == -1000 ||
3280af22
NIS
2947 UCHARAT(PL_reginput) == c1 ||
2948 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2949 {
2950 DEBUG_r(
c3464db5 2951 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2952 "%*s trying tail with n=%"IVdf"...\n",
2953 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2954 );
2955 if (paren) {
2956 if (n) {
cf93c79d
IZ
2957 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2958 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2959 }
a0ed51b3 2960 else
cf93c79d 2961 PL_regendp[paren] = -1;
c277df42 2962 }
a0ed51b3
LW
2963 if (regmatch(next))
2964 sayYES;
2965 REGCP_UNWIND;
2966 }
c277df42
IZ
2967 /* Couldn't or didn't -- back up. */
2968 n--;
dfe13c55 2969 locinput = HOPc(locinput, -l);
3280af22 2970 PL_reginput = locinput;
c277df42
IZ
2971 }
2972 }
2973 sayNO;
2974 break;
2975 }
2976 case CURLYN:
2977 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2978 if (paren > PL_regsize)
2979 PL_regsize = paren;
2980 if (paren > *PL_reglastparen)
2981 *PL_reglastparen = paren;
c277df42
IZ
2982 ln = ARG1(scan); /* min to match */
2983 n = ARG2(scan); /* max to match */
dc45a647 2984 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2985 goto repeat;
a0d0e21e 2986 case CURLY:
c277df42 2987 paren = 0;
a0d0e21e
LW
2988 ln = ARG1(scan); /* min to match */
2989 n = ARG2(scan); /* max to match */
dc45a647 2990 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2991 goto repeat;
2992 case STAR:
2993 ln = 0;
c277df42 2994 n = REG_INFTY;
a0d0e21e 2995 scan = NEXTOPER(scan);
c277df42 2996 paren = 0;
a0d0e21e
LW
2997 goto repeat;
2998 case PLUS:
c277df42
IZ
2999 ln = 1;
3000 n = REG_INFTY;
3001 scan = NEXTOPER(scan);
3002 paren = 0;
3003 repeat:
a0d0e21e
LW
3004 /*
3005 * Lookahead to avoid useless match attempts
3006 * when we know what character comes next.
3007 */
22c35a8c 3008 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3009 c1 = (U8)*STRING(next);
bbce6d69 3010 if (OP(next) == EXACTF)
22c35a8c 3011 c2 = PL_fold[c1];
bbce6d69 3012 else if (OP(next) == EXACTFL)
22c35a8c 3013 c2 = PL_fold_locale[c1];
bbce6d69 3014 else
3015 c2 = c1;
3016 }
a0d0e21e 3017 else
bbce6d69 3018 c1 = c2 = -1000;
3280af22 3019 PL_reginput = locinput;
a0d0e21e 3020 if (minmod) {
c277df42 3021 CHECKPOINT lastcp;
a0d0e21e
LW
3022 minmod = 0;
3023 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3024 sayNO;
a0ed51b3 3025 locinput = PL_reginput;
c277df42 3026 REGCP_SET;
0fe9bf95
IZ
3027 if (c1 != -1000) {
3028 char *e = locinput + n - ln; /* Should not check after this */
3029 char *old = locinput;
3030
3031 if (e >= PL_regeol || (n == REG_INFTY))
3032 e = PL_regeol - 1;
3033 while (1) {
3034 /* Find place 'next' could work */
3035 if (c1 == c2) {
3036 while (locinput <= e && *locinput != c1)
3037 locinput++;
3038 } else {
3039 while (locinput <= e
3040 && *locinput != c1
3041 && *locinput != c2)
3042 locinput++;
3043 }
3044 if (locinput > e)
3045 sayNO;
3046 /* PL_reginput == old now */
3047 if (locinput != old) {
3048 ln = 1; /* Did some */
3049 if (regrepeat(scan, locinput - old) <
3050 locinput - old)
3051 sayNO;
3052 }
3053 /* PL_reginput == locinput now */
29d1e993 3054 TRYPAREN(paren, ln, locinput);
0fe9bf95
IZ
3055 PL_reginput = locinput; /* Could be reset... */
3056 REGCP_UNWIND;
3057 /* Couldn't or didn't -- move forward. */
3058 old = locinput++;
3059 }
3060 }
3061 else
c277df42 3062 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3063 /* If it could work, try it. */
bbce6d69 3064 if (c1 == -1000 ||
3280af22
NIS
3065 UCHARAT(PL_reginput) == c1 ||
3066 UCHARAT(PL_reginput) == c2)
bbce6d69 3067 {
29d1e993 3068 TRYPAREN(paren, n, PL_reginput);
c277df42 3069 REGCP_UNWIND;
bbce6d69 3070 }
c277df42 3071 /* Couldn't or didn't -- move forward. */
a0ed51b3 3072 PL_reginput = locinput;
a0d0e21e
LW
3073 if (regrepeat(scan, 1)) {
3074 ln++;
a0ed51b3
LW
3075 locinput = PL_reginput;
3076 }
3077 else
4633a7c4 3078 sayNO;
a0d0e21e
LW
3079 }
3080 }
3081 else {
c277df42 3082 CHECKPOINT lastcp;
a0d0e21e 3083 n = regrepeat(scan, n);
a0ed51b3 3084 locinput = PL_reginput;
22c35a8c 3085 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3086 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3087 ln = n; /* why back off? */
1aeab75a
GS
3088 /* ...because $ and \Z can match before *and* after
3089 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3090 We should back off by one in this case. */
3091 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3092 ln--;
3093 }
c277df42
IZ
3094 REGCP_SET;
3095 if (paren) {
3096 while (n >= ln) {
3097 /* If it could work, try it. */
3098 if (c1 == -1000 ||
3280af22
NIS
3099 UCHARAT(PL_reginput) == c1 ||
3100 UCHARAT(PL_reginput) == c2)
c277df42 3101 {
29d1e993 3102 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3103 REGCP_UNWIND;
3104 }
3105 /* Couldn't or didn't -- back up. */
3106 n--;
dfe13c55 3107 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3108 }
a0ed51b3
LW
3109 }
3110 else {
c277df42
IZ
3111 while (n >= ln) {
3112 /* If it could work, try it. */
3113 if (c1 == -1000 ||
3280af22
NIS
3114 UCHARAT(PL_reginput) == c1 ||
3115 UCHARAT(PL_reginput) == c2)
c277df42 3116 {
29d1e993 3117 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3118 REGCP_UNWIND;
3119 }
3120 /* Couldn't or didn't -- back up. */
3121 n--;
dfe13c55 3122 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3123 }
a0d0e21e
LW
3124 }
3125 }
4633a7c4 3126 sayNO;
c277df42 3127 break;
a0d0e21e 3128 case END:
0f5d15d6
IZ
3129 if (PL_reg_call_cc) {
3130 re_cc_state *cur_call_cc = PL_reg_call_cc;
3131 CURCUR *cctmp = PL_regcc;
3132 regexp *re = PL_reg_re;
3133 CHECKPOINT cp, lastcp;
3134
3135 cp = regcppush(0); /* Save *all* the positions. */
3136 REGCP_SET;
3137 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3138 the caller. */
3139 PL_reginput = locinput; /* Make position available to
3140 the callcc. */
3141 cache_re(PL_reg_call_cc->re);
3142 PL_regcc = PL_reg_call_cc->cc;
3143 PL_reg_call_cc = PL_reg_call_cc->prev;
3144 if (regmatch(cur_call_cc->node)) {
3145 PL_reg_call_cc = cur_call_cc;
3146 regcpblow(cp);
3147 sayYES;
3148 }
3149 REGCP_UNWIND;
3150 regcppop();
3151 PL_reg_call_cc = cur_call_cc;
3152 PL_regcc = cctmp;
3153 PL_reg_re = re;
3154 cache_re(re);
3155
3156 DEBUG_r(
3157 PerlIO_printf(Perl_debug_log,
3158 "%*s continuation failed...\n",
3159 REPORT_CODE_OFF+PL_regindent*2, "")
3160 );
7821416a 3161 sayNO_SILENT;
0f5d15d6 3162 }
7821416a
IZ
3163 if (locinput < PL_regtill) {
3164 DEBUG_r(PerlIO_printf(Perl_debug_log,
3165 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3166 PL_colors[4],
3167 (long)(locinput - PL_reg_starttry),
3168 (long)(PL_regtill - PL_reg_starttry),
3169 PL_colors[5]));
3170 sayNO_FINAL; /* Cannot match: too short. */
3171 }
3172 PL_reginput = locinput; /* put where regtry can find it */
3173 sayYES_FINAL; /* Success! */
7e5428c5 3174 case SUCCEED:
3280af22 3175 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3176 sayYES_LOUD; /* Success! */
c277df42
IZ
3177 case SUSPEND:
3178 n = 1;
9fe1d20c 3179 PL_reginput = locinput;
c277df42 3180 goto do_ifmatch;
a0d0e21e 3181 case UNLESSM:
c277df42 3182 n = 0;
a0ed51b3 3183 if (scan->flags) {
0fe9bf95
IZ
3184 if (UTF) { /* XXXX This is absolutely
3185 broken, we read before
3186 start of string. */
3187 s = HOPMAYBEc(locinput, -scan->flags);
3188 if (!s)
3189 goto say_yes;
3190 PL_reginput = s;
3191 }
3192 else {
3193 if (locinput < PL_bostr + scan->flags)
3194 goto say_yes;
3195 PL_reginput = locinput - scan->flags;
3196 goto do_ifmatch;
3197 }
a0ed51b3
LW
3198 }
3199 else
3200 PL_reginput = locinput;
c277df42
IZ
3201 goto do_ifmatch;
3202 case IFMATCH:
3203 n = 1;
a0ed51b3 3204 if (scan->flags) {
0fe9bf95
IZ
3205 if (UTF) { /* XXXX This is absolutely
3206 broken, we read before
3207 start of string. */
3208 s = HOPMAYBEc(locinput, -scan->flags);
3209 if (!s || s < PL_bostr)
3210 goto say_no;
3211 PL_reginput = s;
3212 }
3213 else {
3214 if (locinput < PL_bostr + scan->flags)
3215 goto say_no;
3216 PL_reginput = locinput - scan->flags;
3217 goto do_ifmatch;
3218 }
a0ed51b3
LW
3219 }
3220 else
3221 PL_reginput = locinput;
3222
c277df42 3223 do_ifmatch:
c277df42
IZ
3224 inner = NEXTOPER(NEXTOPER(scan));
3225 if (regmatch(inner) != n) {
3226 say_no:
3227 if (logical) {
3228 logical = 0;
3229 sw = 0;
3230 goto do_longjump;
a0ed51b3
LW
3231 }
3232 else
c277df42
IZ
3233 sayNO;
3234 }
3235 say_yes:
3236 if (logical) {
3237 logical = 0;
3238 sw = 1;
3239 }
fe44a5e8 3240 if (OP(scan) == SUSPEND) {
3280af22 3241 locinput = PL_reginput;
565764a8 3242 nextchr = UCHARAT(locinput);
fe44a5e8 3243 }
c277df42
IZ
3244 /* FALL THROUGH. */
3245 case LONGJMP:
3246 do_longjump:
3247 next = scan + ARG(scan);
3248 if (next == scan)
3249 next = NULL;
a0d0e21e
LW
3250 break;
3251 default:
b900a521 3252 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3253 PTR2UV(scan), OP(scan));
cea2e8a9 3254 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3255 }
a0d0e21e
LW
3256 scan = next;
3257 }
a687059c 3258
a0d0e21e
LW
3259 /*
3260 * We get here only if there's trouble -- normally "case END" is
3261 * the terminating point.
3262 */
cea2e8a9 3263 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3264 /*NOTREACHED*/
4633a7c4
LW
3265 sayNO;
3266
7821416a
IZ
3267yes_loud:
3268 DEBUG_r(
3269 PerlIO_printf(Perl_debug_log,
3270 "%*s %scould match...%s\n",
3271 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3272 );
3273 goto yes;
3274yes_final:
3275 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3276 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3277yes:
3278#ifdef DEBUGGING
3280af22 3279 PL_regindent--;
4633a7c4
LW
3280#endif
3281 return 1;
3282
3283no:
7821416a
IZ
3284 DEBUG_r(
3285 PerlIO_printf(Perl_debug_log,
3286 "%*s %sfailed...%s\n",
3287 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3288 );
3289 goto do_no;
3290no_final:
3291do_no:
4633a7c4 3292#ifdef DEBUGGING
3280af22 3293 PL_regindent--;
4633a7c4 3294#endif
a0d0e21e 3295 return 0;
a687059c
LW
3296}
3297
3298/*
3299 - regrepeat - repeatedly match something simple, report how many
3300 */
3301/*
3302 * [This routine now assumes that it will only match on things of length 1.
3303 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3304 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3305 */
76e3520e 3306STATIC I32
cea2e8a9 3307S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3308{
5c0ca799 3309 dTHR;
a0d0e21e 3310 register char *scan;
a0d0e21e 3311 register I32 c;
3280af22 3312 register char *loceol = PL_regeol;
a0ed51b3 3313 register I32 hardcount = 0;
a0d0e21e 3314
3280af22 3315 scan = PL_reginput;
c277df42 3316 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3317 loceol = scan + max;
a0d0e21e 3318 switch (OP(p)) {
22c35a8c 3319 case REG_ANY:
a0d0e21e
LW
3320 while (scan < loceol && *scan != '\n')
3321 scan++;
3322 break;
3323 case SANY:
3324 scan = loceol;
3325 break;
a0ed51b3
LW
3326 case ANYUTF8:
3327 loceol = PL_regeol;
3328 while (scan < loceol && *scan != '\n') {
3329 scan += UTF8SKIP(scan);
3330 hardcount++;
3331 }
3332 break;
3333 case SANYUTF8:
3334 loceol = PL_regeol;
3335 while (scan < loceol) {
3336 scan += UTF8SKIP(scan);
3337 hardcount++;
3338 }
3339 break;
bbce6d69 3340 case EXACT: /* length of string is 1 */
cd439c50 3341 c = (U8)*STRING(p);
bbce6d69 3342 while (scan < loceol && UCHARAT(scan) == c)
3343 scan++;
3344 break;
3345 case EXACTF: /* length of string is 1 */
cd439c50 3346 c = (U8)*STRING(p);
bbce6d69 3347 while (scan < loceol &&
22c35a8c 3348 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3349 scan++;
3350 break;
3351 case EXACTFL: /* length of string is 1 */
3280af22 3352 PL_reg_flags |= RF_tainted;
cd439c50 3353 c = (U8)*STRING(p);
bbce6d69 3354 while (scan < loceol &&
22c35a8c 3355 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3356 scan++;
3357 break;
a0ed51b3
LW
3358 case ANYOFUTF8:
3359 loceol = PL_regeol;
3360 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3361 scan += UTF8SKIP(scan);
3362 hardcount++;
3363 }
3364 break;
a0d0e21e 3365 case ANYOF:
936ed897 3366 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3367 scan++;
a0d0e21e
LW
3368 break;
3369 case ALNUM:
3370 while (scan < loceol && isALNUM(*scan))
3371 scan++;
3372 break;
a0ed51b3
LW
3373 case ALNUMUTF8:
3374 loceol = PL_regeol;
dfe13c55 3375 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3376 scan += UTF8SKIP(scan);
3377 hardcount++;
3378 }
3379 break;
bbce6d69 3380 case ALNUML:
3280af22 3381 PL_reg_flags |= RF_tainted;
bbce6d69 3382 while (scan < loceol && isALNUM_LC(*scan))
3383 scan++;
3384 break;
a0ed51b3
LW
3385 case ALNUMLUTF8:
3386 PL_reg_flags |= RF_tainted;
3387 loceol = PL_regeol;
dfe13c55 3388 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3389 scan += UTF8SKIP(scan);
3390 hardcount++;
3391 }
3392 break;
3393 break;
a0d0e21e
LW
3394 case NALNUM:
3395 while (scan < loceol && !isALNUM(*scan))
3396 scan++;
3397 break;
a0ed51b3
LW
3398 case NALNUMUTF8:
3399 loceol = PL_regeol;
dfe13c55 3400 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3401 scan += UTF8SKIP(scan);
3402 hardcount++;
3403 }
3404 break;
bbce6d69 3405 case NALNUML:
3280af22 3406 PL_reg_flags |= RF_tainted;
bbce6d69 3407 while (scan < loceol && !isALNUM_LC(*scan))
3408 scan++;
3409 break;
a0ed51b3
LW
3410 case NALNUMLUTF8:
3411 PL_reg_flags |= RF_tainted;
3412 loceol = PL_regeol;
dfe13c55 3413 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3414 scan += UTF8SKIP(scan);
3415 hardcount++;
3416 }
3417 break;
a0d0e21e
LW
3418 case SPACE:
3419 while (scan < loceol && isSPACE(*scan))
3420 scan++;
3421 break;
a0ed51b3
LW
3422 case SPACEUTF8:
3423 loceol = PL_regeol;
dfe13c55 3424 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3425 scan += UTF8SKIP(scan);
3426 hardcount++;
3427 }
3428 break;
bbce6d69 3429 case SPACEL:
3280af22 3430 PL_reg_flags |= RF_tainted;
bbce6d69 3431 while (scan < loceol && isSPACE_LC(*scan))
3432 scan++;
3433 break;
a0ed51b3
LW
3434 case SPACELUTF8:
3435 PL_reg_flags |= RF_tainted;
3436 loceol = PL_regeol;
dfe13c55 3437 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3438 scan += UTF8SKIP(scan);
3439 hardcount++;
3440 }
3441 break;
a0d0e21e
LW
3442 case NSPACE:
3443 while (scan < loceol && !isSPACE(*scan))
3444 scan++;
3445 break;
a0ed51b3
LW
3446 case NSPACEUTF8:
3447 loceol = PL_regeol;
dfe13c55 3448 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3449 scan += UTF8SKIP(scan);
3450 hardcount++;
3451 }
3452 break;
bbce6d69 3453 case NSPACEL:
3280af22 3454 PL_reg_flags |= RF_tainted;
bbce6d69 3455 while (scan < loceol && !isSPACE_LC(*scan))
3456 scan++;
3457 break;
a0ed51b3
LW
3458 case NSPACELUTF8:
3459 PL_reg_flags |= RF_tainted;
3460 loceol = PL_regeol;
dfe13c55 3461 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3462 scan += UTF8SKIP(scan);
3463 hardcount++;
3464 }
3465 break;
a0d0e21e
LW
3466 case DIGIT:
3467 while (scan < loceol && isDIGIT(*scan))
3468 scan++;
3469 break;
a0ed51b3
LW
3470 case DIGITUTF8:
3471 loceol = PL_regeol;
dfe13c55 3472 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3473 scan += UTF8SKIP(scan);
3474 hardcount++;
3475 }
3476 break;
3477 break;
a0d0e21e
LW
3478 case NDIGIT:
3479 while (scan < loceol && !isDIGIT(*scan))
3480 scan++;
3481 break;
a0ed51b3
LW
3482 case NDIGITUTF8:
3483 loceol = PL_regeol;
dfe13c55 3484 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3485 scan += UTF8SKIP(scan);
3486 hardcount++;
3487 }
3488 break;
a0d0e21e
LW
3489 default: /* Called on something of 0 width. */
3490 break; /* So match right here or not at all. */
3491 }
a687059c 3492
a0ed51b3
LW
3493 if (hardcount)
3494 c = hardcount;
3495 else
3496 c = scan - PL_reginput;
3280af22 3497 PL_reginput = scan;
a687059c 3498
c277df42
IZ
3499 DEBUG_r(
3500 {
3501 SV *prop = sv_newmortal();
3502
3503 regprop(prop, p);
3504 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3505 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3506 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3507 });
3508
a0d0e21e 3509 return(c);
a687059c
LW
3510}
3511
3512/*
c277df42
IZ
3513 - regrepeat_hard - repeatedly match something, report total lenth and length
3514 *
3515 * The repeater is supposed to have constant length.
3516 */
3517
76e3520e 3518STATIC I32
cea2e8a9 3519S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3520{
5c0ca799 3521 dTHR;
c277df42
IZ
3522 register char *scan;
3523 register char *start;
3280af22 3524 register char *loceol = PL_regeol;
a0ed51b3 3525 I32 l = 0;
708e3b05 3526 I32 count = 0, res = 1;
a0ed51b3
LW
3527
3528 if (!max)
3529 return 0;
c277df42 3530
3280af22 3531 start = PL_reginput;
a0ed51b3 3532 if (UTF) {
708e3b05 3533 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3534 if (!count++) {
3535 l = 0;
3536 while (start < PL_reginput) {
3537 l++;
3538 start += UTF8SKIP(start);
3539 }
3540 *lp = l;
3541 if (l == 0)
3542 return max;
3543 }
3544 if (count == max)
3545 return count;
3546 }
3547 }
3548 else {
708e3b05 3549 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3550 if (!count++) {
3551 *lp = l = PL_reginput - start;
3552 if (max != REG_INFTY && l*max < loceol - scan)
3553 loceol = scan + l*max;
3554 if (l == 0)
3555 return max;
c277df42
IZ
3556 }
3557 }
3558 }
708e3b05 3559 if (!res)
3280af22 3560 PL_reginput = scan;
c277df42 3561
a0ed51b3 3562 return count;
c277df42
IZ
3563}
3564
3565/*
cb8d8820 3566 - reginclass - determine if a character falls into a character class
bbce6d69 3567 */
3568
76e3520e 3569STATIC bool
936ed897 3570S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3571{
5c0ca799 3572 dTHR;
b8c5462f 3573 char flags = ANYOF_FLAGS(p);
bbce6d69 3574 bool match = FALSE;
3575
3576 c &= 0xFF;
b8c5462f 3577 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3578 match = TRUE;
3579 else if (flags & ANYOF_FOLD) {
3580 I32 cf;
3581 if (flags & ANYOF_LOCALE) {
3280af22 3582 PL_reg_flags |= RF_tainted;
22c35a8c 3583 cf = PL_fold_locale[c];
bbce6d69 3584 }
3585 else
22c35a8c 3586 cf = PL_fold[c];
b8c5462f 3587 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3588 match = TRUE;
3589 }
3590
b8c5462f 3591 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3592 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3593 if (
3594 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3595 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3596 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3597 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3598 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3599 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3600 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3601 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3602 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3603 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3604 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3605 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3606 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3607 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3608 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3620 ) /* How's that for a conditional? */
bbce6d69 3621 {
3622 match = TRUE;
3623 }
3624 }
3625
ae5c130c 3626 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3627}
3628
a0ed51b3 3629STATIC bool
cea2e8a9 3630S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3631{
3632 dTHR;
a0ed51b3
LW
3633 char flags = ARG1(f);
3634 bool match = FALSE;
3635 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3636
3637 if (swash_fetch(sv, p))
3638 match = TRUE;
3639 else if (flags & ANYOF_FOLD) {
806e7201 3640 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3641 if (flags & ANYOF_LOCALE) {
3642 PL_reg_flags |= RF_tainted;
3643 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3644 }
3645 else
3646 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3647 if (swash_fetch(sv, tmpbuf))
3648 match = TRUE;
3649 }
3650
b8c5462f 3651 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3652
3653 return (flags & ANYOF_INVERT) ? !match : match;
3654}
161b471a 3655
dfe13c55 3656STATIC U8 *
cea2e8a9 3657S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3658{
3659 dTHR;
a0ed51b3
LW
3660 if (off >= 0) {
3661 while (off-- && s < (U8*)PL_regeol)
3662 s += UTF8SKIP(s);
3663 }
3664 else {
3665 while (off++) {
3666 if (s > (U8*)PL_bostr) {
3667 s--;
3668 if (*s & 0x80) {
3669 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3670 s--;
3671 } /* XXX could check well-formedness here */
3672 }
3673 }
3674 }
3675 return s;
3676}
161b471a 3677
dfe13c55 3678STATIC U8 *
cea2e8a9 3679S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3680{
c485e607 3681 dTHR;
a0ed51b3
LW
3682 if (off >= 0) {
3683 while (off-- && s < (U8*)PL_regeol)
3684 s += UTF8SKIP(s);
3685 if (off >= 0)
3686 return 0;
3687 }
3688 else {
3689 while (off++) {
3690 if (s > (U8*)PL_bostr) {
3691 s--;
3692 if (*s & 0x80) {
3693 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3694 s--;
3695 } /* XXX could check well-formedness here */
3696 }
3697 else
3698 break;
3699 }
3700 if (off <= 0)
3701 return 0;
3702 }
3703 return s;
3704}
51371543
GS
3705
3706#ifdef PERL_OBJECT
51371543
GS
3707#include "XSUB.h"
3708#endif
3709
3710static void
3711restore_pos(pTHXo_ void *arg)
3712{
3713 dTHR;
3714 if (PL_reg_eval_set) {
3715 if (PL_reg_oldsaved) {
3716 PL_reg_re->subbeg = PL_reg_oldsaved;
3717 PL_reg_re->sublen = PL_reg_oldsavedlen;
3718 RX_MATCH_COPIED_on(PL_reg_re);
3719 }
3720 PL_reg_magic->mg_len = PL_reg_oldpos;
3721 PL_reg_eval_set = 0;
3722 PL_curpm = PL_reg_oldcurpm;
3723 }
3724}
3725