This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
tweaks to enable PERL_OBJECT to build & test on win32
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c
GS
38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
56953603
IZ
40#endif
41
f0fcb552 42/*SUPPRESS 112*/
a687059c 43/*
e50aee73 44 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
45 *
46 * Copyright (c) 1986 by University of Toronto.
47 * Written by Henry Spencer. Not derived from licensed software.
48 *
49 * Permission is granted to anyone to use this software for any
50 * purpose on any computer system, and to redistribute it freely,
51 * subject to the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of
54 * this software, no matter how awful, even if they arise
55 * from defects in it.
56 *
57 * 2. The origin of this software must not be misrepresented, either
58 * by explicit claim or by omission.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not
61 * be misrepresented as being the original software.
62 *
63 **** Alterations to Henry's code are...
64 ****
a0ed51b3 65 **** Copyright (c) 1991-1998, Larry Wall
a687059c 66 ****
9ef589d8
LW
67 **** You may distribute under the terms of either the GNU General Public
68 **** License or the Artistic License, as specified in the README file.
a687059c
LW
69 *
70 * Beware that some of this code is subtly aware of the way operator
71 * precedence is structured in regular expressions. Serious changes in
72 * regular-expression syntax might require a total rethink.
73 */
74#include "EXTERN.h"
75#include "perl.h"
0f5d15d6
IZ
76typedef MAGIC *my_magic;
77
a687059c
LW
78#include "regcomp.h"
79
c277df42
IZ
80#define RF_tainted 1 /* tainted information used? */
81#define RF_warned 2 /* warned about big count? */
ce862d02 82#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
83#define RF_utf8 8 /* String contains multibyte chars? */
84
85#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
86
87#define RS_init 1 /* eval environment created */
88#define RS_set 2 /* replsv value is set */
c277df42 89
a687059c
LW
90#ifndef STATIC
91#define STATIC static
92#endif
93
76e3520e 94#ifndef PERL_OBJECT
a0d0e21e
LW
95typedef I32 CHECKPOINT;
96
c277df42
IZ
97/*
98 * Forwards.
99 */
100
101static I32 regmatch _((regnode *prog));
102static I32 regrepeat _((regnode *p, I32 max));
103static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
104static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 105
c277df42 106static bool reginclass _((char *p, I32 c));
a0ed51b3 107static bool reginclassutf8 _((regnode *f, U8* p));
55497cff 108static CHECKPOINT regcppush _((I32 parenfloor));
109static char * regcppop _((void));
942e002e
GS
110static char * regcp_set_to _((I32 ss));
111static void cache_re _((regexp *prog));
76e3520e 112#endif
a0ed51b3 113
ae5c130c 114#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3
LW
115#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
116
117#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
118#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
119
dfe13c55
GS
120#ifndef PERL_OBJECT
121static U8 * reghop _((U8 *pos, I32 off));
122static U8 * reghopmaybe _((U8 *pos, I32 off));
123#endif
124#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
125#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
126#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
127#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
128#define HOPc(pos,off) ((char*)HOP(pos,off))
129#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 130
76e3520e 131STATIC CHECKPOINT
8ac85365 132regcppush(I32 parenfloor)
a0d0e21e 133{
11343788 134 dTHR;
3280af22
NIS
135 int retval = PL_savestack_ix;
136 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
137 int p;
138
139 SSCHECK(i + 5);
3280af22
NIS
140 for (p = PL_regsize; p > parenfloor; p--) {
141 SSPUSHPTR(PL_regendp[p]);
142 SSPUSHPTR(PL_regstartp[p]);
143 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
144 SSPUSHINT(p);
145 }
3280af22
NIS
146 SSPUSHINT(PL_regsize);
147 SSPUSHINT(*PL_reglastparen);
148 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
149 SSPUSHINT(i + 3);
150 SSPUSHINT(SAVEt_REGCONTEXT);
151 return retval;
152}
153
c277df42 154/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
155# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
156 " Setting an EVAL scope, savestack=%i\n", \
3280af22 157 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 158
3280af22 159# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
160 PerlIO_printf(Perl_debug_log, \
161 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 162 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 163
76e3520e 164STATIC char *
8ac85365 165regcppop(void)
a0d0e21e 166{
11343788 167 dTHR;
a0d0e21e
LW
168 I32 i = SSPOPINT;
169 U32 paren = 0;
170 char *input;
171 char *tmps;
172 assert(i == SAVEt_REGCONTEXT);
173 i = SSPOPINT;
174 input = (char *) SSPOPPTR;
3280af22
NIS
175 *PL_reglastparen = SSPOPINT;
176 PL_regsize = SSPOPINT;
c277df42 177 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 178 paren = (U32)SSPOPINT;
3280af22
NIS
179 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
180 PL_regstartp[paren] = (char *) SSPOPPTR;
a0d0e21e 181 tmps = (char*)SSPOPPTR;
3280af22
NIS
182 if (paren <= *PL_reglastparen)
183 PL_regendp[paren] = tmps;
c277df42 184 DEBUG_r(
c3464db5
DD
185 PerlIO_printf(Perl_debug_log,
186 " restoring \\%d to %d(%d)..%d%s\n",
3280af22
NIS
187 paren, PL_regstartp[paren] - PL_regbol,
188 PL_reg_start_tmp[paren] - PL_regbol,
189 PL_regendp[paren] - PL_regbol,
190 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 191 );
a0d0e21e 192 }
c277df42 193 DEBUG_r(
3280af22 194 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
195 PerlIO_printf(Perl_debug_log,
196 " restoring \\%d..\\%d to undef\n",
3280af22 197 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
198 }
199 );
3280af22
NIS
200 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
201 if (paren > PL_regsize)
202 PL_regstartp[paren] = Nullch;
203 PL_regendp[paren] = Nullch;
a0d0e21e
LW
204 }
205 return input;
206}
207
0f5d15d6
IZ
208STATIC char *
209regcp_set_to(I32 ss)
210{
211 I32 tmp = PL_savestack_ix;
212
213 PL_savestack_ix = ss;
214 regcppop();
215 PL_savestack_ix = tmp;
942e002e 216 return Nullch;
0f5d15d6
IZ
217}
218
219typedef struct re_cc_state
220{
221 I32 ss;
222 regnode *node;
223 struct re_cc_state *prev;
224 CURCUR *cc;
225 regexp *re;
226} re_cc_state;
227
c277df42 228#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 229
a687059c 230/*
e50aee73 231 * pregexec and friends
a687059c
LW
232 */
233
234/*
c277df42 235 - pregexec - match a regexp against a string
a687059c 236 */
c277df42 237I32
c3464db5
DD
238pregexec(register regexp *prog, char *stringarg, register char *strend,
239 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
240/* strend: pointer to null at end of string */
241/* strbeg: real beginning of string */
242/* minend: end of match must be >=minend after stringarg. */
243/* nosave: For optimizations. */
244{
245 return
246 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
247 nosave ? 0 : REXEC_COPY_STR);
248}
0f5d15d6
IZ
249
250STATIC void
251cache_re(regexp *prog)
252{
253 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
254#ifdef DEBUGGING
255 PL_regprogram = prog->program;
256#endif
257 PL_regnpar = prog->nparens;
258 PL_regdata = prog->data;
259 PL_reg_re = prog;
260}
c277df42 261
a687059c 262/*
c277df42 263 - regexec_flags - match a regexp against a string
a687059c 264 */
79072805 265I32
c3464db5
DD
266regexec_flags(register regexp *prog, char *stringarg, register char *strend,
267 char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
c277df42
IZ
268/* strend: pointer to null at end of string */
269/* strbeg: real beginning of string */
270/* minend: end of match must be >=minend after stringarg. */
271/* data: May be used for some additional optimizations. */
272/* nosave: For optimizations. */
a687059c 273{
5c0ca799 274 dTHR;
a0d0e21e 275 register char *s;
c277df42 276 register regnode *c;
a0d0e21e
LW
277 register char *startpos = stringarg;
278 register I32 tmp;
c277df42 279 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
280 I32 dontbother = 0; /* how many characters not to try at end */
281 CURCUR cc;
c277df42 282 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
283 constant substr. */ /* CC */
284 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
285 I32 scream_pos = -1; /* Internal iterator of scream. */
286 char *scream_olds;
3280af22 287 SV* oreplsv = GvSV(PL_replgv);
a687059c 288
a0d0e21e 289 cc.cur = 0;
4633a7c4 290 cc.oldcc = 0;
3280af22 291 PL_regcc = &cc;
a0d0e21e 292
0f5d15d6 293 cache_re(prog);
a0d0e21e 294#ifdef DEBUGGING
3280af22 295 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
296#endif
297
298 /* Be paranoid... */
299 if (prog == NULL || startpos == NULL) {
300 croak("NULL regexp parameter");
301 return 0;
302 }
303
c277df42
IZ
304 minlen = prog->minlen;
305 if (strend - startpos < minlen) goto phooey;
306
a0d0e21e 307 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 308 PL_regprev = '\n';
a0d0e21e 309 else {
a0ed51b3 310 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
311 if (!PL_multiline && PL_regprev == '\n')
312 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 313 }
bbce6d69 314
a0d0e21e
LW
315 /* Check validity of program. */
316 if (UCHARAT(prog->program) != MAGIC) {
317 FAIL("corrupted regexp program");
318 }
319
3280af22
NIS
320 PL_reg_flags = 0;
321 PL_reg_eval_set = 0;
a0d0e21e 322
a0ed51b3
LW
323 if (prog->reganch & ROPT_UTF8)
324 PL_reg_flags |= RF_utf8;
325
326 /* Mark beginning of line for ^ and lookbehind. */
327 PL_regbol = startpos;
328 PL_bostr = strbeg;
329
330 /* Mark end of line for $ (and such) */
331 PL_regeol = strend;
332
333 /* see how far we have to get to not match where we matched before */
334 PL_regtill = startpos+minend;
335
0f5d15d6
IZ
336 /* We start without call_cc context. */
337 PL_reg_call_cc = 0;
338
a0d0e21e
LW
339 /* If there is a "must appear" string, look for it. */
340 s = startpos;
c277df42
IZ
341 if (!(flags & REXEC_CHECKED)
342 && prog->check_substr != Nullsv &&
774d564b 343 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 344 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 345 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 346 {
a0ed51b3
LW
347 char *t;
348 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 349 /* Should be nonnegative! */
a0ed51b3 350 end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
c277df42 351 if (screamer) {
3280af22 352 if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
c277df42
IZ
353 s = screaminstr(screamer, prog->check_substr,
354 start_shift + (stringarg - strbeg),
355 end_shift, &scream_pos, 0);
a0d0e21e
LW
356 else
357 s = Nullch;
c277df42 358 scream_olds = s;
0a12ae7d 359 }
a0d0e21e 360 else
c277df42
IZ
361 s = fbm_instr((unsigned char*)s + start_shift,
362 (unsigned char*)strend - end_shift,
411d5715 363 prog->check_substr, 0);
a0d0e21e 364 if (!s) {
c277df42 365 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 366 goto phooey; /* not present */
a0ed51b3
LW
367 }
368 else if (s - stringarg > prog->check_offset_max &&
369 (UTF
dfe13c55 370 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg)
a0ed51b3
LW
371 : (t = s - prog->check_offset_max) != 0
372 )
373 )
374 {
c277df42 375 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3
LW
376 s = t;
377 }
378 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42
IZ
379 && --BmUSEFUL(prog->check_substr) < 0
380 && prog->check_substr == prog->float_substr) { /* boo */
381 SvREFCNT_dec(prog->check_substr);
382 prog->check_substr = Nullsv; /* disable */
383 prog->float_substr = Nullsv; /* clear */
a0d0e21e 384 s = startpos;
a0ed51b3
LW
385 }
386 else
387 s = startpos;
a0d0e21e 388 }
a687059c 389
c277df42
IZ
390 DEBUG_r(
391 PerlIO_printf(Perl_debug_log,
8d300b32
GS
392 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
393 PL_colors[4],PL_colors[5],PL_colors[0],
394 prog->precomp,
395 PL_colors[1],
c277df42 396 (strlen(prog->precomp) > 60 ? "..." : ""),
8d300b32 397 PL_colors[0],
c277df42 398 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 399 startpos, PL_colors[1],
c277df42
IZ
400 (strend - startpos > 60 ? "..." : ""))
401 );
402
a0d0e21e 403 /* Simplest case: anchored match need be tried only once. */
774d564b 404 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e
LW
405 if (prog->reganch & ROPT_ANCH) {
406 if (regtry(prog, startpos))
407 goto got_it;
774d564b 408 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
3280af22 409 (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
c277df42 410 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 411 {
a0d0e21e
LW
412 if (minlen)
413 dontbother = minlen - 1;
dfe13c55 414 strend = HOPc(strend, -dontbother);
a0d0e21e
LW
415 /* for multiline we only have to try after newlines */
416 if (s > startpos)
417 s--;
418 while (s < strend) {
a0ed51b3 419 if (*s++ == '\n') { /* don't need utf8skip here */
a0d0e21e
LW
420 if (s < strend && regtry(prog, s))
421 goto got_it;
422 }
35c8bce7 423 }
35c8bce7 424 }
a0d0e21e
LW
425 goto phooey;
426 }
35c8bce7 427
a0d0e21e 428 /* Messy cases: unanchored match. */
c277df42
IZ
429 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
430 /* we have /x+whatever/ */
431 /* it must be a one character string */
432 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
433 if (UTF) {
434 while (s < strend) {
435 if (*s == ch) {
436 if (regtry(prog, s)) goto got_it;
437 s += UTF8SKIP(s);
438 while (s < strend && *s == ch)
439 s += UTF8SKIP(s);
440 }
441 s += UTF8SKIP(s);
442 }
443 }
444 else {
445 while (s < strend) {
446 if (*s == ch) {
447 if (regtry(prog, s)) goto got_it;
c277df42 448 s++;
a0ed51b3
LW
449 while (s < strend && *s == ch)
450 s++;
451 }
452 s++;
a0d0e21e 453 }
a687059c 454 }
c277df42
IZ
455 }
456 /*SUPPRESS 560*/
457 else if (prog->anchored_substr != Nullsv
458 || (prog->float_substr != Nullsv
459 && prog->float_max_offset < strend - s)) {
460 SV *must = prog->anchored_substr
461 ? prog->anchored_substr : prog->float_substr;
462 I32 back_max =
463 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
464 I32 back_min =
465 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
466 I32 delta = back_max - back_min;
dfe13c55 467 char *last = HOPc(strend, 0-(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
a0ed51b3
LW
468 char *last1; /* Last position checked before */
469
470 if (s > PL_bostr)
dfe13c55 471 last1 = HOPc(s, -1);
a0ed51b3
LW
472 else
473 last1 = s - 1; /* bogus */
c277df42
IZ
474
475 /* XXXX check_substr already used to find `s', can optimize if
476 check_substr==must. */
477 scream_pos = -1;
478 dontbother = end_shift;
dfe13c55 479 strend = HOPc(strend, -dontbother);
c277df42
IZ
480 while ( (s <= last) &&
481 (screamer
dfe13c55 482 ? (s = screaminstr(screamer, must, HOPc(s, back_min) - strbeg,
c277df42 483 end_shift, &scream_pos, 0))
a0ed51b3 484 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
411d5715 485 (unsigned char*)strend, must, 0))) ) {
dfe13c55
GS
486 if (HOPc(s, -back_max) > last1) {
487 last1 = HOPc(s, -back_min);
488 s = HOPc(s, -back_max);
a0ed51b3
LW
489 }
490 else {
dfe13c55 491 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 492
dfe13c55 493 last1 = HOPc(s, -back_min);
c277df42 494 s = t;
a0d0e21e 495 }
a0ed51b3
LW
496 if (UTF) {
497 while (s <= last1) {
498 if (regtry(prog, s))
499 goto got_it;
500 s += UTF8SKIP(s);
501 }
502 }
503 else {
504 while (s <= last1) {
505 if (regtry(prog, s))
506 goto got_it;
507 s++;
508 }
a0d0e21e
LW
509 }
510 }
511 goto phooey;
a0ed51b3
LW
512 }
513 else if (c = prog->regstclass) {
a0d0e21e 514 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 515 char *cc;
a687059c 516
a0d0e21e
LW
517 if (minlen)
518 dontbother = minlen - 1;
dfe13c55 519 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
520 tmp = 1;
521 /* We know what class it must start with. */
522 switch (OP(c)) {
a0ed51b3
LW
523 case ANYOFUTF8:
524 cc = (char *) OPERAND(c);
525 while (s < strend) {
526 if (REGINCLASSUTF8(c, (U8*)s)) {
527 if (tmp && regtry(prog, s))
528 goto got_it;
529 else
530 tmp = doevery;
531 }
532 else
533 tmp = 1;
534 s += UTF8SKIP(s);
535 }
536 break;
a0d0e21e 537 case ANYOF:
a0ed51b3 538 cc = (char *) OPERAND(c);
a0d0e21e 539 while (s < strend) {
a0ed51b3 540 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
541 if (tmp && regtry(prog, s))
542 goto got_it;
543 else
544 tmp = doevery;
a687059c 545 }
a0d0e21e
LW
546 else
547 tmp = 1;
548 s++;
549 }
550 break;
bbce6d69 551 case BOUNDL:
3280af22 552 PL_reg_flags |= RF_tainted;
bbce6d69 553 /* FALL THROUGH */
a0d0e21e 554 case BOUND:
a0ed51b3
LW
555 if (minlen) {
556 dontbother++;
557 strend -= 1;
558 }
3280af22 559 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 560 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 561 while (s < strend) {
95bac841 562 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
563 tmp = !tmp;
564 if (regtry(prog, s))
565 goto got_it;
a687059c 566 }
a0d0e21e
LW
567 s++;
568 }
569 if ((minlen || tmp) && regtry(prog,s))
570 goto got_it;
571 break;
a0ed51b3
LW
572 case BOUNDLUTF8:
573 PL_reg_flags |= RF_tainted;
574 /* FALL THROUGH */
575 case BOUNDUTF8:
576 if (minlen) {
577 dontbother++;
dfe13c55 578 strend = reghop_c(strend, -1);
a0ed51b3 579 }
dfe13c55 580 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
581 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
582 while (s < strend) {
dfe13c55
GS
583 if (tmp == !(OP(c) == BOUND ?
584 swash_fetch(PL_utf8_alnum, (U8*)s) :
585 isALNUM_LC_utf8((U8*)s)))
586 {
a0ed51b3
LW
587 tmp = !tmp;
588 if (regtry(prog, s))
589 goto got_it;
590 }
591 s += UTF8SKIP(s);
592 }
593 if ((minlen || tmp) && regtry(prog,s))
594 goto got_it;
595 break;
bbce6d69 596 case NBOUNDL:
3280af22 597 PL_reg_flags |= RF_tainted;
bbce6d69 598 /* FALL THROUGH */
a0d0e21e 599 case NBOUND:
a0ed51b3
LW
600 if (minlen) {
601 dontbother++;
602 strend -= 1;
603 }
3280af22 604 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 605 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 606 while (s < strend) {
95bac841 607 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
608 tmp = !tmp;
609 else if (regtry(prog, s))
610 goto got_it;
611 s++;
612 }
613 if ((minlen || !tmp) && regtry(prog,s))
614 goto got_it;
615 break;
a0ed51b3
LW
616 case NBOUNDLUTF8:
617 PL_reg_flags |= RF_tainted;
618 /* FALL THROUGH */
619 case NBOUNDUTF8:
620 if (minlen) {
621 dontbother++;
dfe13c55 622 strend = reghop_c(strend, -1);
a0ed51b3 623 }
dfe13c55 624 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
625 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
626 while (s < strend) {
dfe13c55
GS
627 if (tmp == !(OP(c) == NBOUND ?
628 swash_fetch(PL_utf8_alnum, (U8*)s) :
629 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
630 tmp = !tmp;
631 else if (regtry(prog, s))
632 goto got_it;
633 s += UTF8SKIP(s);
634 }
635 if ((minlen || !tmp) && regtry(prog,s))
636 goto got_it;
637 break;
a0d0e21e
LW
638 case ALNUM:
639 while (s < strend) {
bbce6d69 640 if (isALNUM(*s)) {
641 if (tmp && regtry(prog, s))
642 goto got_it;
643 else
644 tmp = doevery;
645 }
646 else
647 tmp = 1;
648 s++;
649 }
650 break;
a0ed51b3
LW
651 case ALNUMUTF8:
652 while (s < strend) {
dfe13c55 653 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
654 if (tmp && regtry(prog, s))
655 goto got_it;
656 else
657 tmp = doevery;
658 }
659 else
660 tmp = 1;
661 s += UTF8SKIP(s);
662 }
663 break;
bbce6d69 664 case ALNUML:
3280af22 665 PL_reg_flags |= RF_tainted;
bbce6d69 666 while (s < strend) {
667 if (isALNUM_LC(*s)) {
a0d0e21e
LW
668 if (tmp && regtry(prog, s))
669 goto got_it;
a687059c 670 else
a0d0e21e
LW
671 tmp = doevery;
672 }
673 else
674 tmp = 1;
675 s++;
676 }
677 break;
a0ed51b3
LW
678 case ALNUMLUTF8:
679 PL_reg_flags |= RF_tainted;
680 while (s < strend) {
dfe13c55 681 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
682 if (tmp && regtry(prog, s))
683 goto got_it;
684 else
685 tmp = doevery;
686 }
687 else
688 tmp = 1;
689 s += UTF8SKIP(s);
690 }
691 break;
a0d0e21e
LW
692 case NALNUM:
693 while (s < strend) {
bbce6d69 694 if (!isALNUM(*s)) {
695 if (tmp && regtry(prog, s))
696 goto got_it;
697 else
698 tmp = doevery;
699 }
700 else
701 tmp = 1;
702 s++;
703 }
704 break;
a0ed51b3
LW
705 case NALNUMUTF8:
706 while (s < strend) {
dfe13c55 707 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
708 if (tmp && regtry(prog, s))
709 goto got_it;
710 else
711 tmp = doevery;
712 }
713 else
714 tmp = 1;
715 s += UTF8SKIP(s);
716 }
717 break;
bbce6d69 718 case NALNUML:
3280af22 719 PL_reg_flags |= RF_tainted;
bbce6d69 720 while (s < strend) {
721 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
722 if (tmp && regtry(prog, s))
723 goto got_it;
a687059c 724 else
a0d0e21e 725 tmp = doevery;
a687059c 726 }
a0d0e21e
LW
727 else
728 tmp = 1;
729 s++;
730 }
731 break;
a0ed51b3
LW
732 case NALNUMLUTF8:
733 PL_reg_flags |= RF_tainted;
734 while (s < strend) {
dfe13c55 735 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
736 if (tmp && regtry(prog, s))
737 goto got_it;
738 else
739 tmp = doevery;
740 }
741 else
742 tmp = 1;
743 s += UTF8SKIP(s);
744 }
745 break;
a0d0e21e
LW
746 case SPACE:
747 while (s < strend) {
748 if (isSPACE(*s)) {
749 if (tmp && regtry(prog, s))
750 goto got_it;
751 else
752 tmp = doevery;
2304df62 753 }
a0d0e21e
LW
754 else
755 tmp = 1;
756 s++;
757 }
758 break;
a0ed51b3
LW
759 case SPACEUTF8:
760 while (s < strend) {
dfe13c55 761 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
762 if (tmp && regtry(prog, s))
763 goto got_it;
764 else
765 tmp = doevery;
766 }
767 else
768 tmp = 1;
769 s += UTF8SKIP(s);
770 }
771 break;
bbce6d69 772 case SPACEL:
3280af22 773 PL_reg_flags |= RF_tainted;
bbce6d69 774 while (s < strend) {
775 if (isSPACE_LC(*s)) {
776 if (tmp && regtry(prog, s))
777 goto got_it;
778 else
779 tmp = doevery;
780 }
781 else
782 tmp = 1;
783 s++;
784 }
785 break;
a0ed51b3
LW
786 case SPACELUTF8:
787 PL_reg_flags |= RF_tainted;
788 while (s < strend) {
dfe13c55 789 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
790 if (tmp && regtry(prog, s))
791 goto got_it;
792 else
793 tmp = doevery;
794 }
795 else
796 tmp = 1;
797 s += UTF8SKIP(s);
798 }
799 break;
a0d0e21e
LW
800 case NSPACE:
801 while (s < strend) {
802 if (!isSPACE(*s)) {
803 if (tmp && regtry(prog, s))
804 goto got_it;
805 else
806 tmp = doevery;
a687059c 807 }
a0d0e21e
LW
808 else
809 tmp = 1;
810 s++;
811 }
812 break;
a0ed51b3
LW
813 case NSPACEUTF8:
814 while (s < strend) {
dfe13c55 815 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
816 if (tmp && regtry(prog, s))
817 goto got_it;
818 else
819 tmp = doevery;
820 }
821 else
822 tmp = 1;
823 s += UTF8SKIP(s);
824 }
825 break;
bbce6d69 826 case NSPACEL:
3280af22 827 PL_reg_flags |= RF_tainted;
bbce6d69 828 while (s < strend) {
829 if (!isSPACE_LC(*s)) {
830 if (tmp && regtry(prog, s))
831 goto got_it;
832 else
833 tmp = doevery;
834 }
835 else
836 tmp = 1;
837 s++;
838 }
839 break;
a0ed51b3
LW
840 case NSPACELUTF8:
841 PL_reg_flags |= RF_tainted;
842 while (s < strend) {
dfe13c55 843 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
844 if (tmp && regtry(prog, s))
845 goto got_it;
846 else
847 tmp = doevery;
848 }
849 else
850 tmp = 1;
851 s += UTF8SKIP(s);
852 }
853 break;
a0d0e21e
LW
854 case DIGIT:
855 while (s < strend) {
856 if (isDIGIT(*s)) {
857 if (tmp && regtry(prog, s))
858 goto got_it;
859 else
860 tmp = doevery;
2b69d0c2 861 }
a0d0e21e
LW
862 else
863 tmp = 1;
864 s++;
865 }
866 break;
a0ed51b3
LW
867 case DIGITUTF8:
868 while (s < strend) {
dfe13c55 869 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
870 if (tmp && regtry(prog, s))
871 goto got_it;
872 else
873 tmp = doevery;
874 }
875 else
876 tmp = 1;
877 s += UTF8SKIP(s);
878 }
879 break;
a0d0e21e
LW
880 case NDIGIT:
881 while (s < strend) {
882 if (!isDIGIT(*s)) {
883 if (tmp && regtry(prog, s))
884 goto got_it;
885 else
886 tmp = doevery;
a687059c 887 }
a0d0e21e
LW
888 else
889 tmp = 1;
890 s++;
891 }
892 break;
a0ed51b3
LW
893 case NDIGITUTF8:
894 while (s < strend) {
dfe13c55 895 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
896 if (tmp && regtry(prog, s))
897 goto got_it;
898 else
899 tmp = doevery;
900 }
901 else
902 tmp = 1;
903 s += UTF8SKIP(s);
904 }
905 break;
a687059c 906 }
a0d0e21e
LW
907 }
908 else {
c277df42
IZ
909 dontbother = 0;
910 if (prog->float_substr != Nullsv) { /* Trim the end. */
911 char *last;
912 I32 oldpos = scream_pos;
913
914 if (screamer) {
915 last = screaminstr(screamer, prog->float_substr, s - strbeg,
916 end_shift, &scream_pos, 1); /* last one */
917 if (!last) {
918 last = scream_olds; /* Only one occurence. */
919 }
a0ed51b3
LW
920 }
921 else {
c277df42
IZ
922 STRLEN len;
923 char *little = SvPV(prog->float_substr, len);
19b4f81a
JPC
924 if (len)
925 last = rninstr(s, strend, little, little + len);
926 else
927 last = strend; /* matching `$' */
c277df42
IZ
928 }
929 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 930 dontbother = strend - last + prog->float_min_offset;
c277df42
IZ
931 }
932 if (minlen && (dontbother < minlen))
a0d0e21e 933 dontbother = minlen - 1;
a0ed51b3 934 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 935 /* We don't know much -- general case. */
a0ed51b3
LW
936 if (UTF) {
937 for (;;) {
84df6dba 938 if (regtry(prog, s))
a0ed51b3 939 goto got_it;
a0ed51b3
LW
940 if (s >= strend)
941 break;
942 s += UTF8SKIP(s);
943 };
944 }
945 else {
946 do {
947 if (regtry(prog, s))
948 goto got_it;
949 } while (s++ < strend);
950 }
a0d0e21e
LW
951 }
952
953 /* Failure. */
954 goto phooey;
a687059c 955
a0d0e21e
LW
956got_it:
957 prog->subbeg = strbeg;
19b4f81a 958 prog->subend = PL_regeol; /* strend may have been modified */
3280af22 959 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 960
961 /* make sure $`, $&, $', and $digit will work later */
c277df42
IZ
962 if (strbeg != prog->subbase) { /* second+ //g match. */
963 if (!(flags & REXEC_COPY_STR)) {
137443ea 964 if (prog->subbase) {
965 Safefree(prog->subbase);
966 prog->subbase = Nullch;
967 }
968 }
969 else {
19b4f81a 970 I32 i = PL_regeol - startpos + (stringarg - strbeg);
137443ea 971 s = savepvn(strbeg, i);
972 Safefree(prog->subbase);
973 prog->subbase = s;
974 prog->subbeg = prog->subbase;
975 prog->subend = prog->subbase + i;
976 s = prog->subbase + (stringarg - strbeg);
977 for (i = 0; i <= prog->nparens; i++) {
978 if (prog->endp[i]) {
979 prog->startp[i] = s + (prog->startp[i] - startpos);
980 prog->endp[i] = s + (prog->endp[i] - startpos);
981 }
a0d0e21e
LW
982 }
983 }
a0d0e21e 984 }
ce862d02 985 /* Preserve the current value of $^R */
3280af22
NIS
986 if (oreplsv != GvSV(PL_replgv)) {
987 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
ce862d02
IZ
988 restored, the value remains
989 the same. */
990 }
a0d0e21e
LW
991 return 1;
992
993phooey:
a0d0e21e 994 return 0;
a687059c
LW
995}
996
997/*
998 - regtry - try match at specific point
999 */
76e3520e 1000STATIC I32 /* 0 failure, 1 success */
8ac85365 1001regtry(regexp *prog, char *startpos)
a687059c 1002{
c277df42 1003 dTHR;
a0d0e21e
LW
1004 register I32 i;
1005 register char **sp;
1006 register char **ep;
c277df42 1007 CHECKPOINT lastcp;
a0d0e21e 1008
3280af22
NIS
1009 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1010 PL_reg_eval_set = RS_init;
ce862d02 1011 DEBUG_r(DEBUG_s(
c3464db5 1012 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 1013 PL_stack_sp - PL_stack_base);
ce862d02
IZ
1014 ));
1015 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 1016 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02
IZ
1017 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1018 SAVETMPS;
1019 /* Apparently this is not needed, judging by wantarray. */
1020 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1021 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1022 }
3280af22
NIS
1023 PL_reginput = startpos;
1024 PL_regstartp = prog->startp;
1025 PL_regendp = prog->endp;
1026 PL_reglastparen = &prog->lastparen;
a0d0e21e 1027 prog->lastparen = 0;
3280af22
NIS
1028 PL_regsize = 0;
1029 if (PL_reg_start_tmpl <= prog->nparens) {
1030 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1031 if(PL_reg_start_tmp)
1032 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1033 else
3280af22 1034 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1035 }
a0d0e21e
LW
1036
1037 sp = prog->startp;
1038 ep = prog->endp;
1039 if (prog->nparens) {
1040 for (i = prog->nparens; i >= 0; i--) {
1041 *sp++ = NULL;
1042 *ep++ = NULL;
a687059c 1043 }
a0d0e21e 1044 }
c277df42 1045 REGCP_SET;
7e5428c5 1046 if (regmatch(prog->program + 1)) {
a0d0e21e 1047 prog->startp[0] = startpos;
3280af22 1048 prog->endp[0] = PL_reginput;
a0d0e21e
LW
1049 return 1;
1050 }
c277df42
IZ
1051 REGCP_UNWIND;
1052 return 0;
a687059c
LW
1053}
1054
1055/*
1056 - regmatch - main matching routine
1057 *
1058 * Conceptually the strategy is simple: check to see whether the current
1059 * node matches, call self recursively to see whether the rest matches,
1060 * and then act accordingly. In practice we make some effort to avoid
1061 * recursion, in particular by going through "ordinary" nodes (that don't
1062 * need to know whether the rest of the match failed) by a loop instead of
1063 * by recursion.
1064 */
1065/* [lwall] I've hoisted the register declarations to the outer block in order to
1066 * maybe save a little bit of pushing and popping on the stack. It also takes
1067 * advantage of machines that use a register save mask on subroutine entry.
1068 */
76e3520e 1069STATIC I32 /* 0 failure, 1 success */
c277df42 1070regmatch(regnode *prog)
a687059c 1071{
c277df42
IZ
1072 dTHR;
1073 register regnode *scan; /* Current node. */
1074 regnode *next; /* Next node. */
1075 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
1076 register I32 nextchr; /* renamed nextchr - nextchar colides with
1077 function of same name */
a0d0e21e
LW
1078 register I32 n; /* no or next */
1079 register I32 ln; /* len or last */
1080 register char *s; /* operand or save */
3280af22 1081 register char *locinput = PL_reginput;
c277df42
IZ
1082 register I32 c1, c2, paren; /* case fold search, parenth */
1083 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1084#ifdef DEBUGGING
3280af22 1085 PL_regindent++;
4633a7c4 1086#endif
a0d0e21e 1087
a0ed51b3 1088 /* Note that nextchr is a byte even in UTF */
76e3520e 1089 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1090 scan = prog;
1091 while (scan != NULL) {
c277df42 1092#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1093#ifdef DEBUGGING
c277df42
IZ
1094# define sayYES goto yes
1095# define sayNO goto no
1096# define saySAME(x) if (x) goto yes; else goto no
1097# define REPORT_CODE_OFF 24
4633a7c4 1098#else
c277df42
IZ
1099# define sayYES return 1
1100# define sayNO return 0
1101# define saySAME(x) return x
a687059c 1102#endif
c277df42
IZ
1103 DEBUG_r( {
1104 SV *prop = sv_newmortal();
3280af22 1105 int docolor = *PL_colors[0];
c277df42 1106 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22
NIS
1107 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1108 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1109 ? (5 + taill) - l : locinput - PL_bostr);
8d300b32 1110 int pref0_len = pref_len - (locinput - PL_reginput);
c277df42 1111
3280af22
NIS
1112 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1113 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1114 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32
GS
1115 if (pref0_len < 0)
1116 pref0_len = 0;
c277df42
IZ
1117 regprop(prop, scan);
1118 PerlIO_printf(Perl_debug_log,
8d300b32 1119 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 1120 locinput - PL_bostr,
8d300b32
GS
1121 PL_colors[4], pref0_len,
1122 locinput - pref_len, PL_colors[5],
1123 PL_colors[2], pref_len - pref0_len,
1124 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 1125 (docolor ? "" : "> <"),
3280af22 1126 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
1127 15 - l - pref_len + 1,
1128 "",
3280af22 1129 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
1130 SvPVX(prop));
1131 } );
a687059c 1132
c277df42 1133 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
1134 if (next == scan)
1135 next = NULL;
a687059c 1136
a0d0e21e
LW
1137 switch (OP(scan)) {
1138 case BOL:
3280af22
NIS
1139 if (locinput == PL_bostr
1140 ? PL_regprev == '\n'
1141 : (PL_multiline &&
1142 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1143 {
a0ed51b3 1144 /* regtill = regbol; */
a0d0e21e
LW
1145 break;
1146 }
4633a7c4 1147 sayNO;
a0d0e21e 1148 case MBOL:
3280af22
NIS
1149 if (locinput == PL_bostr
1150 ? PL_regprev == '\n'
1151 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
1152 {
1153 break;
1154 }
4633a7c4 1155 sayNO;
a0d0e21e 1156 case SBOL:
3280af22 1157 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1158 break;
4633a7c4 1159 sayNO;
774d564b 1160 case GPOS:
3280af22 1161 if (locinput == PL_regbol)
a0d0e21e 1162 break;
4633a7c4 1163 sayNO;
a0d0e21e 1164 case EOL:
3280af22 1165 if (PL_multiline)
a0d0e21e
LW
1166 goto meol;
1167 else
1168 goto seol;
1169 case MEOL:
1170 meol:
3280af22 1171 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1172 sayNO;
a0d0e21e
LW
1173 break;
1174 case SEOL:
1175 seol:
3280af22 1176 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1177 sayNO;
3280af22 1178 if (PL_regeol - locinput > 1)
4633a7c4 1179 sayNO;
a0d0e21e 1180 break;
b85d18e9 1181 case EOS:
3280af22 1182 if (PL_regeol != locinput)
b85d18e9
IZ
1183 sayNO;
1184 break;
a0ed51b3
LW
1185 case SANYUTF8:
1186 if (nextchr & 0x80) {
a176fa2a 1187 locinput += utf8skip[nextchr];
a0ed51b3
LW
1188 if (locinput > PL_regeol)
1189 sayNO;
1190 nextchr = UCHARAT(locinput);
1191 break;
1192 }
1193 if (!nextchr && locinput >= PL_regeol)
1194 sayNO;
1195 nextchr = UCHARAT(++locinput);
1196 break;
a0d0e21e 1197 case SANY:
3280af22 1198 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1199 sayNO;
76e3520e 1200 nextchr = UCHARAT(++locinput);
a0d0e21e 1201 break;
a0ed51b3
LW
1202 case ANYUTF8:
1203 if (nextchr & 0x80) {
a176fa2a 1204 locinput += utf8skip[nextchr];
a0ed51b3
LW
1205 if (locinput > PL_regeol)
1206 sayNO;
1207 nextchr = UCHARAT(locinput);
1208 break;
1209 }
1210 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1211 sayNO;
1212 nextchr = UCHARAT(++locinput);
1213 break;
a0d0e21e 1214 case ANY:
3280af22 1215 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1216 sayNO;
76e3520e 1217 nextchr = UCHARAT(++locinput);
a0d0e21e 1218 break;
bbce6d69 1219 case EXACT:
161b471a 1220 s = (char *) OPERAND(scan);
c277df42 1221 ln = UCHARAT(s++);
a0d0e21e 1222 /* Inline the first character, for speed. */
76e3520e 1223 if (UCHARAT(s) != nextchr)
4633a7c4 1224 sayNO;
3280af22 1225 if (PL_regeol - locinput < ln)
4633a7c4 1226 sayNO;
36477c24 1227 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1228 sayNO;
a0d0e21e 1229 locinput += ln;
76e3520e 1230 nextchr = UCHARAT(locinput);
bbce6d69 1231 break;
1232 case EXACTFL:
3280af22 1233 PL_reg_flags |= RF_tainted;
bbce6d69 1234 /* FALL THROUGH */
1235 case EXACTF:
161b471a 1236 s = (char *) OPERAND(scan);
c277df42 1237 ln = UCHARAT(s++);
a0ed51b3
LW
1238
1239 if (UTF) {
1240 char *l = locinput;
1241 char *e = s + ln;
1242 c1 = OP(scan) == EXACTF;
1243 while (s < e) {
1244 if (l >= PL_regeol)
1245 sayNO;
dfe13c55
GS
1246 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1247 toLOWER_utf8((U8*)l) :
1248 toLOWER_LC_utf8((U8*)l)))
1249 {
a0ed51b3 1250 sayNO;
dfe13c55 1251 }
a0ed51b3
LW
1252 s += UTF8SKIP(s);
1253 l += UTF8SKIP(l);
1254 }
1255 locinput = l;
1256 nextchr = UCHARAT(locinput);
1257 break;
1258 }
1259
bbce6d69 1260 /* Inline the first character, for speed. */
76e3520e 1261 if (UCHARAT(s) != nextchr &&
bbce6d69 1262 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 1263 ? fold : fold_locale)[nextchr])
bbce6d69 1264 sayNO;
3280af22 1265 if (PL_regeol - locinput < ln)
bbce6d69 1266 sayNO;
5f05dabc 1267 if (ln > 1 && (OP(scan) == EXACTF
1268 ? ibcmp(s, locinput, ln)
1269 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1270 sayNO;
1271 locinput += ln;
76e3520e 1272 nextchr = UCHARAT(locinput);
a0d0e21e 1273 break;
a0ed51b3
LW
1274 case ANYOFUTF8:
1275 s = (char *) OPERAND(scan);
1276 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1277 sayNO;
1278 if (locinput >= PL_regeol)
1279 sayNO;
a176fa2a 1280 locinput += utf8skip[nextchr];
a0ed51b3
LW
1281 nextchr = UCHARAT(locinput);
1282 break;
a0d0e21e 1283 case ANYOF:
161b471a 1284 s = (char *) OPERAND(scan);
76e3520e
GS
1285 if (nextchr < 0)
1286 nextchr = UCHARAT(locinput);
873ef191 1287 if (!REGINCLASS(s, nextchr))
4633a7c4 1288 sayNO;
3280af22 1289 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1290 sayNO;
76e3520e 1291 nextchr = UCHARAT(++locinput);
a0d0e21e 1292 break;
bbce6d69 1293 case ALNUML:
3280af22 1294 PL_reg_flags |= RF_tainted;
bbce6d69 1295 /* FALL THROUGH */
a0d0e21e 1296 case ALNUM:
76e3520e 1297 if (!nextchr)
4633a7c4 1298 sayNO;
bbce6d69 1299 if (!(OP(scan) == ALNUM
76e3520e 1300 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1301 sayNO;
76e3520e 1302 nextchr = UCHARAT(++locinput);
a0d0e21e 1303 break;
a0ed51b3
LW
1304 case ALNUMLUTF8:
1305 PL_reg_flags |= RF_tainted;
1306 /* FALL THROUGH */
1307 case ALNUMUTF8:
1308 if (!nextchr)
1309 sayNO;
1310 if (nextchr & 0x80) {
1311 if (!(OP(scan) == ALNUMUTF8
dfe13c55
GS
1312 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1313 : isALNUM_LC_utf8((U8*)locinput)))
1314 {
a0ed51b3 1315 sayNO;
dfe13c55 1316 }
a176fa2a 1317 locinput += utf8skip[nextchr];
a0ed51b3
LW
1318 nextchr = UCHARAT(locinput);
1319 break;
1320 }
1321 if (!(OP(scan) == ALNUMUTF8
1322 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1323 sayNO;
1324 nextchr = UCHARAT(++locinput);
1325 break;
bbce6d69 1326 case NALNUML:
3280af22 1327 PL_reg_flags |= RF_tainted;
bbce6d69 1328 /* FALL THROUGH */
a0d0e21e 1329 case NALNUM:
3280af22 1330 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1331 sayNO;
bbce6d69 1332 if (OP(scan) == NALNUM
76e3520e 1333 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1334 sayNO;
76e3520e 1335 nextchr = UCHARAT(++locinput);
a0d0e21e 1336 break;
a0ed51b3
LW
1337 case NALNUMLUTF8:
1338 PL_reg_flags |= RF_tainted;
1339 /* FALL THROUGH */
1340 case NALNUMUTF8:
1341 if (!nextchr && locinput >= PL_regeol)
1342 sayNO;
1343 if (nextchr & 0x80) {
1344 if (OP(scan) == NALNUMUTF8
dfe13c55
GS
1345 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1346 : isALNUM_LC_utf8((U8*)locinput))
1347 {
a0ed51b3 1348 sayNO;
dfe13c55 1349 }
a176fa2a 1350 locinput += utf8skip[nextchr];
a0ed51b3
LW
1351 nextchr = UCHARAT(locinput);
1352 break;
1353 }
1354 if (OP(scan) == NALNUMUTF8
1355 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1356 sayNO;
1357 nextchr = UCHARAT(++locinput);
1358 break;
bbce6d69 1359 case BOUNDL:
1360 case NBOUNDL:
3280af22 1361 PL_reg_flags |= RF_tainted;
bbce6d69 1362 /* FALL THROUGH */
a0d0e21e 1363 case BOUND:
bbce6d69 1364 case NBOUND:
1365 /* was last char in word? */
3280af22 1366 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1367 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1368 ln = isALNUM(ln);
76e3520e 1369 n = isALNUM(nextchr);
bbce6d69 1370 }
1371 else {
1372 ln = isALNUM_LC(ln);
76e3520e 1373 n = isALNUM_LC(nextchr);
bbce6d69 1374 }
95bac841 1375 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1376 sayNO;
a0d0e21e 1377 break;
a0ed51b3
LW
1378 case BOUNDLUTF8:
1379 case NBOUNDLUTF8:
1380 PL_reg_flags |= RF_tainted;
1381 /* FALL THROUGH */
1382 case BOUNDUTF8:
1383 case NBOUNDUTF8:
1384 /* was last char in word? */
dfe13c55
GS
1385 ln = (locinput != PL_regbol)
1386 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
a0ed51b3
LW
1387 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1388 ln = isALNUM_uni(ln);
dfe13c55 1389 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3
LW
1390 }
1391 else {
1392 ln = isALNUM_LC_uni(ln);
dfe13c55 1393 n = isALNUM_LC_utf8((U8*)locinput);
a0ed51b3
LW
1394 }
1395 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1396 sayNO;
1397 break;
bbce6d69 1398 case SPACEL:
3280af22 1399 PL_reg_flags |= RF_tainted;
bbce6d69 1400 /* FALL THROUGH */
a0d0e21e 1401 case SPACE:
3280af22 1402 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1403 sayNO;
bbce6d69 1404 if (!(OP(scan) == SPACE
76e3520e 1405 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1406 sayNO;
76e3520e 1407 nextchr = UCHARAT(++locinput);
a0d0e21e 1408 break;
a0ed51b3
LW
1409 case SPACELUTF8:
1410 PL_reg_flags |= RF_tainted;
1411 /* FALL THROUGH */
1412 case SPACEUTF8:
1413 if (!nextchr && locinput >= PL_regeol)
1414 sayNO;
1415 if (nextchr & 0x80) {
1416 if (!(OP(scan) == SPACEUTF8
dfe13c55
GS
1417 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1418 : isSPACE_LC_utf8((U8*)locinput)))
1419 {
a0ed51b3 1420 sayNO;
dfe13c55 1421 }
a176fa2a 1422 locinput += utf8skip[nextchr];
a0ed51b3
LW
1423 nextchr = UCHARAT(locinput);
1424 break;
1425 }
1426 if (!(OP(scan) == SPACEUTF8
1427 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1428 sayNO;
1429 nextchr = UCHARAT(++locinput);
1430 break;
bbce6d69 1431 case NSPACEL:
3280af22 1432 PL_reg_flags |= RF_tainted;
bbce6d69 1433 /* FALL THROUGH */
a0d0e21e 1434 case NSPACE:
76e3520e 1435 if (!nextchr)
4633a7c4 1436 sayNO;
bbce6d69 1437 if (OP(scan) == SPACE
76e3520e 1438 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1439 sayNO;
76e3520e 1440 nextchr = UCHARAT(++locinput);
a0d0e21e 1441 break;
a0ed51b3
LW
1442 case NSPACELUTF8:
1443 PL_reg_flags |= RF_tainted;
1444 /* FALL THROUGH */
1445 case NSPACEUTF8:
1446 if (!nextchr)
1447 sayNO;
1448 if (nextchr & 0x80) {
1449 if (OP(scan) == NSPACEUTF8
dfe13c55
GS
1450 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1451 : isSPACE_LC_utf8((U8*)locinput))
1452 {
a0ed51b3 1453 sayNO;
dfe13c55 1454 }
a176fa2a 1455 locinput += utf8skip[nextchr];
a0ed51b3
LW
1456 nextchr = UCHARAT(locinput);
1457 break;
1458 }
1459 if (OP(scan) == NSPACEUTF8
1460 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1461 sayNO;
1462 nextchr = UCHARAT(++locinput);
1463 break;
a0d0e21e 1464 case DIGIT:
76e3520e 1465 if (!isDIGIT(nextchr))
4633a7c4 1466 sayNO;
76e3520e 1467 nextchr = UCHARAT(++locinput);
a0d0e21e 1468 break;
a0ed51b3
LW
1469 case DIGITUTF8:
1470 if (nextchr & 0x80) {
dfe13c55 1471 if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
a0ed51b3 1472 sayNO;
a176fa2a 1473 locinput += utf8skip[nextchr];
a0ed51b3
LW
1474 nextchr = UCHARAT(locinput);
1475 break;
1476 }
1477 if (!isDIGIT(nextchr))
1478 sayNO;
1479 nextchr = UCHARAT(++locinput);
1480 break;
a0d0e21e 1481 case NDIGIT:
3280af22 1482 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1483 sayNO;
76e3520e 1484 if (isDIGIT(nextchr))
4633a7c4 1485 sayNO;
76e3520e 1486 nextchr = UCHARAT(++locinput);
a0d0e21e 1487 break;
a0ed51b3
LW
1488 case NDIGITUTF8:
1489 if (!nextchr && locinput >= PL_regeol)
1490 sayNO;
1491 if (nextchr & 0x80) {
dfe13c55 1492 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1493 sayNO;
a176fa2a 1494 locinput += utf8skip[nextchr];
a0ed51b3
LW
1495 nextchr = UCHARAT(locinput);
1496 break;
1497 }
1498 if (isDIGIT(nextchr))
1499 sayNO;
1500 nextchr = UCHARAT(++locinput);
1501 break;
1502 case CLUMP:
dfe13c55 1503 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1504 sayNO;
a176fa2a 1505 locinput += utf8skip[nextchr];
dfe13c55 1506 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
1507 locinput += UTF8SKIP(locinput);
1508 if (locinput > PL_regeol)
1509 sayNO;
1510 nextchr = UCHARAT(locinput);
1511 break;
c8756f30 1512 case REFFL:
3280af22 1513 PL_reg_flags |= RF_tainted;
c8756f30 1514 /* FALL THROUGH */
c277df42 1515 case REF:
c8756f30 1516 case REFF:
c277df42 1517 n = ARG(scan); /* which paren pair */
3280af22
NIS
1518 s = PL_regstartp[n];
1519 if (*PL_reglastparen < n || !s)
af3f8c16 1520 sayNO; /* Do not match unless seen CLOSEn. */
3280af22 1521 if (s == PL_regendp[n])
a0d0e21e 1522 break;
a0ed51b3
LW
1523
1524 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1525 char *l = locinput;
1526 char *e = PL_regendp[n];
1527 /*
1528 * Note that we can't do the "other character" lookup trick as
1529 * in the 8-bit case (no pun intended) because in Unicode we
1530 * have to map both upper and title case to lower case.
1531 */
1532 if (OP(scan) == REFF) {
1533 while (s < e) {
1534 if (l >= PL_regeol)
1535 sayNO;
dfe13c55 1536 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
1537 sayNO;
1538 s += UTF8SKIP(s);
1539 l += UTF8SKIP(l);
1540 }
1541 }
1542 else {
1543 while (s < e) {
1544 if (l >= PL_regeol)
1545 sayNO;
dfe13c55 1546 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
1547 sayNO;
1548 s += UTF8SKIP(s);
1549 l += UTF8SKIP(l);
1550 }
1551 }
1552 locinput = l;
1553 nextchr = UCHARAT(locinput);
1554 break;
1555 }
1556
a0d0e21e 1557 /* Inline the first character, for speed. */
76e3520e 1558 if (UCHARAT(s) != nextchr &&
c8756f30
AK
1559 (OP(scan) == REF ||
1560 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 1561 ? fold : fold_locale)[nextchr]))))
4633a7c4 1562 sayNO;
3280af22
NIS
1563 ln = PL_regendp[n] - s;
1564 if (locinput + ln > PL_regeol)
4633a7c4 1565 sayNO;
c8756f30
AK
1566 if (ln > 1 && (OP(scan) == REF
1567 ? memNE(s, locinput, ln)
1568 : (OP(scan) == REFF
1569 ? ibcmp(s, locinput, ln)
1570 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1571 sayNO;
a0d0e21e 1572 locinput += ln;
76e3520e 1573 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1574 break;
1575
1576 case NOTHING:
c277df42 1577 case TAIL:
a0d0e21e
LW
1578 break;
1579 case BACK:
1580 break;
c277df42
IZ
1581 case EVAL:
1582 {
1583 dSP;
533c011a 1584 OP_4tree *oop = PL_op;
3280af22
NIS
1585 COP *ocurcop = PL_curcop;
1586 SV **ocurpad = PL_curpad;
c277df42
IZ
1587 SV *ret;
1588
1589 n = ARG(scan);
533c011a
NIS
1590 PL_op = (OP_4tree*)PL_regdata->data[n];
1591 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3280af22 1592 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 1]);
c277df42 1593
76e3520e 1594 CALLRUNOPS(); /* Scalar context. */
c277df42
IZ
1595 SPAGAIN;
1596 ret = POPs;
1597 PUTBACK;
1598
0f5d15d6
IZ
1599 PL_op = oop;
1600 PL_curpad = ocurpad;
1601 PL_curcop = ocurcop;
c277df42 1602 if (logical) {
0f5d15d6
IZ
1603 if (logical == 2) { /* Postponed subexpression. */
1604 regexp *re;
1605 my_magic mg = Null(my_magic);
1606 re_cc_state state;
1607 CURCUR cctmp;
1608 CHECKPOINT cp, lastcp;
1609
1610 if(SvROK(ret) || SvRMAGICAL(ret)) {
1611 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
1612
1613 if(SvMAGICAL(sv))
1614 mg = mg_find(sv, 'r');
1615 }
1616 if (mg) {
1617 re = (regexp *)mg->mg_obj;
1618 ReREFCNT_inc(re);
1619 }
1620 else {
1621 STRLEN len;
1622 char *t = SvPV(ret, len);
1623 PMOP pm;
1624 char *oprecomp = PL_regprecomp;
1625 I32 osize = PL_regsize;
1626 I32 onpar = PL_regnpar;
1627
1628 pm.op_pmflags = 0;
1629 re = CALLREGCOMP(t, t + len, &pm);
1630 if (!(SvFLAGS(ret)
1631 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
1632 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
1633 PL_regprecomp = oprecomp;
1634 PL_regsize = osize;
1635 PL_regnpar = onpar;
1636 }
1637 DEBUG_r(
1638 PerlIO_printf(Perl_debug_log,
1639 "Entering embedded `%s%.60s%s%s'\n",
1640 PL_colors[0],
1641 re->precomp,
1642 PL_colors[1],
1643 (strlen(re->precomp) > 60 ? "..." : ""))
1644 );
1645 state.node = next;
1646 state.prev = PL_reg_call_cc;
1647 state.cc = PL_regcc;
1648 state.re = PL_reg_re;
1649
1650 cctmp.cur = 0;
1651 cctmp.oldcc = 0;
1652 PL_regcc = &cctmp;
1653
1654 cp = regcppush(0); /* Save *all* the positions. */
1655 REGCP_SET;
1656 cache_re(re);
1657 state.ss = PL_savestack_ix;
1658 *PL_reglastparen = 0;
1659 PL_reg_call_cc = &state;
1660 PL_reginput = locinput;
1661 if (regmatch(re->program + 1)) {
1662 ReREFCNT_dec(re);
1663 regcpblow(cp);
1664 sayYES;
1665 }
1666 DEBUG_r(
1667 PerlIO_printf(Perl_debug_log,
1668 "%*s failed...\n",
1669 REPORT_CODE_OFF+PL_regindent*2, "")
1670 );
1671 ReREFCNT_dec(re);
1672 REGCP_UNWIND;
1673 regcppop();
1674 PL_reg_call_cc = state.prev;
1675 PL_regcc = state.cc;
1676 PL_reg_re = state.re;
d3790889 1677 cache_re(PL_reg_re);
0f5d15d6
IZ
1678 sayNO;
1679 }
c277df42 1680 sw = SvTRUE(ret);
0f5d15d6 1681 logical = 0;
a0ed51b3
LW
1682 }
1683 else
3280af22 1684 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
1685 break;
1686 }
a0d0e21e 1687 case OPEN:
c277df42 1688 n = ARG(scan); /* which paren pair */
3280af22
NIS
1689 PL_reg_start_tmp[n] = locinput;
1690 if (n > PL_regsize)
1691 PL_regsize = n;
a0d0e21e
LW
1692 break;
1693 case CLOSE:
c277df42 1694 n = ARG(scan); /* which paren pair */
3280af22
NIS
1695 PL_regstartp[n] = PL_reg_start_tmp[n];
1696 PL_regendp[n] = locinput;
1697 if (n > *PL_reglastparen)
1698 *PL_reglastparen = n;
a0d0e21e 1699 break;
c277df42
IZ
1700 case GROUPP:
1701 n = ARG(scan); /* which paren pair */
3280af22 1702 sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
c277df42
IZ
1703 break;
1704 case IFTHEN:
1705 if (sw)
1706 next = NEXTOPER(NEXTOPER(scan));
1707 else {
1708 next = scan + ARG(scan);
1709 if (OP(next) == IFTHEN) /* Fake one. */
1710 next = NEXTOPER(NEXTOPER(next));
1711 }
1712 break;
1713 case LOGICAL:
0f5d15d6 1714 logical = scan->flags;
c277df42 1715 break;
a0d0e21e
LW
1716 case CURLYX: {
1717 CURCUR cc;
3280af22 1718 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
1719
1720 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1721 next += ARG(next);
3280af22
NIS
1722 cc.oldcc = PL_regcc;
1723 PL_regcc = &cc;
1724 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
1725 cc.cur = -1;
1726 cc.min = ARG1(scan);
1727 cc.max = ARG2(scan);
c277df42 1728 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1729 cc.next = next;
1730 cc.minmod = minmod;
1731 cc.lastloc = 0;
3280af22 1732 PL_reginput = locinput;
a0d0e21e
LW
1733 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1734 regcpblow(cp);
3280af22 1735 PL_regcc = cc.oldcc;
4633a7c4 1736 saySAME(n);
a0d0e21e
LW
1737 }
1738 /* NOT REACHED */
1739 case WHILEM: {
1740 /*
1741 * This is really hard to understand, because after we match
1742 * what we're trying to match, we must make sure the rest of
1743 * the RE is going to match for sure, and to do that we have
1744 * to go back UP the parse tree by recursing ever deeper. And
1745 * if it fails, we have to reset our parent's current state
1746 * that we can try again after backing off.
1747 */
1748
c277df42 1749 CHECKPOINT cp, lastcp;
3280af22 1750 CURCUR* cc = PL_regcc;
c277df42
IZ
1751 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1752
4633a7c4 1753 n = cc->cur + 1; /* how many we know we matched */
3280af22 1754 PL_reginput = locinput;
a0d0e21e 1755
c277df42
IZ
1756 DEBUG_r(
1757 PerlIO_printf(Perl_debug_log,
1758 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1759 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
1760 (long)n, (long)cc->min,
1761 (long)cc->max, (long)cc)
1762 );
4633a7c4 1763
a0d0e21e
LW
1764 /* If degenerate scan matches "", assume scan done. */
1765
579cf2c3 1766 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
1767 PL_regcc = cc->oldcc;
1768 ln = PL_regcc->cur;
c277df42 1769 DEBUG_r(
c3464db5
DD
1770 PerlIO_printf(Perl_debug_log,
1771 "%*s empty match detected, try continuation...\n",
3280af22 1772 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1773 );
a0d0e21e 1774 if (regmatch(cc->next))
4633a7c4 1775 sayYES;
c277df42 1776 DEBUG_r(
c3464db5
DD
1777 PerlIO_printf(Perl_debug_log,
1778 "%*s failed...\n",
3280af22 1779 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1780 );
3280af22
NIS
1781 PL_regcc->cur = ln;
1782 PL_regcc = cc;
4633a7c4 1783 sayNO;
a0d0e21e
LW
1784 }
1785
1786 /* First just match a string of min scans. */
1787
1788 if (n < cc->min) {
1789 cc->cur = n;
1790 cc->lastloc = locinput;
4633a7c4
LW
1791 if (regmatch(cc->scan))
1792 sayYES;
1793 cc->cur = n - 1;
c277df42
IZ
1794 cc->lastloc = lastloc;
1795 DEBUG_r(
c3464db5
DD
1796 PerlIO_printf(Perl_debug_log,
1797 "%*s failed...\n",
3280af22 1798 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1799 );
4633a7c4 1800 sayNO;
a0d0e21e
LW
1801 }
1802
1803 /* Prefer next over scan for minimal matching. */
1804
1805 if (cc->minmod) {
3280af22
NIS
1806 PL_regcc = cc->oldcc;
1807 ln = PL_regcc->cur;
5f05dabc 1808 cp = regcppush(cc->parenfloor);
c277df42 1809 REGCP_SET;
5f05dabc 1810 if (regmatch(cc->next)) {
c277df42 1811 regcpblow(cp);
4633a7c4 1812 sayYES; /* All done. */
5f05dabc 1813 }
c277df42 1814 REGCP_UNWIND;
5f05dabc 1815 regcppop();
3280af22
NIS
1816 PL_regcc->cur = ln;
1817 PL_regcc = cc;
a0d0e21e 1818
c277df42 1819 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 1820 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
1821 && !(PL_reg_flags & RF_warned)) {
1822 PL_reg_flags |= RF_warned;
599cee73 1823 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
1824 "Complex regular subexpression recursion",
1825 REG_INFTY - 1);
c277df42 1826 }
4633a7c4 1827 sayNO;
c277df42 1828 }
a687059c 1829
c277df42 1830 DEBUG_r(
c3464db5
DD
1831 PerlIO_printf(Perl_debug_log,
1832 "%*s trying longer...\n",
3280af22 1833 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1834 );
a0d0e21e 1835 /* Try scanning more and see if it helps. */
3280af22 1836 PL_reginput = locinput;
a0d0e21e
LW
1837 cc->cur = n;
1838 cc->lastloc = locinput;
5f05dabc 1839 cp = regcppush(cc->parenfloor);
c277df42 1840 REGCP_SET;
5f05dabc 1841 if (regmatch(cc->scan)) {
c277df42 1842 regcpblow(cp);
4633a7c4 1843 sayYES;
5f05dabc 1844 }
c277df42 1845 DEBUG_r(
c3464db5
DD
1846 PerlIO_printf(Perl_debug_log,
1847 "%*s failed...\n",
3280af22 1848 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1849 );
1850 REGCP_UNWIND;
5f05dabc 1851 regcppop();
4633a7c4 1852 cc->cur = n - 1;
c277df42 1853 cc->lastloc = lastloc;
4633a7c4 1854 sayNO;
a0d0e21e
LW
1855 }
1856
1857 /* Prefer scan over next for maximal matching. */
1858
1859 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1860 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1861 cc->cur = n;
1862 cc->lastloc = locinput;
c277df42 1863 REGCP_SET;
5f05dabc 1864 if (regmatch(cc->scan)) {
c277df42 1865 regcpblow(cp);
4633a7c4 1866 sayYES;
5f05dabc 1867 }
c277df42 1868 REGCP_UNWIND;
a0d0e21e 1869 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1870 PL_reginput = locinput;
c277df42 1871 DEBUG_r(
c3464db5
DD
1872 PerlIO_printf(Perl_debug_log,
1873 "%*s failed, try continuation...\n",
3280af22 1874 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1875 );
1876 }
599cee73
PM
1877 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
1878 && !(PL_reg_flags & RF_warned)) {
3280af22 1879 PL_reg_flags |= RF_warned;
599cee73 1880 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
1881 "Complex regular subexpression recursion",
1882 REG_INFTY - 1);
a0d0e21e
LW
1883 }
1884
1885 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
1886 PL_regcc = cc->oldcc;
1887 ln = PL_regcc->cur;
a0d0e21e 1888 if (regmatch(cc->next))
4633a7c4 1889 sayYES;
c277df42 1890 DEBUG_r(
c3464db5 1891 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1892 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1893 );
3280af22
NIS
1894 PL_regcc->cur = ln;
1895 PL_regcc = cc;
4633a7c4 1896 cc->cur = n - 1;
c277df42 1897 cc->lastloc = lastloc;
4633a7c4 1898 sayNO;
a0d0e21e
LW
1899 }
1900 /* NOT REACHED */
c277df42
IZ
1901 case BRANCHJ:
1902 next = scan + ARG(scan);
1903 if (next == scan)
1904 next = NULL;
1905 inner = NEXTOPER(NEXTOPER(scan));
1906 goto do_branch;
1907 case BRANCH:
1908 inner = NEXTOPER(scan);
1909 do_branch:
1910 {
1911 CHECKPOINT lastcp;
1912 c1 = OP(scan);
1913 if (OP(next) != c1) /* No choice. */
1914 next = inner; /* Avoid recursion. */
a0d0e21e 1915 else {
3280af22 1916 int lastparen = *PL_reglastparen;
c277df42
IZ
1917
1918 REGCP_SET;
a0d0e21e 1919 do {
3280af22 1920 PL_reginput = locinput;
c277df42 1921 if (regmatch(inner))
4633a7c4 1922 sayYES;
c277df42 1923 REGCP_UNWIND;
3280af22
NIS
1924 for (n = *PL_reglastparen; n > lastparen; n--)
1925 PL_regendp[n] = 0;
1926 *PL_reglastparen = n;
c277df42 1927 scan = next;
a0d0e21e 1928 /*SUPPRESS 560*/
c277df42
IZ
1929 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1930 next += n;
a0d0e21e 1931 else
c277df42 1932 next = NULL;
c277df42
IZ
1933 inner = NEXTOPER(scan);
1934 if (c1 == BRANCHJ) {
1935 inner = NEXTOPER(inner);
1936 }
1937 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1938 sayNO;
a0d0e21e 1939 /* NOTREACHED */
a687059c 1940 }
a0d0e21e
LW
1941 }
1942 break;
1943 case MINMOD:
1944 minmod = 1;
1945 break;
c277df42
IZ
1946 case CURLYM:
1947 {
00db4c45 1948 I32 l = 0;
c277df42
IZ
1949 CHECKPOINT lastcp;
1950
1951 /* We suppose that the next guy does not need
1952 backtracking: in particular, it is of constant length,
1953 and has no parenths to influence future backrefs. */
1954 ln = ARG1(scan); /* min to match */
1955 n = ARG2(scan); /* max to match */
c277df42
IZ
1956 paren = scan->flags;
1957 if (paren) {
3280af22
NIS
1958 if (paren > PL_regsize)
1959 PL_regsize = paren;
1960 if (paren > *PL_reglastparen)
1961 *PL_reglastparen = paren;
c277df42 1962 }
dc45a647 1963 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
1964 if (paren)
1965 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 1966 PL_reginput = locinput;
c277df42
IZ
1967 if (minmod) {
1968 minmod = 0;
1969 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1970 sayNO;
5f4b28b2 1971 if (ln && l == 0 && n >= ln
c277df42
IZ
1972 /* In fact, this is tricky. If paren, then the
1973 fact that we did/didnot match may influence
1974 future execution. */
1975 && !(paren && ln == 0))
1976 ln = n;
3280af22 1977 locinput = PL_reginput;
c277df42
IZ
1978 if (regkind[(U8)OP(next)] == EXACT) {
1979 c1 = UCHARAT(OPERAND(next) + 1);
1980 if (OP(next) == EXACTF)
1981 c2 = fold[c1];
1982 else if (OP(next) == EXACTFL)
1983 c2 = fold_locale[c1];
1984 else
1985 c2 = c1;
a0ed51b3
LW
1986 }
1987 else
c277df42
IZ
1988 c1 = c2 = -1000;
1989 REGCP_SET;
5f4b28b2 1990 /* This may be improved if l == 0. */
c277df42
IZ
1991 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1992 /* If it could work, try it. */
1993 if (c1 == -1000 ||
3280af22
NIS
1994 UCHARAT(PL_reginput) == c1 ||
1995 UCHARAT(PL_reginput) == c2)
c277df42
IZ
1996 {
1997 if (paren) {
1998 if (n) {
dfe13c55 1999 PL_regstartp[paren] = HOPc(PL_reginput, -l);
3280af22 2000 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2001 }
2002 else
3280af22 2003 PL_regendp[paren] = NULL;
c277df42
IZ
2004 }
2005 if (regmatch(next))
2006 sayYES;
2007 REGCP_UNWIND;
2008 }
2009 /* Couldn't or didn't -- move forward. */
3280af22 2010 PL_reginput = locinput;
c277df42
IZ
2011 if (regrepeat_hard(scan, 1, &l)) {
2012 ln++;
3280af22 2013 locinput = PL_reginput;
c277df42
IZ
2014 }
2015 else
2016 sayNO;
2017 }
a0ed51b3
LW
2018 }
2019 else {
c277df42
IZ
2020 n = regrepeat_hard(scan, n, &l);
2021 if (n != 0 && l == 0
2022 /* In fact, this is tricky. If paren, then the
2023 fact that we did/didnot match may influence
2024 future execution. */
2025 && !(paren && ln == 0))
2026 ln = n;
3280af22 2027 locinput = PL_reginput;
c277df42 2028 DEBUG_r(
5c0ca799
GS
2029 PerlIO_printf(Perl_debug_log,
2030 "%*s matched %ld times, len=%ld...\n",
3280af22 2031 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2032 );
2033 if (n >= ln) {
2034 if (regkind[(U8)OP(next)] == EXACT) {
2035 c1 = UCHARAT(OPERAND(next) + 1);
2036 if (OP(next) == EXACTF)
2037 c2 = fold[c1];
2038 else if (OP(next) == EXACTFL)
2039 c2 = fold_locale[c1];
2040 else
2041 c2 = c1;
a0ed51b3
LW
2042 }
2043 else
c277df42
IZ
2044 c1 = c2 = -1000;
2045 }
2046 REGCP_SET;
2047 while (n >= ln) {
2048 /* If it could work, try it. */
2049 if (c1 == -1000 ||
3280af22
NIS
2050 UCHARAT(PL_reginput) == c1 ||
2051 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2052 {
2053 DEBUG_r(
c3464db5
DD
2054 PerlIO_printf(Perl_debug_log,
2055 "%*s trying tail with n=%ld...\n",
3280af22 2056 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2057 );
2058 if (paren) {
2059 if (n) {
dfe13c55 2060 PL_regstartp[paren] = HOPc(PL_reginput, -l);
a0ed51b3 2061 PL_regendp[paren] = PL_reginput;
c277df42 2062 }
a0ed51b3
LW
2063 else
2064 PL_regendp[paren] = NULL;
c277df42 2065 }
a0ed51b3
LW
2066 if (regmatch(next))
2067 sayYES;
2068 REGCP_UNWIND;
2069 }
c277df42
IZ
2070 /* Couldn't or didn't -- back up. */
2071 n--;
dfe13c55 2072 locinput = HOPc(locinput, -l);
3280af22 2073 PL_reginput = locinput;
c277df42
IZ
2074 }
2075 }
2076 sayNO;
2077 break;
2078 }
2079 case CURLYN:
2080 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2081 if (paren > PL_regsize)
2082 PL_regsize = paren;
2083 if (paren > *PL_reglastparen)
2084 *PL_reglastparen = paren;
c277df42
IZ
2085 ln = ARG1(scan); /* min to match */
2086 n = ARG2(scan); /* max to match */
dc45a647 2087 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2088 goto repeat;
a0d0e21e 2089 case CURLY:
c277df42 2090 paren = 0;
a0d0e21e
LW
2091 ln = ARG1(scan); /* min to match */
2092 n = ARG2(scan); /* max to match */
dc45a647 2093 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2094 goto repeat;
2095 case STAR:
2096 ln = 0;
c277df42 2097 n = REG_INFTY;
a0d0e21e 2098 scan = NEXTOPER(scan);
c277df42 2099 paren = 0;
a0d0e21e
LW
2100 goto repeat;
2101 case PLUS:
c277df42
IZ
2102 ln = 1;
2103 n = REG_INFTY;
2104 scan = NEXTOPER(scan);
2105 paren = 0;
2106 repeat:
a0d0e21e
LW
2107 /*
2108 * Lookahead to avoid useless match attempts
2109 * when we know what character comes next.
2110 */
bbce6d69 2111 if (regkind[(U8)OP(next)] == EXACT) {
2112 c1 = UCHARAT(OPERAND(next) + 1);
2113 if (OP(next) == EXACTF)
2114 c2 = fold[c1];
2115 else if (OP(next) == EXACTFL)
2116 c2 = fold_locale[c1];
2117 else
2118 c2 = c1;
2119 }
a0d0e21e 2120 else
bbce6d69 2121 c1 = c2 = -1000;
3280af22 2122 PL_reginput = locinput;
a0d0e21e 2123 if (minmod) {
c277df42 2124 CHECKPOINT lastcp;
a0d0e21e
LW
2125 minmod = 0;
2126 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2127 sayNO;
a0ed51b3 2128 locinput = PL_reginput;
c277df42
IZ
2129 REGCP_SET;
2130 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2131 /* If it could work, try it. */
bbce6d69 2132 if (c1 == -1000 ||
3280af22
NIS
2133 UCHARAT(PL_reginput) == c1 ||
2134 UCHARAT(PL_reginput) == c2)
bbce6d69 2135 {
c277df42
IZ
2136 if (paren) {
2137 if (n) {
dfe13c55 2138 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2139 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2140 }
2141 else
3280af22 2142 PL_regendp[paren] = NULL;
c277df42 2143 }
a0d0e21e 2144 if (regmatch(next))
4633a7c4 2145 sayYES;
c277df42 2146 REGCP_UNWIND;
bbce6d69 2147 }
c277df42 2148 /* Couldn't or didn't -- move forward. */
a0ed51b3 2149 PL_reginput = locinput;
a0d0e21e
LW
2150 if (regrepeat(scan, 1)) {
2151 ln++;
a0ed51b3
LW
2152 locinput = PL_reginput;
2153 }
2154 else
4633a7c4 2155 sayNO;
a0d0e21e
LW
2156 }
2157 }
2158 else {
c277df42 2159 CHECKPOINT lastcp;
a0d0e21e 2160 n = regrepeat(scan, n);
a0ed51b3 2161 locinput = PL_reginput;
a0d0e21e 2162 if (ln < n && regkind[(U8)OP(next)] == EOL &&
3280af22 2163 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2164 ln = n; /* why back off? */
c277df42
IZ
2165 REGCP_SET;
2166 if (paren) {
2167 while (n >= ln) {
2168 /* If it could work, try it. */
2169 if (c1 == -1000 ||
3280af22
NIS
2170 UCHARAT(PL_reginput) == c1 ||
2171 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2172 {
2173 if (paren && n) {
2174 if (n) {
dfe13c55 2175 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2176 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2177 }
2178 else
3280af22 2179 PL_regendp[paren] = NULL;
c277df42
IZ
2180 }
2181 if (regmatch(next))
2182 sayYES;
2183 REGCP_UNWIND;
2184 }
2185 /* Couldn't or didn't -- back up. */
2186 n--;
dfe13c55 2187 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2188 }
a0ed51b3
LW
2189 }
2190 else {
c277df42
IZ
2191 while (n >= ln) {
2192 /* If it could work, try it. */
2193 if (c1 == -1000 ||
3280af22
NIS
2194 UCHARAT(PL_reginput) == c1 ||
2195 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2196 {
2197 if (regmatch(next))
2198 sayYES;
2199 REGCP_UNWIND;
2200 }
2201 /* Couldn't or didn't -- back up. */
2202 n--;
dfe13c55 2203 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2204 }
a0d0e21e
LW
2205 }
2206 }
4633a7c4 2207 sayNO;
c277df42 2208 break;
a0d0e21e 2209 case END:
0f5d15d6
IZ
2210 if (PL_reg_call_cc) {
2211 re_cc_state *cur_call_cc = PL_reg_call_cc;
2212 CURCUR *cctmp = PL_regcc;
2213 regexp *re = PL_reg_re;
2214 CHECKPOINT cp, lastcp;
2215
2216 cp = regcppush(0); /* Save *all* the positions. */
2217 REGCP_SET;
2218 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2219 the caller. */
2220 PL_reginput = locinput; /* Make position available to
2221 the callcc. */
2222 cache_re(PL_reg_call_cc->re);
2223 PL_regcc = PL_reg_call_cc->cc;
2224 PL_reg_call_cc = PL_reg_call_cc->prev;
2225 if (regmatch(cur_call_cc->node)) {
2226 PL_reg_call_cc = cur_call_cc;
2227 regcpblow(cp);
2228 sayYES;
2229 }
2230 REGCP_UNWIND;
2231 regcppop();
2232 PL_reg_call_cc = cur_call_cc;
2233 PL_regcc = cctmp;
2234 PL_reg_re = re;
2235 cache_re(re);
2236
2237 DEBUG_r(
2238 PerlIO_printf(Perl_debug_log,
2239 "%*s continuation failed...\n",
2240 REPORT_CODE_OFF+PL_regindent*2, "")
2241 );
2242 sayNO;
2243 }
3280af22 2244 if (locinput < PL_regtill)
7e5428c5
IZ
2245 sayNO; /* Cannot match: too short. */
2246 /* Fall through */
2247 case SUCCEED:
3280af22 2248 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2249 sayYES; /* Success! */
c277df42
IZ
2250 case SUSPEND:
2251 n = 1;
9fe1d20c 2252 PL_reginput = locinput;
c277df42 2253 goto do_ifmatch;
a0d0e21e 2254 case UNLESSM:
c277df42 2255 n = 0;
a0ed51b3 2256 if (scan->flags) {
dfe13c55 2257 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3
LW
2258 if (!s)
2259 goto say_yes;
2260 PL_reginput = s;
2261 }
2262 else
2263 PL_reginput = locinput;
c277df42
IZ
2264 goto do_ifmatch;
2265 case IFMATCH:
2266 n = 1;
a0ed51b3 2267 if (scan->flags) {
dfe13c55 2268 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3
LW
2269 if (!s)
2270 goto say_no;
2271 PL_reginput = s;
2272 }
2273 else
2274 PL_reginput = locinput;
2275
c277df42 2276 do_ifmatch:
c277df42
IZ
2277 inner = NEXTOPER(NEXTOPER(scan));
2278 if (regmatch(inner) != n) {
2279 say_no:
2280 if (logical) {
2281 logical = 0;
2282 sw = 0;
2283 goto do_longjump;
a0ed51b3
LW
2284 }
2285 else
c277df42
IZ
2286 sayNO;
2287 }
2288 say_yes:
2289 if (logical) {
2290 logical = 0;
2291 sw = 1;
2292 }
fe44a5e8 2293 if (OP(scan) == SUSPEND) {
3280af22 2294 locinput = PL_reginput;
565764a8 2295 nextchr = UCHARAT(locinput);
fe44a5e8 2296 }
c277df42
IZ
2297 /* FALL THROUGH. */
2298 case LONGJMP:
2299 do_longjump:
2300 next = scan + ARG(scan);
2301 if (next == scan)
2302 next = NULL;
a0d0e21e
LW
2303 break;
2304 default:
c030ccd9 2305 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2306 (unsigned long)scan, OP(scan));
a0d0e21e 2307 FAIL("regexp memory corruption");
a687059c 2308 }
a0d0e21e
LW
2309 scan = next;
2310 }
a687059c 2311
a0d0e21e
LW
2312 /*
2313 * We get here only if there's trouble -- normally "case END" is
2314 * the terminating point.
2315 */
2316 FAIL("corrupted regexp pointers");
2317 /*NOTREACHED*/
4633a7c4
LW
2318 sayNO;
2319
2320yes:
2321#ifdef DEBUGGING
3280af22 2322 PL_regindent--;
4633a7c4
LW
2323#endif
2324 return 1;
2325
2326no:
2327#ifdef DEBUGGING
3280af22 2328 PL_regindent--;
4633a7c4 2329#endif
a0d0e21e 2330 return 0;
a687059c
LW
2331}
2332
2333/*
2334 - regrepeat - repeatedly match something simple, report how many
2335 */
2336/*
2337 * [This routine now assumes that it will only match on things of length 1.
2338 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2339 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2340 */
76e3520e 2341STATIC I32
c277df42 2342regrepeat(regnode *p, I32 max)
a687059c 2343{
5c0ca799 2344 dTHR;
a0d0e21e
LW
2345 register char *scan;
2346 register char *opnd;
2347 register I32 c;
3280af22 2348 register char *loceol = PL_regeol;
a0ed51b3 2349 register I32 hardcount = 0;
a0d0e21e 2350
3280af22 2351 scan = PL_reginput;
c277df42 2352 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2353 loceol = scan + max;
161b471a 2354 opnd = (char *) OPERAND(p);
a0d0e21e
LW
2355 switch (OP(p)) {
2356 case ANY:
2357 while (scan < loceol && *scan != '\n')
2358 scan++;
2359 break;
2360 case SANY:
2361 scan = loceol;
2362 break;
a0ed51b3
LW
2363 case ANYUTF8:
2364 loceol = PL_regeol;
2365 while (scan < loceol && *scan != '\n') {
2366 scan += UTF8SKIP(scan);
2367 hardcount++;
2368 }
2369 break;
2370 case SANYUTF8:
2371 loceol = PL_regeol;
2372 while (scan < loceol) {
2373 scan += UTF8SKIP(scan);
2374 hardcount++;
2375 }
2376 break;
bbce6d69 2377 case EXACT: /* length of string is 1 */
2378 c = UCHARAT(++opnd);
2379 while (scan < loceol && UCHARAT(scan) == c)
2380 scan++;
2381 break;
2382 case EXACTF: /* length of string is 1 */
2383 c = UCHARAT(++opnd);
2384 while (scan < loceol &&
2385 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
2386 scan++;
2387 break;
2388 case EXACTFL: /* length of string is 1 */
3280af22 2389 PL_reg_flags |= RF_tainted;
bbce6d69 2390 c = UCHARAT(++opnd);
2391 while (scan < loceol &&
2392 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e
LW
2393 scan++;
2394 break;
a0ed51b3
LW
2395 case ANYOFUTF8:
2396 loceol = PL_regeol;
2397 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2398 scan += UTF8SKIP(scan);
2399 hardcount++;
2400 }
2401 break;
a0d0e21e 2402 case ANYOF:
ae5c130c 2403 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2404 scan++;
a0d0e21e
LW
2405 break;
2406 case ALNUM:
2407 while (scan < loceol && isALNUM(*scan))
2408 scan++;
2409 break;
a0ed51b3
LW
2410 case ALNUMUTF8:
2411 loceol = PL_regeol;
dfe13c55 2412 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2413 scan += UTF8SKIP(scan);
2414 hardcount++;
2415 }
2416 break;
bbce6d69 2417 case ALNUML:
3280af22 2418 PL_reg_flags |= RF_tainted;
bbce6d69 2419 while (scan < loceol && isALNUM_LC(*scan))
2420 scan++;
2421 break;
a0ed51b3
LW
2422 case ALNUMLUTF8:
2423 PL_reg_flags |= RF_tainted;
2424 loceol = PL_regeol;
dfe13c55 2425 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2426 scan += UTF8SKIP(scan);
2427 hardcount++;
2428 }
2429 break;
2430 break;
a0d0e21e
LW
2431 case NALNUM:
2432 while (scan < loceol && !isALNUM(*scan))
2433 scan++;
2434 break;
a0ed51b3
LW
2435 case NALNUMUTF8:
2436 loceol = PL_regeol;
dfe13c55 2437 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2438 scan += UTF8SKIP(scan);
2439 hardcount++;
2440 }
2441 break;
bbce6d69 2442 case NALNUML:
3280af22 2443 PL_reg_flags |= RF_tainted;
bbce6d69 2444 while (scan < loceol && !isALNUM_LC(*scan))
2445 scan++;
2446 break;
a0ed51b3
LW
2447 case NALNUMLUTF8:
2448 PL_reg_flags |= RF_tainted;
2449 loceol = PL_regeol;
dfe13c55 2450 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2451 scan += UTF8SKIP(scan);
2452 hardcount++;
2453 }
2454 break;
a0d0e21e
LW
2455 case SPACE:
2456 while (scan < loceol && isSPACE(*scan))
2457 scan++;
2458 break;
a0ed51b3
LW
2459 case SPACEUTF8:
2460 loceol = PL_regeol;
dfe13c55 2461 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2462 scan += UTF8SKIP(scan);
2463 hardcount++;
2464 }
2465 break;
bbce6d69 2466 case SPACEL:
3280af22 2467 PL_reg_flags |= RF_tainted;
bbce6d69 2468 while (scan < loceol && isSPACE_LC(*scan))
2469 scan++;
2470 break;
a0ed51b3
LW
2471 case SPACELUTF8:
2472 PL_reg_flags |= RF_tainted;
2473 loceol = PL_regeol;
dfe13c55 2474 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2475 scan += UTF8SKIP(scan);
2476 hardcount++;
2477 }
2478 break;
a0d0e21e
LW
2479 case NSPACE:
2480 while (scan < loceol && !isSPACE(*scan))
2481 scan++;
2482 break;
a0ed51b3
LW
2483 case NSPACEUTF8:
2484 loceol = PL_regeol;
dfe13c55 2485 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2486 scan += UTF8SKIP(scan);
2487 hardcount++;
2488 }
2489 break;
bbce6d69 2490 case NSPACEL:
3280af22 2491 PL_reg_flags |= RF_tainted;
bbce6d69 2492 while (scan < loceol && !isSPACE_LC(*scan))
2493 scan++;
2494 break;
a0ed51b3
LW
2495 case NSPACELUTF8:
2496 PL_reg_flags |= RF_tainted;
2497 loceol = PL_regeol;
dfe13c55 2498 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2499 scan += UTF8SKIP(scan);
2500 hardcount++;
2501 }
2502 break;
a0d0e21e
LW
2503 case DIGIT:
2504 while (scan < loceol && isDIGIT(*scan))
2505 scan++;
2506 break;
a0ed51b3
LW
2507 case DIGITUTF8:
2508 loceol = PL_regeol;
dfe13c55 2509 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2510 scan += UTF8SKIP(scan);
2511 hardcount++;
2512 }
2513 break;
2514 break;
a0d0e21e
LW
2515 case NDIGIT:
2516 while (scan < loceol && !isDIGIT(*scan))
2517 scan++;
2518 break;
a0ed51b3
LW
2519 case NDIGITUTF8:
2520 loceol = PL_regeol;
dfe13c55 2521 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2522 scan += UTF8SKIP(scan);
2523 hardcount++;
2524 }
2525 break;
a0d0e21e
LW
2526 default: /* Called on something of 0 width. */
2527 break; /* So match right here or not at all. */
2528 }
a687059c 2529
a0ed51b3
LW
2530 if (hardcount)
2531 c = hardcount;
2532 else
2533 c = scan - PL_reginput;
3280af22 2534 PL_reginput = scan;
a687059c 2535
c277df42
IZ
2536 DEBUG_r(
2537 {
2538 SV *prop = sv_newmortal();
2539
2540 regprop(prop, p);
2541 PerlIO_printf(Perl_debug_log,
2542 "%*s %s can match %ld times out of %ld...\n",
2543 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2544 });
2545
a0d0e21e 2546 return(c);
a687059c
LW
2547}
2548
2549/*
c277df42
IZ
2550 - regrepeat_hard - repeatedly match something, report total lenth and length
2551 *
2552 * The repeater is supposed to have constant length.
2553 */
2554
76e3520e 2555STATIC I32
c277df42
IZ
2556regrepeat_hard(regnode *p, I32 max, I32 *lp)
2557{
5c0ca799 2558 dTHR;
c277df42
IZ
2559 register char *scan;
2560 register char *start;
3280af22 2561 register char *loceol = PL_regeol;
a0ed51b3 2562 I32 l = 0;
708e3b05 2563 I32 count = 0, res = 1;
a0ed51b3
LW
2564
2565 if (!max)
2566 return 0;
c277df42 2567
3280af22 2568 start = PL_reginput;
a0ed51b3 2569 if (UTF) {
708e3b05 2570 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2571 if (!count++) {
2572 l = 0;
2573 while (start < PL_reginput) {
2574 l++;
2575 start += UTF8SKIP(start);
2576 }
2577 *lp = l;
2578 if (l == 0)
2579 return max;
2580 }
2581 if (count == max)
2582 return count;
2583 }
2584 }
2585 else {
708e3b05 2586 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2587 if (!count++) {
2588 *lp = l = PL_reginput - start;
2589 if (max != REG_INFTY && l*max < loceol - scan)
2590 loceol = scan + l*max;
2591 if (l == 0)
2592 return max;
c277df42
IZ
2593 }
2594 }
2595 }
708e3b05 2596 if (!res)
3280af22 2597 PL_reginput = scan;
c277df42 2598
a0ed51b3 2599 return count;
c277df42
IZ
2600}
2601
2602/*
bbce6d69 2603 - regclass - determine if a character falls into a character class
2604 */
2605
76e3520e 2606STATIC bool
8ac85365 2607reginclass(register char *p, register I32 c)
bbce6d69 2608{
5c0ca799 2609 dTHR;
bbce6d69 2610 char flags = *p;
2611 bool match = FALSE;
2612
2613 c &= 0xFF;
ae5c130c 2614 if (ANYOF_TEST(p, c))
bbce6d69 2615 match = TRUE;
2616 else if (flags & ANYOF_FOLD) {
2617 I32 cf;
2618 if (flags & ANYOF_LOCALE) {
3280af22 2619 PL_reg_flags |= RF_tainted;
bbce6d69 2620 cf = fold_locale[c];
2621 }
2622 else
2623 cf = fold[c];
ae5c130c 2624 if (ANYOF_TEST(p, cf))
bbce6d69 2625 match = TRUE;
2626 }
2627
2628 if (!match && (flags & ANYOF_ISA)) {
3280af22 2629 PL_reg_flags |= RF_tainted;
bbce6d69 2630
2631 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2632 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2633 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2634 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2635 {
2636 match = TRUE;
2637 }
2638 }
2639
ae5c130c 2640 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2641}
2642
a0ed51b3
LW
2643STATIC bool
2644reginclassutf8(regnode *f, U8 *p)
c485e607
NIS
2645{
2646 dTHR;
a0ed51b3
LW
2647 char flags = ARG1(f);
2648 bool match = FALSE;
2649 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2650
2651 if (swash_fetch(sv, p))
2652 match = TRUE;
2653 else if (flags & ANYOF_FOLD) {
2654 I32 cf;
dfe13c55 2655 U8 tmpbuf[10];
a0ed51b3
LW
2656 if (flags & ANYOF_LOCALE) {
2657 PL_reg_flags |= RF_tainted;
2658 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2659 }
2660 else
2661 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2662 if (swash_fetch(sv, tmpbuf))
2663 match = TRUE;
2664 }
2665
2666 if (!match && (flags & ANYOF_ISA)) {
2667 PL_reg_flags |= RF_tainted;
2668
2669 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2670 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2671 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2672 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2673 {
2674 match = TRUE;
2675 }
2676 }
2677
2678 return (flags & ANYOF_INVERT) ? !match : match;
2679}
161b471a 2680
dfe13c55
GS
2681STATIC U8 *
2682reghop(U8 *s, I32 off)
c485e607
NIS
2683{
2684 dTHR;
a0ed51b3
LW
2685 if (off >= 0) {
2686 while (off-- && s < (U8*)PL_regeol)
2687 s += UTF8SKIP(s);
2688 }
2689 else {
2690 while (off++) {
2691 if (s > (U8*)PL_bostr) {
2692 s--;
2693 if (*s & 0x80) {
2694 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2695 s--;
2696 } /* XXX could check well-formedness here */
2697 }
2698 }
2699 }
2700 return s;
2701}
161b471a 2702
dfe13c55
GS
2703STATIC U8 *
2704reghopmaybe(U8* s, I32 off)
a0ed51b3 2705{
c485e607 2706 dTHR;
a0ed51b3
LW
2707 if (off >= 0) {
2708 while (off-- && s < (U8*)PL_regeol)
2709 s += UTF8SKIP(s);
2710 if (off >= 0)
2711 return 0;
2712 }
2713 else {
2714 while (off++) {
2715 if (s > (U8*)PL_bostr) {
2716 s--;
2717 if (*s & 0x80) {
2718 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2719 s--;
2720 } /* XXX could check well-formedness here */
2721 }
2722 else
2723 break;
2724 }
2725 if (off <= 0)
2726 return 0;
2727 }
2728 return s;
2729}