This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
added patch to generate PPDEF(pp_foo)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
f0fcb552 22/*SUPPRESS 112*/
a687059c 23/*
e50aee73 24 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
25 *
26 * Copyright (c) 1986 by University of Toronto.
27 * Written by Henry Spencer. Not derived from licensed software.
28 *
29 * Permission is granted to anyone to use this software for any
30 * purpose on any computer system, and to redistribute it freely,
31 * subject to the following restrictions:
32 *
33 * 1. The author is not responsible for the consequences of use of
34 * this software, no matter how awful, even if they arise
35 * from defects in it.
36 *
37 * 2. The origin of this software must not be misrepresented, either
38 * by explicit claim or by omission.
39 *
40 * 3. Altered versions must be plainly marked as such, and must not
41 * be misrepresented as being the original software.
42 *
43 **** Alterations to Henry's code are...
44 ****
9607fc9c 45 **** Copyright (c) 1991-1997, Larry Wall
a687059c 46 ****
9ef589d8
LW
47 **** You may distribute under the terms of either the GNU General Public
48 **** License or the Artistic License, as specified in the README file.
a687059c
LW
49 *
50 * Beware that some of this code is subtly aware of the way operator
51 * precedence is structured in regular expressions. Serious changes in
52 * regular-expression syntax might require a total rethink.
53 */
54#include "EXTERN.h"
55#include "perl.h"
56#include "regcomp.h"
57
c277df42
IZ
58#define RF_tainted 1 /* tainted information used? */
59#define RF_warned 2 /* warned about big count? */
ce862d02
IZ
60#define RF_evaled 4 /* Did an EVAL with setting? */
61
62#define RS_init 1 /* eval environment created */
63#define RS_set 2 /* replsv value is set */
c277df42 64
a687059c
LW
65#ifndef STATIC
66#define STATIC static
67#endif
68
76e3520e 69#ifndef PERL_OBJECT
a0d0e21e
LW
70typedef I32 CHECKPOINT;
71
c277df42
IZ
72/*
73 * Forwards.
74 */
75
76static I32 regmatch _((regnode *prog));
77static I32 regrepeat _((regnode *p, I32 max));
78static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
79static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 80
c277df42 81static bool reginclass _((char *p, I32 c));
55497cff 82static CHECKPOINT regcppush _((I32 parenfloor));
83static char * regcppop _((void));
76e3520e 84#endif
ae5c130c 85#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0d0e21e 86
76e3520e 87STATIC CHECKPOINT
8ac85365 88regcppush(I32 parenfloor)
a0d0e21e 89{
11343788 90 dTHR;
a0d0e21e 91 int retval = savestack_ix;
c277df42 92 int i = (regsize - parenfloor) * 4;
a0d0e21e
LW
93 int p;
94
95 SSCHECK(i + 5);
96 for (p = regsize; p > parenfloor; p--) {
97 SSPUSHPTR(regendp[p]);
98 SSPUSHPTR(regstartp[p]);
c5254dd6 99 SSPUSHPTR(reg_start_tmp[p]);
a0d0e21e
LW
100 SSPUSHINT(p);
101 }
102 SSPUSHINT(regsize);
103 SSPUSHINT(*reglastparen);
104 SSPUSHPTR(reginput);
105 SSPUSHINT(i + 3);
106 SSPUSHINT(SAVEt_REGCONTEXT);
107 return retval;
108}
109
c277df42
IZ
110/* These are needed since we do not localize EVAL nodes: */
111# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, " Setting an EVAL scope, savestack=%i\n", savestack_ix)); lastcp = savestack_ix
112# define REGCP_UNWIND DEBUG_r(lastcp != savestack_ix ? PerlIO_printf(Perl_debug_log," Clearing an EVAL scope, savestack=%i..%i\n", lastcp, savestack_ix) : 0); regcpblow(lastcp)
113
76e3520e 114STATIC char *
8ac85365 115regcppop(void)
a0d0e21e 116{
11343788 117 dTHR;
a0d0e21e
LW
118 I32 i = SSPOPINT;
119 U32 paren = 0;
120 char *input;
121 char *tmps;
122 assert(i == SAVEt_REGCONTEXT);
123 i = SSPOPINT;
124 input = (char *) SSPOPPTR;
125 *reglastparen = SSPOPINT;
126 regsize = SSPOPINT;
c277df42 127 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 128 paren = (U32)SSPOPINT;
c277df42 129 reg_start_tmp[paren] = (char *) SSPOPPTR;
a0d0e21e
LW
130 regstartp[paren] = (char *) SSPOPPTR;
131 tmps = (char*)SSPOPPTR;
132 if (paren <= *reglastparen)
133 regendp[paren] = tmps;
c277df42
IZ
134 DEBUG_r(
135 PerlIO_printf(Perl_debug_log, " restoring \\%d to %d(%d)..%d%s\n",
136 paren, regstartp[paren] - regbol,
137 reg_start_tmp[paren] - regbol,
138 regendp[paren] - regbol,
139 (paren > *reglastparen ? "(no)" : ""));
140 );
a0d0e21e 141 }
c277df42
IZ
142 DEBUG_r(
143 if (*reglastparen + 1 <= regnpar) {
144 PerlIO_printf(Perl_debug_log, " restoring \\%d..\\%d to undef\n",
145 *reglastparen + 1, regnpar);
146 }
147 );
a0d0e21e
LW
148 for (paren = *reglastparen + 1; paren <= regnpar; paren++) {
149 if (paren > regsize)
150 regstartp[paren] = Nullch;
151 regendp[paren] = Nullch;
152 }
153 return input;
154}
155
c277df42 156#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 157
a687059c 158/*
e50aee73 159 * pregexec and friends
a687059c
LW
160 */
161
162/*
c277df42 163 - pregexec - match a regexp against a string
a687059c 164 */
c277df42
IZ
165I32
166pregexec(register regexp *prog, char *stringarg, register char *strend, char *strbeg, I32 minend, SV *screamer, U32 nosave)
167/* strend: pointer to null at end of string */
168/* strbeg: real beginning of string */
169/* minend: end of match must be >=minend after stringarg. */
170/* nosave: For optimizations. */
171{
172 return
173 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
174 nosave ? 0 : REXEC_COPY_STR);
175}
176
a687059c 177/*
c277df42 178 - regexec_flags - match a regexp against a string
a687059c 179 */
79072805 180I32
c277df42
IZ
181regexec_flags(register regexp *prog, char *stringarg, register char *strend, char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
182/* strend: pointer to null at end of string */
183/* strbeg: real beginning of string */
184/* minend: end of match must be >=minend after stringarg. */
185/* data: May be used for some additional optimizations. */
186/* nosave: For optimizations. */
a687059c 187{
a0d0e21e 188 register char *s;
c277df42 189 register regnode *c;
a0d0e21e
LW
190 register char *startpos = stringarg;
191 register I32 tmp;
c277df42 192 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
193 I32 dontbother = 0; /* how many characters not to try at end */
194 CURCUR cc;
c277df42
IZ
195 I32 start_shift = 0; /* Offset of the start to find
196 constant substr. */
197 I32 end_shift = 0; /* Same for the end. */
198 I32 scream_pos = -1; /* Internal iterator of scream. */
199 char *scream_olds;
ce862d02 200 SV* oreplsv = GvSV(replgv);
a687059c 201
a0d0e21e 202 cc.cur = 0;
4633a7c4 203 cc.oldcc = 0;
a0d0e21e
LW
204 regcc = &cc;
205
c277df42 206 regprecomp = prog->precomp; /* Needed for error messages. */
a0d0e21e
LW
207#ifdef DEBUGGING
208 regnarrate = debug & 512;
209 regprogram = prog->program;
210#endif
211
212 /* Be paranoid... */
213 if (prog == NULL || startpos == NULL) {
214 croak("NULL regexp parameter");
215 return 0;
216 }
217
c277df42
IZ
218 minlen = prog->minlen;
219 if (strend - startpos < minlen) goto phooey;
220
a0d0e21e
LW
221 if (startpos == strbeg) /* is ^ valid at stringarg? */
222 regprev = '\n';
223 else {
224 regprev = stringarg[-1];
225 if (!multiline && regprev == '\n')
226 regprev = '\0'; /* force ^ to NOT match */
227 }
bbce6d69 228
a0d0e21e
LW
229 /* Check validity of program. */
230 if (UCHARAT(prog->program) != MAGIC) {
231 FAIL("corrupted regexp program");
232 }
233
bbce6d69 234 regnpar = prog->nparens;
c277df42
IZ
235 reg_flags = 0;
236 reg_eval_set = 0;
a0d0e21e
LW
237
238 /* If there is a "must appear" string, look for it. */
239 s = startpos;
c277df42
IZ
240 if (!(flags & REXEC_CHECKED)
241 && prog->check_substr != Nullsv &&
774d564b 242 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42
IZ
243 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
244 || (multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 245 {
c277df42
IZ
246 start_shift = prog->check_offset_min;
247 /* Should be nonnegative! */
248 end_shift = minlen - start_shift - SvCUR(prog->check_substr);
249 if (screamer) {
250 if (screamfirst[BmRARE(prog->check_substr)] >= 0)
251 s = screaminstr(screamer, prog->check_substr,
252 start_shift + (stringarg - strbeg),
253 end_shift, &scream_pos, 0);
a0d0e21e
LW
254 else
255 s = Nullch;
c277df42 256 scream_olds = s;
0a12ae7d 257 }
a0d0e21e 258 else
c277df42
IZ
259 s = fbm_instr((unsigned char*)s + start_shift,
260 (unsigned char*)strend - end_shift,
411d5715 261 prog->check_substr, 0);
a0d0e21e 262 if (!s) {
c277df42 263 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 264 goto phooey; /* not present */
c277df42
IZ
265 } else if ((s - stringarg) > prog->check_offset_max) {
266 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
267 s -= prog->check_offset_max;
268 } else if (!prog->naughty
269 && --BmUSEFUL(prog->check_substr) < 0
270 && prog->check_substr == prog->float_substr) { /* boo */
271 SvREFCNT_dec(prog->check_substr);
272 prog->check_substr = Nullsv; /* disable */
273 prog->float_substr = Nullsv; /* clear */
a0d0e21e 274 s = startpos;
c277df42 275 } else s = startpos;
a0d0e21e 276 }
a687059c 277
c277df42 278 /* Mark beginning of line for ^ and lookbehind. */
a0d0e21e 279 regbol = startpos;
c277df42 280 bostr = strbeg;
a687059c 281
a0d0e21e
LW
282 /* Mark end of line for $ (and such) */
283 regeol = strend;
a687059c 284
a0d0e21e
LW
285 /* see how far we have to get to not match where we matched before */
286 regtill = startpos+minend;
a687059c 287
c277df42
IZ
288 DEBUG_r(
289 PerlIO_printf(Perl_debug_log,
290 "Matching `%.60s%s' against `%.*s%s'\n",
291 prog->precomp,
292 (strlen(prog->precomp) > 60 ? "..." : ""),
293 (strend - startpos > 60 ? 60 : strend - startpos),
294 startpos,
295 (strend - startpos > 60 ? "..." : ""))
296 );
297
a0d0e21e 298 /* Simplest case: anchored match need be tried only once. */
774d564b 299 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e
LW
300 if (prog->reganch & ROPT_ANCH) {
301 if (regtry(prog, startpos))
302 goto got_it;
774d564b 303 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42
IZ
304 (multiline || (prog->reganch & ROPT_IMPLICIT)
305 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 306 {
a0d0e21e
LW
307 if (minlen)
308 dontbother = minlen - 1;
309 strend -= dontbother;
310 /* for multiline we only have to try after newlines */
311 if (s > startpos)
312 s--;
313 while (s < strend) {
314 if (*s++ == '\n') {
315 if (s < strend && regtry(prog, s))
316 goto got_it;
317 }
35c8bce7 318 }
35c8bce7 319 }
a0d0e21e
LW
320 goto phooey;
321 }
35c8bce7 322
a0d0e21e 323 /* Messy cases: unanchored match. */
c277df42
IZ
324 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
325 /* we have /x+whatever/ */
326 /* it must be a one character string */
327 char ch = SvPVX(prog->anchored_substr)[0];
328 while (s < strend) {
329 if (*s == ch) {
330 if (regtry(prog, s)) goto got_it;
a0d0e21e 331 s++;
c277df42
IZ
332 while (s < strend && *s == ch)
333 s++;
a0d0e21e 334 }
c277df42 335 s++;
a687059c 336 }
c277df42
IZ
337 }
338 /*SUPPRESS 560*/
339 else if (prog->anchored_substr != Nullsv
340 || (prog->float_substr != Nullsv
341 && prog->float_max_offset < strend - s)) {
342 SV *must = prog->anchored_substr
343 ? prog->anchored_substr : prog->float_substr;
344 I32 back_max =
345 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
346 I32 back_min =
347 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
348 I32 delta = back_max - back_min;
349 char *last = strend - SvCUR(must) - back_min; /* Cannot start after this */
350 char *last1 = s - 1; /* Last position checked before */
351
352 /* XXXX check_substr already used to find `s', can optimize if
353 check_substr==must. */
354 scream_pos = -1;
355 dontbother = end_shift;
356 strend -= dontbother;
357 while ( (s <= last) &&
358 (screamer
359 ? (s = screaminstr(screamer, must, s + back_min - strbeg,
360 end_shift, &scream_pos, 0))
361 : (s = fbm_instr((unsigned char*)s + back_min,
411d5715 362 (unsigned char*)strend, must, 0))) ) {
c277df42
IZ
363 if (s - back_max > last1) {
364 last1 = s - back_min;
365 s = s - back_max;
366 } else {
367 char *t = last1 + 1;
368
369 last1 = s - back_min;
370 s = t;
a0d0e21e 371 }
c277df42 372 while (s <= last1) {
a0d0e21e
LW
373 if (regtry(prog, s))
374 goto got_it;
375 s++;
376 }
377 }
378 goto phooey;
c277df42 379 } else if (c = prog->regstclass) {
a0d0e21e 380 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
161b471a 381 char *Class;
a687059c 382
a0d0e21e
LW
383 if (minlen)
384 dontbother = minlen - 1;
385 strend -= dontbother; /* don't bother with what can't match */
386 tmp = 1;
387 /* We know what class it must start with. */
388 switch (OP(c)) {
389 case ANYOF:
161b471a 390 Class = (char *) OPERAND(c);
a0d0e21e 391 while (s < strend) {
ae5c130c 392 if (REGINCLASS(Class, *s)) {
a0d0e21e
LW
393 if (tmp && regtry(prog, s))
394 goto got_it;
395 else
396 tmp = doevery;
a687059c 397 }
a0d0e21e
LW
398 else
399 tmp = 1;
400 s++;
401 }
402 break;
bbce6d69 403 case BOUNDL:
c277df42 404 reg_flags |= RF_tainted;
bbce6d69 405 /* FALL THROUGH */
a0d0e21e
LW
406 case BOUND:
407 if (minlen)
408 dontbother++,strend--;
bbce6d69 409 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 410 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 411 while (s < strend) {
95bac841 412 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
413 tmp = !tmp;
414 if (regtry(prog, s))
415 goto got_it;
a687059c 416 }
a0d0e21e
LW
417 s++;
418 }
419 if ((minlen || tmp) && regtry(prog,s))
420 goto got_it;
421 break;
bbce6d69 422 case NBOUNDL:
c277df42 423 reg_flags |= RF_tainted;
bbce6d69 424 /* FALL THROUGH */
a0d0e21e
LW
425 case NBOUND:
426 if (minlen)
427 dontbother++,strend--;
bbce6d69 428 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 429 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 430 while (s < strend) {
95bac841 431 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
432 tmp = !tmp;
433 else if (regtry(prog, s))
434 goto got_it;
435 s++;
436 }
437 if ((minlen || !tmp) && regtry(prog,s))
438 goto got_it;
439 break;
440 case ALNUM:
441 while (s < strend) {
bbce6d69 442 if (isALNUM(*s)) {
443 if (tmp && regtry(prog, s))
444 goto got_it;
445 else
446 tmp = doevery;
447 }
448 else
449 tmp = 1;
450 s++;
451 }
452 break;
453 case ALNUML:
c277df42 454 reg_flags |= RF_tainted;
bbce6d69 455 while (s < strend) {
456 if (isALNUM_LC(*s)) {
a0d0e21e
LW
457 if (tmp && regtry(prog, s))
458 goto got_it;
a687059c 459 else
a0d0e21e
LW
460 tmp = doevery;
461 }
462 else
463 tmp = 1;
464 s++;
465 }
466 break;
467 case NALNUM:
468 while (s < strend) {
bbce6d69 469 if (!isALNUM(*s)) {
470 if (tmp && regtry(prog, s))
471 goto got_it;
472 else
473 tmp = doevery;
474 }
475 else
476 tmp = 1;
477 s++;
478 }
479 break;
480 case NALNUML:
c277df42 481 reg_flags |= RF_tainted;
bbce6d69 482 while (s < strend) {
483 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
484 if (tmp && regtry(prog, s))
485 goto got_it;
a687059c 486 else
a0d0e21e 487 tmp = doevery;
a687059c 488 }
a0d0e21e
LW
489 else
490 tmp = 1;
491 s++;
492 }
493 break;
494 case SPACE:
495 while (s < strend) {
496 if (isSPACE(*s)) {
497 if (tmp && regtry(prog, s))
498 goto got_it;
499 else
500 tmp = doevery;
2304df62 501 }
a0d0e21e
LW
502 else
503 tmp = 1;
504 s++;
505 }
506 break;
bbce6d69 507 case SPACEL:
c277df42 508 reg_flags |= RF_tainted;
bbce6d69 509 while (s < strend) {
510 if (isSPACE_LC(*s)) {
511 if (tmp && regtry(prog, s))
512 goto got_it;
513 else
514 tmp = doevery;
515 }
516 else
517 tmp = 1;
518 s++;
519 }
520 break;
a0d0e21e
LW
521 case NSPACE:
522 while (s < strend) {
523 if (!isSPACE(*s)) {
524 if (tmp && regtry(prog, s))
525 goto got_it;
526 else
527 tmp = doevery;
a687059c 528 }
a0d0e21e
LW
529 else
530 tmp = 1;
531 s++;
532 }
533 break;
bbce6d69 534 case NSPACEL:
c277df42 535 reg_flags |= RF_tainted;
bbce6d69 536 while (s < strend) {
537 if (!isSPACE_LC(*s)) {
538 if (tmp && regtry(prog, s))
539 goto got_it;
540 else
541 tmp = doevery;
542 }
543 else
544 tmp = 1;
545 s++;
546 }
547 break;
a0d0e21e
LW
548 case DIGIT:
549 while (s < strend) {
550 if (isDIGIT(*s)) {
551 if (tmp && regtry(prog, s))
552 goto got_it;
553 else
554 tmp = doevery;
2b69d0c2 555 }
a0d0e21e
LW
556 else
557 tmp = 1;
558 s++;
559 }
560 break;
561 case NDIGIT:
562 while (s < strend) {
563 if (!isDIGIT(*s)) {
564 if (tmp && regtry(prog, s))
565 goto got_it;
566 else
567 tmp = doevery;
a687059c 568 }
a0d0e21e
LW
569 else
570 tmp = 1;
571 s++;
572 }
573 break;
a687059c 574 }
a0d0e21e
LW
575 }
576 else {
c277df42
IZ
577 dontbother = 0;
578 if (prog->float_substr != Nullsv) { /* Trim the end. */
579 char *last;
580 I32 oldpos = scream_pos;
581
582 if (screamer) {
583 last = screaminstr(screamer, prog->float_substr, s - strbeg,
584 end_shift, &scream_pos, 1); /* last one */
585 if (!last) {
586 last = scream_olds; /* Only one occurence. */
587 }
588 } else {
589 STRLEN len;
590 char *little = SvPV(prog->float_substr, len);
591 last = rninstr(s, strend, little, little + len);
592 }
593 if (last == NULL) goto phooey; /* Should not happen! */
594 dontbother = strend - last - 1;
595 }
596 if (minlen && (dontbother < minlen))
a0d0e21e
LW
597 dontbother = minlen - 1;
598 strend -= dontbother;
599 /* We don't know much -- general case. */
600 do {
601 if (regtry(prog, s))
602 goto got_it;
603 } while (s++ < strend);
604 }
605
606 /* Failure. */
607 goto phooey;
a687059c 608
a0d0e21e 609got_it:
420218e7 610 strend += dontbother; /* uncheat */
a0d0e21e
LW
611 prog->subbeg = strbeg;
612 prog->subend = strend;
72311751 613 RX_MATCH_TAINTED_set(prog, reg_flags & RF_tainted);
5f05dabc 614
615 /* make sure $`, $&, $', and $digit will work later */
c277df42
IZ
616 if (strbeg != prog->subbase) { /* second+ //g match. */
617 if (!(flags & REXEC_COPY_STR)) {
137443ea 618 if (prog->subbase) {
619 Safefree(prog->subbase);
620 prog->subbase = Nullch;
621 }
622 }
623 else {
624 I32 i = strend - startpos + (stringarg - strbeg);
625 s = savepvn(strbeg, i);
626 Safefree(prog->subbase);
627 prog->subbase = s;
628 prog->subbeg = prog->subbase;
629 prog->subend = prog->subbase + i;
630 s = prog->subbase + (stringarg - strbeg);
631 for (i = 0; i <= prog->nparens; i++) {
632 if (prog->endp[i]) {
633 prog->startp[i] = s + (prog->startp[i] - startpos);
634 prog->endp[i] = s + (prog->endp[i] - startpos);
635 }
a0d0e21e
LW
636 }
637 }
a0d0e21e 638 }
ce862d02
IZ
639 /* Preserve the current value of $^R */
640 if (oreplsv != GvSV(replgv)) {
641 sv_setsv(oreplsv, GvSV(replgv));/* So that when GvSV(replgv) is
642 restored, the value remains
643 the same. */
644 }
a0d0e21e
LW
645 return 1;
646
647phooey:
a0d0e21e 648 return 0;
a687059c
LW
649}
650
651/*
652 - regtry - try match at specific point
653 */
76e3520e 654STATIC I32 /* 0 failure, 1 success */
8ac85365 655regtry(regexp *prog, char *startpos)
a687059c 656{
c277df42 657 dTHR;
a0d0e21e
LW
658 register I32 i;
659 register char **sp;
660 register char **ep;
c277df42 661 CHECKPOINT lastcp;
a0d0e21e 662
ce862d02
IZ
663 if ((prog->reganch & ROPT_EVAL_SEEN) && !reg_eval_set) {
664 reg_eval_set = RS_init;
665 DEBUG_r(DEBUG_s(
666 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base);
667 ));
668 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
669 cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
670 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
671 SAVETMPS;
672 /* Apparently this is not needed, judging by wantarray. */
673 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
674 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
675 }
a0d0e21e
LW
676 reginput = startpos;
677 regstartp = prog->startp;
678 regendp = prog->endp;
679 reglastparen = &prog->lastparen;
680 prog->lastparen = 0;
681 regsize = 0;
c277df42
IZ
682 if (reg_start_tmpl <= prog->nparens) {
683 reg_start_tmpl = prog->nparens*3/2 + 3;
684 if(reg_start_tmp)
685 Renew(reg_start_tmp, reg_start_tmpl, char*);
686 else
687 New(22,reg_start_tmp, reg_start_tmpl, char*);
688 }
a0d0e21e
LW
689
690 sp = prog->startp;
691 ep = prog->endp;
7fae4e64 692 regdata = prog->data;
a0d0e21e
LW
693 if (prog->nparens) {
694 for (i = prog->nparens; i >= 0; i--) {
695 *sp++ = NULL;
696 *ep++ = NULL;
a687059c 697 }
a0d0e21e 698 }
c277df42 699 REGCP_SET;
7e5428c5 700 if (regmatch(prog->program + 1)) {
a0d0e21e
LW
701 prog->startp[0] = startpos;
702 prog->endp[0] = reginput;
703 return 1;
704 }
c277df42
IZ
705 REGCP_UNWIND;
706 return 0;
a687059c
LW
707}
708
709/*
710 - regmatch - main matching routine
711 *
712 * Conceptually the strategy is simple: check to see whether the current
713 * node matches, call self recursively to see whether the rest matches,
714 * and then act accordingly. In practice we make some effort to avoid
715 * recursion, in particular by going through "ordinary" nodes (that don't
716 * need to know whether the rest of the match failed) by a loop instead of
717 * by recursion.
718 */
719/* [lwall] I've hoisted the register declarations to the outer block in order to
720 * maybe save a little bit of pushing and popping on the stack. It also takes
721 * advantage of machines that use a register save mask on subroutine entry.
722 */
76e3520e 723STATIC I32 /* 0 failure, 1 success */
c277df42 724regmatch(regnode *prog)
a687059c 725{
c277df42
IZ
726 dTHR;
727 register regnode *scan; /* Current node. */
728 regnode *next; /* Next node. */
729 regnode *inner; /* Next node in internal branch. */
76e3520e 730 register I32 nextchr; /* renamed nextchr - nextchar colides with function of same name */
a0d0e21e
LW
731 register I32 n; /* no or next */
732 register I32 ln; /* len or last */
733 register char *s; /* operand or save */
734 register char *locinput = reginput;
c277df42
IZ
735 register I32 c1, c2, paren; /* case fold search, parenth */
736 int minmod = 0, sw = 0, logical = 0;
4633a7c4 737#ifdef DEBUGGING
4633a7c4
LW
738 regindent++;
739#endif
a0d0e21e 740
76e3520e 741 nextchr = UCHARAT(locinput);
a0d0e21e
LW
742 scan = prog;
743 while (scan != NULL) {
c277df42 744#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 745#ifdef DEBUGGING
c277df42
IZ
746# define sayYES goto yes
747# define sayNO goto no
748# define saySAME(x) if (x) goto yes; else goto no
749# define REPORT_CODE_OFF 24
4633a7c4 750#else
c277df42
IZ
751# define sayYES return 1
752# define sayNO return 0
753# define saySAME(x) return x
a687059c 754#endif
c277df42
IZ
755 DEBUG_r( {
756 SV *prop = sv_newmortal();
757 int docolor = *colors[0];
758 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
759 int l = (regeol - locinput > taill ? taill : regeol - locinput);
760 int pref_len = (locinput - bostr > (5 + taill) - l
761 ? (5 + taill) - l : locinput - bostr);
762
763 if (l + pref_len < (5 + taill) && l < regeol - locinput)
764 l = ( regeol - locinput > (5 + taill) - pref_len
765 ? (5 + taill) - pref_len : regeol - locinput);
766 regprop(prop, scan);
767 PerlIO_printf(Perl_debug_log,
768 "%4i <%s%.*s%s%s%s%.*s%s>%*s|%*s%2d%s\n",
769 locinput - bostr,
770 colors[2], pref_len, locinput - pref_len, colors[3],
771 (docolor ? "" : "> <"),
772 colors[0], l, locinput, colors[1],
773 15 - l - pref_len + 1,
774 "",
775 regindent*2, "", scan - regprogram,
776 SvPVX(prop));
777 } );
a687059c 778
c277df42 779 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
780 if (next == scan)
781 next = NULL;
a687059c 782
a0d0e21e
LW
783 switch (OP(scan)) {
784 case BOL:
ee027f1d 785 if (locinput == bostr
a0d0e21e 786 ? regprev == '\n'
c277df42 787 : (multiline &&
76e3520e 788 (nextchr || locinput < regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
789 {
790 /* regtill = regbol; */
791 break;
792 }
4633a7c4 793 sayNO;
a0d0e21e 794 case MBOL:
ee027f1d 795 if (locinput == bostr
a0d0e21e 796 ? regprev == '\n'
76e3520e 797 : ((nextchr || locinput < regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
798 {
799 break;
800 }
4633a7c4 801 sayNO;
a0d0e21e
LW
802 case SBOL:
803 if (locinput == regbol && regprev == '\n')
804 break;
4633a7c4 805 sayNO;
774d564b 806 case GPOS:
a0d0e21e
LW
807 if (locinput == regbol)
808 break;
4633a7c4 809 sayNO;
a0d0e21e
LW
810 case EOL:
811 if (multiline)
812 goto meol;
813 else
814 goto seol;
815 case MEOL:
816 meol:
76e3520e 817 if ((nextchr || locinput < regeol) && nextchr != '\n')
4633a7c4 818 sayNO;
a0d0e21e
LW
819 break;
820 case SEOL:
821 seol:
76e3520e 822 if ((nextchr || locinput < regeol) && nextchr != '\n')
4633a7c4 823 sayNO;
a0d0e21e 824 if (regeol - locinput > 1)
4633a7c4 825 sayNO;
a0d0e21e 826 break;
b85d18e9
IZ
827 case EOS:
828 if (regeol != locinput)
829 sayNO;
830 break;
a0d0e21e 831 case SANY:
76e3520e 832 if (!nextchr && locinput >= regeol)
4633a7c4 833 sayNO;
76e3520e 834 nextchr = UCHARAT(++locinput);
a0d0e21e
LW
835 break;
836 case ANY:
76e3520e 837 if (!nextchr && locinput >= regeol || nextchr == '\n')
4633a7c4 838 sayNO;
76e3520e 839 nextchr = UCHARAT(++locinput);
a0d0e21e 840 break;
bbce6d69 841 case EXACT:
161b471a 842 s = (char *) OPERAND(scan);
c277df42 843 ln = UCHARAT(s++);
a0d0e21e 844 /* Inline the first character, for speed. */
76e3520e 845 if (UCHARAT(s) != nextchr)
4633a7c4 846 sayNO;
a0d0e21e 847 if (regeol - locinput < ln)
4633a7c4 848 sayNO;
36477c24 849 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 850 sayNO;
a0d0e21e 851 locinput += ln;
76e3520e 852 nextchr = UCHARAT(locinput);
bbce6d69 853 break;
854 case EXACTFL:
c277df42 855 reg_flags |= RF_tainted;
bbce6d69 856 /* FALL THROUGH */
857 case EXACTF:
161b471a 858 s = (char *) OPERAND(scan);
c277df42 859 ln = UCHARAT(s++);
bbce6d69 860 /* Inline the first character, for speed. */
76e3520e 861 if (UCHARAT(s) != nextchr &&
bbce6d69 862 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 863 ? fold : fold_locale)[nextchr])
bbce6d69 864 sayNO;
865 if (regeol - locinput < ln)
866 sayNO;
5f05dabc 867 if (ln > 1 && (OP(scan) == EXACTF
868 ? ibcmp(s, locinput, ln)
869 : ibcmp_locale(s, locinput, ln)))
bbce6d69 870 sayNO;
871 locinput += ln;
76e3520e 872 nextchr = UCHARAT(locinput);
a0d0e21e
LW
873 break;
874 case ANYOF:
161b471a 875 s = (char *) OPERAND(scan);
76e3520e
GS
876 if (nextchr < 0)
877 nextchr = UCHARAT(locinput);
873ef191 878 if (!REGINCLASS(s, nextchr))
4633a7c4 879 sayNO;
76e3520e 880 if (!nextchr && locinput >= regeol)
4633a7c4 881 sayNO;
76e3520e 882 nextchr = UCHARAT(++locinput);
a0d0e21e 883 break;
bbce6d69 884 case ALNUML:
c277df42 885 reg_flags |= RF_tainted;
bbce6d69 886 /* FALL THROUGH */
a0d0e21e 887 case ALNUM:
76e3520e 888 if (!nextchr)
4633a7c4 889 sayNO;
bbce6d69 890 if (!(OP(scan) == ALNUM
76e3520e 891 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 892 sayNO;
76e3520e 893 nextchr = UCHARAT(++locinput);
a0d0e21e 894 break;
bbce6d69 895 case NALNUML:
c277df42 896 reg_flags |= RF_tainted;
bbce6d69 897 /* FALL THROUGH */
a0d0e21e 898 case NALNUM:
76e3520e 899 if (!nextchr && locinput >= regeol)
4633a7c4 900 sayNO;
bbce6d69 901 if (OP(scan) == NALNUM
76e3520e 902 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 903 sayNO;
76e3520e 904 nextchr = UCHARAT(++locinput);
a0d0e21e 905 break;
bbce6d69 906 case BOUNDL:
907 case NBOUNDL:
c277df42 908 reg_flags |= RF_tainted;
bbce6d69 909 /* FALL THROUGH */
a0d0e21e 910 case BOUND:
bbce6d69 911 case NBOUND:
912 /* was last char in word? */
913 ln = (locinput != regbol) ? UCHARAT(locinput - 1) : regprev;
914 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
915 ln = isALNUM(ln);
76e3520e 916 n = isALNUM(nextchr);
bbce6d69 917 }
918 else {
919 ln = isALNUM_LC(ln);
76e3520e 920 n = isALNUM_LC(nextchr);
bbce6d69 921 }
95bac841 922 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 923 sayNO;
a0d0e21e 924 break;
bbce6d69 925 case SPACEL:
c277df42 926 reg_flags |= RF_tainted;
bbce6d69 927 /* FALL THROUGH */
a0d0e21e 928 case SPACE:
76e3520e 929 if (!nextchr && locinput >= regeol)
4633a7c4 930 sayNO;
bbce6d69 931 if (!(OP(scan) == SPACE
76e3520e 932 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 933 sayNO;
76e3520e 934 nextchr = UCHARAT(++locinput);
a0d0e21e 935 break;
bbce6d69 936 case NSPACEL:
c277df42 937 reg_flags |= RF_tainted;
bbce6d69 938 /* FALL THROUGH */
a0d0e21e 939 case NSPACE:
76e3520e 940 if (!nextchr)
4633a7c4 941 sayNO;
bbce6d69 942 if (OP(scan) == SPACE
76e3520e 943 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 944 sayNO;
76e3520e 945 nextchr = UCHARAT(++locinput);
a0d0e21e
LW
946 break;
947 case DIGIT:
76e3520e 948 if (!isDIGIT(nextchr))
4633a7c4 949 sayNO;
76e3520e 950 nextchr = UCHARAT(++locinput);
a0d0e21e
LW
951 break;
952 case NDIGIT:
76e3520e 953 if (!nextchr && locinput >= regeol)
4633a7c4 954 sayNO;
76e3520e 955 if (isDIGIT(nextchr))
4633a7c4 956 sayNO;
76e3520e 957 nextchr = UCHARAT(++locinput);
a0d0e21e 958 break;
c8756f30 959 case REFFL:
c277df42 960 reg_flags |= RF_tainted;
c8756f30 961 /* FALL THROUGH */
c277df42 962 case REF:
c8756f30 963 case REFF:
c277df42 964 n = ARG(scan); /* which paren pair */
a0d0e21e 965 s = regstartp[n];
c277df42 966 if (*reglastparen < n || !s)
af3f8c16 967 sayNO; /* Do not match unless seen CLOSEn. */
a0d0e21e
LW
968 if (s == regendp[n])
969 break;
970 /* Inline the first character, for speed. */
76e3520e 971 if (UCHARAT(s) != nextchr &&
c8756f30
AK
972 (OP(scan) == REF ||
973 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 974 ? fold : fold_locale)[nextchr]))))
4633a7c4 975 sayNO;
a0d0e21e
LW
976 ln = regendp[n] - s;
977 if (locinput + ln > regeol)
4633a7c4 978 sayNO;
c8756f30
AK
979 if (ln > 1 && (OP(scan) == REF
980 ? memNE(s, locinput, ln)
981 : (OP(scan) == REFF
982 ? ibcmp(s, locinput, ln)
983 : ibcmp_locale(s, locinput, ln))))
4633a7c4 984 sayNO;
a0d0e21e 985 locinput += ln;
76e3520e 986 nextchr = UCHARAT(locinput);
a0d0e21e
LW
987 break;
988
989 case NOTHING:
c277df42 990 case TAIL:
a0d0e21e
LW
991 break;
992 case BACK:
993 break;
c277df42
IZ
994 case EVAL:
995 {
996 dSP;
997 OP_4tree *oop = op;
998 COP *ocurcop = curcop;
999 SV **ocurpad = curpad;
1000 SV *ret;
1001
1002 n = ARG(scan);
7fae4e64 1003 op = (OP_4tree*)regdata->data[n];
c277df42 1004 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", op) );
7fae4e64 1005 curpad = AvARRAY((AV*)regdata->data[n + 1]);
c277df42 1006
76e3520e 1007 CALLRUNOPS(); /* Scalar context. */
c277df42
IZ
1008 SPAGAIN;
1009 ret = POPs;
1010 PUTBACK;
1011
1012 if (logical) {
1013 logical = 0;
1014 sw = SvTRUE(ret);
ce862d02
IZ
1015 } else
1016 sv_setsv(save_scalar(replgv), ret);
c277df42
IZ
1017 op = oop;
1018 curpad = ocurpad;
1019 curcop = ocurcop;
1020 break;
1021 }
a0d0e21e 1022 case OPEN:
c277df42
IZ
1023 n = ARG(scan); /* which paren pair */
1024 reg_start_tmp[n] = locinput;
a0d0e21e
LW
1025 if (n > regsize)
1026 regsize = n;
1027 break;
1028 case CLOSE:
c277df42
IZ
1029 n = ARG(scan); /* which paren pair */
1030 regstartp[n] = reg_start_tmp[n];
a0d0e21e
LW
1031 regendp[n] = locinput;
1032 if (n > *reglastparen)
1033 *reglastparen = n;
1034 break;
c277df42
IZ
1035 case GROUPP:
1036 n = ARG(scan); /* which paren pair */
1037 sw = (*reglastparen >= n && regendp[n] != NULL);
1038 break;
1039 case IFTHEN:
1040 if (sw)
1041 next = NEXTOPER(NEXTOPER(scan));
1042 else {
1043 next = scan + ARG(scan);
1044 if (OP(next) == IFTHEN) /* Fake one. */
1045 next = NEXTOPER(NEXTOPER(next));
1046 }
1047 break;
1048 case LOGICAL:
1049 logical = 1;
1050 break;
a0d0e21e
LW
1051 case CURLYX: {
1052 CURCUR cc;
1053 CHECKPOINT cp = savestack_ix;
c277df42
IZ
1054
1055 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1056 next += ARG(next);
a0d0e21e
LW
1057 cc.oldcc = regcc;
1058 regcc = &cc;
1059 cc.parenfloor = *reglastparen;
1060 cc.cur = -1;
1061 cc.min = ARG1(scan);
1062 cc.max = ARG2(scan);
c277df42 1063 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1064 cc.next = next;
1065 cc.minmod = minmod;
1066 cc.lastloc = 0;
1067 reginput = locinput;
1068 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1069 regcpblow(cp);
1070 regcc = cc.oldcc;
4633a7c4 1071 saySAME(n);
a0d0e21e
LW
1072 }
1073 /* NOT REACHED */
1074 case WHILEM: {
1075 /*
1076 * This is really hard to understand, because after we match
1077 * what we're trying to match, we must make sure the rest of
1078 * the RE is going to match for sure, and to do that we have
1079 * to go back UP the parse tree by recursing ever deeper. And
1080 * if it fails, we have to reset our parent's current state
1081 * that we can try again after backing off.
1082 */
1083
c277df42 1084 CHECKPOINT cp, lastcp;
a0d0e21e 1085 CURCUR* cc = regcc;
c277df42
IZ
1086 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1087
4633a7c4 1088 n = cc->cur + 1; /* how many we know we matched */
a0d0e21e
LW
1089 reginput = locinput;
1090
c277df42
IZ
1091 DEBUG_r(
1092 PerlIO_printf(Perl_debug_log,
1093 "%*s %ld out of %ld..%ld cc=%lx\n",
1094 REPORT_CODE_OFF+regindent*2, "",
1095 (long)n, (long)cc->min,
1096 (long)cc->max, (long)cc)
1097 );
4633a7c4 1098
a0d0e21e
LW
1099 /* If degenerate scan matches "", assume scan done. */
1100
579cf2c3 1101 if (locinput == cc->lastloc && n >= cc->min) {
a0d0e21e
LW
1102 regcc = cc->oldcc;
1103 ln = regcc->cur;
c277df42
IZ
1104 DEBUG_r(
1105 PerlIO_printf(Perl_debug_log, "%*s empty match detected, try continuation...\n", REPORT_CODE_OFF+regindent*2, "")
1106 );
a0d0e21e 1107 if (regmatch(cc->next))
4633a7c4 1108 sayYES;
c277df42
IZ
1109 DEBUG_r(
1110 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1111 );
a0d0e21e
LW
1112 regcc->cur = ln;
1113 regcc = cc;
4633a7c4 1114 sayNO;
a0d0e21e
LW
1115 }
1116
1117 /* First just match a string of min scans. */
1118
1119 if (n < cc->min) {
1120 cc->cur = n;
1121 cc->lastloc = locinput;
4633a7c4
LW
1122 if (regmatch(cc->scan))
1123 sayYES;
1124 cc->cur = n - 1;
c277df42
IZ
1125 cc->lastloc = lastloc;
1126 DEBUG_r(
1127 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1128 );
4633a7c4 1129 sayNO;
a0d0e21e
LW
1130 }
1131
1132 /* Prefer next over scan for minimal matching. */
1133
1134 if (cc->minmod) {
1135 regcc = cc->oldcc;
1136 ln = regcc->cur;
5f05dabc 1137 cp = regcppush(cc->parenfloor);
c277df42 1138 REGCP_SET;
5f05dabc 1139 if (regmatch(cc->next)) {
c277df42 1140 regcpblow(cp);
4633a7c4 1141 sayYES; /* All done. */
5f05dabc 1142 }
c277df42 1143 REGCP_UNWIND;
5f05dabc 1144 regcppop();
a0d0e21e
LW
1145 regcc->cur = ln;
1146 regcc = cc;
1147
c277df42
IZ
1148 if (n >= cc->max) { /* Maximum greed exceeded? */
1149 if (dowarn && n >= REG_INFTY
1150 && !(reg_flags & RF_warned)) {
1151 reg_flags |= RF_warned;
1152 warn("count exceeded %d", REG_INFTY - 1);
1153 }
4633a7c4 1154 sayNO;
c277df42 1155 }
a687059c 1156
c277df42
IZ
1157 DEBUG_r(
1158 PerlIO_printf(Perl_debug_log, "%*s trying longer...\n", REPORT_CODE_OFF+regindent*2, "")
1159 );
a0d0e21e
LW
1160 /* Try scanning more and see if it helps. */
1161 reginput = locinput;
1162 cc->cur = n;
1163 cc->lastloc = locinput;
5f05dabc 1164 cp = regcppush(cc->parenfloor);
c277df42 1165 REGCP_SET;
5f05dabc 1166 if (regmatch(cc->scan)) {
c277df42 1167 regcpblow(cp);
4633a7c4 1168 sayYES;
5f05dabc 1169 }
c277df42
IZ
1170 DEBUG_r(
1171 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1172 );
1173 REGCP_UNWIND;
5f05dabc 1174 regcppop();
4633a7c4 1175 cc->cur = n - 1;
c277df42 1176 cc->lastloc = lastloc;
4633a7c4 1177 sayNO;
a0d0e21e
LW
1178 }
1179
1180 /* Prefer scan over next for maximal matching. */
1181
1182 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1183 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1184 cc->cur = n;
1185 cc->lastloc = locinput;
c277df42 1186 REGCP_SET;
5f05dabc 1187 if (regmatch(cc->scan)) {
c277df42 1188 regcpblow(cp);
4633a7c4 1189 sayYES;
5f05dabc 1190 }
c277df42 1191 REGCP_UNWIND;
a0d0e21e
LW
1192 regcppop(); /* Restore some previous $<digit>s? */
1193 reginput = locinput;
c277df42
IZ
1194 DEBUG_r(
1195 PerlIO_printf(Perl_debug_log, "%*s failed, try continuation...\n", REPORT_CODE_OFF+regindent*2, "")
1196 );
1197 }
1198 if (dowarn && n >= REG_INFTY && !(reg_flags & RF_warned)) {
1199 reg_flags |= RF_warned;
1200 warn("count exceeded %d", REG_INFTY - 1);
a0d0e21e
LW
1201 }
1202
1203 /* Failed deeper matches of scan, so see if this one works. */
1204 regcc = cc->oldcc;
1205 ln = regcc->cur;
1206 if (regmatch(cc->next))
4633a7c4 1207 sayYES;
c277df42
IZ
1208 DEBUG_r(
1209 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1210 );
a0d0e21e
LW
1211 regcc->cur = ln;
1212 regcc = cc;
4633a7c4 1213 cc->cur = n - 1;
c277df42 1214 cc->lastloc = lastloc;
4633a7c4 1215 sayNO;
a0d0e21e
LW
1216 }
1217 /* NOT REACHED */
c277df42
IZ
1218 case BRANCHJ:
1219 next = scan + ARG(scan);
1220 if (next == scan)
1221 next = NULL;
1222 inner = NEXTOPER(NEXTOPER(scan));
1223 goto do_branch;
1224 case BRANCH:
1225 inner = NEXTOPER(scan);
1226 do_branch:
1227 {
1228 CHECKPOINT lastcp;
1229 c1 = OP(scan);
1230 if (OP(next) != c1) /* No choice. */
1231 next = inner; /* Avoid recursion. */
a0d0e21e 1232 else {
748a9306 1233 int lastparen = *reglastparen;
c277df42
IZ
1234
1235 REGCP_SET;
a0d0e21e
LW
1236 do {
1237 reginput = locinput;
c277df42 1238 if (regmatch(inner))
4633a7c4 1239 sayYES;
c277df42 1240 REGCP_UNWIND;
748a9306
LW
1241 for (n = *reglastparen; n > lastparen; n--)
1242 regendp[n] = 0;
1243 *reglastparen = n;
c277df42 1244 scan = next;
a0d0e21e 1245 /*SUPPRESS 560*/
c277df42
IZ
1246 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1247 next += n;
a0d0e21e 1248 else
c277df42 1249 next = NULL;
c277df42
IZ
1250 inner = NEXTOPER(scan);
1251 if (c1 == BRANCHJ) {
1252 inner = NEXTOPER(inner);
1253 }
1254 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1255 sayNO;
a0d0e21e 1256 /* NOTREACHED */
a687059c 1257 }
a0d0e21e
LW
1258 }
1259 break;
1260 case MINMOD:
1261 minmod = 1;
1262 break;
c277df42
IZ
1263 case CURLYM:
1264 {
00db4c45 1265 I32 l = 0;
c277df42
IZ
1266 CHECKPOINT lastcp;
1267
1268 /* We suppose that the next guy does not need
1269 backtracking: in particular, it is of constant length,
1270 and has no parenths to influence future backrefs. */
1271 ln = ARG1(scan); /* min to match */
1272 n = ARG2(scan); /* max to match */
c277df42
IZ
1273 paren = scan->flags;
1274 if (paren) {
1275 if (paren > regsize)
1276 regsize = paren;
1277 if (paren > *reglastparen)
1278 *reglastparen = paren;
1279 }
dc45a647 1280 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
1281 if (paren)
1282 scan += NEXT_OFF(scan); /* Skip former OPEN. */
1283 reginput = locinput;
1284 if (minmod) {
1285 minmod = 0;
1286 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1287 sayNO;
5f4b28b2 1288 if (ln && l == 0 && n >= ln
c277df42
IZ
1289 /* In fact, this is tricky. If paren, then the
1290 fact that we did/didnot match may influence
1291 future execution. */
1292 && !(paren && ln == 0))
1293 ln = n;
1294 locinput = reginput;
1295 if (regkind[(U8)OP(next)] == EXACT) {
1296 c1 = UCHARAT(OPERAND(next) + 1);
1297 if (OP(next) == EXACTF)
1298 c2 = fold[c1];
1299 else if (OP(next) == EXACTFL)
1300 c2 = fold_locale[c1];
1301 else
1302 c2 = c1;
1303 } else
1304 c1 = c2 = -1000;
1305 REGCP_SET;
5f4b28b2 1306 /* This may be improved if l == 0. */
c277df42
IZ
1307 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1308 /* If it could work, try it. */
1309 if (c1 == -1000 ||
1310 UCHARAT(reginput) == c1 ||
1311 UCHARAT(reginput) == c2)
1312 {
1313 if (paren) {
1314 if (n) {
1315 regstartp[paren] = reginput - l;
1316 regendp[paren] = reginput;
1317 } else
1318 regendp[paren] = NULL;
1319 }
1320 if (regmatch(next))
1321 sayYES;
1322 REGCP_UNWIND;
1323 }
1324 /* Couldn't or didn't -- move forward. */
1325 reginput = locinput;
1326 if (regrepeat_hard(scan, 1, &l)) {
1327 ln++;
1328 locinput = reginput;
1329 }
1330 else
1331 sayNO;
1332 }
1333 } else {
1334 n = regrepeat_hard(scan, n, &l);
1335 if (n != 0 && l == 0
1336 /* In fact, this is tricky. If paren, then the
1337 fact that we did/didnot match may influence
1338 future execution. */
1339 && !(paren && ln == 0))
1340 ln = n;
1341 locinput = reginput;
1342 DEBUG_r(
1343 PerlIO_printf(Perl_debug_log, "%*s matched %ld times, len=%ld...\n", REPORT_CODE_OFF+regindent*2, "", n, l)
1344 );
1345 if (n >= ln) {
1346 if (regkind[(U8)OP(next)] == EXACT) {
1347 c1 = UCHARAT(OPERAND(next) + 1);
1348 if (OP(next) == EXACTF)
1349 c2 = fold[c1];
1350 else if (OP(next) == EXACTFL)
1351 c2 = fold_locale[c1];
1352 else
1353 c2 = c1;
1354 } else
1355 c1 = c2 = -1000;
1356 }
1357 REGCP_SET;
1358 while (n >= ln) {
1359 /* If it could work, try it. */
1360 if (c1 == -1000 ||
1361 UCHARAT(reginput) == c1 ||
1362 UCHARAT(reginput) == c2)
1363 {
1364 DEBUG_r(
1365 PerlIO_printf(Perl_debug_log, "%*s trying tail with n=%ld...\n", REPORT_CODE_OFF+regindent*2, "", n)
1366 );
1367 if (paren) {
1368 if (n) {
1369 regstartp[paren] = reginput - l;
1370 regendp[paren] = reginput;
1371 } else
1372 regendp[paren] = NULL;
1373 }
1374 if (regmatch(next))
1375 sayYES;
1376 REGCP_UNWIND;
1377 }
1378 /* Couldn't or didn't -- back up. */
1379 n--;
1380 locinput -= l;
1381 reginput = locinput;
1382 }
1383 }
1384 sayNO;
1385 break;
1386 }
1387 case CURLYN:
1388 paren = scan->flags; /* Which paren to set */
1389 if (paren > regsize)
1390 regsize = paren;
1391 if (paren > *reglastparen)
1392 *reglastparen = paren;
1393 ln = ARG1(scan); /* min to match */
1394 n = ARG2(scan); /* max to match */
dc45a647 1395 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 1396 goto repeat;
a0d0e21e 1397 case CURLY:
c277df42 1398 paren = 0;
a0d0e21e
LW
1399 ln = ARG1(scan); /* min to match */
1400 n = ARG2(scan); /* max to match */
dc45a647 1401 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
1402 goto repeat;
1403 case STAR:
1404 ln = 0;
c277df42 1405 n = REG_INFTY;
a0d0e21e 1406 scan = NEXTOPER(scan);
c277df42 1407 paren = 0;
a0d0e21e
LW
1408 goto repeat;
1409 case PLUS:
c277df42
IZ
1410 ln = 1;
1411 n = REG_INFTY;
1412 scan = NEXTOPER(scan);
1413 paren = 0;
1414 repeat:
a0d0e21e
LW
1415 /*
1416 * Lookahead to avoid useless match attempts
1417 * when we know what character comes next.
1418 */
bbce6d69 1419 if (regkind[(U8)OP(next)] == EXACT) {
1420 c1 = UCHARAT(OPERAND(next) + 1);
1421 if (OP(next) == EXACTF)
1422 c2 = fold[c1];
1423 else if (OP(next) == EXACTFL)
1424 c2 = fold_locale[c1];
1425 else
1426 c2 = c1;
1427 }
a0d0e21e 1428 else
bbce6d69 1429 c1 = c2 = -1000;
a0d0e21e
LW
1430 reginput = locinput;
1431 if (minmod) {
c277df42 1432 CHECKPOINT lastcp;
a0d0e21e
LW
1433 minmod = 0;
1434 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 1435 sayNO;
c277df42
IZ
1436 REGCP_SET;
1437 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 1438 /* If it could work, try it. */
bbce6d69 1439 if (c1 == -1000 ||
1440 UCHARAT(reginput) == c1 ||
1441 UCHARAT(reginput) == c2)
1442 {
c277df42
IZ
1443 if (paren) {
1444 if (n) {
1445 regstartp[paren] = reginput - 1;
1446 regendp[paren] = reginput;
1447 } else
1448 regendp[paren] = NULL;
1449 }
a0d0e21e 1450 if (regmatch(next))
4633a7c4 1451 sayYES;
c277df42 1452 REGCP_UNWIND;
bbce6d69 1453 }
c277df42 1454 /* Couldn't or didn't -- move forward. */
748a9306 1455 reginput = locinput + ln;
a0d0e21e
LW
1456 if (regrepeat(scan, 1)) {
1457 ln++;
1458 reginput = locinput + ln;
c277df42 1459 } else
4633a7c4 1460 sayNO;
a0d0e21e
LW
1461 }
1462 }
1463 else {
c277df42 1464 CHECKPOINT lastcp;
a0d0e21e
LW
1465 n = regrepeat(scan, n);
1466 if (ln < n && regkind[(U8)OP(next)] == EOL &&
c277df42 1467 (!multiline || OP(next) == SEOL))
a0d0e21e 1468 ln = n; /* why back off? */
c277df42
IZ
1469 REGCP_SET;
1470 if (paren) {
1471 while (n >= ln) {
1472 /* If it could work, try it. */
1473 if (c1 == -1000 ||
1474 UCHARAT(reginput) == c1 ||
1475 UCHARAT(reginput) == c2)
1476 {
1477 if (paren && n) {
1478 if (n) {
1479 regstartp[paren] = reginput - 1;
1480 regendp[paren] = reginput;
1481 } else
1482 regendp[paren] = NULL;
1483 }
1484 if (regmatch(next))
1485 sayYES;
1486 REGCP_UNWIND;
1487 }
1488 /* Couldn't or didn't -- back up. */
1489 n--;
1490 reginput = locinput + n;
1491 }
1492 } else {
1493 while (n >= ln) {
1494 /* If it could work, try it. */
1495 if (c1 == -1000 ||
1496 UCHARAT(reginput) == c1 ||
1497 UCHARAT(reginput) == c2)
1498 {
1499 if (regmatch(next))
1500 sayYES;
1501 REGCP_UNWIND;
1502 }
1503 /* Couldn't or didn't -- back up. */
1504 n--;
1505 reginput = locinput + n;
bbce6d69 1506 }
a0d0e21e
LW
1507 }
1508 }
4633a7c4 1509 sayNO;
c277df42 1510 break;
a0d0e21e 1511 case END:
7e5428c5
IZ
1512 if (locinput < regtill)
1513 sayNO; /* Cannot match: too short. */
1514 /* Fall through */
1515 case SUCCEED:
a0d0e21e 1516 reginput = locinput; /* put where regtry can find it */
4633a7c4 1517 sayYES; /* Success! */
c277df42
IZ
1518 case SUSPEND:
1519 n = 1;
1520 goto do_ifmatch;
a0d0e21e 1521 case UNLESSM:
c277df42
IZ
1522 n = 0;
1523 if (locinput < bostr + scan->flags)
1524 goto say_yes;
1525 goto do_ifmatch;
1526 case IFMATCH:
1527 n = 1;
1528 if (locinput < bostr + scan->flags)
1529 goto say_no;
1530 do_ifmatch:
1531 reginput = locinput - scan->flags;
1532 inner = NEXTOPER(NEXTOPER(scan));
1533 if (regmatch(inner) != n) {
1534 say_no:
1535 if (logical) {
1536 logical = 0;
1537 sw = 0;
1538 goto do_longjump;
1539 } else
1540 sayNO;
1541 }
1542 say_yes:
1543 if (logical) {
1544 logical = 0;
1545 sw = 1;
1546 }
fe44a5e8 1547 if (OP(scan) == SUSPEND) {
c277df42 1548 locinput = reginput;
565764a8 1549 nextchr = UCHARAT(locinput);
fe44a5e8 1550 }
c277df42
IZ
1551 /* FALL THROUGH. */
1552 case LONGJMP:
1553 do_longjump:
1554 next = scan + ARG(scan);
1555 if (next == scan)
1556 next = NULL;
a0d0e21e
LW
1557 break;
1558 default:
c030ccd9 1559 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 1560 (unsigned long)scan, OP(scan));
a0d0e21e 1561 FAIL("regexp memory corruption");
a687059c 1562 }
a0d0e21e
LW
1563 scan = next;
1564 }
a687059c 1565
a0d0e21e
LW
1566 /*
1567 * We get here only if there's trouble -- normally "case END" is
1568 * the terminating point.
1569 */
1570 FAIL("corrupted regexp pointers");
1571 /*NOTREACHED*/
4633a7c4
LW
1572 sayNO;
1573
1574yes:
1575#ifdef DEBUGGING
1576 regindent--;
1577#endif
1578 return 1;
1579
1580no:
1581#ifdef DEBUGGING
1582 regindent--;
1583#endif
a0d0e21e 1584 return 0;
a687059c
LW
1585}
1586
1587/*
1588 - regrepeat - repeatedly match something simple, report how many
1589 */
1590/*
1591 * [This routine now assumes that it will only match on things of length 1.
1592 * That was true before, but now we assume scan - reginput is the count,
1593 * rather than incrementing count on every character.]
1594 */
76e3520e 1595STATIC I32
c277df42 1596regrepeat(regnode *p, I32 max)
a687059c 1597{
a0d0e21e
LW
1598 register char *scan;
1599 register char *opnd;
1600 register I32 c;
1601 register char *loceol = regeol;
1602
1603 scan = reginput;
c277df42 1604 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 1605 loceol = scan + max;
161b471a 1606 opnd = (char *) OPERAND(p);
a0d0e21e
LW
1607 switch (OP(p)) {
1608 case ANY:
1609 while (scan < loceol && *scan != '\n')
1610 scan++;
1611 break;
1612 case SANY:
1613 scan = loceol;
1614 break;
bbce6d69 1615 case EXACT: /* length of string is 1 */
1616 c = UCHARAT(++opnd);
1617 while (scan < loceol && UCHARAT(scan) == c)
1618 scan++;
1619 break;
1620 case EXACTF: /* length of string is 1 */
1621 c = UCHARAT(++opnd);
1622 while (scan < loceol &&
1623 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
1624 scan++;
1625 break;
1626 case EXACTFL: /* length of string is 1 */
c277df42 1627 reg_flags |= RF_tainted;
bbce6d69 1628 c = UCHARAT(++opnd);
1629 while (scan < loceol &&
1630 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e
LW
1631 scan++;
1632 break;
1633 case ANYOF:
ae5c130c 1634 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 1635 scan++;
a0d0e21e
LW
1636 break;
1637 case ALNUM:
1638 while (scan < loceol && isALNUM(*scan))
1639 scan++;
1640 break;
bbce6d69 1641 case ALNUML:
c277df42 1642 reg_flags |= RF_tainted;
bbce6d69 1643 while (scan < loceol && isALNUM_LC(*scan))
1644 scan++;
1645 break;
a0d0e21e
LW
1646 case NALNUM:
1647 while (scan < loceol && !isALNUM(*scan))
1648 scan++;
1649 break;
bbce6d69 1650 case NALNUML:
c277df42 1651 reg_flags |= RF_tainted;
bbce6d69 1652 while (scan < loceol && !isALNUM_LC(*scan))
1653 scan++;
1654 break;
a0d0e21e
LW
1655 case SPACE:
1656 while (scan < loceol && isSPACE(*scan))
1657 scan++;
1658 break;
bbce6d69 1659 case SPACEL:
c277df42 1660 reg_flags |= RF_tainted;
bbce6d69 1661 while (scan < loceol && isSPACE_LC(*scan))
1662 scan++;
1663 break;
a0d0e21e
LW
1664 case NSPACE:
1665 while (scan < loceol && !isSPACE(*scan))
1666 scan++;
1667 break;
bbce6d69 1668 case NSPACEL:
c277df42 1669 reg_flags |= RF_tainted;
bbce6d69 1670 while (scan < loceol && !isSPACE_LC(*scan))
1671 scan++;
1672 break;
a0d0e21e
LW
1673 case DIGIT:
1674 while (scan < loceol && isDIGIT(*scan))
1675 scan++;
1676 break;
1677 case NDIGIT:
1678 while (scan < loceol && !isDIGIT(*scan))
1679 scan++;
1680 break;
1681 default: /* Called on something of 0 width. */
1682 break; /* So match right here or not at all. */
1683 }
a687059c 1684
a0d0e21e
LW
1685 c = scan - reginput;
1686 reginput = scan;
a687059c 1687
c277df42
IZ
1688 DEBUG_r(
1689 {
1690 SV *prop = sv_newmortal();
1691
1692 regprop(prop, p);
1693 PerlIO_printf(Perl_debug_log,
1694 "%*s %s can match %ld times out of %ld...\n",
1695 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
1696 });
1697
a0d0e21e 1698 return(c);
a687059c
LW
1699}
1700
1701/*
c277df42
IZ
1702 - regrepeat_hard - repeatedly match something, report total lenth and length
1703 *
1704 * The repeater is supposed to have constant length.
1705 */
1706
76e3520e 1707STATIC I32
c277df42
IZ
1708regrepeat_hard(regnode *p, I32 max, I32 *lp)
1709{
1710 register char *scan;
1711 register char *start;
1712 register char *loceol = regeol;
1713 I32 l = -1;
1714
1715 start = reginput;
1716 while (reginput < loceol && (scan = reginput, regmatch(p))) {
1717 if (l == -1) {
1718 *lp = l = reginput - start;
1719 if (max != REG_INFTY && l*max < loceol - scan)
1720 loceol = scan + l*max;
1721 if (l == 0) {
1722 return max;
1723 }
1724 }
1725 }
1726 if (reginput < loceol)
1727 reginput = scan;
1728 else
1729 scan = reginput;
1730
1731 return (scan - start)/l;
1732}
1733
1734/*
bbce6d69 1735 - regclass - determine if a character falls into a character class
1736 */
1737
76e3520e 1738STATIC bool
8ac85365 1739reginclass(register char *p, register I32 c)
bbce6d69 1740{
1741 char flags = *p;
1742 bool match = FALSE;
1743
1744 c &= 0xFF;
ae5c130c 1745 if (ANYOF_TEST(p, c))
bbce6d69 1746 match = TRUE;
1747 else if (flags & ANYOF_FOLD) {
1748 I32 cf;
1749 if (flags & ANYOF_LOCALE) {
c277df42 1750 reg_flags |= RF_tainted;
bbce6d69 1751 cf = fold_locale[c];
1752 }
1753 else
1754 cf = fold[c];
ae5c130c 1755 if (ANYOF_TEST(p, cf))
bbce6d69 1756 match = TRUE;
1757 }
1758
1759 if (!match && (flags & ANYOF_ISA)) {
c277df42 1760 reg_flags |= RF_tainted;
bbce6d69 1761
1762 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
1763 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
1764 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
1765 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
1766 {
1767 match = TRUE;
1768 }
1769 }
1770
ae5c130c 1771 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 1772}
1773
161b471a
NIS
1774
1775