This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[asperl] Created new branch from win32@396, added AS patch#1
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
f0fcb552 22/*SUPPRESS 112*/
a687059c 23/*
e50aee73 24 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
25 *
26 * Copyright (c) 1986 by University of Toronto.
27 * Written by Henry Spencer. Not derived from licensed software.
28 *
29 * Permission is granted to anyone to use this software for any
30 * purpose on any computer system, and to redistribute it freely,
31 * subject to the following restrictions:
32 *
33 * 1. The author is not responsible for the consequences of use of
34 * this software, no matter how awful, even if they arise
35 * from defects in it.
36 *
37 * 2. The origin of this software must not be misrepresented, either
38 * by explicit claim or by omission.
39 *
40 * 3. Altered versions must be plainly marked as such, and must not
41 * be misrepresented as being the original software.
42 *
43 **** Alterations to Henry's code are...
44 ****
9607fc9c 45 **** Copyright (c) 1991-1997, Larry Wall
a687059c 46 ****
9ef589d8
LW
47 **** You may distribute under the terms of either the GNU General Public
48 **** License or the Artistic License, as specified in the README file.
a687059c
LW
49 *
50 * Beware that some of this code is subtly aware of the way operator
51 * precedence is structured in regular expressions. Serious changes in
52 * regular-expression syntax might require a total rethink.
53 */
54#include "EXTERN.h"
55#include "perl.h"
56#include "regcomp.h"
57
c277df42
IZ
58static char * reginput; /* String-input pointer. */
59static char * regbol; /* Beginning of input, for ^ check. */
60static char * regeol; /* End of input, for $ check. */
61static char ** regstartp; /* Pointer to startp array. */
62static char ** regendp; /* Ditto for endp. */
63static U32 * reglastparen; /* Similarly for lastparen. */
64static char * regtill; /* How far we are required to go. */
65static char regprev; /* char before regbol, \n if none */
66
67static char * regprecomp; /* uncompiled string. */
68static I32 regnpar; /* () count. */
69static I32 regsize; /* Largest OPEN seens. */
70static char ** reg_start_tmp;
71static U32 reg_start_tmpl;
72static struct reg_data *data;
73static char *bostr;
74
75static U32 reg_flags; /* tainted/warned */
76static I32 reg_eval_set;
77
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
80#define RF_evaled 4 /* Did an EVAL? */
81
a687059c
LW
82#ifndef STATIC
83#define STATIC static
84#endif
85
86#ifdef DEBUGGING
c277df42
IZ
87static I32 regnarrate = 0;
88static regnode* regprogram = 0;
a687059c
LW
89#endif
90
a0d0e21e
LW
91/* Current curly descriptor */
92typedef struct curcur CURCUR;
93struct curcur {
94 int parenfloor; /* how far back to strip paren data */
95 int cur; /* how many instances of scan we've matched */
96 int min; /* the minimal number of scans to match */
97 int max; /* the maximal number of scans to match */
98 int minmod; /* whether to work our way up or down */
c277df42
IZ
99 regnode * scan; /* the thing to match */
100 regnode * next; /* what has to match after it */
a0d0e21e
LW
101 char * lastloc; /* where we started matching this scan */
102 CURCUR * oldcc; /* current curly before we started this one */
103};
104
105static CURCUR* regcc;
106
107typedef I32 CHECKPOINT;
108
c277df42
IZ
109/*
110 * Forwards.
111 */
112
113static I32 regmatch _((regnode *prog));
114static I32 regrepeat _((regnode *p, I32 max));
115static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
116static I32 regtry _((regexp *prog, char *startpos));
117static bool reginclass _((char *p, I32 c));
55497cff 118static CHECKPOINT regcppush _((I32 parenfloor));
119static char * regcppop _((void));
a0d0e21e 120
55497cff 121static CHECKPOINT
8ac85365 122regcppush(I32 parenfloor)
a0d0e21e 123{
11343788 124 dTHR;
a0d0e21e 125 int retval = savestack_ix;
c277df42 126 int i = (regsize - parenfloor) * 4;
a0d0e21e
LW
127 int p;
128
129 SSCHECK(i + 5);
130 for (p = regsize; p > parenfloor; p--) {
131 SSPUSHPTR(regendp[p]);
132 SSPUSHPTR(regstartp[p]);
c5254dd6 133 SSPUSHPTR(reg_start_tmp[p]);
a0d0e21e
LW
134 SSPUSHINT(p);
135 }
136 SSPUSHINT(regsize);
137 SSPUSHINT(*reglastparen);
138 SSPUSHPTR(reginput);
139 SSPUSHINT(i + 3);
140 SSPUSHINT(SAVEt_REGCONTEXT);
141 return retval;
142}
143
c277df42
IZ
144/* These are needed since we do not localize EVAL nodes: */
145# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, " Setting an EVAL scope, savestack=%i\n", savestack_ix)); lastcp = savestack_ix
146# define REGCP_UNWIND DEBUG_r(lastcp != savestack_ix ? PerlIO_printf(Perl_debug_log," Clearing an EVAL scope, savestack=%i..%i\n", lastcp, savestack_ix) : 0); regcpblow(lastcp)
147
55497cff 148static char *
8ac85365 149regcppop(void)
a0d0e21e 150{
11343788 151 dTHR;
a0d0e21e
LW
152 I32 i = SSPOPINT;
153 U32 paren = 0;
154 char *input;
155 char *tmps;
156 assert(i == SAVEt_REGCONTEXT);
157 i = SSPOPINT;
158 input = (char *) SSPOPPTR;
159 *reglastparen = SSPOPINT;
160 regsize = SSPOPINT;
c277df42 161 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 162 paren = (U32)SSPOPINT;
c277df42 163 reg_start_tmp[paren] = (char *) SSPOPPTR;
a0d0e21e
LW
164 regstartp[paren] = (char *) SSPOPPTR;
165 tmps = (char*)SSPOPPTR;
166 if (paren <= *reglastparen)
167 regendp[paren] = tmps;
c277df42
IZ
168 DEBUG_r(
169 PerlIO_printf(Perl_debug_log, " restoring \\%d to %d(%d)..%d%s\n",
170 paren, regstartp[paren] - regbol,
171 reg_start_tmp[paren] - regbol,
172 regendp[paren] - regbol,
173 (paren > *reglastparen ? "(no)" : ""));
174 );
a0d0e21e 175 }
c277df42
IZ
176 DEBUG_r(
177 if (*reglastparen + 1 <= regnpar) {
178 PerlIO_printf(Perl_debug_log, " restoring \\%d..\\%d to undef\n",
179 *reglastparen + 1, regnpar);
180 }
181 );
a0d0e21e
LW
182 for (paren = *reglastparen + 1; paren <= regnpar; paren++) {
183 if (paren > regsize)
184 regstartp[paren] = Nullch;
185 regendp[paren] = Nullch;
186 }
187 return input;
188}
189
c277df42 190#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 191
a687059c 192/*
e50aee73 193 * pregexec and friends
a687059c
LW
194 */
195
196/*
c277df42 197 - pregexec - match a regexp against a string
a687059c 198 */
c277df42
IZ
199I32
200pregexec(register regexp *prog, char *stringarg, register char *strend, char *strbeg, I32 minend, SV *screamer, U32 nosave)
201/* strend: pointer to null at end of string */
202/* strbeg: real beginning of string */
203/* minend: end of match must be >=minend after stringarg. */
204/* nosave: For optimizations. */
205{
206 return
207 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
208 nosave ? 0 : REXEC_COPY_STR);
209}
210
a687059c 211/*
c277df42 212 - regexec_flags - match a regexp against a string
a687059c 213 */
79072805 214I32
c277df42
IZ
215regexec_flags(register regexp *prog, char *stringarg, register char *strend, char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
216/* strend: pointer to null at end of string */
217/* strbeg: real beginning of string */
218/* minend: end of match must be >=minend after stringarg. */
219/* data: May be used for some additional optimizations. */
220/* nosave: For optimizations. */
a687059c 221{
a0d0e21e 222 register char *s;
c277df42 223 register regnode *c;
a0d0e21e
LW
224 register char *startpos = stringarg;
225 register I32 tmp;
c277df42 226 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
227 I32 dontbother = 0; /* how many characters not to try at end */
228 CURCUR cc;
c277df42
IZ
229 I32 start_shift = 0; /* Offset of the start to find
230 constant substr. */
231 I32 end_shift = 0; /* Same for the end. */
232 I32 scream_pos = -1; /* Internal iterator of scream. */
233 char *scream_olds;
a687059c 234
a0d0e21e 235 cc.cur = 0;
4633a7c4 236 cc.oldcc = 0;
a0d0e21e
LW
237 regcc = &cc;
238
c277df42 239 regprecomp = prog->precomp; /* Needed for error messages. */
a0d0e21e
LW
240#ifdef DEBUGGING
241 regnarrate = debug & 512;
242 regprogram = prog->program;
243#endif
244
245 /* Be paranoid... */
246 if (prog == NULL || startpos == NULL) {
247 croak("NULL regexp parameter");
248 return 0;
249 }
250
c277df42
IZ
251 minlen = prog->minlen;
252 if (strend - startpos < minlen) goto phooey;
253
a0d0e21e
LW
254 if (startpos == strbeg) /* is ^ valid at stringarg? */
255 regprev = '\n';
256 else {
257 regprev = stringarg[-1];
258 if (!multiline && regprev == '\n')
259 regprev = '\0'; /* force ^ to NOT match */
260 }
bbce6d69 261
a0d0e21e
LW
262 /* Check validity of program. */
263 if (UCHARAT(prog->program) != MAGIC) {
264 FAIL("corrupted regexp program");
265 }
266
bbce6d69 267 regnpar = prog->nparens;
c277df42
IZ
268 reg_flags = 0;
269 reg_eval_set = 0;
a0d0e21e
LW
270
271 /* If there is a "must appear" string, look for it. */
272 s = startpos;
c277df42
IZ
273 if (!(flags & REXEC_CHECKED)
274 && prog->check_substr != Nullsv &&
774d564b 275 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42
IZ
276 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
277 || (multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 278 {
c277df42
IZ
279 start_shift = prog->check_offset_min;
280 /* Should be nonnegative! */
281 end_shift = minlen - start_shift - SvCUR(prog->check_substr);
282 if (screamer) {
283 if (screamfirst[BmRARE(prog->check_substr)] >= 0)
284 s = screaminstr(screamer, prog->check_substr,
285 start_shift + (stringarg - strbeg),
286 end_shift, &scream_pos, 0);
a0d0e21e
LW
287 else
288 s = Nullch;
c277df42 289 scream_olds = s;
0a12ae7d 290 }
a0d0e21e 291 else
c277df42
IZ
292 s = fbm_instr((unsigned char*)s + start_shift,
293 (unsigned char*)strend - end_shift,
294 prog->check_substr);
a0d0e21e 295 if (!s) {
c277df42 296 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 297 goto phooey; /* not present */
c277df42
IZ
298 } else if ((s - stringarg) > prog->check_offset_max) {
299 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
300 s -= prog->check_offset_max;
301 } else if (!prog->naughty
302 && --BmUSEFUL(prog->check_substr) < 0
303 && prog->check_substr == prog->float_substr) { /* boo */
304 SvREFCNT_dec(prog->check_substr);
305 prog->check_substr = Nullsv; /* disable */
306 prog->float_substr = Nullsv; /* clear */
a0d0e21e 307 s = startpos;
c277df42 308 } else s = startpos;
a0d0e21e 309 }
a687059c 310
c277df42 311 /* Mark beginning of line for ^ and lookbehind. */
a0d0e21e 312 regbol = startpos;
c277df42 313 bostr = strbeg;
a687059c 314
a0d0e21e
LW
315 /* Mark end of line for $ (and such) */
316 regeol = strend;
a687059c 317
a0d0e21e
LW
318 /* see how far we have to get to not match where we matched before */
319 regtill = startpos+minend;
a687059c 320
c277df42
IZ
321 DEBUG_r(
322 PerlIO_printf(Perl_debug_log,
323 "Matching `%.60s%s' against `%.*s%s'\n",
324 prog->precomp,
325 (strlen(prog->precomp) > 60 ? "..." : ""),
326 (strend - startpos > 60 ? 60 : strend - startpos),
327 startpos,
328 (strend - startpos > 60 ? "..." : ""))
329 );
330
a0d0e21e 331 /* Simplest case: anchored match need be tried only once. */
774d564b 332 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e
LW
333 if (prog->reganch & ROPT_ANCH) {
334 if (regtry(prog, startpos))
335 goto got_it;
774d564b 336 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42
IZ
337 (multiline || (prog->reganch & ROPT_IMPLICIT)
338 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 339 {
a0d0e21e
LW
340 if (minlen)
341 dontbother = minlen - 1;
342 strend -= dontbother;
343 /* for multiline we only have to try after newlines */
344 if (s > startpos)
345 s--;
346 while (s < strend) {
347 if (*s++ == '\n') {
348 if (s < strend && regtry(prog, s))
349 goto got_it;
350 }
35c8bce7 351 }
35c8bce7 352 }
a0d0e21e
LW
353 goto phooey;
354 }
35c8bce7 355
a0d0e21e 356 /* Messy cases: unanchored match. */
c277df42
IZ
357 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
358 /* we have /x+whatever/ */
359 /* it must be a one character string */
360 char ch = SvPVX(prog->anchored_substr)[0];
361 while (s < strend) {
362 if (*s == ch) {
363 if (regtry(prog, s)) goto got_it;
a0d0e21e 364 s++;
c277df42
IZ
365 while (s < strend && *s == ch)
366 s++;
a0d0e21e 367 }
c277df42 368 s++;
a687059c 369 }
c277df42
IZ
370 }
371 /*SUPPRESS 560*/
372 else if (prog->anchored_substr != Nullsv
373 || (prog->float_substr != Nullsv
374 && prog->float_max_offset < strend - s)) {
375 SV *must = prog->anchored_substr
376 ? prog->anchored_substr : prog->float_substr;
377 I32 back_max =
378 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
379 I32 back_min =
380 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
381 I32 delta = back_max - back_min;
382 char *last = strend - SvCUR(must) - back_min; /* Cannot start after this */
383 char *last1 = s - 1; /* Last position checked before */
384
385 /* XXXX check_substr already used to find `s', can optimize if
386 check_substr==must. */
387 scream_pos = -1;
388 dontbother = end_shift;
389 strend -= dontbother;
390 while ( (s <= last) &&
391 (screamer
392 ? (s = screaminstr(screamer, must, s + back_min - strbeg,
393 end_shift, &scream_pos, 0))
394 : (s = fbm_instr((unsigned char*)s + back_min,
395 (unsigned char*)strend, must))) ) {
396 if (s - back_max > last1) {
397 last1 = s - back_min;
398 s = s - back_max;
399 } else {
400 char *t = last1 + 1;
401
402 last1 = s - back_min;
403 s = t;
a0d0e21e 404 }
c277df42 405 while (s <= last1) {
a0d0e21e
LW
406 if (regtry(prog, s))
407 goto got_it;
408 s++;
409 }
410 }
411 goto phooey;
c277df42 412 } else if (c = prog->regstclass) {
a0d0e21e 413 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
161b471a 414 char *Class;
a687059c 415
a0d0e21e
LW
416 if (minlen)
417 dontbother = minlen - 1;
418 strend -= dontbother; /* don't bother with what can't match */
419 tmp = 1;
420 /* We know what class it must start with. */
421 switch (OP(c)) {
422 case ANYOF:
161b471a 423 Class = (char *) OPERAND(c);
a0d0e21e 424 while (s < strend) {
161b471a 425 if (reginclass(Class, *s)) {
a0d0e21e
LW
426 if (tmp && regtry(prog, s))
427 goto got_it;
428 else
429 tmp = doevery;
a687059c 430 }
a0d0e21e
LW
431 else
432 tmp = 1;
433 s++;
434 }
435 break;
bbce6d69 436 case BOUNDL:
c277df42 437 reg_flags |= RF_tainted;
bbce6d69 438 /* FALL THROUGH */
a0d0e21e
LW
439 case BOUND:
440 if (minlen)
441 dontbother++,strend--;
bbce6d69 442 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 443 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 444 while (s < strend) {
95bac841 445 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
446 tmp = !tmp;
447 if (regtry(prog, s))
448 goto got_it;
a687059c 449 }
a0d0e21e
LW
450 s++;
451 }
452 if ((minlen || tmp) && regtry(prog,s))
453 goto got_it;
454 break;
bbce6d69 455 case NBOUNDL:
c277df42 456 reg_flags |= RF_tainted;
bbce6d69 457 /* FALL THROUGH */
a0d0e21e
LW
458 case NBOUND:
459 if (minlen)
460 dontbother++,strend--;
bbce6d69 461 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 462 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 463 while (s < strend) {
95bac841 464 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
465 tmp = !tmp;
466 else if (regtry(prog, s))
467 goto got_it;
468 s++;
469 }
470 if ((minlen || !tmp) && regtry(prog,s))
471 goto got_it;
472 break;
473 case ALNUM:
474 while (s < strend) {
bbce6d69 475 if (isALNUM(*s)) {
476 if (tmp && regtry(prog, s))
477 goto got_it;
478 else
479 tmp = doevery;
480 }
481 else
482 tmp = 1;
483 s++;
484 }
485 break;
486 case ALNUML:
c277df42 487 reg_flags |= RF_tainted;
bbce6d69 488 while (s < strend) {
489 if (isALNUM_LC(*s)) {
a0d0e21e
LW
490 if (tmp && regtry(prog, s))
491 goto got_it;
a687059c 492 else
a0d0e21e
LW
493 tmp = doevery;
494 }
495 else
496 tmp = 1;
497 s++;
498 }
499 break;
500 case NALNUM:
501 while (s < strend) {
bbce6d69 502 if (!isALNUM(*s)) {
503 if (tmp && regtry(prog, s))
504 goto got_it;
505 else
506 tmp = doevery;
507 }
508 else
509 tmp = 1;
510 s++;
511 }
512 break;
513 case NALNUML:
c277df42 514 reg_flags |= RF_tainted;
bbce6d69 515 while (s < strend) {
516 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
517 if (tmp && regtry(prog, s))
518 goto got_it;
a687059c 519 else
a0d0e21e 520 tmp = doevery;
a687059c 521 }
a0d0e21e
LW
522 else
523 tmp = 1;
524 s++;
525 }
526 break;
527 case SPACE:
528 while (s < strend) {
529 if (isSPACE(*s)) {
530 if (tmp && regtry(prog, s))
531 goto got_it;
532 else
533 tmp = doevery;
2304df62 534 }
a0d0e21e
LW
535 else
536 tmp = 1;
537 s++;
538 }
539 break;
bbce6d69 540 case SPACEL:
c277df42 541 reg_flags |= RF_tainted;
bbce6d69 542 while (s < strend) {
543 if (isSPACE_LC(*s)) {
544 if (tmp && regtry(prog, s))
545 goto got_it;
546 else
547 tmp = doevery;
548 }
549 else
550 tmp = 1;
551 s++;
552 }
553 break;
a0d0e21e
LW
554 case NSPACE:
555 while (s < strend) {
556 if (!isSPACE(*s)) {
557 if (tmp && regtry(prog, s))
558 goto got_it;
559 else
560 tmp = doevery;
a687059c 561 }
a0d0e21e
LW
562 else
563 tmp = 1;
564 s++;
565 }
566 break;
bbce6d69 567 case NSPACEL:
c277df42 568 reg_flags |= RF_tainted;
bbce6d69 569 while (s < strend) {
570 if (!isSPACE_LC(*s)) {
571 if (tmp && regtry(prog, s))
572 goto got_it;
573 else
574 tmp = doevery;
575 }
576 else
577 tmp = 1;
578 s++;
579 }
580 break;
a0d0e21e
LW
581 case DIGIT:
582 while (s < strend) {
583 if (isDIGIT(*s)) {
584 if (tmp && regtry(prog, s))
585 goto got_it;
586 else
587 tmp = doevery;
2b69d0c2 588 }
a0d0e21e
LW
589 else
590 tmp = 1;
591 s++;
592 }
593 break;
594 case NDIGIT:
595 while (s < strend) {
596 if (!isDIGIT(*s)) {
597 if (tmp && regtry(prog, s))
598 goto got_it;
599 else
600 tmp = doevery;
a687059c 601 }
a0d0e21e
LW
602 else
603 tmp = 1;
604 s++;
605 }
606 break;
a687059c 607 }
a0d0e21e
LW
608 }
609 else {
c277df42
IZ
610 dontbother = 0;
611 if (prog->float_substr != Nullsv) { /* Trim the end. */
612 char *last;
613 I32 oldpos = scream_pos;
614
615 if (screamer) {
616 last = screaminstr(screamer, prog->float_substr, s - strbeg,
617 end_shift, &scream_pos, 1); /* last one */
618 if (!last) {
619 last = scream_olds; /* Only one occurence. */
620 }
621 } else {
622 STRLEN len;
623 char *little = SvPV(prog->float_substr, len);
624 last = rninstr(s, strend, little, little + len);
625 }
626 if (last == NULL) goto phooey; /* Should not happen! */
627 dontbother = strend - last - 1;
628 }
629 if (minlen && (dontbother < minlen))
a0d0e21e
LW
630 dontbother = minlen - 1;
631 strend -= dontbother;
632 /* We don't know much -- general case. */
633 do {
634 if (regtry(prog, s))
635 goto got_it;
636 } while (s++ < strend);
637 }
638
639 /* Failure. */
640 goto phooey;
a687059c 641
a0d0e21e 642got_it:
420218e7 643 strend += dontbother; /* uncheat */
a0d0e21e
LW
644 prog->subbeg = strbeg;
645 prog->subend = strend;
c277df42 646 RX_MATCH_TAINTED_SET(prog, reg_flags & RF_tainted);
5f05dabc 647
648 /* make sure $`, $&, $', and $digit will work later */
c277df42
IZ
649 if (strbeg != prog->subbase) { /* second+ //g match. */
650 if (!(flags & REXEC_COPY_STR)) {
137443ea 651 if (prog->subbase) {
652 Safefree(prog->subbase);
653 prog->subbase = Nullch;
654 }
655 }
656 else {
657 I32 i = strend - startpos + (stringarg - strbeg);
658 s = savepvn(strbeg, i);
659 Safefree(prog->subbase);
660 prog->subbase = s;
661 prog->subbeg = prog->subbase;
662 prog->subend = prog->subbase + i;
663 s = prog->subbase + (stringarg - strbeg);
664 for (i = 0; i <= prog->nparens; i++) {
665 if (prog->endp[i]) {
666 prog->startp[i] = s + (prog->startp[i] - startpos);
667 prog->endp[i] = s + (prog->endp[i] - startpos);
668 }
a0d0e21e
LW
669 }
670 }
a0d0e21e
LW
671 }
672 return 1;
673
674phooey:
a0d0e21e 675 return 0;
a687059c
LW
676}
677
678/*
679 - regtry - try match at specific point
680 */
79072805 681static I32 /* 0 failure, 1 success */
8ac85365 682regtry(regexp *prog, char *startpos)
a687059c 683{
c277df42 684 dTHR;
a0d0e21e
LW
685 register I32 i;
686 register char **sp;
687 register char **ep;
c277df42 688 CHECKPOINT lastcp;
a0d0e21e
LW
689
690 reginput = startpos;
691 regstartp = prog->startp;
692 regendp = prog->endp;
693 reglastparen = &prog->lastparen;
694 prog->lastparen = 0;
695 regsize = 0;
c277df42
IZ
696 if (reg_start_tmpl <= prog->nparens) {
697 reg_start_tmpl = prog->nparens*3/2 + 3;
698 if(reg_start_tmp)
699 Renew(reg_start_tmp, reg_start_tmpl, char*);
700 else
701 New(22,reg_start_tmp, reg_start_tmpl, char*);
702 }
a0d0e21e
LW
703
704 sp = prog->startp;
705 ep = prog->endp;
c277df42 706 data = prog->data;
a0d0e21e
LW
707 if (prog->nparens) {
708 for (i = prog->nparens; i >= 0; i--) {
709 *sp++ = NULL;
710 *ep++ = NULL;
a687059c 711 }
a0d0e21e 712 }
c277df42 713 REGCP_SET;
a0d0e21e
LW
714 if (regmatch(prog->program + 1) && reginput >= regtill) {
715 prog->startp[0] = startpos;
716 prog->endp[0] = reginput;
717 return 1;
718 }
c277df42
IZ
719 REGCP_UNWIND;
720 return 0;
a687059c
LW
721}
722
723/*
724 - regmatch - main matching routine
725 *
726 * Conceptually the strategy is simple: check to see whether the current
727 * node matches, call self recursively to see whether the rest matches,
728 * and then act accordingly. In practice we make some effort to avoid
729 * recursion, in particular by going through "ordinary" nodes (that don't
730 * need to know whether the rest of the match failed) by a loop instead of
731 * by recursion.
732 */
733/* [lwall] I've hoisted the register declarations to the outer block in order to
734 * maybe save a little bit of pushing and popping on the stack. It also takes
735 * advantage of machines that use a register save mask on subroutine entry.
736 */
79072805 737static I32 /* 0 failure, 1 success */
c277df42 738regmatch(regnode *prog)
a687059c 739{
c277df42
IZ
740 dTHR;
741 register regnode *scan; /* Current node. */
742 regnode *next; /* Next node. */
743 regnode *inner; /* Next node in internal branch. */
a0d0e21e
LW
744 register I32 nextchar;
745 register I32 n; /* no or next */
746 register I32 ln; /* len or last */
747 register char *s; /* operand or save */
748 register char *locinput = reginput;
c277df42
IZ
749 register I32 c1, c2, paren; /* case fold search, parenth */
750 int minmod = 0, sw = 0, logical = 0;
4633a7c4
LW
751#ifdef DEBUGGING
752 static int regindent = 0;
753 regindent++;
754#endif
a0d0e21e 755
bbce6d69 756 nextchar = UCHARAT(locinput);
a0d0e21e
LW
757 scan = prog;
758 while (scan != NULL) {
c277df42 759#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 760#ifdef DEBUGGING
c277df42
IZ
761# define sayYES goto yes
762# define sayNO goto no
763# define saySAME(x) if (x) goto yes; else goto no
764# define REPORT_CODE_OFF 24
4633a7c4 765#else
c277df42
IZ
766# define sayYES return 1
767# define sayNO return 0
768# define saySAME(x) return x
a687059c 769#endif
c277df42
IZ
770 DEBUG_r( {
771 SV *prop = sv_newmortal();
772 int docolor = *colors[0];
773 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
774 int l = (regeol - locinput > taill ? taill : regeol - locinput);
775 int pref_len = (locinput - bostr > (5 + taill) - l
776 ? (5 + taill) - l : locinput - bostr);
777
778 if (l + pref_len < (5 + taill) && l < regeol - locinput)
779 l = ( regeol - locinput > (5 + taill) - pref_len
780 ? (5 + taill) - pref_len : regeol - locinput);
781 regprop(prop, scan);
782 PerlIO_printf(Perl_debug_log,
783 "%4i <%s%.*s%s%s%s%.*s%s>%*s|%*s%2d%s\n",
784 locinput - bostr,
785 colors[2], pref_len, locinput - pref_len, colors[3],
786 (docolor ? "" : "> <"),
787 colors[0], l, locinput, colors[1],
788 15 - l - pref_len + 1,
789 "",
790 regindent*2, "", scan - regprogram,
791 SvPVX(prop));
792 } );
a687059c
LW
793
794#ifdef REGALIGN
c277df42 795 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
796 if (next == scan)
797 next = NULL;
a687059c 798#else
a0d0e21e 799 next = regnext(scan);
a687059c
LW
800#endif
801
a0d0e21e
LW
802 switch (OP(scan)) {
803 case BOL:
804 if (locinput == regbol
805 ? regprev == '\n'
c277df42
IZ
806 : (multiline &&
807 (nextchar || locinput < regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
808 {
809 /* regtill = regbol; */
810 break;
811 }
4633a7c4 812 sayNO;
a0d0e21e
LW
813 case MBOL:
814 if (locinput == regbol
815 ? regprev == '\n'
816 : ((nextchar || locinput < regeol) && locinput[-1] == '\n') )
817 {
818 break;
819 }
4633a7c4 820 sayNO;
a0d0e21e
LW
821 case SBOL:
822 if (locinput == regbol && regprev == '\n')
823 break;
4633a7c4 824 sayNO;
774d564b 825 case GPOS:
a0d0e21e
LW
826 if (locinput == regbol)
827 break;
4633a7c4 828 sayNO;
a0d0e21e
LW
829 case EOL:
830 if (multiline)
831 goto meol;
832 else
833 goto seol;
834 case MEOL:
835 meol:
836 if ((nextchar || locinput < regeol) && nextchar != '\n')
4633a7c4 837 sayNO;
a0d0e21e
LW
838 break;
839 case SEOL:
840 seol:
841 if ((nextchar || locinput < regeol) && nextchar != '\n')
4633a7c4 842 sayNO;
a0d0e21e 843 if (regeol - locinput > 1)
4633a7c4 844 sayNO;
a0d0e21e
LW
845 break;
846 case SANY:
847 if (!nextchar && locinput >= regeol)
4633a7c4 848 sayNO;
bbce6d69 849 nextchar = UCHARAT(++locinput);
a0d0e21e
LW
850 break;
851 case ANY:
852 if (!nextchar && locinput >= regeol || nextchar == '\n')
4633a7c4 853 sayNO;
bbce6d69 854 nextchar = UCHARAT(++locinput);
a0d0e21e 855 break;
bbce6d69 856 case EXACT:
161b471a 857 s = (char *) OPERAND(scan);
c277df42 858 ln = UCHARAT(s++);
a0d0e21e 859 /* Inline the first character, for speed. */
95bac841 860 if (UCHARAT(s) != nextchar)
4633a7c4 861 sayNO;
a0d0e21e 862 if (regeol - locinput < ln)
4633a7c4 863 sayNO;
36477c24 864 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 865 sayNO;
a0d0e21e 866 locinput += ln;
bbce6d69 867 nextchar = UCHARAT(locinput);
868 break;
869 case EXACTFL:
c277df42 870 reg_flags |= RF_tainted;
bbce6d69 871 /* FALL THROUGH */
872 case EXACTF:
161b471a 873 s = (char *) OPERAND(scan);
c277df42 874 ln = UCHARAT(s++);
bbce6d69 875 /* Inline the first character, for speed. */
876 if (UCHARAT(s) != nextchar &&
877 UCHARAT(s) != ((OP(scan) == EXACTF)
878 ? fold : fold_locale)[nextchar])
879 sayNO;
880 if (regeol - locinput < ln)
881 sayNO;
5f05dabc 882 if (ln > 1 && (OP(scan) == EXACTF
883 ? ibcmp(s, locinput, ln)
884 : ibcmp_locale(s, locinput, ln)))
bbce6d69 885 sayNO;
886 locinput += ln;
887 nextchar = UCHARAT(locinput);
a0d0e21e
LW
888 break;
889 case ANYOF:
161b471a 890 s = (char *) OPERAND(scan);
a0d0e21e
LW
891 if (nextchar < 0)
892 nextchar = UCHARAT(locinput);
bbce6d69 893 if (!reginclass(s, nextchar))
4633a7c4 894 sayNO;
a0d0e21e 895 if (!nextchar && locinput >= regeol)
4633a7c4 896 sayNO;
bbce6d69 897 nextchar = UCHARAT(++locinput);
a0d0e21e 898 break;
bbce6d69 899 case ALNUML:
c277df42 900 reg_flags |= RF_tainted;
bbce6d69 901 /* FALL THROUGH */
a0d0e21e
LW
902 case ALNUM:
903 if (!nextchar)
4633a7c4 904 sayNO;
bbce6d69 905 if (!(OP(scan) == ALNUM
906 ? isALNUM(nextchar) : isALNUM_LC(nextchar)))
4633a7c4 907 sayNO;
bbce6d69 908 nextchar = UCHARAT(++locinput);
a0d0e21e 909 break;
bbce6d69 910 case NALNUML:
c277df42 911 reg_flags |= RF_tainted;
bbce6d69 912 /* FALL THROUGH */
a0d0e21e
LW
913 case NALNUM:
914 if (!nextchar && locinput >= regeol)
4633a7c4 915 sayNO;
bbce6d69 916 if (OP(scan) == NALNUM
917 ? isALNUM(nextchar) : isALNUM_LC(nextchar))
4633a7c4 918 sayNO;
bbce6d69 919 nextchar = UCHARAT(++locinput);
a0d0e21e 920 break;
bbce6d69 921 case BOUNDL:
922 case NBOUNDL:
c277df42 923 reg_flags |= RF_tainted;
bbce6d69 924 /* FALL THROUGH */
a0d0e21e 925 case BOUND:
bbce6d69 926 case NBOUND:
927 /* was last char in word? */
928 ln = (locinput != regbol) ? UCHARAT(locinput - 1) : regprev;
929 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
930 ln = isALNUM(ln);
931 n = isALNUM(nextchar);
932 }
933 else {
934 ln = isALNUM_LC(ln);
935 n = isALNUM_LC(nextchar);
936 }
95bac841 937 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 938 sayNO;
a0d0e21e 939 break;
bbce6d69 940 case SPACEL:
c277df42 941 reg_flags |= RF_tainted;
bbce6d69 942 /* FALL THROUGH */
a0d0e21e
LW
943 case SPACE:
944 if (!nextchar && locinput >= regeol)
4633a7c4 945 sayNO;
bbce6d69 946 if (!(OP(scan) == SPACE
947 ? isSPACE(nextchar) : isSPACE_LC(nextchar)))
4633a7c4 948 sayNO;
bbce6d69 949 nextchar = UCHARAT(++locinput);
a0d0e21e 950 break;
bbce6d69 951 case NSPACEL:
c277df42 952 reg_flags |= RF_tainted;
bbce6d69 953 /* FALL THROUGH */
a0d0e21e
LW
954 case NSPACE:
955 if (!nextchar)
4633a7c4 956 sayNO;
bbce6d69 957 if (OP(scan) == SPACE
958 ? isSPACE(nextchar) : isSPACE_LC(nextchar))
4633a7c4 959 sayNO;
bbce6d69 960 nextchar = UCHARAT(++locinput);
a0d0e21e
LW
961 break;
962 case DIGIT:
963 if (!isDIGIT(nextchar))
4633a7c4 964 sayNO;
bbce6d69 965 nextchar = UCHARAT(++locinput);
a0d0e21e
LW
966 break;
967 case NDIGIT:
968 if (!nextchar && locinput >= regeol)
4633a7c4 969 sayNO;
a0d0e21e 970 if (isDIGIT(nextchar))
4633a7c4 971 sayNO;
bbce6d69 972 nextchar = UCHARAT(++locinput);
a0d0e21e 973 break;
c8756f30 974 case REFFL:
c277df42 975 reg_flags |= RF_tainted;
c8756f30 976 /* FALL THROUGH */
c277df42 977 case REF:
c8756f30 978 case REFF:
c277df42 979 n = ARG(scan); /* which paren pair */
a0d0e21e 980 s = regstartp[n];
c277df42
IZ
981 if (*reglastparen < n || !s)
982 break; /* Zero length always matches */
a0d0e21e
LW
983 if (s == regendp[n])
984 break;
985 /* Inline the first character, for speed. */
c8756f30
AK
986 if (UCHARAT(s) != nextchar &&
987 (OP(scan) == REF ||
988 (UCHARAT(s) != ((OP(scan) == REFF
c277df42 989 ? fold : fold_locale)[nextchar]))))
4633a7c4 990 sayNO;
a0d0e21e
LW
991 ln = regendp[n] - s;
992 if (locinput + ln > regeol)
4633a7c4 993 sayNO;
c8756f30
AK
994 if (ln > 1 && (OP(scan) == REF
995 ? memNE(s, locinput, ln)
996 : (OP(scan) == REFF
997 ? ibcmp(s, locinput, ln)
998 : ibcmp_locale(s, locinput, ln))))
4633a7c4 999 sayNO;
a0d0e21e 1000 locinput += ln;
bbce6d69 1001 nextchar = UCHARAT(locinput);
a0d0e21e
LW
1002 break;
1003
1004 case NOTHING:
c277df42 1005 case TAIL:
a0d0e21e
LW
1006 break;
1007 case BACK:
1008 break;
c277df42
IZ
1009 case EVAL:
1010 {
1011 dSP;
1012 OP_4tree *oop = op;
1013 COP *ocurcop = curcop;
1014 SV **ocurpad = curpad;
1015 SV *ret;
1016
1017 n = ARG(scan);
1018 op = (OP_4tree*)data->data[n];
1019 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", op) );
1020 curpad = AvARRAY((AV*)data->data[n + 1]);
1021 if (!reg_eval_set) {
1022 /* Preserve whatever is on stack now, otherwise
1023 OP_NEXTSTATE will overwrite it. */
1024 SAVEINT(reg_eval_set); /* Protect against unwinding. */
1025 reg_eval_set = 1;
1026 DEBUG_r(DEBUG_s(
1027 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base);
1028 ));
1029 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1030 cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base;
1031 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1032 SAVETMPS;
1033 /* Apparently this is not needed, judging by wantarray. */
1034 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1035 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1036 }
1037
1038 runops(); /* Scalar context. */
1039 SPAGAIN;
1040 ret = POPs;
1041 PUTBACK;
1042
1043 if (logical) {
1044 logical = 0;
1045 sw = SvTRUE(ret);
1046 }
1047 op = oop;
1048 curpad = ocurpad;
1049 curcop = ocurcop;
1050 break;
1051 }
a0d0e21e 1052 case OPEN:
c277df42
IZ
1053 n = ARG(scan); /* which paren pair */
1054 reg_start_tmp[n] = locinput;
a0d0e21e
LW
1055 if (n > regsize)
1056 regsize = n;
1057 break;
1058 case CLOSE:
c277df42
IZ
1059 n = ARG(scan); /* which paren pair */
1060 regstartp[n] = reg_start_tmp[n];
a0d0e21e
LW
1061 regendp[n] = locinput;
1062 if (n > *reglastparen)
1063 *reglastparen = n;
1064 break;
c277df42
IZ
1065 case GROUPP:
1066 n = ARG(scan); /* which paren pair */
1067 sw = (*reglastparen >= n && regendp[n] != NULL);
1068 break;
1069 case IFTHEN:
1070 if (sw)
1071 next = NEXTOPER(NEXTOPER(scan));
1072 else {
1073 next = scan + ARG(scan);
1074 if (OP(next) == IFTHEN) /* Fake one. */
1075 next = NEXTOPER(NEXTOPER(next));
1076 }
1077 break;
1078 case LOGICAL:
1079 logical = 1;
1080 break;
a0d0e21e
LW
1081 case CURLYX: {
1082 CURCUR cc;
1083 CHECKPOINT cp = savestack_ix;
c277df42
IZ
1084
1085 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1086 next += ARG(next);
a0d0e21e
LW
1087 cc.oldcc = regcc;
1088 regcc = &cc;
1089 cc.parenfloor = *reglastparen;
1090 cc.cur = -1;
1091 cc.min = ARG1(scan);
1092 cc.max = ARG2(scan);
c277df42 1093 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1094 cc.next = next;
1095 cc.minmod = minmod;
1096 cc.lastloc = 0;
1097 reginput = locinput;
1098 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1099 regcpblow(cp);
1100 regcc = cc.oldcc;
4633a7c4 1101 saySAME(n);
a0d0e21e
LW
1102 }
1103 /* NOT REACHED */
1104 case WHILEM: {
1105 /*
1106 * This is really hard to understand, because after we match
1107 * what we're trying to match, we must make sure the rest of
1108 * the RE is going to match for sure, and to do that we have
1109 * to go back UP the parse tree by recursing ever deeper. And
1110 * if it fails, we have to reset our parent's current state
1111 * that we can try again after backing off.
1112 */
1113
c277df42 1114 CHECKPOINT cp, lastcp;
a0d0e21e 1115 CURCUR* cc = regcc;
c277df42
IZ
1116 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1117
4633a7c4 1118 n = cc->cur + 1; /* how many we know we matched */
a0d0e21e
LW
1119 reginput = locinput;
1120
c277df42
IZ
1121 DEBUG_r(
1122 PerlIO_printf(Perl_debug_log,
1123 "%*s %ld out of %ld..%ld cc=%lx\n",
1124 REPORT_CODE_OFF+regindent*2, "",
1125 (long)n, (long)cc->min,
1126 (long)cc->max, (long)cc)
1127 );
4633a7c4 1128
a0d0e21e
LW
1129 /* If degenerate scan matches "", assume scan done. */
1130
579cf2c3 1131 if (locinput == cc->lastloc && n >= cc->min) {
a0d0e21e
LW
1132 regcc = cc->oldcc;
1133 ln = regcc->cur;
c277df42
IZ
1134 DEBUG_r(
1135 PerlIO_printf(Perl_debug_log, "%*s empty match detected, try continuation...\n", REPORT_CODE_OFF+regindent*2, "")
1136 );
a0d0e21e 1137 if (regmatch(cc->next))
4633a7c4 1138 sayYES;
c277df42
IZ
1139 DEBUG_r(
1140 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1141 );
a0d0e21e
LW
1142 regcc->cur = ln;
1143 regcc = cc;
4633a7c4 1144 sayNO;
a0d0e21e
LW
1145 }
1146
1147 /* First just match a string of min scans. */
1148
1149 if (n < cc->min) {
1150 cc->cur = n;
1151 cc->lastloc = locinput;
4633a7c4
LW
1152 if (regmatch(cc->scan))
1153 sayYES;
1154 cc->cur = n - 1;
c277df42
IZ
1155 cc->lastloc = lastloc;
1156 DEBUG_r(
1157 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1158 );
4633a7c4 1159 sayNO;
a0d0e21e
LW
1160 }
1161
1162 /* Prefer next over scan for minimal matching. */
1163
1164 if (cc->minmod) {
1165 regcc = cc->oldcc;
1166 ln = regcc->cur;
5f05dabc 1167 cp = regcppush(cc->parenfloor);
c277df42 1168 REGCP_SET;
5f05dabc 1169 if (regmatch(cc->next)) {
c277df42 1170 regcpblow(cp);
4633a7c4 1171 sayYES; /* All done. */
5f05dabc 1172 }
c277df42 1173 REGCP_UNWIND;
5f05dabc 1174 regcppop();
a0d0e21e
LW
1175 regcc->cur = ln;
1176 regcc = cc;
1177
c277df42
IZ
1178 if (n >= cc->max) { /* Maximum greed exceeded? */
1179 if (dowarn && n >= REG_INFTY
1180 && !(reg_flags & RF_warned)) {
1181 reg_flags |= RF_warned;
1182 warn("count exceeded %d", REG_INFTY - 1);
1183 }
4633a7c4 1184 sayNO;
c277df42 1185 }
a687059c 1186
c277df42
IZ
1187 DEBUG_r(
1188 PerlIO_printf(Perl_debug_log, "%*s trying longer...\n", REPORT_CODE_OFF+regindent*2, "")
1189 );
a0d0e21e
LW
1190 /* Try scanning more and see if it helps. */
1191 reginput = locinput;
1192 cc->cur = n;
1193 cc->lastloc = locinput;
5f05dabc 1194 cp = regcppush(cc->parenfloor);
c277df42 1195 REGCP_SET;
5f05dabc 1196 if (regmatch(cc->scan)) {
c277df42 1197 regcpblow(cp);
4633a7c4 1198 sayYES;
5f05dabc 1199 }
c277df42
IZ
1200 DEBUG_r(
1201 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1202 );
1203 REGCP_UNWIND;
5f05dabc 1204 regcppop();
4633a7c4 1205 cc->cur = n - 1;
c277df42 1206 cc->lastloc = lastloc;
4633a7c4 1207 sayNO;
a0d0e21e
LW
1208 }
1209
1210 /* Prefer scan over next for maximal matching. */
1211
1212 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1213 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1214 cc->cur = n;
1215 cc->lastloc = locinput;
c277df42 1216 REGCP_SET;
5f05dabc 1217 if (regmatch(cc->scan)) {
c277df42 1218 regcpblow(cp);
4633a7c4 1219 sayYES;
5f05dabc 1220 }
c277df42 1221 REGCP_UNWIND;
a0d0e21e
LW
1222 regcppop(); /* Restore some previous $<digit>s? */
1223 reginput = locinput;
c277df42
IZ
1224 DEBUG_r(
1225 PerlIO_printf(Perl_debug_log, "%*s failed, try continuation...\n", REPORT_CODE_OFF+regindent*2, "")
1226 );
1227 }
1228 if (dowarn && n >= REG_INFTY && !(reg_flags & RF_warned)) {
1229 reg_flags |= RF_warned;
1230 warn("count exceeded %d", REG_INFTY - 1);
a0d0e21e
LW
1231 }
1232
1233 /* Failed deeper matches of scan, so see if this one works. */
1234 regcc = cc->oldcc;
1235 ln = regcc->cur;
1236 if (regmatch(cc->next))
4633a7c4 1237 sayYES;
c277df42
IZ
1238 DEBUG_r(
1239 PerlIO_printf(Perl_debug_log, "%*s failed...\n", REPORT_CODE_OFF+regindent*2, "")
1240 );
a0d0e21e
LW
1241 regcc->cur = ln;
1242 regcc = cc;
4633a7c4 1243 cc->cur = n - 1;
c277df42 1244 cc->lastloc = lastloc;
4633a7c4 1245 sayNO;
a0d0e21e
LW
1246 }
1247 /* NOT REACHED */
c277df42
IZ
1248 case BRANCHJ:
1249 next = scan + ARG(scan);
1250 if (next == scan)
1251 next = NULL;
1252 inner = NEXTOPER(NEXTOPER(scan));
1253 goto do_branch;
1254 case BRANCH:
1255 inner = NEXTOPER(scan);
1256 do_branch:
1257 {
1258 CHECKPOINT lastcp;
1259 c1 = OP(scan);
1260 if (OP(next) != c1) /* No choice. */
1261 next = inner; /* Avoid recursion. */
a0d0e21e 1262 else {
748a9306 1263 int lastparen = *reglastparen;
c277df42
IZ
1264
1265 REGCP_SET;
a0d0e21e
LW
1266 do {
1267 reginput = locinput;
c277df42 1268 if (regmatch(inner))
4633a7c4 1269 sayYES;
c277df42 1270 REGCP_UNWIND;
748a9306
LW
1271 for (n = *reglastparen; n > lastparen; n--)
1272 regendp[n] = 0;
1273 *reglastparen = n;
c277df42 1274 scan = next;
a687059c 1275#ifdef REGALIGN
a0d0e21e 1276 /*SUPPRESS 560*/
c277df42
IZ
1277 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1278 next += n;
a0d0e21e 1279 else
c277df42 1280 next = NULL;
a687059c 1281#else
c277df42 1282 next = regnext(next);
79072805 1283#endif
c277df42
IZ
1284 inner = NEXTOPER(scan);
1285 if (c1 == BRANCHJ) {
1286 inner = NEXTOPER(inner);
1287 }
1288 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1289 sayNO;
a0d0e21e 1290 /* NOTREACHED */
a687059c 1291 }
a0d0e21e
LW
1292 }
1293 break;
1294 case MINMOD:
1295 minmod = 1;
1296 break;
c277df42
IZ
1297 case CURLYM:
1298 {
1299 I32 l;
1300 CHECKPOINT lastcp;
1301
1302 /* We suppose that the next guy does not need
1303 backtracking: in particular, it is of constant length,
1304 and has no parenths to influence future backrefs. */
1305 ln = ARG1(scan); /* min to match */
1306 n = ARG2(scan); /* max to match */
1307#ifdef REGALIGN_STRUCT
1308 paren = scan->flags;
1309 if (paren) {
1310 if (paren > regsize)
1311 regsize = paren;
1312 if (paren > *reglastparen)
1313 *reglastparen = paren;
1314 }
1315#endif
1316 scan = NEXTOPER(scan) + 4/sizeof(regnode);
1317 if (paren)
1318 scan += NEXT_OFF(scan); /* Skip former OPEN. */
1319 reginput = locinput;
1320 if (minmod) {
1321 minmod = 0;
1322 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1323 sayNO;
1324 if (l == 0 && n >= ln
1325 /* In fact, this is tricky. If paren, then the
1326 fact that we did/didnot match may influence
1327 future execution. */
1328 && !(paren && ln == 0))
1329 ln = n;
1330 locinput = reginput;
1331 if (regkind[(U8)OP(next)] == EXACT) {
1332 c1 = UCHARAT(OPERAND(next) + 1);
1333 if (OP(next) == EXACTF)
1334 c2 = fold[c1];
1335 else if (OP(next) == EXACTFL)
1336 c2 = fold_locale[c1];
1337 else
1338 c2 = c1;
1339 } else
1340 c1 = c2 = -1000;
1341 REGCP_SET;
1342 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1343 /* If it could work, try it. */
1344 if (c1 == -1000 ||
1345 UCHARAT(reginput) == c1 ||
1346 UCHARAT(reginput) == c2)
1347 {
1348 if (paren) {
1349 if (n) {
1350 regstartp[paren] = reginput - l;
1351 regendp[paren] = reginput;
1352 } else
1353 regendp[paren] = NULL;
1354 }
1355 if (regmatch(next))
1356 sayYES;
1357 REGCP_UNWIND;
1358 }
1359 /* Couldn't or didn't -- move forward. */
1360 reginput = locinput;
1361 if (regrepeat_hard(scan, 1, &l)) {
1362 ln++;
1363 locinput = reginput;
1364 }
1365 else
1366 sayNO;
1367 }
1368 } else {
1369 n = regrepeat_hard(scan, n, &l);
1370 if (n != 0 && l == 0
1371 /* In fact, this is tricky. If paren, then the
1372 fact that we did/didnot match may influence
1373 future execution. */
1374 && !(paren && ln == 0))
1375 ln = n;
1376 locinput = reginput;
1377 DEBUG_r(
1378 PerlIO_printf(Perl_debug_log, "%*s matched %ld times, len=%ld...\n", REPORT_CODE_OFF+regindent*2, "", n, l)
1379 );
1380 if (n >= ln) {
1381 if (regkind[(U8)OP(next)] == EXACT) {
1382 c1 = UCHARAT(OPERAND(next) + 1);
1383 if (OP(next) == EXACTF)
1384 c2 = fold[c1];
1385 else if (OP(next) == EXACTFL)
1386 c2 = fold_locale[c1];
1387 else
1388 c2 = c1;
1389 } else
1390 c1 = c2 = -1000;
1391 }
1392 REGCP_SET;
1393 while (n >= ln) {
1394 /* If it could work, try it. */
1395 if (c1 == -1000 ||
1396 UCHARAT(reginput) == c1 ||
1397 UCHARAT(reginput) == c2)
1398 {
1399 DEBUG_r(
1400 PerlIO_printf(Perl_debug_log, "%*s trying tail with n=%ld...\n", REPORT_CODE_OFF+regindent*2, "", n)
1401 );
1402 if (paren) {
1403 if (n) {
1404 regstartp[paren] = reginput - l;
1405 regendp[paren] = reginput;
1406 } else
1407 regendp[paren] = NULL;
1408 }
1409 if (regmatch(next))
1410 sayYES;
1411 REGCP_UNWIND;
1412 }
1413 /* Couldn't or didn't -- back up. */
1414 n--;
1415 locinput -= l;
1416 reginput = locinput;
1417 }
1418 }
1419 sayNO;
1420 break;
1421 }
1422 case CURLYN:
1423 paren = scan->flags; /* Which paren to set */
1424 if (paren > regsize)
1425 regsize = paren;
1426 if (paren > *reglastparen)
1427 *reglastparen = paren;
1428 ln = ARG1(scan); /* min to match */
1429 n = ARG2(scan); /* max to match */
1430 scan = regnext(NEXTOPER(scan) + 4/sizeof(regnode));
1431 goto repeat;
a0d0e21e 1432 case CURLY:
c277df42 1433 paren = 0;
a0d0e21e
LW
1434 ln = ARG1(scan); /* min to match */
1435 n = ARG2(scan); /* max to match */
c277df42 1436 scan = NEXTOPER(scan) + 4/sizeof(regnode);
a0d0e21e
LW
1437 goto repeat;
1438 case STAR:
1439 ln = 0;
c277df42 1440 n = REG_INFTY;
a0d0e21e 1441 scan = NEXTOPER(scan);
c277df42 1442 paren = 0;
a0d0e21e
LW
1443 goto repeat;
1444 case PLUS:
c277df42
IZ
1445 ln = 1;
1446 n = REG_INFTY;
1447 scan = NEXTOPER(scan);
1448 paren = 0;
1449 repeat:
a0d0e21e
LW
1450 /*
1451 * Lookahead to avoid useless match attempts
1452 * when we know what character comes next.
1453 */
bbce6d69 1454 if (regkind[(U8)OP(next)] == EXACT) {
1455 c1 = UCHARAT(OPERAND(next) + 1);
1456 if (OP(next) == EXACTF)
1457 c2 = fold[c1];
1458 else if (OP(next) == EXACTFL)
1459 c2 = fold_locale[c1];
1460 else
1461 c2 = c1;
1462 }
a0d0e21e 1463 else
bbce6d69 1464 c1 = c2 = -1000;
a0d0e21e
LW
1465 reginput = locinput;
1466 if (minmod) {
c277df42 1467 CHECKPOINT lastcp;
a0d0e21e
LW
1468 minmod = 0;
1469 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 1470 sayNO;
c277df42
IZ
1471 REGCP_SET;
1472 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 1473 /* If it could work, try it. */
bbce6d69 1474 if (c1 == -1000 ||
1475 UCHARAT(reginput) == c1 ||
1476 UCHARAT(reginput) == c2)
1477 {
c277df42
IZ
1478 if (paren) {
1479 if (n) {
1480 regstartp[paren] = reginput - 1;
1481 regendp[paren] = reginput;
1482 } else
1483 regendp[paren] = NULL;
1484 }
a0d0e21e 1485 if (regmatch(next))
4633a7c4 1486 sayYES;
c277df42 1487 REGCP_UNWIND;
bbce6d69 1488 }
c277df42 1489 /* Couldn't or didn't -- move forward. */
748a9306 1490 reginput = locinput + ln;
a0d0e21e
LW
1491 if (regrepeat(scan, 1)) {
1492 ln++;
1493 reginput = locinput + ln;
c277df42 1494 } else
4633a7c4 1495 sayNO;
a0d0e21e
LW
1496 }
1497 }
1498 else {
c277df42 1499 CHECKPOINT lastcp;
a0d0e21e
LW
1500 n = regrepeat(scan, n);
1501 if (ln < n && regkind[(U8)OP(next)] == EOL &&
c277df42 1502 (!multiline || OP(next) == SEOL))
a0d0e21e 1503 ln = n; /* why back off? */
c277df42
IZ
1504 REGCP_SET;
1505 if (paren) {
1506 while (n >= ln) {
1507 /* If it could work, try it. */
1508 if (c1 == -1000 ||
1509 UCHARAT(reginput) == c1 ||
1510 UCHARAT(reginput) == c2)
1511 {
1512 if (paren && n) {
1513 if (n) {
1514 regstartp[paren] = reginput - 1;
1515 regendp[paren] = reginput;
1516 } else
1517 regendp[paren] = NULL;
1518 }
1519 if (regmatch(next))
1520 sayYES;
1521 REGCP_UNWIND;
1522 }
1523 /* Couldn't or didn't -- back up. */
1524 n--;
1525 reginput = locinput + n;
1526 }
1527 } else {
1528 while (n >= ln) {
1529 /* If it could work, try it. */
1530 if (c1 == -1000 ||
1531 UCHARAT(reginput) == c1 ||
1532 UCHARAT(reginput) == c2)
1533 {
1534 if (regmatch(next))
1535 sayYES;
1536 REGCP_UNWIND;
1537 }
1538 /* Couldn't or didn't -- back up. */
1539 n--;
1540 reginput = locinput + n;
bbce6d69 1541 }
a0d0e21e
LW
1542 }
1543 }
4633a7c4 1544 sayNO;
c277df42 1545 break;
a0d0e21e
LW
1546 case SUCCEED:
1547 case END:
1548 reginput = locinput; /* put where regtry can find it */
4633a7c4 1549 sayYES; /* Success! */
c277df42
IZ
1550 case SUSPEND:
1551 n = 1;
1552 goto do_ifmatch;
a0d0e21e 1553 case UNLESSM:
c277df42
IZ
1554 n = 0;
1555 if (locinput < bostr + scan->flags)
1556 goto say_yes;
1557 goto do_ifmatch;
1558 case IFMATCH:
1559 n = 1;
1560 if (locinput < bostr + scan->flags)
1561 goto say_no;
1562 do_ifmatch:
1563 reginput = locinput - scan->flags;
1564 inner = NEXTOPER(NEXTOPER(scan));
1565 if (regmatch(inner) != n) {
1566 say_no:
1567 if (logical) {
1568 logical = 0;
1569 sw = 0;
1570 goto do_longjump;
1571 } else
1572 sayNO;
1573 }
1574 say_yes:
1575 if (logical) {
1576 logical = 0;
1577 sw = 1;
1578 }
1579 if (OP(scan) == SUSPEND)
1580 locinput = reginput;
1581 /* FALL THROUGH. */
1582 case LONGJMP:
1583 do_longjump:
1584 next = scan + ARG(scan);
1585 if (next == scan)
1586 next = NULL;
a0d0e21e
LW
1587 break;
1588 default:
c030ccd9 1589 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 1590 (unsigned long)scan, OP(scan));
a0d0e21e 1591 FAIL("regexp memory corruption");
a687059c 1592 }
a0d0e21e
LW
1593 scan = next;
1594 }
a687059c 1595
a0d0e21e
LW
1596 /*
1597 * We get here only if there's trouble -- normally "case END" is
1598 * the terminating point.
1599 */
1600 FAIL("corrupted regexp pointers");
1601 /*NOTREACHED*/
4633a7c4
LW
1602 sayNO;
1603
1604yes:
1605#ifdef DEBUGGING
1606 regindent--;
1607#endif
1608 return 1;
1609
1610no:
1611#ifdef DEBUGGING
1612 regindent--;
1613#endif
a0d0e21e 1614 return 0;
a687059c
LW
1615}
1616
1617/*
1618 - regrepeat - repeatedly match something simple, report how many
1619 */
1620/*
1621 * [This routine now assumes that it will only match on things of length 1.
1622 * That was true before, but now we assume scan - reginput is the count,
1623 * rather than incrementing count on every character.]
1624 */
79072805 1625static I32
c277df42 1626regrepeat(regnode *p, I32 max)
a687059c 1627{
a0d0e21e
LW
1628 register char *scan;
1629 register char *opnd;
1630 register I32 c;
1631 register char *loceol = regeol;
1632
1633 scan = reginput;
c277df42 1634 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 1635 loceol = scan + max;
161b471a 1636 opnd = (char *) OPERAND(p);
a0d0e21e
LW
1637 switch (OP(p)) {
1638 case ANY:
1639 while (scan < loceol && *scan != '\n')
1640 scan++;
1641 break;
1642 case SANY:
1643 scan = loceol;
1644 break;
bbce6d69 1645 case EXACT: /* length of string is 1 */
1646 c = UCHARAT(++opnd);
1647 while (scan < loceol && UCHARAT(scan) == c)
1648 scan++;
1649 break;
1650 case EXACTF: /* length of string is 1 */
1651 c = UCHARAT(++opnd);
1652 while (scan < loceol &&
1653 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
1654 scan++;
1655 break;
1656 case EXACTFL: /* length of string is 1 */
c277df42 1657 reg_flags |= RF_tainted;
bbce6d69 1658 c = UCHARAT(++opnd);
1659 while (scan < loceol &&
1660 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e
LW
1661 scan++;
1662 break;
1663 case ANYOF:
bbce6d69 1664 while (scan < loceol && reginclass(opnd, *scan))
a0d0e21e 1665 scan++;
a0d0e21e
LW
1666 break;
1667 case ALNUM:
1668 while (scan < loceol && isALNUM(*scan))
1669 scan++;
1670 break;
bbce6d69 1671 case ALNUML:
c277df42 1672 reg_flags |= RF_tainted;
bbce6d69 1673 while (scan < loceol && isALNUM_LC(*scan))
1674 scan++;
1675 break;
a0d0e21e
LW
1676 case NALNUM:
1677 while (scan < loceol && !isALNUM(*scan))
1678 scan++;
1679 break;
bbce6d69 1680 case NALNUML:
c277df42 1681 reg_flags |= RF_tainted;
bbce6d69 1682 while (scan < loceol && !isALNUM_LC(*scan))
1683 scan++;
1684 break;
a0d0e21e
LW
1685 case SPACE:
1686 while (scan < loceol && isSPACE(*scan))
1687 scan++;
1688 break;
bbce6d69 1689 case SPACEL:
c277df42 1690 reg_flags |= RF_tainted;
bbce6d69 1691 while (scan < loceol && isSPACE_LC(*scan))
1692 scan++;
1693 break;
a0d0e21e
LW
1694 case NSPACE:
1695 while (scan < loceol && !isSPACE(*scan))
1696 scan++;
1697 break;
bbce6d69 1698 case NSPACEL:
c277df42 1699 reg_flags |= RF_tainted;
bbce6d69 1700 while (scan < loceol && !isSPACE_LC(*scan))
1701 scan++;
1702 break;
a0d0e21e
LW
1703 case DIGIT:
1704 while (scan < loceol && isDIGIT(*scan))
1705 scan++;
1706 break;
1707 case NDIGIT:
1708 while (scan < loceol && !isDIGIT(*scan))
1709 scan++;
1710 break;
1711 default: /* Called on something of 0 width. */
1712 break; /* So match right here or not at all. */
1713 }
a687059c 1714
a0d0e21e
LW
1715 c = scan - reginput;
1716 reginput = scan;
a687059c 1717
c277df42
IZ
1718 DEBUG_r(
1719 {
1720 SV *prop = sv_newmortal();
1721
1722 regprop(prop, p);
1723 PerlIO_printf(Perl_debug_log,
1724 "%*s %s can match %ld times out of %ld...\n",
1725 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
1726 });
1727
a0d0e21e 1728 return(c);
a687059c
LW
1729}
1730
1731/*
c277df42
IZ
1732 - regrepeat_hard - repeatedly match something, report total lenth and length
1733 *
1734 * The repeater is supposed to have constant length.
1735 */
1736
1737static I32
1738regrepeat_hard(regnode *p, I32 max, I32 *lp)
1739{
1740 register char *scan;
1741 register char *start;
1742 register char *loceol = regeol;
1743 I32 l = -1;
1744
1745 start = reginput;
1746 while (reginput < loceol && (scan = reginput, regmatch(p))) {
1747 if (l == -1) {
1748 *lp = l = reginput - start;
1749 if (max != REG_INFTY && l*max < loceol - scan)
1750 loceol = scan + l*max;
1751 if (l == 0) {
1752 return max;
1753 }
1754 }
1755 }
1756 if (reginput < loceol)
1757 reginput = scan;
1758 else
1759 scan = reginput;
1760
1761 return (scan - start)/l;
1762}
1763
1764/*
bbce6d69 1765 - regclass - determine if a character falls into a character class
1766 */
1767
1768static bool
8ac85365 1769reginclass(register char *p, register I32 c)
bbce6d69 1770{
1771 char flags = *p;
1772 bool match = FALSE;
1773
1774 c &= 0xFF;
1775 if (p[1 + (c >> 3)] & (1 << (c & 7)))
1776 match = TRUE;
1777 else if (flags & ANYOF_FOLD) {
1778 I32 cf;
1779 if (flags & ANYOF_LOCALE) {
c277df42 1780 reg_flags |= RF_tainted;
bbce6d69 1781 cf = fold_locale[c];
1782 }
1783 else
1784 cf = fold[c];
1785 if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
1786 match = TRUE;
1787 }
1788
1789 if (!match && (flags & ANYOF_ISA)) {
c277df42 1790 reg_flags |= RF_tainted;
bbce6d69 1791
1792 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
1793 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
1794 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
1795 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
1796 {
1797 match = TRUE;
1798 }
1799 }
1800
1801 return match ^ ((flags & ANYOF_INVERT) != 0);
1802}
1803
161b471a
NIS
1804
1805