This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Okay, here's your official unofficial closure leak patch
[perl5.git] / regcomp.c
CommitLineData
a0d0e21e
LW
1/* regcomp.c
2 */
3
4/*
5 * "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
f0fcb552 17/*SUPPRESS 112*/
a687059c
LW
18/*
19 * regcomp and regexec -- regsub and regerror are not used in perl
20 *
21 * Copyright (c) 1986 by University of Toronto.
22 * Written by Henry Spencer. Not derived from licensed software.
23 *
24 * Permission is granted to anyone to use this software for any
25 * purpose on any computer system, and to redistribute it freely,
26 * subject to the following restrictions:
27 *
28 * 1. The author is not responsible for the consequences of use of
29 * this software, no matter how awful, even if they arise
30 * from defects in it.
31 *
32 * 2. The origin of this software must not be misrepresented, either
33 * by explicit claim or by omission.
34 *
35 * 3. Altered versions must be plainly marked as such, and must not
36 * be misrepresented as being the original software.
37 *
38 *
39 **** Alterations to Henry's code are...
40 ****
a0d0e21e 41 **** Copyright (c) 1991-1994, Larry Wall
a687059c 42 ****
9ef589d8
LW
43 **** You may distribute under the terms of either the GNU General Public
44 **** License or the Artistic License, as specified in the README file.
45
a687059c
LW
46 *
47 * Beware that some of this code is subtly aware of the way operator
48 * precedence is structured in regular expressions. Serious changes in
49 * regular-expression syntax might require a total rethink.
50 */
51#include "EXTERN.h"
52#include "perl.h"
53#include "INTERN.h"
54#include "regcomp.h"
55
fe14fcc3
LW
56#ifdef MSDOS
57# if defined(BUGGY_MSC6)
58 /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
59 # pragma optimize("a",off)
60 /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
61 # pragma optimize("w",on )
62# endif /* BUGGY_MSC6 */
63#endif /* MSDOS */
64
a687059c
LW
65#ifndef STATIC
66#define STATIC static
67#endif
68
69#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
70#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
71 ((*s) == '{' && regcurly(s)))
2b69d0c2
LW
72#ifdef atarist
73#define PERL_META "^$.[()|?+*\\"
74#else
a687059c 75#define META "^$.[()|?+*\\"
2b69d0c2 76#endif
a687059c 77
35c8bce7
LW
78#ifdef SPSTART
79#undef SPSTART /* dratted cpp namespace... */
80#endif
a687059c
LW
81/*
82 * Flags to be passed up and down.
83 */
a687059c 84#define WORST 0 /* Worst case. */
a0d0e21e
LW
85#define HASWIDTH 0x1 /* Known never to match null string. */
86#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
87#define SPSTART 0x4 /* Starts with * or +. */
88#define TRYAGAIN 0x8 /* Weeded out a declaration. */
a687059c
LW
89
90/*
a687059c
LW
91 * Forward declarations for regcomp()'s friends.
92 */
a0d0e21e
LW
93
94static char *reg _((I32, I32 *));
95static char *reganode _((char, unsigned short));
96static char *regatom _((I32 *));
97static char *regbranch _((I32 *));
98static void regc _((char));
99static char *regclass _((void));
100STATIC I32 regcurly _((char *));
101static char *regnode _((char));
102static char *regpiece _((I32 *));
103static void reginsert _((char, char *));
104static void regoptail _((char *, char *));
105static void regset _((char *, I32, I32));
106static void regtail _((char *, char *));
107static char* nextchar _((void));
a687059c
LW
108
109/*
110 - regcomp - compile a regular expression into internal code
111 *
112 * We can't allocate space until we know how big the compiled form will be,
113 * but we can't compile it (and thus know how big it is) until we've got a
114 * place to put the code. So we cheat: we compile it twice, once with code
115 * generation turned off and size counting turned on, and once "for real".
116 * This also means that we don't allocate space until we are sure that the
117 * thing really will compile successfully, and we never have to move the
118 * code and thus invalidate pointers into it. (Note that it has to be in
119 * one piece because free() must be able to free it all.) [NB: not true in perl]
120 *
121 * Beware that the optimization-preparation code in here knows about some
122 * of the structure of the compiled regexp. [I'll say.]
123 */
124regexp *
a0d0e21e
LW
125regcomp(exp,xend,pm)
126char* exp;
127char* xend;
128PMOP* pm;
a687059c 129{
a0d0e21e
LW
130 I32 fold = pm->op_pmflags & PMf_FOLD;
131 register regexp *r;
132 register char *scan;
133 register SV *longish;
134 SV *longest;
135 register I32 len;
136 register char *first;
137 I32 flags;
138 I32 backish;
139 I32 backest;
140 I32 curback;
141 I32 minlen = 0;
142 I32 sawplus = 0;
143 I32 sawopen = 0;
144
145 if (exp == NULL)
146 croak("NULL regexp argument");
147
148 /* First pass: determine size, legality. */
149 regflags = pm->op_pmflags;
150 regparse = exp;
151 regxend = xend;
152 regprecomp = savepvn(exp,xend-exp);
153 regnaughty = 0;
154 regsawback = 0;
155 regnpar = 1;
156 regsize = 0L;
157 regcode = &regdummy;
158 regc((char)MAGIC);
159 if (reg(0, &flags) == NULL) {
160 Safefree(regprecomp);
161 regprecomp = Nullch;
162 return(NULL);
163 }
164
165 /* Small enough for pointer-storage convention? */
166 if (regsize >= 32767L) /* Probably could be 65535L. */
167 FAIL("regexp too big");
168
169 /* Allocate space. */
170 Newc(1001, r, sizeof(regexp) + (unsigned)regsize, char, regexp);
171 if (r == NULL)
172 FAIL("regexp out of space");
173
174 /* Second pass: emit code. */
175 r->prelen = xend-exp;
176 r->precomp = regprecomp;
177 r->subbeg = r->subbase = NULL;
178 regnaughty = 0;
179 regparse = exp;
180 regnpar = 1;
181 regcode = r->program;
182 regc((char)MAGIC);
183 if (reg(0, &flags) == NULL)
184 return(NULL);
185
186 /* Dig out information for optimizations. */
187 pm->op_pmflags = regflags;
188 fold = pm->op_pmflags & PMf_FOLD;
189 r->regstart = Nullsv; /* Worst-case defaults. */
190 r->reganch = 0;
191 r->regmust = Nullsv;
192 r->regback = -1;
193 r->regstclass = Nullch;
194 r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */
195 scan = r->program+1; /* First BRANCH. */
196 if (OP(regnext(scan)) == END) {/* Only one top-level choice. */
197 scan = NEXTOPER(scan);
198
199 first = scan;
200 while ((OP(first) == OPEN && (sawopen = 1)) ||
201 (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
202 (OP(first) == PLUS) ||
203 (OP(first) == MINMOD) ||
204 (regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
205 if (OP(first) == PLUS)
206 sawplus = 1;
207 else
208 first += regarglen[(U8)OP(first)];
209 first = NEXTOPER(first);
a687059c
LW
210 }
211
a0d0e21e
LW
212 /* Starting-point info. */
213 again:
214 if (OP(first) == EXACTLY) {
215 r->regstart = newSVpv(OPERAND(first)+1,*OPERAND(first));
216 if (SvCUR(r->regstart) > !(sawstudy|fold))
217 fbm_compile(r->regstart,fold);
218 else
219 sv_upgrade(r->regstart, SVt_PVBM);
220 }
221 else if (strchr(simple+2,OP(first)))
222 r->regstclass = first;
223 else if (OP(first) == BOUND || OP(first) == NBOUND)
224 r->regstclass = first;
225 else if (regkind[(U8)OP(first)] == BOL) {
226 r->reganch = ROPT_ANCH;
227 first = NEXTOPER(first);
228 goto again;
229 }
230 else if ((OP(first) == STAR &&
231 regkind[(U8)OP(NEXTOPER(first))] == ANY) &&
232 !(r->reganch & ROPT_ANCH) )
233 {
234 /* turn .* into ^.* with an implied $*=1 */
235 r->reganch = ROPT_ANCH | ROPT_IMPLICIT;
236 first = NEXTOPER(first);
237 goto again;
238 }
239 if (sawplus && (!sawopen || !regsawback))
240 r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */
241
242 DEBUG_r(fprintf(stderr,"first %d next %d offset %d\n",
243 OP(first), OP(NEXTOPER(first)), first - scan));
244 /*
245 * If there's something expensive in the r.e., find the
246 * longest literal string that must appear and make it the
247 * regmust. Resolve ties in favor of later strings, since
248 * the regstart check works with the beginning of the r.e.
249 * and avoiding duplication strengthens checking. Not a
250 * strong reason, but sufficient in the absence of others.
251 * [Now we resolve ties in favor of the earlier string if
252 * it happens that curback has been invalidated, since the
253 * earlier string may buy us something the later one won't.]
254 */
255 longish = newSVpv("",0);
256 longest = newSVpv("",0);
257 len = 0;
258 minlen = 0;
259 curback = 0;
260 backish = 0;
261 backest = 0;
262 while (OP(scan) != END) {
263 if (OP(scan) == BRANCH) {
264 if (OP(regnext(scan)) == BRANCH) {
265 curback = -30000;
266 while (OP(scan) == BRANCH)
267 scan = regnext(scan);
00bf170e 268 }
a0d0e21e
LW
269 else /* single branch is ok */
270 scan = NEXTOPER(scan);
748a9306 271 continue;
a0d0e21e
LW
272 }
273 if (OP(scan) == UNLESSM) {
274 curback = -30000;
275 scan = regnext(scan);
748a9306 276 continue;
a0d0e21e
LW
277 }
278 if (OP(scan) == EXACTLY) {
279 char *t;
a687059c 280
a0d0e21e
LW
281 first = scan;
282 while (OP(t = regnext(scan)) == CLOSE)
283 scan = t;
284 minlen += *OPERAND(first);
285 if (curback - backish == len) {
286 sv_catpvn(longish, OPERAND(first)+1,
287 *OPERAND(first));
288 len += *OPERAND(first);
289 curback += *OPERAND(first);
290 first = regnext(scan);
2b69d0c2 291 }
a0d0e21e
LW
292 else if (*OPERAND(first) >= len + (curback >= 0)) {
293 len = *OPERAND(first);
294 sv_setpvn(longish, OPERAND(first)+1,len);
295 backish = curback;
296 curback += len;
297 first = regnext(scan);
9ef589d8 298 }
a0d0e21e
LW
299 else
300 curback += *OPERAND(first);
301 }
302 else if (strchr(varies,OP(scan))) {
303 curback = -30000;
a687059c 304 len = 0;
a0d0e21e 305 if (SvCUR(longish) > SvCUR(longest)) {
79072805 306 sv_setsv(longest,longish);
34de22dd
LW
307 backest = backish;
308 }
a0d0e21e
LW
309 sv_setpvn(longish,"",0);
310 if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan))))
311 minlen++;
312 else if (regkind[(U8)OP(scan)] == CURLY &&
313 strchr(simple,OP(NEXTOPER(scan)+4)))
314 minlen += ARG1(scan);
315 }
316 else if (strchr(simple,OP(scan))) {
317 curback++;
318 minlen++;
319 len = 0;
320 if (SvCUR(longish) > SvCUR(longest)) {
321 sv_setsv(longest,longish);
322 backest = backish;
fe14fcc3 323 }
a0d0e21e
LW
324 sv_setpvn(longish,"",0);
325 }
326 scan = regnext(scan);
a687059c
LW
327 }
328
a0d0e21e
LW
329 /* Prefer earlier on tie, unless we can tail match latter */
330
331 if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL) >
332 SvCUR(longest))
333 {
334 sv_setsv(longest,longish);
335 backest = backish;
336 }
337 else
338 sv_setpvn(longish,"",0);
339 if (SvCUR(longest)
340 &&
341 (!r->regstart
342 ||
343 !fbm_instr((unsigned char*) SvPVX(r->regstart),
344 (unsigned char *) SvPVX(r->regstart)
345 + SvCUR(r->regstart),
346 longest)
347 )
348 )
349 {
350 r->regmust = longest;
351 if (backest < 0)
352 backest = -1;
353 r->regback = backest;
354 if (SvCUR(longest) > !(sawstudy || fold ||
355 regkind[(U8)OP(first)]==EOL))
356 fbm_compile(r->regmust,fold);
357 (void)SvUPGRADE(r->regmust, SVt_PVBM);
358 BmUSEFUL(r->regmust) = 100;
359 if (regkind[(U8)OP(first)] == EOL && SvCUR(longish))
360 SvTAIL_on(r->regmust);
361 }
362 else {
363 SvREFCNT_dec(longest);
364 longest = Nullsv;
365 }
366 SvREFCNT_dec(longish);
367 }
368
369 r->do_folding = fold;
370 r->nparens = regnpar - 1;
371 r->minlen = minlen;
372 Newz(1002, r->startp, regnpar, char*);
373 Newz(1002, r->endp, regnpar, char*);
374 DEBUG_r(regdump(r));
375 return(r);
a687059c
LW
376}
377
378/*
379 - reg - regular expression, i.e. main body or parenthesized thing
380 *
381 * Caller must absorb opening parenthesis.
382 *
383 * Combining parenthesis handling with the base level of regular expression
384 * is a trifle forced, but the need to tie the tails of the branches to what
385 * follows makes it hard to avoid.
386 */
387static char *
388reg(paren, flagp)
79072805
LW
389I32 paren; /* Parenthesized? */
390I32 *flagp;
a687059c 391{
a0d0e21e
LW
392 register char *ret;
393 register char *br;
394 register char *ender = 0;
395 register I32 parno = 0;
396 I32 flags;
397
398 *flagp = HASWIDTH; /* Tentatively. */
399
400 /* Make an OPEN node, if parenthesized. */
401 if (paren) {
402 if (*regparse == '?') {
403 regparse++;
748a9306 404 paren = *regparse++;
a0d0e21e
LW
405 ret = NULL;
406 switch (paren) {
407 case ':':
408 case '=':
409 case '!':
410 break;
411 case '$':
412 case '@':
413 croak("Sequence (?%c...) not implemented", paren);
414 break;
415 case '#':
416 while (*regparse && *regparse != ')')
417 regparse++;
418 if (*regparse != ')')
748a9306 419 croak("Sequence (?#... not terminated");
a0d0e21e
LW
420 nextchar();
421 *flagp = TRYAGAIN;
422 return NULL;
423 default:
424 --regparse;
425 while (*regparse && strchr("iogmsx", *regparse))
426 pmflag(&regflags, *regparse++);
427 if (*regparse != ')')
428 croak("Sequence (?%c...) not recognized", *regparse);
429 nextchar();
430 *flagp = TRYAGAIN;
431 return NULL;
432 }
433 }
434 else {
435 parno = regnpar;
436 regnpar++;
437 ret = reganode(OPEN, parno);
438 }
439 } else
440 ret = NULL;
441
442 /* Pick up the branches, linking them together. */
443 br = regbranch(&flags);
444 if (br == NULL)
445 return(NULL);
446 if (ret != NULL)
447 regtail(ret, br); /* OPEN -> first. */
448 else
449 ret = br;
450 if (!(flags&HASWIDTH))
451 *flagp &= ~HASWIDTH;
452 *flagp |= flags&SPSTART;
453 while (*regparse == '|') {
454 nextchar();
a687059c
LW
455 br = regbranch(&flags);
456 if (br == NULL)
a0d0e21e
LW
457 return(NULL);
458 regtail(ret, br); /* BRANCH -> BRANCH. */
a687059c 459 if (!(flags&HASWIDTH))
a0d0e21e 460 *flagp &= ~HASWIDTH;
a687059c 461 *flagp |= flags&SPSTART;
a0d0e21e
LW
462 }
463
464 /* Make a closing node, and hook it on the end. */
465 switch (paren) {
466 case ':':
467 ender = regnode(NOTHING);
468 break;
469 case 1:
470 ender = reganode(CLOSE, parno);
471 break;
472 case '=':
473 case '!':
474 ender = regnode(SUCCEED);
475 *flagp &= ~HASWIDTH;
476 break;
477 case 0:
478 ender = regnode(END);
479 break;
480 }
481 regtail(ret, ender);
482
483 /* Hook the tails of the branches to the closing node. */
484 for (br = ret; br != NULL; br = regnext(br))
485 regoptail(br, ender);
486
487 if (paren == '=') {
488 reginsert(IFMATCH,ret);
489 regtail(ret, regnode(NOTHING));
490 }
491 else if (paren == '!') {
492 reginsert(UNLESSM,ret);
493 regtail(ret, regnode(NOTHING));
494 }
495
496 /* Check for proper termination. */
497 if (paren && *nextchar() != ')') {
498 FAIL("unmatched () in regexp");
499 } else if (!paren && regparse < regxend) {
500 if (*regparse == ')') {
501 FAIL("unmatched () in regexp");
502 } else
503 FAIL("junk on end of regexp"); /* "Can't happen". */
504 /* NOTREACHED */
505 }
a687059c 506
a0d0e21e 507 return(ret);
a687059c
LW
508}
509
510/*
511 - regbranch - one alternative of an | operator
512 *
513 * Implements the concatenation operator.
514 */
515static char *
516regbranch(flagp)
79072805 517I32 *flagp;
a687059c 518{
a0d0e21e
LW
519 register char *ret;
520 register char *chain;
521 register char *latest;
522 I32 flags = 0;
523
524 *flagp = WORST; /* Tentatively. */
525
526 ret = regnode(BRANCH);
527 chain = NULL;
528 regparse--;
529 nextchar();
530 while (regparse < regxend && *regparse != '|' && *regparse != ')') {
531 flags &= ~TRYAGAIN;
532 latest = regpiece(&flags);
533 if (latest == NULL) {
534 if (flags & TRYAGAIN)
535 continue;
536 return(NULL);
537 }
538 *flagp |= flags&HASWIDTH;
539 if (chain == NULL) /* First piece. */
540 *flagp |= flags&SPSTART;
541 else {
542 regnaughty++;
543 regtail(chain, latest);
a687059c 544 }
a0d0e21e
LW
545 chain = latest;
546 }
547 if (chain == NULL) /* Loop ran zero times. */
548 (void) regnode(NOTHING);
a687059c 549
a0d0e21e 550 return(ret);
a687059c
LW
551}
552
553/*
554 - regpiece - something followed by possible [*+?]
555 *
556 * Note that the branching code sequences used for ? and the general cases
557 * of * and + are somewhat optimized: they use the same NOTHING node as
558 * both the endmarker for their branch list and the body of the last branch.
559 * It might seem that this node could be dispensed with entirely, but the
560 * endmarker role is not redundant.
561 */
562static char *
563regpiece(flagp)
79072805 564I32 *flagp;
a687059c 565{
a0d0e21e
LW
566 register char *ret;
567 register char op;
568 register char *next;
569 I32 flags;
570 char *origparse = regparse;
571 char *maxpos;
572 I32 min;
573 I32 max = 32767;
574
575 ret = regatom(&flags);
576 if (ret == NULL) {
577 if (flags & TRYAGAIN)
578 *flagp |= TRYAGAIN;
579 return(NULL);
580 }
581
582 op = *regparse;
583 if (op == '(' && regparse[1] == '?' && regparse[2] == '#') {
584 while (op && op != ')')
585 op = *++regparse;
586 if (op) {
587 nextchar();
588 op = *regparse;
589 }
590 }
591
592 if (op == '{' && regcurly(regparse)) {
593 next = regparse + 1;
594 maxpos = Nullch;
595 while (isDIGIT(*next) || *next == ',') {
596 if (*next == ',') {
597 if (maxpos)
598 break;
599 else
600 maxpos = next;
a687059c 601 }
a0d0e21e
LW
602 next++;
603 }
604 if (*next == '}') { /* got one */
605 if (!maxpos)
606 maxpos = next;
607 regparse++;
608 min = atoi(regparse);
609 if (*maxpos == ',')
610 maxpos++;
611 else
612 maxpos = regparse;
613 max = atoi(maxpos);
614 if (!max && *maxpos != '0')
615 max = 32767; /* meaning "infinity" */
616 regparse = next;
617 nextchar();
618
619 do_curly:
620 if ((flags&SIMPLE)) {
621 regnaughty += 2 + regnaughty / 2;
622 reginsert(CURLY, ret);
623 }
624 else {
625 regnaughty += 4 + regnaughty; /* compound interest */
626 regtail(ret, regnode(WHILEM));
627 reginsert(CURLYX,ret);
628 regtail(ret, regnode(NOTHING));
629 }
630
631 if (min > 0)
632 *flagp = (WORST|HASWIDTH);
633 if (max && max < min)
634 croak("Can't do {n,m} with n > m");
635 if (regcode != &regdummy) {
00bf170e 636#ifdef REGALIGN
a0d0e21e
LW
637 *(unsigned short *)(ret+3) = min;
638 *(unsigned short *)(ret+5) = max;
00bf170e 639#else
a0d0e21e
LW
640 ret[3] = min >> 8; ret[4] = min & 0377;
641 ret[5] = max >> 8; ret[6] = max & 0377;
00bf170e 642#endif
a687059c 643 }
a687059c 644
a0d0e21e 645 goto nest_check;
a687059c 646 }
a0d0e21e 647 }
a687059c 648
a0d0e21e
LW
649 if (!ISMULT1(op)) {
650 *flagp = flags;
a687059c 651 return(ret);
a0d0e21e
LW
652 }
653 nextchar();
654
655 *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
656
657 if (op == '*' && (flags&SIMPLE)) {
658 reginsert(STAR, ret);
659 regnaughty += 4;
660 }
661 else if (op == '*') {
662 min = 0;
663 goto do_curly;
664 } else if (op == '+' && (flags&SIMPLE)) {
665 reginsert(PLUS, ret);
666 regnaughty += 3;
667 }
668 else if (op == '+') {
669 min = 1;
670 goto do_curly;
671 } else if (op == '?') {
672 min = 0; max = 1;
673 goto do_curly;
674 }
675 nest_check:
676 if (dowarn && regcode != &regdummy && !(flags&HASWIDTH) && max > 10000) {
677 warn("%.*s matches null string many times",
678 regparse - origparse, origparse);
679 }
680
681 if (*regparse == '?') {
682 nextchar();
683 reginsert(MINMOD, ret);
684#ifdef REGALIGN
685 regtail(ret, ret + 4);
686#else
687 regtail(ret, ret + 3);
688#endif
689 }
690 if (ISMULT2(regparse))
691 FAIL("nested *?+ in regexp");
692
693 return(ret);
a687059c
LW
694}
695
696/*
697 - regatom - the lowest level
698 *
699 * Optimization: gobbles an entire sequence of ordinary characters so that
700 * it can turn them into a single node, which is smaller to store and
701 * faster to run. Backslashed characters are exceptions, each becoming a
702 * separate node; the code is simpler that way and it's not worth fixing.
703 *
704 * [Yes, it is worth fixing, some scripts can run twice the speed.]
705 */
706static char *
707regatom(flagp)
79072805 708I32 *flagp;
a687059c 709{
a0d0e21e
LW
710 register char *ret = 0;
711 I32 flags;
712
713 *flagp = WORST; /* Tentatively. */
714
715tryagain:
716 switch (*regparse) {
717 case '^':
718 nextchar();
719 if (regflags & PMf_MULTILINE)
720 ret = regnode(MBOL);
721 else if (regflags & PMf_SINGLELINE)
722 ret = regnode(SBOL);
723 else
724 ret = regnode(BOL);
725 break;
726 case '$':
727 nextchar();
728 if (regflags & PMf_MULTILINE)
729 ret = regnode(MEOL);
730 else if (regflags & PMf_SINGLELINE)
731 ret = regnode(SEOL);
732 else
733 ret = regnode(EOL);
734 break;
735 case '.':
736 nextchar();
737 if (regflags & PMf_SINGLELINE)
738 ret = regnode(SANY);
739 else
740 ret = regnode(ANY);
741 regnaughty++;
742 *flagp |= HASWIDTH|SIMPLE;
743 break;
744 case '[':
745 regparse++;
746 ret = regclass();
747 *flagp |= HASWIDTH|SIMPLE;
748 break;
749 case '(':
750 nextchar();
751 ret = reg(1, &flags);
752 if (ret == NULL) {
753 if (flags & TRYAGAIN)
754 goto tryagain;
755 return(NULL);
756 }
757 *flagp |= flags&(HASWIDTH|SPSTART);
758 break;
759 case '|':
760 case ')':
761 if (flags & TRYAGAIN) {
762 *flagp |= TRYAGAIN;
763 return NULL;
764 }
765 croak("internal urp in regexp at /%s/", regparse);
766 /* Supposed to be caught earlier. */
767 break;
768 case '?':
769 case '+':
770 case '*':
771 FAIL("?+* follows nothing in regexp");
772 break;
773 case '\\':
774 switch (*++regparse) {
775 case 'A':
776 ret = regnode(SBOL);
777 *flagp |= SIMPLE;
778 nextchar();
779 break;
780 case 'G':
781 ret = regnode(GBOL);
782 *flagp |= SIMPLE;
783 nextchar();
784 break;
785 case 'Z':
786 ret = regnode(SEOL);
787 *flagp |= SIMPLE;
788 nextchar();
789 break;
790 case 'w':
791 ret = regnode(ALNUM);
792 *flagp |= HASWIDTH|SIMPLE;
793 nextchar();
794 break;
795 case 'W':
796 ret = regnode(NALNUM);
797 *flagp |= HASWIDTH|SIMPLE;
798 nextchar();
799 break;
800 case 'b':
801 ret = regnode(BOUND);
802 *flagp |= SIMPLE;
803 nextchar();
804 break;
805 case 'B':
806 ret = regnode(NBOUND);
807 *flagp |= SIMPLE;
808 nextchar();
809 break;
810 case 's':
811 ret = regnode(SPACE);
812 *flagp |= HASWIDTH|SIMPLE;
813 nextchar();
814 break;
815 case 'S':
816 ret = regnode(NSPACE);
817 *flagp |= HASWIDTH|SIMPLE;
818 nextchar();
819 break;
820 case 'd':
821 ret = regnode(DIGIT);
822 *flagp |= HASWIDTH|SIMPLE;
823 nextchar();
824 break;
825 case 'D':
826 ret = regnode(NDIGIT);
827 *flagp |= HASWIDTH|SIMPLE;
828 nextchar();
829 break;
830 case 'n':
831 case 'r':
832 case 't':
833 case 'f':
834 case 'e':
835 case 'a':
836 case 'x':
837 case 'c':
838 case '0':
839 goto defchar;
840 case '1': case '2': case '3': case '4':
841 case '5': case '6': case '7': case '8': case '9':
842 {
843 I32 num = atoi(regparse);
844
845 if (num > 9 && num >= regnpar)
846 goto defchar;
847 else {
848 regsawback = 1;
849 ret = reganode(REF, num);
850 *flagp |= HASWIDTH;
851 while (isDIGIT(*regparse))
a687059c 852 regparse++;
a0d0e21e
LW
853 regparse--;
854 nextchar();
855 }
856 }
857 break;
858 case '\0':
859 if (regparse >= regxend)
860 FAIL("trailing \\ in regexp");
861 /* FALL THROUGH */
862 default:
863 goto defchar;
864 }
865 break;
866 default: {
867 register I32 len;
868 register char ender;
869 register char *p;
870 char *oldp;
871 I32 numlen;
872
873 regparse++;
874
875 defchar:
876 ret = regnode(EXACTLY);
877 regc(0); /* save spot for len */
878 for (len = 0, p = regparse - 1;
879 len < 127 && p < regxend;
880 len++)
881 {
882 oldp = p;
883 switch (*p) {
884 case '^':
885 case '$':
886 case '.':
887 case '[':
888 case '(':
889 case ')':
890 case '|':
891 goto loopdone;
892 case '\\':
893 switch (*++p) {
894 case 'A':
895 case 'G':
896 case 'Z':
897 case 'w':
898 case 'W':
899 case 'b':
900 case 'B':
901 case 's':
902 case 'S':
903 case 'd':
904 case 'D':
905 --p;
906 goto loopdone;
907 case 'n':
908 ender = '\n';
909 p++;
a687059c 910 break;
a0d0e21e
LW
911 case 'r':
912 ender = '\r';
913 p++;
a687059c 914 break;
a0d0e21e
LW
915 case 't':
916 ender = '\t';
917 p++;
a687059c 918 break;
a0d0e21e
LW
919 case 'f':
920 ender = '\f';
921 p++;
a687059c 922 break;
a0d0e21e
LW
923 case 'e':
924 ender = '\033';
925 p++;
a687059c 926 break;
a0d0e21e
LW
927 case 'a':
928 ender = '\007';
929 p++;
a687059c 930 break;
a0d0e21e
LW
931 case 'x':
932 ender = scan_hex(++p, 2, &numlen);
933 p += numlen;
a687059c 934 break;
a0d0e21e
LW
935 case 'c':
936 p++;
937 ender = *p++;
938 if (isLOWER(ender))
939 ender = toUPPER(ender);
940 ender ^= 64;
a687059c 941 break;
a0d0e21e
LW
942 case '0': case '1': case '2': case '3':case '4':
943 case '5': case '6': case '7': case '8':case '9':
944 if (*p == '0' ||
945 (isDIGIT(p[1]) && atoi(p) >= regnpar) ) {
946 ender = scan_oct(p, 3, &numlen);
947 p += numlen;
948 }
949 else {
950 --p;
951 goto loopdone;
a687059c
LW
952 }
953 break;
a0d0e21e
LW
954 case '\0':
955 if (p >= regxend)
a687059c
LW
956 FAIL("trailing \\ in regexp");
957 /* FALL THROUGH */
a0d0e21e
LW
958 default:
959 ender = *p++;
960 break;
961 }
962 break;
963 case ' ': case '\t': case '\n': case '\r': case '\f': case '\v':
964 if (regflags & PMf_EXTENDED) {
965 p++;
966 len--;
967 continue;
968 }
969 /* FALL THROUGH */
a687059c 970 default:
a0d0e21e
LW
971 ender = *p++;
972 break;
a687059c 973 }
a0d0e21e
LW
974 if (regflags & PMf_FOLD && isUPPER(ender))
975 ender = toLOWER(ender);
976 if (ISMULT2(p)) { /* Back off on ?+*. */
977 if (len)
978 p = oldp;
979 else {
980 len++;
981 regc(ender);
982 }
983 break;
a687059c 984 }
a0d0e21e
LW
985 regc(ender);
986 }
987 loopdone:
988 regparse = p - 1;
989 nextchar();
990 if (len < 0)
991 FAIL("internal disaster in regexp");
992 if (len > 0)
993 *flagp |= HASWIDTH;
994 if (len == 1)
995 *flagp |= SIMPLE;
996 if (regcode != &regdummy)
997 *OPERAND(ret) = len;
998 regc('\0');
a687059c 999 }
a0d0e21e
LW
1000 break;
1001 }
a687059c 1002
a0d0e21e 1003 return(ret);
a687059c
LW
1004}
1005
1006static void
1007regset(bits,def,c)
1008char *bits;
79072805
LW
1009I32 def;
1010register I32 c;
a687059c 1011{
a0d0e21e
LW
1012 if (regcode == &regdummy)
1013 return;
1014 c &= 255;
1015 if (def)
1016 bits[c >> 3] &= ~(1 << (c & 7));
1017 else
1018 bits[c >> 3] |= (1 << (c & 7));
a687059c
LW
1019}
1020
1021static char *
1022regclass()
1023{
a0d0e21e
LW
1024 register char *bits;
1025 register I32 class;
1026 register I32 lastclass = 1234;
1027 register I32 range = 0;
1028 register char *ret;
1029 register I32 def;
1030 I32 numlen;
1031
1032 ret = regnode(ANYOF);
1033 if (*regparse == '^') { /* Complement of range. */
1034 regnaughty++;
1035 regparse++;
1036 def = 0;
1037 } else {
1038 def = 255;
1039 }
1040 bits = regcode;
1041 for (class = 0; class < 32; class++)
1042 regc(def);
1043 if (*regparse == ']' || *regparse == '-')
1044 goto skipcond; /* allow 1st char to be ] or - */
1045 while (regparse < regxend && *regparse != ']') {
1046 skipcond:
1047 class = UCHARAT(regparse++);
1048 if (class == '\\') {
1049 class = UCHARAT(regparse++);
1050 switch (class) {
1051 case 'w':
1052 for (class = 0; class < 256; class++)
1053 if (isALNUM(class))
1054 regset(bits,def,class);
1055 lastclass = 1234;
1056 continue;
1057 case 'W':
1058 for (class = 0; class < 256; class++)
1059 if (!isALNUM(class))
1060 regset(bits,def,class);
1061 lastclass = 1234;
1062 continue;
1063 case 's':
1064 for (class = 0; class < 256; class++)
1065 if (isSPACE(class))
1066 regset(bits,def,class);
1067 lastclass = 1234;
1068 continue;
1069 case 'S':
1070 for (class = 0; class < 256; class++)
1071 if (!isSPACE(class))
1072 regset(bits,def,class);
1073 lastclass = 1234;
1074 continue;
1075 case 'd':
1076 for (class = '0'; class <= '9'; class++)
1077 regset(bits,def,class);
1078 lastclass = 1234;
1079 continue;
1080 case 'D':
1081 for (class = 0; class < '0'; class++)
1082 regset(bits,def,class);
1083 for (class = '9' + 1; class < 256; class++)
1084 regset(bits,def,class);
1085 lastclass = 1234;
1086 continue;
1087 case 'n':
1088 class = '\n';
1089 break;
1090 case 'r':
1091 class = '\r';
1092 break;
1093 case 't':
1094 class = '\t';
1095 break;
1096 case 'f':
1097 class = '\f';
1098 break;
1099 case 'b':
1100 class = '\b';
1101 break;
1102 case 'e':
1103 class = '\033';
1104 break;
1105 case 'a':
1106 class = '\007';
1107 break;
1108 case 'x':
1109 class = scan_hex(regparse, 2, &numlen);
1110 regparse += numlen;
1111 break;
1112 case 'c':
1113 class = *regparse++;
1114 if (isLOWER(class))
1115 class = toUPPER(class);
1116 class ^= 64;
1117 break;
1118 case '0': case '1': case '2': case '3': case '4':
1119 case '5': case '6': case '7': case '8': case '9':
1120 class = scan_oct(--regparse, 3, &numlen);
1121 regparse += numlen;
1122 break;
1123 }
1124 }
1125 if (range) {
1126 if (lastclass > class)
1127 FAIL("invalid [] range in regexp");
1128 range = 0;
1129 }
1130 else {
1131 lastclass = class;
1132 if (*regparse == '-' && regparse+1 < regxend &&
1133 regparse[1] != ']') {
a687059c 1134 regparse++;
a0d0e21e
LW
1135 range = 1;
1136 continue; /* do it next time */
1137 }
a687059c 1138 }
a0d0e21e
LW
1139 for ( ; lastclass <= class; lastclass++) {
1140 regset(bits,def,lastclass);
1141 if (regflags & PMf_FOLD && isUPPER(lastclass))
1142 regset(bits,def,toLOWER(lastclass));
a687059c 1143 }
a0d0e21e
LW
1144 lastclass = class;
1145 }
1146 if (*regparse != ']')
1147 FAIL("unmatched [] in regexp");
1148 nextchar();
1149 return ret;
1150}
1151
1152static char*
1153nextchar()
1154{
1155 char* retval = regparse++;
1156
1157 if (regflags & PMf_EXTENDED) {
748a9306
LW
1158 for (;;) {
1159 if (isSPACE(*regparse)) {
1160 regparse++;
1161 continue;
1162 }
1163 else if (*regparse == '(' && regparse[1] == '?' &&
1164 regparse[2] == '#') {
1165 while (*regparse && *regparse != ')')
1166 regparse++;
1167 regparse++;
1168 continue;
1169 }
1170 else if (*regparse == '#') {
1171 while (*regparse && *regparse != '\n')
1172 regparse++;
1173 regparse++;
1174 continue;
1175 }
1176 break;
1177 }
a0d0e21e
LW
1178 }
1179 return retval;
a687059c
LW
1180}
1181
1182/*
a0d0e21e
LW
1183- regnode - emit a node
1184*/
1185#ifdef CAN_PROTOTYPE
1186static char * /* Location. */
1187regnode(char op)
1188#else
a687059c
LW
1189static char * /* Location. */
1190regnode(op)
1191char op;
a0d0e21e 1192#endif
a687059c 1193{
a0d0e21e
LW
1194 register char *ret;
1195 register char *ptr;
a687059c 1196
a0d0e21e
LW
1197 ret = regcode;
1198 if (ret == &regdummy) {
a687059c 1199#ifdef REGALIGN
a0d0e21e
LW
1200 if (!(regsize & 1))
1201 regsize++;
a687059c 1202#endif
a0d0e21e
LW
1203 regsize += 3;
1204 return(ret);
1205 }
a687059c
LW
1206
1207#ifdef REGALIGN
1208#ifndef lint
a0d0e21e
LW
1209 if (!((long)ret & 1))
1210 *ret++ = 127;
a687059c
LW
1211#endif
1212#endif
a0d0e21e
LW
1213 ptr = ret;
1214 *ptr++ = op;
1215 *ptr++ = '\0'; /* Null "next" pointer. */
1216 *ptr++ = '\0';
1217 regcode = ptr;
a687059c 1218
a0d0e21e 1219 return(ret);
a687059c
LW
1220}
1221
1222/*
a0d0e21e
LW
1223- reganode - emit a node with an argument
1224*/
1225#ifdef CAN_PROTOTYPE
1226static char * /* Location. */
1227reganode(char op, unsigned short arg)
1228#else
fe14fcc3
LW
1229static char * /* Location. */
1230reganode(op, arg)
1231char op;
1232unsigned short arg;
a0d0e21e 1233#endif
fe14fcc3 1234{
a0d0e21e
LW
1235 register char *ret;
1236 register char *ptr;
fe14fcc3 1237
a0d0e21e
LW
1238 ret = regcode;
1239 if (ret == &regdummy) {
fe14fcc3 1240#ifdef REGALIGN
a0d0e21e
LW
1241 if (!(regsize & 1))
1242 regsize++;
fe14fcc3 1243#endif
a0d0e21e
LW
1244 regsize += 5;
1245 return(ret);
1246 }
fe14fcc3
LW
1247
1248#ifdef REGALIGN
1249#ifndef lint
a0d0e21e
LW
1250 if (!((long)ret & 1))
1251 *ret++ = 127;
fe14fcc3
LW
1252#endif
1253#endif
a0d0e21e
LW
1254 ptr = ret;
1255 *ptr++ = op;
1256 *ptr++ = '\0'; /* Null "next" pointer. */
1257 *ptr++ = '\0';
fe14fcc3 1258#ifdef REGALIGN
a0d0e21e 1259 *(unsigned short *)(ret+3) = arg;
fe14fcc3 1260#else
a0d0e21e 1261 ret[3] = arg >> 8; ret[4] = arg & 0377;
fe14fcc3 1262#endif
a0d0e21e
LW
1263 ptr += 2;
1264 regcode = ptr;
fe14fcc3 1265
a0d0e21e 1266 return(ret);
fe14fcc3
LW
1267}
1268
1269/*
a0d0e21e
LW
1270- regc - emit (if appropriate) a byte of code
1271*/
1272#ifdef CAN_PROTOTYPE
1273static void
1274regc(char b)
1275#else
a687059c
LW
1276static void
1277regc(b)
1278char b;
a0d0e21e 1279#endif
a687059c 1280{
a0d0e21e
LW
1281 if (regcode != &regdummy)
1282 *regcode++ = b;
1283 else
1284 regsize++;
a687059c
LW
1285}
1286
1287/*
a0d0e21e
LW
1288- reginsert - insert an operator in front of already-emitted operand
1289*
1290* Means relocating the operand.
1291*/
1292#ifdef CAN_PROTOTYPE
1293static void
1294reginsert(char op, char *opnd)
1295#else
a687059c
LW
1296static void
1297reginsert(op, opnd)
1298char op;
1299char *opnd;
a0d0e21e 1300#endif
a687059c 1301{
a0d0e21e
LW
1302 register char *src;
1303 register char *dst;
1304 register char *place;
1305 register int offset = (regkind[(U8)op] == CURLY ? 4 : 0);
a687059c 1306
a0d0e21e 1307 if (regcode == &regdummy) {
a687059c 1308#ifdef REGALIGN
a0d0e21e 1309 regsize += 4 + offset;
a687059c 1310#else
a0d0e21e 1311 regsize += 3 + offset;
a687059c 1312#endif
a0d0e21e
LW
1313 return;
1314 }
a687059c 1315
a0d0e21e 1316 src = regcode;
a687059c 1317#ifdef REGALIGN
a0d0e21e 1318 regcode += 4 + offset;
a687059c 1319#else
a0d0e21e 1320 regcode += 3 + offset;
a687059c 1321#endif
a0d0e21e
LW
1322 dst = regcode;
1323 while (src > opnd)
1324 *--dst = *--src;
1325
1326 place = opnd; /* Op node, where operand used to be. */
1327 *place++ = op;
1328 *place++ = '\0';
1329 *place++ = '\0';
1330 while (offset-- > 0)
a687059c 1331 *place++ = '\0';
2b69d0c2 1332#ifdef REGALIGN
a0d0e21e 1333 *place++ = '\177';
2b69d0c2 1334#endif
a687059c
LW
1335}
1336
1337/*
a0d0e21e
LW
1338- regtail - set the next-pointer at the end of a node chain
1339*/
a687059c
LW
1340static void
1341regtail(p, val)
1342char *p;
1343char *val;
1344{
a0d0e21e
LW
1345 register char *scan;
1346 register char *temp;
1347 register I32 offset;
1348
1349 if (p == &regdummy)
1350 return;
1351
1352 /* Find last node. */
1353 scan = p;
1354 for (;;) {
1355 temp = regnext(scan);
1356 if (temp == NULL)
1357 break;
1358 scan = temp;
1359 }
a687059c
LW
1360
1361#ifdef REGALIGN
a0d0e21e 1362 offset = val - scan;
a687059c 1363#ifndef lint
a0d0e21e 1364 *(short*)(scan+1) = offset;
a687059c 1365#else
a0d0e21e 1366 offset = offset;
a687059c
LW
1367#endif
1368#else
a0d0e21e
LW
1369 if (OP(scan) == BACK)
1370 offset = scan - val;
1371 else
1372 offset = val - scan;
1373 *(scan+1) = (offset>>8)&0377;
1374 *(scan+2) = offset&0377;
a687059c
LW
1375#endif
1376}
1377
1378/*
a0d0e21e
LW
1379- regoptail - regtail on operand of first argument; nop if operandless
1380*/
a687059c
LW
1381static void
1382regoptail(p, val)
1383char *p;
1384char *val;
1385{
a0d0e21e
LW
1386 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
1387 if (p == NULL || p == &regdummy || regkind[(U8)OP(p)] != BRANCH)
1388 return;
1389 regtail(NEXTOPER(p), val);
a687059c
LW
1390}
1391
1392/*
1393 - regcurly - a little FSA that accepts {\d+,?\d*}
1394 */
79072805 1395STATIC I32
a687059c
LW
1396regcurly(s)
1397register char *s;
1398{
1399 if (*s++ != '{')
1400 return FALSE;
f0fcb552 1401 if (!isDIGIT(*s))
a687059c 1402 return FALSE;
f0fcb552 1403 while (isDIGIT(*s))
a687059c
LW
1404 s++;
1405 if (*s == ',')
1406 s++;
f0fcb552 1407 while (isDIGIT(*s))
a687059c
LW
1408 s++;
1409 if (*s != '}')
1410 return FALSE;
1411 return TRUE;
1412}
1413
1414#ifdef DEBUGGING
1415
1416/*
1417 - regdump - dump a regexp onto stderr in vaguely comprehensible form
1418 */
1419void
1420regdump(r)
1421regexp *r;
1422{
a0d0e21e
LW
1423 register char *s;
1424 register char op = EXACTLY; /* Arbitrary non-END op. */
1425 register char *next;
a687059c
LW
1426
1427
a0d0e21e
LW
1428 s = r->program + 1;
1429 while (op != END) { /* While that wasn't END last time... */
a687059c 1430#ifdef REGALIGN
a0d0e21e
LW
1431 if (!((long)s & 1))
1432 s++;
a687059c 1433#endif
a0d0e21e
LW
1434 op = OP(s);
1435 fprintf(stderr,"%2d%s", s-r->program, regprop(s)); /* Where, what. */
1436 next = regnext(s);
1437 s += regarglen[(U8)op];
1438 if (next == NULL) /* Next ptr. */
1439 fprintf(stderr,"(0)");
1440 else
1441 fprintf(stderr,"(%d)", (s-r->program)+(next-s));
1442 s += 3;
1443 if (op == ANYOF) {
1444 s += 32;
a687059c 1445 }
a0d0e21e
LW
1446 if (op == EXACTLY) {
1447 /* Literal string, where present. */
1448 s++;
1449 (void)putc(' ', stderr);
1450 (void)putc('<', stderr);
1451 while (*s != '\0') {
1452 (void)putc(*s, stderr);
1453 s++;
1454 }
1455 (void)putc('>', stderr);
1456 s++;
1457 }
1458 (void)putc('\n', stderr);
1459 }
1460
1461 /* Header fields of interest. */
1462 if (r->regstart)
1463 fprintf(stderr,"start `%s' ", SvPVX(r->regstart));
1464 if (r->regstclass)
1465 fprintf(stderr,"stclass `%s' ", regprop(r->regstclass));
1466 if (r->reganch & ROPT_ANCH)
1467 fprintf(stderr,"anchored ");
1468 if (r->reganch & ROPT_SKIP)
1469 fprintf(stderr,"plus ");
1470 if (r->reganch & ROPT_IMPLICIT)
1471 fprintf(stderr,"implicit ");
1472 if (r->regmust != NULL)
1473 fprintf(stderr,"must have \"%s\" back %ld ", SvPVX(r->regmust),
1474 (long) r->regback);
1475 fprintf(stderr, "minlen %ld ", (long) r->minlen);
1476 fprintf(stderr,"\n");
a687059c
LW
1477}
1478
1479/*
a0d0e21e
LW
1480- regprop - printable representation of opcode
1481*/
a687059c
LW
1482char *
1483regprop(op)
1484char *op;
1485{
a0d0e21e
LW
1486 register char *p = 0;
1487
1488 (void) strcpy(buf, ":");
1489
1490 switch (OP(op)) {
1491 case BOL:
1492 p = "BOL";
1493 break;
1494 case MBOL:
1495 p = "MBOL";
1496 break;
1497 case SBOL:
1498 p = "SBOL";
1499 break;
1500 case EOL:
1501 p = "EOL";
1502 break;
1503 case MEOL:
1504 p = "MEOL";
1505 break;
1506 case SEOL:
1507 p = "SEOL";
1508 break;
1509 case ANY:
1510 p = "ANY";
1511 break;
1512 case SANY:
1513 p = "SANY";
1514 break;
1515 case ANYOF:
1516 p = "ANYOF";
1517 break;
1518 case BRANCH:
1519 p = "BRANCH";
1520 break;
1521 case EXACTLY:
1522 p = "EXACTLY";
1523 break;
1524 case NOTHING:
1525 p = "NOTHING";
1526 break;
1527 case BACK:
1528 p = "BACK";
1529 break;
1530 case END:
1531 p = "END";
1532 break;
1533 case ALNUM:
1534 p = "ALNUM";
1535 break;
1536 case NALNUM:
1537 p = "NALNUM";
1538 break;
1539 case BOUND:
1540 p = "BOUND";
1541 break;
1542 case NBOUND:
1543 p = "NBOUND";
1544 break;
1545 case SPACE:
1546 p = "SPACE";
1547 break;
1548 case NSPACE:
1549 p = "NSPACE";
1550 break;
1551 case DIGIT:
1552 p = "DIGIT";
1553 break;
1554 case NDIGIT:
1555 p = "NDIGIT";
1556 break;
1557 case CURLY:
1558 (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", ARG1(op),ARG2(op));
1559 p = NULL;
1560 break;
1561 case CURLYX:
1562 (void)sprintf(buf+strlen(buf), "CURLYX {%d,%d}", ARG1(op),ARG2(op));
1563 p = NULL;
1564 break;
1565 case REF:
1566 (void)sprintf(buf+strlen(buf), "REF%d", ARG1(op));
1567 p = NULL;
1568 break;
1569 case OPEN:
1570 (void)sprintf(buf+strlen(buf), "OPEN%d", ARG1(op));
1571 p = NULL;
1572 break;
1573 case CLOSE:
1574 (void)sprintf(buf+strlen(buf), "CLOSE%d", ARG1(op));
1575 p = NULL;
1576 break;
1577 case STAR:
1578 p = "STAR";
1579 break;
1580 case PLUS:
1581 p = "PLUS";
1582 break;
1583 case MINMOD:
1584 p = "MINMOD";
1585 break;
1586 case GBOL:
1587 p = "GBOL";
1588 break;
1589 case UNLESSM:
1590 p = "UNLESSM";
1591 break;
1592 case IFMATCH:
1593 p = "IFMATCH";
1594 break;
1595 case SUCCEED:
1596 p = "SUCCEED";
1597 break;
1598 case WHILEM:
1599 p = "WHILEM";
1600 break;
1601 default:
1602 FAIL("corrupted regexp opcode");
1603 }
1604 if (p != NULL)
1605 (void) strcat(buf, p);
1606 return(buf);
a687059c
LW
1607}
1608#endif /* DEBUGGING */
1609
2b69d0c2 1610void
a687059c
LW
1611regfree(r)
1612struct regexp *r;
1613{
a0d0e21e
LW
1614 if (!r)
1615 return;
1616 if (r->precomp) {
1617 Safefree(r->precomp);
1618 r->precomp = Nullch;
1619 }
1620 if (r->subbase) {
1621 Safefree(r->subbase);
1622 r->subbase = Nullch;
1623 }
1624 if (r->regmust) {
1625 SvREFCNT_dec(r->regmust);
1626 r->regmust = Nullsv;
1627 }
1628 if (r->regstart) {
1629 SvREFCNT_dec(r->regstart);
1630 r->regstart = Nullsv;
1631 }
1632 Safefree(r->startp);
1633 Safefree(r->endp);
1634 Safefree(r);
a687059c 1635}