This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl5.000 patch.0o: [address] a few more Configure and build nits.
[perl5.git] / regcomp.c
CommitLineData
a0d0e21e
LW
1/* regcomp.c
2 */
3
4/*
5 * "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
f0fcb552 17/*SUPPRESS 112*/
a687059c
LW
18/*
19 * regcomp and regexec -- regsub and regerror are not used in perl
20 *
21 * Copyright (c) 1986 by University of Toronto.
22 * Written by Henry Spencer. Not derived from licensed software.
23 *
24 * Permission is granted to anyone to use this software for any
25 * purpose on any computer system, and to redistribute it freely,
26 * subject to the following restrictions:
27 *
28 * 1. The author is not responsible for the consequences of use of
29 * this software, no matter how awful, even if they arise
30 * from defects in it.
31 *
32 * 2. The origin of this software must not be misrepresented, either
33 * by explicit claim or by omission.
34 *
35 * 3. Altered versions must be plainly marked as such, and must not
36 * be misrepresented as being the original software.
37 *
38 *
39 **** Alterations to Henry's code are...
40 ****
a0d0e21e 41 **** Copyright (c) 1991-1994, Larry Wall
a687059c 42 ****
9ef589d8
LW
43 **** You may distribute under the terms of either the GNU General Public
44 **** License or the Artistic License, as specified in the README file.
45
a687059c
LW
46 *
47 * Beware that some of this code is subtly aware of the way operator
48 * precedence is structured in regular expressions. Serious changes in
49 * regular-expression syntax might require a total rethink.
50 */
51#include "EXTERN.h"
52#include "perl.h"
53#include "INTERN.h"
54#include "regcomp.h"
55
fe14fcc3
LW
56#ifdef MSDOS
57# if defined(BUGGY_MSC6)
58 /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
59 # pragma optimize("a",off)
60 /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
61 # pragma optimize("w",on )
62# endif /* BUGGY_MSC6 */
63#endif /* MSDOS */
64
a687059c
LW
65#ifndef STATIC
66#define STATIC static
67#endif
68
69#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
70#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
71 ((*s) == '{' && regcurly(s)))
2b69d0c2
LW
72#ifdef atarist
73#define PERL_META "^$.[()|?+*\\"
74#else
a687059c 75#define META "^$.[()|?+*\\"
2b69d0c2 76#endif
a687059c 77
35c8bce7
LW
78#ifdef SPSTART
79#undef SPSTART /* dratted cpp namespace... */
80#endif
a687059c
LW
81/*
82 * Flags to be passed up and down.
83 */
a687059c 84#define WORST 0 /* Worst case. */
a0d0e21e
LW
85#define HASWIDTH 0x1 /* Known never to match null string. */
86#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
87#define SPSTART 0x4 /* Starts with * or +. */
88#define TRYAGAIN 0x8 /* Weeded out a declaration. */
a687059c
LW
89
90/*
a687059c
LW
91 * Forward declarations for regcomp()'s friends.
92 */
a0d0e21e
LW
93
94static char *reg _((I32, I32 *));
95static char *reganode _((char, unsigned short));
96static char *regatom _((I32 *));
97static char *regbranch _((I32 *));
98static void regc _((char));
99static char *regclass _((void));
100STATIC I32 regcurly _((char *));
101static char *regnode _((char));
102static char *regpiece _((I32 *));
103static void reginsert _((char, char *));
104static void regoptail _((char *, char *));
105static void regset _((char *, I32, I32));
106static void regtail _((char *, char *));
107static char* nextchar _((void));
a687059c
LW
108
109/*
110 - regcomp - compile a regular expression into internal code
111 *
112 * We can't allocate space until we know how big the compiled form will be,
113 * but we can't compile it (and thus know how big it is) until we've got a
114 * place to put the code. So we cheat: we compile it twice, once with code
115 * generation turned off and size counting turned on, and once "for real".
116 * This also means that we don't allocate space until we are sure that the
117 * thing really will compile successfully, and we never have to move the
118 * code and thus invalidate pointers into it. (Note that it has to be in
119 * one piece because free() must be able to free it all.) [NB: not true in perl]
120 *
121 * Beware that the optimization-preparation code in here knows about some
122 * of the structure of the compiled regexp. [I'll say.]
123 */
124regexp *
a0d0e21e
LW
125regcomp(exp,xend,pm)
126char* exp;
127char* xend;
128PMOP* pm;
a687059c 129{
a0d0e21e
LW
130 I32 fold = pm->op_pmflags & PMf_FOLD;
131 register regexp *r;
132 register char *scan;
133 register SV *longish;
134 SV *longest;
135 register I32 len;
136 register char *first;
137 I32 flags;
138 I32 backish;
139 I32 backest;
140 I32 curback;
141 I32 minlen = 0;
142 I32 sawplus = 0;
143 I32 sawopen = 0;
144
145 if (exp == NULL)
146 croak("NULL regexp argument");
147
148 /* First pass: determine size, legality. */
149 regflags = pm->op_pmflags;
150 regparse = exp;
151 regxend = xend;
152 regprecomp = savepvn(exp,xend-exp);
153 regnaughty = 0;
154 regsawback = 0;
155 regnpar = 1;
156 regsize = 0L;
157 regcode = &regdummy;
158 regc((char)MAGIC);
159 if (reg(0, &flags) == NULL) {
160 Safefree(regprecomp);
161 regprecomp = Nullch;
162 return(NULL);
163 }
164
165 /* Small enough for pointer-storage convention? */
166 if (regsize >= 32767L) /* Probably could be 65535L. */
167 FAIL("regexp too big");
168
169 /* Allocate space. */
170 Newc(1001, r, sizeof(regexp) + (unsigned)regsize, char, regexp);
171 if (r == NULL)
172 FAIL("regexp out of space");
173
174 /* Second pass: emit code. */
175 r->prelen = xend-exp;
176 r->precomp = regprecomp;
177 r->subbeg = r->subbase = NULL;
178 regnaughty = 0;
179 regparse = exp;
180 regnpar = 1;
181 regcode = r->program;
182 regc((char)MAGIC);
183 if (reg(0, &flags) == NULL)
184 return(NULL);
185
186 /* Dig out information for optimizations. */
187 pm->op_pmflags = regflags;
188 fold = pm->op_pmflags & PMf_FOLD;
189 r->regstart = Nullsv; /* Worst-case defaults. */
190 r->reganch = 0;
191 r->regmust = Nullsv;
192 r->regback = -1;
193 r->regstclass = Nullch;
194 r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */
195 scan = r->program+1; /* First BRANCH. */
196 if (OP(regnext(scan)) == END) {/* Only one top-level choice. */
197 scan = NEXTOPER(scan);
198
199 first = scan;
200 while ((OP(first) == OPEN && (sawopen = 1)) ||
201 (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
202 (OP(first) == PLUS) ||
203 (OP(first) == MINMOD) ||
204 (regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
205 if (OP(first) == PLUS)
206 sawplus = 1;
207 else
208 first += regarglen[(U8)OP(first)];
209 first = NEXTOPER(first);
a687059c
LW
210 }
211
a0d0e21e
LW
212 /* Starting-point info. */
213 again:
214 if (OP(first) == EXACTLY) {
215 r->regstart = newSVpv(OPERAND(first)+1,*OPERAND(first));
216 if (SvCUR(r->regstart) > !(sawstudy|fold))
217 fbm_compile(r->regstart,fold);
218 else
219 sv_upgrade(r->regstart, SVt_PVBM);
220 }
221 else if (strchr(simple+2,OP(first)))
222 r->regstclass = first;
223 else if (OP(first) == BOUND || OP(first) == NBOUND)
224 r->regstclass = first;
225 else if (regkind[(U8)OP(first)] == BOL) {
226 r->reganch = ROPT_ANCH;
227 first = NEXTOPER(first);
228 goto again;
229 }
230 else if ((OP(first) == STAR &&
231 regkind[(U8)OP(NEXTOPER(first))] == ANY) &&
232 !(r->reganch & ROPT_ANCH) )
233 {
234 /* turn .* into ^.* with an implied $*=1 */
235 r->reganch = ROPT_ANCH | ROPT_IMPLICIT;
236 first = NEXTOPER(first);
237 goto again;
238 }
239 if (sawplus && (!sawopen || !regsawback))
240 r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */
241
242 DEBUG_r(fprintf(stderr,"first %d next %d offset %d\n",
243 OP(first), OP(NEXTOPER(first)), first - scan));
244 /*
245 * If there's something expensive in the r.e., find the
246 * longest literal string that must appear and make it the
247 * regmust. Resolve ties in favor of later strings, since
248 * the regstart check works with the beginning of the r.e.
249 * and avoiding duplication strengthens checking. Not a
250 * strong reason, but sufficient in the absence of others.
251 * [Now we resolve ties in favor of the earlier string if
252 * it happens that curback has been invalidated, since the
253 * earlier string may buy us something the later one won't.]
254 */
255 longish = newSVpv("",0);
256 longest = newSVpv("",0);
257 len = 0;
258 minlen = 0;
259 curback = 0;
260 backish = 0;
261 backest = 0;
262 while (OP(scan) != END) {
263 if (OP(scan) == BRANCH) {
264 if (OP(regnext(scan)) == BRANCH) {
265 curback = -30000;
266 while (OP(scan) == BRANCH)
267 scan = regnext(scan);
00bf170e 268 }
a0d0e21e
LW
269 else /* single branch is ok */
270 scan = NEXTOPER(scan);
271 }
272 if (OP(scan) == UNLESSM) {
273 curback = -30000;
274 scan = regnext(scan);
275 }
276 if (OP(scan) == EXACTLY) {
277 char *t;
a687059c 278
a0d0e21e
LW
279 first = scan;
280 while (OP(t = regnext(scan)) == CLOSE)
281 scan = t;
282 minlen += *OPERAND(first);
283 if (curback - backish == len) {
284 sv_catpvn(longish, OPERAND(first)+1,
285 *OPERAND(first));
286 len += *OPERAND(first);
287 curback += *OPERAND(first);
288 first = regnext(scan);
2b69d0c2 289 }
a0d0e21e
LW
290 else if (*OPERAND(first) >= len + (curback >= 0)) {
291 len = *OPERAND(first);
292 sv_setpvn(longish, OPERAND(first)+1,len);
293 backish = curback;
294 curback += len;
295 first = regnext(scan);
9ef589d8 296 }
a0d0e21e
LW
297 else
298 curback += *OPERAND(first);
299 }
300 else if (strchr(varies,OP(scan))) {
301 curback = -30000;
a687059c 302 len = 0;
a0d0e21e 303 if (SvCUR(longish) > SvCUR(longest)) {
79072805 304 sv_setsv(longest,longish);
34de22dd
LW
305 backest = backish;
306 }
a0d0e21e
LW
307 sv_setpvn(longish,"",0);
308 if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan))))
309 minlen++;
310 else if (regkind[(U8)OP(scan)] == CURLY &&
311 strchr(simple,OP(NEXTOPER(scan)+4)))
312 minlen += ARG1(scan);
313 }
314 else if (strchr(simple,OP(scan))) {
315 curback++;
316 minlen++;
317 len = 0;
318 if (SvCUR(longish) > SvCUR(longest)) {
319 sv_setsv(longest,longish);
320 backest = backish;
fe14fcc3 321 }
a0d0e21e
LW
322 sv_setpvn(longish,"",0);
323 }
324 scan = regnext(scan);
a687059c
LW
325 }
326
a0d0e21e
LW
327 /* Prefer earlier on tie, unless we can tail match latter */
328
329 if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL) >
330 SvCUR(longest))
331 {
332 sv_setsv(longest,longish);
333 backest = backish;
334 }
335 else
336 sv_setpvn(longish,"",0);
337 if (SvCUR(longest)
338 &&
339 (!r->regstart
340 ||
341 !fbm_instr((unsigned char*) SvPVX(r->regstart),
342 (unsigned char *) SvPVX(r->regstart)
343 + SvCUR(r->regstart),
344 longest)
345 )
346 )
347 {
348 r->regmust = longest;
349 if (backest < 0)
350 backest = -1;
351 r->regback = backest;
352 if (SvCUR(longest) > !(sawstudy || fold ||
353 regkind[(U8)OP(first)]==EOL))
354 fbm_compile(r->regmust,fold);
355 (void)SvUPGRADE(r->regmust, SVt_PVBM);
356 BmUSEFUL(r->regmust) = 100;
357 if (regkind[(U8)OP(first)] == EOL && SvCUR(longish))
358 SvTAIL_on(r->regmust);
359 }
360 else {
361 SvREFCNT_dec(longest);
362 longest = Nullsv;
363 }
364 SvREFCNT_dec(longish);
365 }
366
367 r->do_folding = fold;
368 r->nparens = regnpar - 1;
369 r->minlen = minlen;
370 Newz(1002, r->startp, regnpar, char*);
371 Newz(1002, r->endp, regnpar, char*);
372 DEBUG_r(regdump(r));
373 return(r);
a687059c
LW
374}
375
376/*
377 - reg - regular expression, i.e. main body or parenthesized thing
378 *
379 * Caller must absorb opening parenthesis.
380 *
381 * Combining parenthesis handling with the base level of regular expression
382 * is a trifle forced, but the need to tie the tails of the branches to what
383 * follows makes it hard to avoid.
384 */
385static char *
386reg(paren, flagp)
79072805
LW
387I32 paren; /* Parenthesized? */
388I32 *flagp;
a687059c 389{
a0d0e21e
LW
390 register char *ret;
391 register char *br;
392 register char *ender = 0;
393 register I32 parno = 0;
394 I32 flags;
395
396 *flagp = HASWIDTH; /* Tentatively. */
397
398 /* Make an OPEN node, if parenthesized. */
399 if (paren) {
400 if (*regparse == '?') {
401 regparse++;
402 paren = *nextchar();
403 ret = NULL;
404 switch (paren) {
405 case ':':
406 case '=':
407 case '!':
408 break;
409 case '$':
410 case '@':
411 croak("Sequence (?%c...) not implemented", paren);
412 break;
413 case '#':
414 while (*regparse && *regparse != ')')
415 regparse++;
416 if (*regparse != ')')
417 croak("Sequence (?#... not terminated", *regparse);
418 nextchar();
419 *flagp = TRYAGAIN;
420 return NULL;
421 default:
422 --regparse;
423 while (*regparse && strchr("iogmsx", *regparse))
424 pmflag(&regflags, *regparse++);
425 if (*regparse != ')')
426 croak("Sequence (?%c...) not recognized", *regparse);
427 nextchar();
428 *flagp = TRYAGAIN;
429 return NULL;
430 }
431 }
432 else {
433 parno = regnpar;
434 regnpar++;
435 ret = reganode(OPEN, parno);
436 }
437 } else
438 ret = NULL;
439
440 /* Pick up the branches, linking them together. */
441 br = regbranch(&flags);
442 if (br == NULL)
443 return(NULL);
444 if (ret != NULL)
445 regtail(ret, br); /* OPEN -> first. */
446 else
447 ret = br;
448 if (!(flags&HASWIDTH))
449 *flagp &= ~HASWIDTH;
450 *flagp |= flags&SPSTART;
451 while (*regparse == '|') {
452 nextchar();
a687059c
LW
453 br = regbranch(&flags);
454 if (br == NULL)
a0d0e21e
LW
455 return(NULL);
456 regtail(ret, br); /* BRANCH -> BRANCH. */
a687059c 457 if (!(flags&HASWIDTH))
a0d0e21e 458 *flagp &= ~HASWIDTH;
a687059c 459 *flagp |= flags&SPSTART;
a0d0e21e
LW
460 }
461
462 /* Make a closing node, and hook it on the end. */
463 switch (paren) {
464 case ':':
465 ender = regnode(NOTHING);
466 break;
467 case 1:
468 ender = reganode(CLOSE, parno);
469 break;
470 case '=':
471 case '!':
472 ender = regnode(SUCCEED);
473 *flagp &= ~HASWIDTH;
474 break;
475 case 0:
476 ender = regnode(END);
477 break;
478 }
479 regtail(ret, ender);
480
481 /* Hook the tails of the branches to the closing node. */
482 for (br = ret; br != NULL; br = regnext(br))
483 regoptail(br, ender);
484
485 if (paren == '=') {
486 reginsert(IFMATCH,ret);
487 regtail(ret, regnode(NOTHING));
488 }
489 else if (paren == '!') {
490 reginsert(UNLESSM,ret);
491 regtail(ret, regnode(NOTHING));
492 }
493
494 /* Check for proper termination. */
495 if (paren && *nextchar() != ')') {
496 FAIL("unmatched () in regexp");
497 } else if (!paren && regparse < regxend) {
498 if (*regparse == ')') {
499 FAIL("unmatched () in regexp");
500 } else
501 FAIL("junk on end of regexp"); /* "Can't happen". */
502 /* NOTREACHED */
503 }
a687059c 504
a0d0e21e 505 return(ret);
a687059c
LW
506}
507
508/*
509 - regbranch - one alternative of an | operator
510 *
511 * Implements the concatenation operator.
512 */
513static char *
514regbranch(flagp)
79072805 515I32 *flagp;
a687059c 516{
a0d0e21e
LW
517 register char *ret;
518 register char *chain;
519 register char *latest;
520 I32 flags = 0;
521
522 *flagp = WORST; /* Tentatively. */
523
524 ret = regnode(BRANCH);
525 chain = NULL;
526 regparse--;
527 nextchar();
528 while (regparse < regxend && *regparse != '|' && *regparse != ')') {
529 flags &= ~TRYAGAIN;
530 latest = regpiece(&flags);
531 if (latest == NULL) {
532 if (flags & TRYAGAIN)
533 continue;
534 return(NULL);
535 }
536 *flagp |= flags&HASWIDTH;
537 if (chain == NULL) /* First piece. */
538 *flagp |= flags&SPSTART;
539 else {
540 regnaughty++;
541 regtail(chain, latest);
a687059c 542 }
a0d0e21e
LW
543 chain = latest;
544 }
545 if (chain == NULL) /* Loop ran zero times. */
546 (void) regnode(NOTHING);
a687059c 547
a0d0e21e 548 return(ret);
a687059c
LW
549}
550
551/*
552 - regpiece - something followed by possible [*+?]
553 *
554 * Note that the branching code sequences used for ? and the general cases
555 * of * and + are somewhat optimized: they use the same NOTHING node as
556 * both the endmarker for their branch list and the body of the last branch.
557 * It might seem that this node could be dispensed with entirely, but the
558 * endmarker role is not redundant.
559 */
560static char *
561regpiece(flagp)
79072805 562I32 *flagp;
a687059c 563{
a0d0e21e
LW
564 register char *ret;
565 register char op;
566 register char *next;
567 I32 flags;
568 char *origparse = regparse;
569 char *maxpos;
570 I32 min;
571 I32 max = 32767;
572
573 ret = regatom(&flags);
574 if (ret == NULL) {
575 if (flags & TRYAGAIN)
576 *flagp |= TRYAGAIN;
577 return(NULL);
578 }
579
580 op = *regparse;
581 if (op == '(' && regparse[1] == '?' && regparse[2] == '#') {
582 while (op && op != ')')
583 op = *++regparse;
584 if (op) {
585 nextchar();
586 op = *regparse;
587 }
588 }
589
590 if (op == '{' && regcurly(regparse)) {
591 next = regparse + 1;
592 maxpos = Nullch;
593 while (isDIGIT(*next) || *next == ',') {
594 if (*next == ',') {
595 if (maxpos)
596 break;
597 else
598 maxpos = next;
a687059c 599 }
a0d0e21e
LW
600 next++;
601 }
602 if (*next == '}') { /* got one */
603 if (!maxpos)
604 maxpos = next;
605 regparse++;
606 min = atoi(regparse);
607 if (*maxpos == ',')
608 maxpos++;
609 else
610 maxpos = regparse;
611 max = atoi(maxpos);
612 if (!max && *maxpos != '0')
613 max = 32767; /* meaning "infinity" */
614 regparse = next;
615 nextchar();
616
617 do_curly:
618 if ((flags&SIMPLE)) {
619 regnaughty += 2 + regnaughty / 2;
620 reginsert(CURLY, ret);
621 }
622 else {
623 regnaughty += 4 + regnaughty; /* compound interest */
624 regtail(ret, regnode(WHILEM));
625 reginsert(CURLYX,ret);
626 regtail(ret, regnode(NOTHING));
627 }
628
629 if (min > 0)
630 *flagp = (WORST|HASWIDTH);
631 if (max && max < min)
632 croak("Can't do {n,m} with n > m");
633 if (regcode != &regdummy) {
00bf170e 634#ifdef REGALIGN
a0d0e21e
LW
635 *(unsigned short *)(ret+3) = min;
636 *(unsigned short *)(ret+5) = max;
00bf170e 637#else
a0d0e21e
LW
638 ret[3] = min >> 8; ret[4] = min & 0377;
639 ret[5] = max >> 8; ret[6] = max & 0377;
00bf170e 640#endif
a687059c 641 }
a687059c 642
a0d0e21e 643 goto nest_check;
a687059c 644 }
a0d0e21e 645 }
a687059c 646
a0d0e21e
LW
647 if (!ISMULT1(op)) {
648 *flagp = flags;
a687059c 649 return(ret);
a0d0e21e
LW
650 }
651 nextchar();
652
653 *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
654
655 if (op == '*' && (flags&SIMPLE)) {
656 reginsert(STAR, ret);
657 regnaughty += 4;
658 }
659 else if (op == '*') {
660 min = 0;
661 goto do_curly;
662 } else if (op == '+' && (flags&SIMPLE)) {
663 reginsert(PLUS, ret);
664 regnaughty += 3;
665 }
666 else if (op == '+') {
667 min = 1;
668 goto do_curly;
669 } else if (op == '?') {
670 min = 0; max = 1;
671 goto do_curly;
672 }
673 nest_check:
674 if (dowarn && regcode != &regdummy && !(flags&HASWIDTH) && max > 10000) {
675 warn("%.*s matches null string many times",
676 regparse - origparse, origparse);
677 }
678
679 if (*regparse == '?') {
680 nextchar();
681 reginsert(MINMOD, ret);
682#ifdef REGALIGN
683 regtail(ret, ret + 4);
684#else
685 regtail(ret, ret + 3);
686#endif
687 }
688 if (ISMULT2(regparse))
689 FAIL("nested *?+ in regexp");
690
691 return(ret);
a687059c
LW
692}
693
694/*
695 - regatom - the lowest level
696 *
697 * Optimization: gobbles an entire sequence of ordinary characters so that
698 * it can turn them into a single node, which is smaller to store and
699 * faster to run. Backslashed characters are exceptions, each becoming a
700 * separate node; the code is simpler that way and it's not worth fixing.
701 *
702 * [Yes, it is worth fixing, some scripts can run twice the speed.]
703 */
704static char *
705regatom(flagp)
79072805 706I32 *flagp;
a687059c 707{
a0d0e21e
LW
708 register char *ret = 0;
709 I32 flags;
710
711 *flagp = WORST; /* Tentatively. */
712
713tryagain:
714 switch (*regparse) {
715 case '^':
716 nextchar();
717 if (regflags & PMf_MULTILINE)
718 ret = regnode(MBOL);
719 else if (regflags & PMf_SINGLELINE)
720 ret = regnode(SBOL);
721 else
722 ret = regnode(BOL);
723 break;
724 case '$':
725 nextchar();
726 if (regflags & PMf_MULTILINE)
727 ret = regnode(MEOL);
728 else if (regflags & PMf_SINGLELINE)
729 ret = regnode(SEOL);
730 else
731 ret = regnode(EOL);
732 break;
733 case '.':
734 nextchar();
735 if (regflags & PMf_SINGLELINE)
736 ret = regnode(SANY);
737 else
738 ret = regnode(ANY);
739 regnaughty++;
740 *flagp |= HASWIDTH|SIMPLE;
741 break;
742 case '[':
743 regparse++;
744 ret = regclass();
745 *flagp |= HASWIDTH|SIMPLE;
746 break;
747 case '(':
748 nextchar();
749 ret = reg(1, &flags);
750 if (ret == NULL) {
751 if (flags & TRYAGAIN)
752 goto tryagain;
753 return(NULL);
754 }
755 *flagp |= flags&(HASWIDTH|SPSTART);
756 break;
757 case '|':
758 case ')':
759 if (flags & TRYAGAIN) {
760 *flagp |= TRYAGAIN;
761 return NULL;
762 }
763 croak("internal urp in regexp at /%s/", regparse);
764 /* Supposed to be caught earlier. */
765 break;
766 case '?':
767 case '+':
768 case '*':
769 FAIL("?+* follows nothing in regexp");
770 break;
771 case '\\':
772 switch (*++regparse) {
773 case 'A':
774 ret = regnode(SBOL);
775 *flagp |= SIMPLE;
776 nextchar();
777 break;
778 case 'G':
779 ret = regnode(GBOL);
780 *flagp |= SIMPLE;
781 nextchar();
782 break;
783 case 'Z':
784 ret = regnode(SEOL);
785 *flagp |= SIMPLE;
786 nextchar();
787 break;
788 case 'w':
789 ret = regnode(ALNUM);
790 *flagp |= HASWIDTH|SIMPLE;
791 nextchar();
792 break;
793 case 'W':
794 ret = regnode(NALNUM);
795 *flagp |= HASWIDTH|SIMPLE;
796 nextchar();
797 break;
798 case 'b':
799 ret = regnode(BOUND);
800 *flagp |= SIMPLE;
801 nextchar();
802 break;
803 case 'B':
804 ret = regnode(NBOUND);
805 *flagp |= SIMPLE;
806 nextchar();
807 break;
808 case 's':
809 ret = regnode(SPACE);
810 *flagp |= HASWIDTH|SIMPLE;
811 nextchar();
812 break;
813 case 'S':
814 ret = regnode(NSPACE);
815 *flagp |= HASWIDTH|SIMPLE;
816 nextchar();
817 break;
818 case 'd':
819 ret = regnode(DIGIT);
820 *flagp |= HASWIDTH|SIMPLE;
821 nextchar();
822 break;
823 case 'D':
824 ret = regnode(NDIGIT);
825 *flagp |= HASWIDTH|SIMPLE;
826 nextchar();
827 break;
828 case 'n':
829 case 'r':
830 case 't':
831 case 'f':
832 case 'e':
833 case 'a':
834 case 'x':
835 case 'c':
836 case '0':
837 goto defchar;
838 case '1': case '2': case '3': case '4':
839 case '5': case '6': case '7': case '8': case '9':
840 {
841 I32 num = atoi(regparse);
842
843 if (num > 9 && num >= regnpar)
844 goto defchar;
845 else {
846 regsawback = 1;
847 ret = reganode(REF, num);
848 *flagp |= HASWIDTH;
849 while (isDIGIT(*regparse))
a687059c 850 regparse++;
a0d0e21e
LW
851 regparse--;
852 nextchar();
853 }
854 }
855 break;
856 case '\0':
857 if (regparse >= regxend)
858 FAIL("trailing \\ in regexp");
859 /* FALL THROUGH */
860 default:
861 goto defchar;
862 }
863 break;
864 default: {
865 register I32 len;
866 register char ender;
867 register char *p;
868 char *oldp;
869 I32 numlen;
870
871 regparse++;
872
873 defchar:
874 ret = regnode(EXACTLY);
875 regc(0); /* save spot for len */
876 for (len = 0, p = regparse - 1;
877 len < 127 && p < regxend;
878 len++)
879 {
880 oldp = p;
881 switch (*p) {
882 case '^':
883 case '$':
884 case '.':
885 case '[':
886 case '(':
887 case ')':
888 case '|':
889 goto loopdone;
890 case '\\':
891 switch (*++p) {
892 case 'A':
893 case 'G':
894 case 'Z':
895 case 'w':
896 case 'W':
897 case 'b':
898 case 'B':
899 case 's':
900 case 'S':
901 case 'd':
902 case 'D':
903 --p;
904 goto loopdone;
905 case 'n':
906 ender = '\n';
907 p++;
a687059c 908 break;
a0d0e21e
LW
909 case 'r':
910 ender = '\r';
911 p++;
a687059c 912 break;
a0d0e21e
LW
913 case 't':
914 ender = '\t';
915 p++;
a687059c 916 break;
a0d0e21e
LW
917 case 'f':
918 ender = '\f';
919 p++;
a687059c 920 break;
a0d0e21e
LW
921 case 'e':
922 ender = '\033';
923 p++;
a687059c 924 break;
a0d0e21e
LW
925 case 'a':
926 ender = '\007';
927 p++;
a687059c 928 break;
a0d0e21e
LW
929 case 'x':
930 ender = scan_hex(++p, 2, &numlen);
931 p += numlen;
a687059c 932 break;
a0d0e21e
LW
933 case 'c':
934 p++;
935 ender = *p++;
936 if (isLOWER(ender))
937 ender = toUPPER(ender);
938 ender ^= 64;
a687059c 939 break;
a0d0e21e
LW
940 case '0': case '1': case '2': case '3':case '4':
941 case '5': case '6': case '7': case '8':case '9':
942 if (*p == '0' ||
943 (isDIGIT(p[1]) && atoi(p) >= regnpar) ) {
944 ender = scan_oct(p, 3, &numlen);
945 p += numlen;
946 }
947 else {
948 --p;
949 goto loopdone;
a687059c
LW
950 }
951 break;
a0d0e21e
LW
952 case '\0':
953 if (p >= regxend)
a687059c
LW
954 FAIL("trailing \\ in regexp");
955 /* FALL THROUGH */
a0d0e21e
LW
956 default:
957 ender = *p++;
958 break;
959 }
960 break;
961 case ' ': case '\t': case '\n': case '\r': case '\f': case '\v':
962 if (regflags & PMf_EXTENDED) {
963 p++;
964 len--;
965 continue;
966 }
967 /* FALL THROUGH */
a687059c 968 default:
a0d0e21e
LW
969 ender = *p++;
970 break;
a687059c 971 }
a0d0e21e
LW
972 if (regflags & PMf_FOLD && isUPPER(ender))
973 ender = toLOWER(ender);
974 if (ISMULT2(p)) { /* Back off on ?+*. */
975 if (len)
976 p = oldp;
977 else {
978 len++;
979 regc(ender);
980 }
981 break;
a687059c 982 }
a0d0e21e
LW
983 regc(ender);
984 }
985 loopdone:
986 regparse = p - 1;
987 nextchar();
988 if (len < 0)
989 FAIL("internal disaster in regexp");
990 if (len > 0)
991 *flagp |= HASWIDTH;
992 if (len == 1)
993 *flagp |= SIMPLE;
994 if (regcode != &regdummy)
995 *OPERAND(ret) = len;
996 regc('\0');
a687059c 997 }
a0d0e21e
LW
998 break;
999 }
a687059c 1000
a0d0e21e 1001 return(ret);
a687059c
LW
1002}
1003
1004static void
1005regset(bits,def,c)
1006char *bits;
79072805
LW
1007I32 def;
1008register I32 c;
a687059c 1009{
a0d0e21e
LW
1010 if (regcode == &regdummy)
1011 return;
1012 c &= 255;
1013 if (def)
1014 bits[c >> 3] &= ~(1 << (c & 7));
1015 else
1016 bits[c >> 3] |= (1 << (c & 7));
a687059c
LW
1017}
1018
1019static char *
1020regclass()
1021{
a0d0e21e
LW
1022 register char *bits;
1023 register I32 class;
1024 register I32 lastclass = 1234;
1025 register I32 range = 0;
1026 register char *ret;
1027 register I32 def;
1028 I32 numlen;
1029
1030 ret = regnode(ANYOF);
1031 if (*regparse == '^') { /* Complement of range. */
1032 regnaughty++;
1033 regparse++;
1034 def = 0;
1035 } else {
1036 def = 255;
1037 }
1038 bits = regcode;
1039 for (class = 0; class < 32; class++)
1040 regc(def);
1041 if (*regparse == ']' || *regparse == '-')
1042 goto skipcond; /* allow 1st char to be ] or - */
1043 while (regparse < regxend && *regparse != ']') {
1044 skipcond:
1045 class = UCHARAT(regparse++);
1046 if (class == '\\') {
1047 class = UCHARAT(regparse++);
1048 switch (class) {
1049 case 'w':
1050 for (class = 0; class < 256; class++)
1051 if (isALNUM(class))
1052 regset(bits,def,class);
1053 lastclass = 1234;
1054 continue;
1055 case 'W':
1056 for (class = 0; class < 256; class++)
1057 if (!isALNUM(class))
1058 regset(bits,def,class);
1059 lastclass = 1234;
1060 continue;
1061 case 's':
1062 for (class = 0; class < 256; class++)
1063 if (isSPACE(class))
1064 regset(bits,def,class);
1065 lastclass = 1234;
1066 continue;
1067 case 'S':
1068 for (class = 0; class < 256; class++)
1069 if (!isSPACE(class))
1070 regset(bits,def,class);
1071 lastclass = 1234;
1072 continue;
1073 case 'd':
1074 for (class = '0'; class <= '9'; class++)
1075 regset(bits,def,class);
1076 lastclass = 1234;
1077 continue;
1078 case 'D':
1079 for (class = 0; class < '0'; class++)
1080 regset(bits,def,class);
1081 for (class = '9' + 1; class < 256; class++)
1082 regset(bits,def,class);
1083 lastclass = 1234;
1084 continue;
1085 case 'n':
1086 class = '\n';
1087 break;
1088 case 'r':
1089 class = '\r';
1090 break;
1091 case 't':
1092 class = '\t';
1093 break;
1094 case 'f':
1095 class = '\f';
1096 break;
1097 case 'b':
1098 class = '\b';
1099 break;
1100 case 'e':
1101 class = '\033';
1102 break;
1103 case 'a':
1104 class = '\007';
1105 break;
1106 case 'x':
1107 class = scan_hex(regparse, 2, &numlen);
1108 regparse += numlen;
1109 break;
1110 case 'c':
1111 class = *regparse++;
1112 if (isLOWER(class))
1113 class = toUPPER(class);
1114 class ^= 64;
1115 break;
1116 case '0': case '1': case '2': case '3': case '4':
1117 case '5': case '6': case '7': case '8': case '9':
1118 class = scan_oct(--regparse, 3, &numlen);
1119 regparse += numlen;
1120 break;
1121 }
1122 }
1123 if (range) {
1124 if (lastclass > class)
1125 FAIL("invalid [] range in regexp");
1126 range = 0;
1127 }
1128 else {
1129 lastclass = class;
1130 if (*regparse == '-' && regparse+1 < regxend &&
1131 regparse[1] != ']') {
a687059c 1132 regparse++;
a0d0e21e
LW
1133 range = 1;
1134 continue; /* do it next time */
1135 }
a687059c 1136 }
a0d0e21e
LW
1137 for ( ; lastclass <= class; lastclass++) {
1138 regset(bits,def,lastclass);
1139 if (regflags & PMf_FOLD && isUPPER(lastclass))
1140 regset(bits,def,toLOWER(lastclass));
a687059c 1141 }
a0d0e21e
LW
1142 lastclass = class;
1143 }
1144 if (*regparse != ']')
1145 FAIL("unmatched [] in regexp");
1146 nextchar();
1147 return ret;
1148}
1149
1150static char*
1151nextchar()
1152{
1153 char* retval = regparse++;
1154
1155 if (regflags & PMf_EXTENDED) {
1156 while (isSPACE(*regparse))
1157 regparse++;
1158 }
1159 return retval;
a687059c
LW
1160}
1161
1162/*
a0d0e21e
LW
1163- regnode - emit a node
1164*/
1165#ifdef CAN_PROTOTYPE
1166static char * /* Location. */
1167regnode(char op)
1168#else
a687059c
LW
1169static char * /* Location. */
1170regnode(op)
1171char op;
a0d0e21e 1172#endif
a687059c 1173{
a0d0e21e
LW
1174 register char *ret;
1175 register char *ptr;
a687059c 1176
a0d0e21e
LW
1177 ret = regcode;
1178 if (ret == &regdummy) {
a687059c 1179#ifdef REGALIGN
a0d0e21e
LW
1180 if (!(regsize & 1))
1181 regsize++;
a687059c 1182#endif
a0d0e21e
LW
1183 regsize += 3;
1184 return(ret);
1185 }
a687059c
LW
1186
1187#ifdef REGALIGN
1188#ifndef lint
a0d0e21e
LW
1189 if (!((long)ret & 1))
1190 *ret++ = 127;
a687059c
LW
1191#endif
1192#endif
a0d0e21e
LW
1193 ptr = ret;
1194 *ptr++ = op;
1195 *ptr++ = '\0'; /* Null "next" pointer. */
1196 *ptr++ = '\0';
1197 regcode = ptr;
a687059c 1198
a0d0e21e 1199 return(ret);
a687059c
LW
1200}
1201
1202/*
a0d0e21e
LW
1203- reganode - emit a node with an argument
1204*/
1205#ifdef CAN_PROTOTYPE
1206static char * /* Location. */
1207reganode(char op, unsigned short arg)
1208#else
fe14fcc3
LW
1209static char * /* Location. */
1210reganode(op, arg)
1211char op;
1212unsigned short arg;
a0d0e21e 1213#endif
fe14fcc3 1214{
a0d0e21e
LW
1215 register char *ret;
1216 register char *ptr;
fe14fcc3 1217
a0d0e21e
LW
1218 ret = regcode;
1219 if (ret == &regdummy) {
fe14fcc3 1220#ifdef REGALIGN
a0d0e21e
LW
1221 if (!(regsize & 1))
1222 regsize++;
fe14fcc3 1223#endif
a0d0e21e
LW
1224 regsize += 5;
1225 return(ret);
1226 }
fe14fcc3
LW
1227
1228#ifdef REGALIGN
1229#ifndef lint
a0d0e21e
LW
1230 if (!((long)ret & 1))
1231 *ret++ = 127;
fe14fcc3
LW
1232#endif
1233#endif
a0d0e21e
LW
1234 ptr = ret;
1235 *ptr++ = op;
1236 *ptr++ = '\0'; /* Null "next" pointer. */
1237 *ptr++ = '\0';
fe14fcc3 1238#ifdef REGALIGN
a0d0e21e 1239 *(unsigned short *)(ret+3) = arg;
fe14fcc3 1240#else
a0d0e21e 1241 ret[3] = arg >> 8; ret[4] = arg & 0377;
fe14fcc3 1242#endif
a0d0e21e
LW
1243 ptr += 2;
1244 regcode = ptr;
fe14fcc3 1245
a0d0e21e 1246 return(ret);
fe14fcc3
LW
1247}
1248
1249/*
a0d0e21e
LW
1250- regc - emit (if appropriate) a byte of code
1251*/
1252#ifdef CAN_PROTOTYPE
1253static void
1254regc(char b)
1255#else
a687059c
LW
1256static void
1257regc(b)
1258char b;
a0d0e21e 1259#endif
a687059c 1260{
a0d0e21e
LW
1261 if (regcode != &regdummy)
1262 *regcode++ = b;
1263 else
1264 regsize++;
a687059c
LW
1265}
1266
1267/*
a0d0e21e
LW
1268- reginsert - insert an operator in front of already-emitted operand
1269*
1270* Means relocating the operand.
1271*/
1272#ifdef CAN_PROTOTYPE
1273static void
1274reginsert(char op, char *opnd)
1275#else
a687059c
LW
1276static void
1277reginsert(op, opnd)
1278char op;
1279char *opnd;
a0d0e21e 1280#endif
a687059c 1281{
a0d0e21e
LW
1282 register char *src;
1283 register char *dst;
1284 register char *place;
1285 register int offset = (regkind[(U8)op] == CURLY ? 4 : 0);
a687059c 1286
a0d0e21e 1287 if (regcode == &regdummy) {
a687059c 1288#ifdef REGALIGN
a0d0e21e 1289 regsize += 4 + offset;
a687059c 1290#else
a0d0e21e 1291 regsize += 3 + offset;
a687059c 1292#endif
a0d0e21e
LW
1293 return;
1294 }
a687059c 1295
a0d0e21e 1296 src = regcode;
a687059c 1297#ifdef REGALIGN
a0d0e21e 1298 regcode += 4 + offset;
a687059c 1299#else
a0d0e21e 1300 regcode += 3 + offset;
a687059c 1301#endif
a0d0e21e
LW
1302 dst = regcode;
1303 while (src > opnd)
1304 *--dst = *--src;
1305
1306 place = opnd; /* Op node, where operand used to be. */
1307 *place++ = op;
1308 *place++ = '\0';
1309 *place++ = '\0';
1310 while (offset-- > 0)
a687059c 1311 *place++ = '\0';
2b69d0c2 1312#ifdef REGALIGN
a0d0e21e 1313 *place++ = '\177';
2b69d0c2 1314#endif
a687059c
LW
1315}
1316
1317/*
a0d0e21e
LW
1318- regtail - set the next-pointer at the end of a node chain
1319*/
a687059c
LW
1320static void
1321regtail(p, val)
1322char *p;
1323char *val;
1324{
a0d0e21e
LW
1325 register char *scan;
1326 register char *temp;
1327 register I32 offset;
1328
1329 if (p == &regdummy)
1330 return;
1331
1332 /* Find last node. */
1333 scan = p;
1334 for (;;) {
1335 temp = regnext(scan);
1336 if (temp == NULL)
1337 break;
1338 scan = temp;
1339 }
a687059c
LW
1340
1341#ifdef REGALIGN
a0d0e21e 1342 offset = val - scan;
a687059c 1343#ifndef lint
a0d0e21e 1344 *(short*)(scan+1) = offset;
a687059c 1345#else
a0d0e21e 1346 offset = offset;
a687059c
LW
1347#endif
1348#else
a0d0e21e
LW
1349 if (OP(scan) == BACK)
1350 offset = scan - val;
1351 else
1352 offset = val - scan;
1353 *(scan+1) = (offset>>8)&0377;
1354 *(scan+2) = offset&0377;
a687059c
LW
1355#endif
1356}
1357
1358/*
a0d0e21e
LW
1359- regoptail - regtail on operand of first argument; nop if operandless
1360*/
a687059c
LW
1361static void
1362regoptail(p, val)
1363char *p;
1364char *val;
1365{
a0d0e21e
LW
1366 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
1367 if (p == NULL || p == &regdummy || regkind[(U8)OP(p)] != BRANCH)
1368 return;
1369 regtail(NEXTOPER(p), val);
a687059c
LW
1370}
1371
1372/*
1373 - regcurly - a little FSA that accepts {\d+,?\d*}
1374 */
79072805 1375STATIC I32
a687059c
LW
1376regcurly(s)
1377register char *s;
1378{
1379 if (*s++ != '{')
1380 return FALSE;
f0fcb552 1381 if (!isDIGIT(*s))
a687059c 1382 return FALSE;
f0fcb552 1383 while (isDIGIT(*s))
a687059c
LW
1384 s++;
1385 if (*s == ',')
1386 s++;
f0fcb552 1387 while (isDIGIT(*s))
a687059c
LW
1388 s++;
1389 if (*s != '}')
1390 return FALSE;
1391 return TRUE;
1392}
1393
1394#ifdef DEBUGGING
1395
1396/*
1397 - regdump - dump a regexp onto stderr in vaguely comprehensible form
1398 */
1399void
1400regdump(r)
1401regexp *r;
1402{
a0d0e21e
LW
1403 register char *s;
1404 register char op = EXACTLY; /* Arbitrary non-END op. */
1405 register char *next;
a687059c
LW
1406
1407
a0d0e21e
LW
1408 s = r->program + 1;
1409 while (op != END) { /* While that wasn't END last time... */
a687059c 1410#ifdef REGALIGN
a0d0e21e
LW
1411 if (!((long)s & 1))
1412 s++;
a687059c 1413#endif
a0d0e21e
LW
1414 op = OP(s);
1415 fprintf(stderr,"%2d%s", s-r->program, regprop(s)); /* Where, what. */
1416 next = regnext(s);
1417 s += regarglen[(U8)op];
1418 if (next == NULL) /* Next ptr. */
1419 fprintf(stderr,"(0)");
1420 else
1421 fprintf(stderr,"(%d)", (s-r->program)+(next-s));
1422 s += 3;
1423 if (op == ANYOF) {
1424 s += 32;
a687059c 1425 }
a0d0e21e
LW
1426 if (op == EXACTLY) {
1427 /* Literal string, where present. */
1428 s++;
1429 (void)putc(' ', stderr);
1430 (void)putc('<', stderr);
1431 while (*s != '\0') {
1432 (void)putc(*s, stderr);
1433 s++;
1434 }
1435 (void)putc('>', stderr);
1436 s++;
1437 }
1438 (void)putc('\n', stderr);
1439 }
1440
1441 /* Header fields of interest. */
1442 if (r->regstart)
1443 fprintf(stderr,"start `%s' ", SvPVX(r->regstart));
1444 if (r->regstclass)
1445 fprintf(stderr,"stclass `%s' ", regprop(r->regstclass));
1446 if (r->reganch & ROPT_ANCH)
1447 fprintf(stderr,"anchored ");
1448 if (r->reganch & ROPT_SKIP)
1449 fprintf(stderr,"plus ");
1450 if (r->reganch & ROPT_IMPLICIT)
1451 fprintf(stderr,"implicit ");
1452 if (r->regmust != NULL)
1453 fprintf(stderr,"must have \"%s\" back %ld ", SvPVX(r->regmust),
1454 (long) r->regback);
1455 fprintf(stderr, "minlen %ld ", (long) r->minlen);
1456 fprintf(stderr,"\n");
a687059c
LW
1457}
1458
1459/*
a0d0e21e
LW
1460- regprop - printable representation of opcode
1461*/
a687059c
LW
1462char *
1463regprop(op)
1464char *op;
1465{
a0d0e21e
LW
1466 register char *p = 0;
1467
1468 (void) strcpy(buf, ":");
1469
1470 switch (OP(op)) {
1471 case BOL:
1472 p = "BOL";
1473 break;
1474 case MBOL:
1475 p = "MBOL";
1476 break;
1477 case SBOL:
1478 p = "SBOL";
1479 break;
1480 case EOL:
1481 p = "EOL";
1482 break;
1483 case MEOL:
1484 p = "MEOL";
1485 break;
1486 case SEOL:
1487 p = "SEOL";
1488 break;
1489 case ANY:
1490 p = "ANY";
1491 break;
1492 case SANY:
1493 p = "SANY";
1494 break;
1495 case ANYOF:
1496 p = "ANYOF";
1497 break;
1498 case BRANCH:
1499 p = "BRANCH";
1500 break;
1501 case EXACTLY:
1502 p = "EXACTLY";
1503 break;
1504 case NOTHING:
1505 p = "NOTHING";
1506 break;
1507 case BACK:
1508 p = "BACK";
1509 break;
1510 case END:
1511 p = "END";
1512 break;
1513 case ALNUM:
1514 p = "ALNUM";
1515 break;
1516 case NALNUM:
1517 p = "NALNUM";
1518 break;
1519 case BOUND:
1520 p = "BOUND";
1521 break;
1522 case NBOUND:
1523 p = "NBOUND";
1524 break;
1525 case SPACE:
1526 p = "SPACE";
1527 break;
1528 case NSPACE:
1529 p = "NSPACE";
1530 break;
1531 case DIGIT:
1532 p = "DIGIT";
1533 break;
1534 case NDIGIT:
1535 p = "NDIGIT";
1536 break;
1537 case CURLY:
1538 (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", ARG1(op),ARG2(op));
1539 p = NULL;
1540 break;
1541 case CURLYX:
1542 (void)sprintf(buf+strlen(buf), "CURLYX {%d,%d}", ARG1(op),ARG2(op));
1543 p = NULL;
1544 break;
1545 case REF:
1546 (void)sprintf(buf+strlen(buf), "REF%d", ARG1(op));
1547 p = NULL;
1548 break;
1549 case OPEN:
1550 (void)sprintf(buf+strlen(buf), "OPEN%d", ARG1(op));
1551 p = NULL;
1552 break;
1553 case CLOSE:
1554 (void)sprintf(buf+strlen(buf), "CLOSE%d", ARG1(op));
1555 p = NULL;
1556 break;
1557 case STAR:
1558 p = "STAR";
1559 break;
1560 case PLUS:
1561 p = "PLUS";
1562 break;
1563 case MINMOD:
1564 p = "MINMOD";
1565 break;
1566 case GBOL:
1567 p = "GBOL";
1568 break;
1569 case UNLESSM:
1570 p = "UNLESSM";
1571 break;
1572 case IFMATCH:
1573 p = "IFMATCH";
1574 break;
1575 case SUCCEED:
1576 p = "SUCCEED";
1577 break;
1578 case WHILEM:
1579 p = "WHILEM";
1580 break;
1581 default:
1582 FAIL("corrupted regexp opcode");
1583 }
1584 if (p != NULL)
1585 (void) strcat(buf, p);
1586 return(buf);
a687059c
LW
1587}
1588#endif /* DEBUGGING */
1589
2b69d0c2 1590void
a687059c
LW
1591regfree(r)
1592struct regexp *r;
1593{
a0d0e21e
LW
1594 if (!r)
1595 return;
1596 if (r->precomp) {
1597 Safefree(r->precomp);
1598 r->precomp = Nullch;
1599 }
1600 if (r->subbase) {
1601 Safefree(r->subbase);
1602 r->subbase = Nullch;
1603 }
1604 if (r->regmust) {
1605 SvREFCNT_dec(r->regmust);
1606 r->regmust = Nullsv;
1607 }
1608 if (r->regstart) {
1609 SvREFCNT_dec(r->regstart);
1610 r->regstart = Nullsv;
1611 }
1612 Safefree(r->startp);
1613 Safefree(r->endp);
1614 Safefree(r);
a687059c 1615}