*
**** Alterations to Henry's code are...
****
- **** Copyright (c) 1991-1994, Larry Wall
+ **** Copyright (c) 1991-1997, Larry Wall
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
static void regoptail _((char *, char *));
static void regset _((char *, I32));
static void regtail _((char *, char *));
+static char* regwhite _((char *, char *));
static char* nextchar _((void));
/*
I32 minlen = 0;
I32 sawplus = 0;
I32 sawopen = 0;
+#define MAX_REPEAT_DEPTH 12
+ struct {
+ char *opcode;
+ I32 count;
+ } repeat_stack[MAX_REPEAT_DEPTH];
+ I32 repeat_depth = 0;
+ I32 repeat_count = 1; /* We start unmultiplied. */
if (exp == NULL)
croak("NULL regexp argument");
regkind[(U8)OP(first)] == NBOUND)
r->regstclass = first;
else if (regkind[(U8)OP(first)] == BOL) {
- r->reganch = ROPT_ANCH;
+ r->reganch |= ROPT_ANCH_BOL;
first = NEXTOPER(first);
- goto again;
+ goto again;
+ }
+ else if (OP(first) == GPOS) {
+ r->reganch |= ROPT_ANCH_GPOS;
+ first = NEXTOPER(first);
+ goto again;
}
else if ((OP(first) == STAR &&
regkind[(U8)OP(NEXTOPER(first))] == ANY) &&
!(r->reganch & ROPT_ANCH) )
{
/* turn .* into ^.* with an implied $*=1 */
- r->reganch = ROPT_ANCH | ROPT_IMPLICIT;
+ r->reganch |= ROPT_ANCH_BOL | ROPT_IMPLICIT;
first = NEXTOPER(first);
- goto again;
+ goto again;
}
if (sawplus && (!sawopen || !regsawback))
r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */
char *t;
first = scan;
- while (OP(t = regnext(scan)) == CLOSE)
+ while ((t = regnext(scan)) && OP(t) == CLOSE)
scan = t;
- minlen += *OPERAND(first);
+ minlen += *OPERAND(first) * repeat_count;
if (curback - backish == len) {
sv_catpvn(longish, OPERAND(first)+1,
*OPERAND(first));
curback += *OPERAND(first);
}
else if (strchr(varies,OP(scan))) {
- curback = -30000;
+ int tcount;
+ char *next;
+
+ if (repeat_depth < MAX_REPEAT_DEPTH
+ && ((OP(scan) == PLUS
+ && (tcount = 1)
+ && (next = NEXTOPER(scan)))
+ || (regkind[(U8)OP(scan)] == CURLY
+ && (tcount = ARG1(scan))
+ && (next = NEXTOPER(scan)+4))))
+ {
+ /* We treat (abc)+ as (abc)(abc)*. */
+
+ /* Mark the place to return back. */
+ repeat_stack[repeat_depth].opcode = regnext(scan);
+ repeat_stack[repeat_depth].count = repeat_count;
+ repeat_depth++;
+ repeat_count *= tcount;
+
+ /* Go deeper: */
+ scan = next;
+ continue;
+ }
+ else {
+ curback = -30000;
+ len = 0;
+ if (SvCUR(longish) > SvCUR(longest)) {
+ sv_setsv(longest,longish);
+ backest = backish;
+ }
+ sv_setpvn(longish,"",0);
+ }
+ }
+ else if (strchr(simple,OP(scan))) {
+ curback++;
+ minlen += repeat_count;
len = 0;
if (SvCUR(longish) > SvCUR(longest)) {
sv_setsv(longest,longish);
backest = backish;
}
sv_setpvn(longish,"",0);
- if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan))))
- minlen++;
- else if (regkind[(U8)OP(scan)] == CURLY &&
- strchr(simple,OP(NEXTOPER(scan)+4)))
- minlen += ARG1(scan);
}
- else if (strchr(simple,OP(scan))) {
- curback++;
- minlen++;
+ scan = regnext(scan);
+ if (!scan) { /* Go up PLUS or CURLY. */
+ if (!repeat_depth--)
+ croak("panic: re scan");
+ scan = repeat_stack[repeat_depth].opcode;
+ repeat_count = repeat_stack[repeat_depth].count;
+ /* Need to submit the longest string found: */
+ curback = -30000;
len = 0;
if (SvCUR(longish) > SvCUR(longest)) {
sv_setsv(longest,longish);
}
sv_setpvn(longish,"",0);
}
- scan = regnext(scan);
}
/* Prefer earlier on tie, unless we can tail match latter */
- if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL)
+ if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL)
> SvCUR(longest))
{
sv_setsv(longest,longish);
if (backest < 0)
backest = -1;
r->regback = backest;
- if (SvCUR(longest) > !(sawstudy || regkind[(U8)OP(first)] == EOL))
+ if (SvCUR(longest) > !(sawstudy ||
+ (first && regkind[(U8)OP(first)] == EOL)))
fbm_compile(r->regmust);
(void)SvUPGRADE(r->regmust, SVt_PVBM);
BmUSEFUL(r->regmust) = 100;
- if (regkind[(U8)OP(first)] == EOL && SvCUR(longish))
+ if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish))
SvTAIL_on(r->regmust);
}
else {
break;
case '$':
case '@':
- croak("Sequence (?%c...) not implemented", paren);
+ croak("Sequence (?%c...) not implemented", (int)paren);
break;
case '#':
while (*regparse && *regparse != ')')
nextchar();
*flagp = TRYAGAIN;
return NULL;
+ case 0:
+ croak("Sequence (? incomplete");
+ break;
default:
--regparse;
- while (*regparse && strchr("iogmsx", *regparse))
+ while (*regparse && strchr("iogcmsx", *regparse))
pmflag(®flags, *regparse++);
if (*regparse != ')')
croak("Sequence (?%c...) not recognized", *regparse);
*flagp = flags;
return(ret);
}
+
+ if (!(flags&HASWIDTH) && op != '?')
+ FAIL("regexp *+ operand could be empty");
+
nextchar();
*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
nextchar();
break;
case 'G':
- ret = regnode(GBOL);
+ ret = regnode(GPOS);
*flagp |= SIMPLE;
nextchar();
break;
goto defchar;
else {
regsawback = 1;
- ret = reganode(REF, num);
+ ret = reganode((regflags & PMf_FOLD)
+ ? ((regflags & PMf_LOCALE) ? REFFL : REFF)
+ : REF, num);
*flagp |= HASWIDTH;
while (isDIGIT(*regparse))
regparse++;
len++)
{
oldp = p;
+
+ if (regflags & PMf_EXTENDED)
+ p = regwhite(p, regxend);
switch (*p) {
case '^':
case '$':
break;
}
break;
- case '#':
- if (regflags & PMf_EXTENDED) {
- while (p < regxend && *p != '\n') p++;
- }
- /* FALL THROUGH */
- case ' ': case '\t': case '\n': case '\r': case '\f': case '\v':
- if (regflags & PMf_EXTENDED) {
- p++;
- len--;
- continue;
- }
- /* FALL THROUGH */
default:
ender = *p++;
break;
}
+ if (regflags & PMf_EXTENDED)
+ p = regwhite(p, regxend);
if (ISMULT2(p)) { /* Back off on ?+*. */
if (len)
p = oldp;
return(ret);
}
+static char *
+regwhite(p, e)
+char *p;
+char *e;
+{
+ while (p < e) {
+ if (isSPACE(*p))
+ ++p;
+ else if (*p == '#') {
+ do {
+ p++;
+ } while (p < e && *p != '\n');
+ }
+ else
+ break;
+ }
+ return p;
+}
+
static void
regset(opnd, c)
char *opnd;
register char *s;
register char op = EXACT; /* Arbitrary non-END op. */
register char *next;
-
+ SV *sv = sv_newmortal();
s = r->program + 1;
while (op != END) { /* While that wasn't END last time... */
s++;
#endif
op = OP(s);
- PerlIO_printf(Perl_debug_log, "%2d%s", s-r->program, regprop(s)); /* Where, what. */
+ /* where, what */
+ regprop(sv, s);
+ PerlIO_printf(Perl_debug_log, "%2d%s", s - r->program, SvPVX(sv));
next = regnext(s);
s += regarglen[(U8)op];
if (next == NULL) /* Next ptr. */
/* Header fields of interest. */
if (r->regstart)
PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart));
- if (r->regstclass)
- PerlIO_printf(Perl_debug_log, "stclass `%s' ", regprop(r->regstclass));
- if (r->reganch & ROPT_ANCH)
- PerlIO_printf(Perl_debug_log, "anchored ");
+ if (r->regstclass) {
+ regprop(sv, r->regstclass);
+ PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv));
+ }
+ if (r->reganch & ROPT_ANCH) {
+ PerlIO_printf(Perl_debug_log, "anchored");
+ if (r->reganch & ROPT_ANCH_BOL)
+ PerlIO_printf(Perl_debug_log, "(BOL)");
+ if (r->reganch & ROPT_ANCH_GPOS)
+ PerlIO_printf(Perl_debug_log, "(GPOS)");
+ PerlIO_putc(Perl_debug_log, ' ');
+ }
if (r->reganch & ROPT_SKIP)
PerlIO_printf(Perl_debug_log, "plus ");
if (r->reganch & ROPT_IMPLICIT)
/*
- regprop - printable representation of opcode
*/
-char *
-regprop(op)
+void
+regprop(sv, op)
+SV *sv;
char *op;
{
register char *p = 0;
- (void) strcpy(buf, ":");
-
+ sv_setpv(sv, ":");
switch (OP(op)) {
case BOL:
p = "BOL";
p = "NBOUNDL";
break;
case CURLY:
- (void)sprintf(buf+strlen(buf), "CURLY {%d,%d}", ARG1(op),ARG2(op));
- p = NULL;
+ sv_catpvf(sv, "CURLY {%d,%d}", ARG1(op), ARG2(op));
break;
case CURLYX:
- (void)sprintf(buf+strlen(buf), "CURLYX {%d,%d}", ARG1(op),ARG2(op));
- p = NULL;
+ sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(op), ARG2(op));
break;
case REF:
- (void)sprintf(buf+strlen(buf), "REF%d", ARG1(op));
- p = NULL;
+ sv_catpvf(sv, "REF%d", ARG1(op));
+ break;
+ case REFF:
+ sv_catpvf(sv, "REFF%d", ARG1(op));
+ break;
+ case REFFL:
+ sv_catpvf(sv, "REFFL%d", ARG1(op));
break;
case OPEN:
- (void)sprintf(buf+strlen(buf), "OPEN%d", ARG1(op));
- p = NULL;
+ sv_catpvf(sv, "OPEN%d", ARG1(op));
break;
case CLOSE:
- (void)sprintf(buf+strlen(buf), "CLOSE%d", ARG1(op));
+ sv_catpvf(sv, "CLOSE%d", ARG1(op));
p = NULL;
break;
case STAR:
case MINMOD:
p = "MINMOD";
break;
- case GBOL:
- p = "GBOL";
+ case GPOS:
+ p = "GPOS";
break;
case UNLESSM:
p = "UNLESSM";
default:
FAIL("corrupted regexp opcode");
}
- if (p != NULL)
- (void) strcat(buf, p);
- return(buf);
+ if (p)
+ sv_catpv(sv, p);
}
#endif /* DEBUGGING */