Commit | Line | Data |
---|---|---|
a559c259 | 1 | /* $Header: search.c,v 1.0.1.2 88/01/28 10:30:46 root Exp $ |
8d063cd8 LW |
2 | * |
3 | * $Log: search.c,v $ | |
a559c259 LW |
4 | * Revision 1.0.1.2 88/01/28 10:30:46 root |
5 | * patch8: uncommented free_compex for use with eval operator. | |
6 | * | |
135863df AB |
7 | * Revision 1.0.1.1 88/01/24 03:55:05 root |
8 | * patch 2: made depend on perl.h. | |
9 | * | |
8d063cd8 LW |
10 | * Revision 1.0 87/12/18 13:05:59 root |
11 | * Initial revision | |
12 | * | |
13 | */ | |
14 | ||
15 | /* string search routines */ | |
16 | ||
8d063cd8 LW |
17 | #include "EXTERN.h" |
18 | #include "handy.h" | |
19 | #include "util.h" | |
20 | #include "INTERN.h" | |
21 | #include "search.h" | |
135863df AB |
22 | #include "EXTERN.h" |
23 | #include "perl.h" | |
8d063cd8 LW |
24 | |
25 | #define VERBOSE | |
26 | #define FLUSH | |
27 | #define MEM_SIZE int | |
28 | ||
29 | #ifndef BITSPERBYTE | |
30 | #define BITSPERBYTE 8 | |
31 | #endif | |
32 | ||
33 | #define BMAPSIZ (127 / BITSPERBYTE + 1) | |
34 | ||
35 | #define CHAR 0 /* a normal character */ | |
36 | #define ANY 1 /* . matches anything except newline */ | |
37 | #define CCL 2 /* [..] character class */ | |
38 | #define NCCL 3 /* [^..]negated character class */ | |
39 | #define BEG 4 /* ^ beginning of a line */ | |
40 | #define END 5 /* $ end of a line */ | |
41 | #define LPAR 6 /* ( begin sub-match */ | |
42 | #define RPAR 7 /* ) end sub-match */ | |
43 | #define REF 8 /* \N backreference to the Nth submatch */ | |
44 | #define WORD 9 /* \w matches alphanumeric character */ | |
45 | #define NWORD 10 /* \W matches non-alphanumeric character */ | |
46 | #define WBOUND 11 /* \b matches word boundary */ | |
47 | #define NWBOUND 12 /* \B matches non-boundary */ | |
48 | #define FINIS 13 /* the end of the pattern */ | |
49 | ||
50 | #define CODEMASK 15 | |
51 | ||
52 | /* Quantifiers: */ | |
53 | ||
54 | #define MINZERO 16 /* minimum is 0, not 1 */ | |
55 | #define MAXINF 32 /* maximum is infinity, not 1 */ | |
56 | ||
57 | #define ASCSIZ 0200 | |
58 | typedef char TRANSTABLE[ASCSIZ]; | |
59 | ||
60 | static TRANSTABLE trans = { | |
61 | 0000,0001,0002,0003,0004,0005,0006,0007, | |
62 | 0010,0011,0012,0013,0014,0015,0016,0017, | |
63 | 0020,0021,0022,0023,0024,0025,0026,0027, | |
64 | 0030,0031,0032,0033,0034,0035,0036,0037, | |
65 | 0040,0041,0042,0043,0044,0045,0046,0047, | |
66 | 0050,0051,0052,0053,0054,0055,0056,0057, | |
67 | 0060,0061,0062,0063,0064,0065,0066,0067, | |
68 | 0070,0071,0072,0073,0074,0075,0076,0077, | |
69 | 0100,0101,0102,0103,0104,0105,0106,0107, | |
70 | 0110,0111,0112,0113,0114,0115,0116,0117, | |
71 | 0120,0121,0122,0123,0124,0125,0126,0127, | |
72 | 0130,0131,0132,0133,0134,0135,0136,0137, | |
73 | 0140,0141,0142,0143,0144,0145,0146,0147, | |
74 | 0150,0151,0152,0153,0154,0155,0156,0157, | |
75 | 0160,0161,0162,0163,0164,0165,0166,0167, | |
76 | 0170,0171,0172,0173,0174,0175,0176,0177, | |
77 | }; | |
78 | static bool folding = FALSE; | |
79 | ||
80 | static int err; | |
81 | #define NOERR 0 | |
82 | #define BEGFAIL 1 | |
83 | #define FATAL 2 | |
84 | ||
85 | static char *FirstCharacter; | |
86 | static char *matchend; | |
87 | static char *matchtill; | |
88 | ||
89 | void | |
90 | search_init() | |
91 | { | |
92 | #ifdef UNDEF | |
93 | register int i; | |
94 | ||
95 | for (i = 0; i < ASCSIZ; i++) | |
96 | trans[i] = i; | |
97 | #else | |
98 | ; | |
99 | #endif | |
100 | } | |
101 | ||
102 | void | |
103 | init_compex(compex) | |
104 | register COMPEX *compex; | |
105 | { | |
106 | /* the following must start off zeroed */ | |
107 | ||
108 | compex->precomp = Nullch; | |
109 | compex->complen = 0; | |
110 | compex->subbase = Nullch; | |
111 | } | |
112 | ||
8d063cd8 LW |
113 | void |
114 | free_compex(compex) | |
115 | register COMPEX *compex; | |
116 | { | |
117 | if (compex->complen) { | |
118 | safefree(compex->compbuf); | |
119 | compex->complen = 0; | |
120 | } | |
121 | if (compex->subbase) { | |
122 | safefree(compex->subbase); | |
123 | compex->subbase = Nullch; | |
124 | } | |
125 | } | |
8d063cd8 LW |
126 | |
127 | static char *gbr_str = Nullch; | |
128 | static int gbr_siz = 0; | |
129 | ||
130 | char * | |
131 | getparen(compex,n) | |
132 | register COMPEX *compex; | |
133 | int n; | |
134 | { | |
135 | int length = compex->subend[n] - compex->subbeg[n]; | |
136 | ||
137 | if (!n && | |
138 | (!compex->numsubs || n > compex->numsubs || !compex->subend[n] || length<0)) | |
139 | return ""; | |
140 | growstr(&gbr_str, &gbr_siz, length+1); | |
141 | safecpy(gbr_str, compex->subbeg[n], length+1); | |
142 | return gbr_str; | |
143 | } | |
144 | ||
145 | void | |
146 | case_fold(which) | |
147 | int which; | |
148 | { | |
149 | register int i; | |
150 | ||
151 | if (which != folding) { | |
152 | if (which) { | |
153 | for (i = 'A'; i <= 'Z'; i++) | |
154 | trans[i] = tolower(i); | |
155 | } | |
156 | else { | |
157 | for (i = 'A'; i <= 'Z'; i++) | |
158 | trans[i] = i; | |
159 | } | |
160 | folding = which; | |
161 | } | |
162 | } | |
163 | ||
164 | /* Compile the regular expression into internal form */ | |
165 | ||
166 | char * | |
167 | compile(compex, sp, regex, fold) | |
168 | register COMPEX *compex; | |
169 | register char *sp; | |
170 | int regex; | |
171 | int fold; | |
172 | { | |
173 | register int c; | |
174 | register char *cp; | |
175 | char *lastcp; | |
176 | char paren[MAXSUB], | |
177 | *parenp; | |
178 | char **alt = compex->alternatives; | |
179 | char *retmes = "Badly formed search string"; | |
180 | ||
181 | case_fold(compex->do_folding = fold); | |
182 | if (compex->precomp) | |
183 | safefree(compex->precomp); | |
184 | compex->precomp = savestr(sp); | |
185 | if (!compex->complen) { | |
186 | compex->compbuf = safemalloc(84); | |
187 | compex->complen = 80; | |
188 | } | |
189 | cp = compex->compbuf; /* point at compiled buffer */ | |
190 | *alt++ = cp; /* first alternative starts here */ | |
191 | parenp = paren; /* first paren goes here */ | |
192 | if (*sp == 0) { /* nothing to compile? */ | |
193 | #ifdef NOTDEF | |
194 | if (*cp == 0) /* nothing there yet? */ | |
195 | return "Null search string"; | |
196 | #endif | |
197 | if (*cp) | |
198 | return Nullch; /* just keep old expression */ | |
199 | } | |
200 | compex->numsubs = 0; /* no parens yet */ | |
201 | lastcp = 0; | |
202 | for (;;) { | |
203 | if (cp - compex->compbuf >= compex->complen) { | |
204 | char *ocompbuf = compex->compbuf; | |
205 | ||
206 | grow_comp(compex); | |
207 | if (ocompbuf != compex->compbuf) { /* adjust pointers? */ | |
208 | char **tmpalt; | |
209 | ||
210 | cp = compex->compbuf + (cp - ocompbuf); | |
211 | if (lastcp) | |
212 | lastcp = compex->compbuf + (lastcp - ocompbuf); | |
213 | for (tmpalt = compex->alternatives; tmpalt < alt; tmpalt++) | |
214 | if (*tmpalt) | |
215 | *tmpalt = compex->compbuf + (*tmpalt - ocompbuf); | |
216 | } | |
217 | } | |
218 | c = *sp++; /* get next char of pattern */ | |
219 | if (c == 0) { /* end of pattern? */ | |
220 | if (parenp != paren) { /* balanced parentheses? */ | |
221 | #ifdef VERBOSE | |
222 | retmes = "Missing right parenthesis"; | |
223 | #endif | |
224 | goto badcomp; | |
225 | } | |
226 | *cp++ = FINIS; /* append a stopper */ | |
227 | *alt++ = 0; /* terminate alternative list */ | |
228 | /* | |
229 | compex->complen = cp - compex->compbuf + 1; | |
230 | compex->compbuf = saferealloc(compex->compbuf,compex->complen+4); */ | |
231 | return Nullch; /* return success */ | |
232 | } | |
233 | if (c != '*' && c != '?' && c != '+') | |
234 | lastcp = cp; | |
235 | if (!regex) { /* just a normal search string? */ | |
236 | *cp++ = CHAR; /* everything is a normal char */ | |
237 | *cp++ = trans[c]; | |
238 | } | |
239 | else /* it is a regular expression */ | |
240 | switch (c) { | |
241 | ||
242 | default: | |
243 | normal_char: | |
244 | *cp++ = CHAR; | |
245 | *cp++ = trans[c]; | |
246 | continue; | |
247 | ||
248 | case '.': | |
249 | *cp++ = ANY; | |
250 | continue; | |
251 | ||
252 | case '[': { /* character class */ | |
253 | register int i; | |
254 | ||
255 | if (cp - compex->compbuf >= compex->complen - BMAPSIZ) { | |
256 | char *ocompbuf = compex->compbuf; | |
257 | ||
258 | grow_comp(compex); /* reserve bitmap */ | |
259 | if (ocompbuf != compex->compbuf) {/* adjust pointers? */ | |
260 | char **tmpalt; | |
261 | ||
262 | cp = compex->compbuf + (cp - ocompbuf); | |
263 | if (lastcp) | |
264 | lastcp = compex->compbuf + (lastcp - ocompbuf); | |
265 | for (tmpalt = compex->alternatives; tmpalt < alt; | |
266 | tmpalt++) | |
267 | if (*tmpalt) | |
268 | *tmpalt = | |
269 | compex->compbuf + (*tmpalt - ocompbuf); | |
270 | } | |
271 | } | |
272 | for (i = BMAPSIZ; i; --i) | |
273 | cp[i] = 0; | |
274 | ||
275 | if ((c = *sp++) == '^') { | |
276 | c = *sp++; | |
277 | *cp++ = NCCL; /* negated */ | |
278 | } | |
279 | else | |
280 | *cp++ = CCL; /* normal */ | |
281 | ||
282 | i = 0; /* remember oldchar */ | |
283 | do { | |
284 | if (c == '\0') { | |
285 | #ifdef VERBOSE | |
286 | retmes = "Missing ]"; | |
287 | #endif | |
288 | goto badcomp; | |
289 | } | |
290 | if (c == '\\' && *sp) { | |
291 | switch (*sp) { | |
292 | default: | |
293 | c = *sp++; | |
294 | break; | |
295 | case '0': case '1': case '2': case '3': | |
296 | case '4': case '5': case '6': case '7': | |
297 | c = *sp++ - '0'; | |
298 | if (index("01234567",*sp)) { | |
299 | c <<= 3; | |
300 | c += *sp++ - '0'; | |
301 | } | |
302 | if (index("01234567",*sp)) { | |
303 | c <<= 3; | |
304 | c += *sp++ - '0'; | |
305 | } | |
306 | break; | |
307 | case 'b': | |
308 | c = '\b'; | |
309 | sp++; | |
310 | break; | |
311 | case 'n': | |
312 | c = '\n'; | |
313 | sp++; | |
314 | break; | |
315 | case 'r': | |
316 | c = '\r'; | |
317 | sp++; | |
318 | break; | |
319 | case 'f': | |
320 | c = '\f'; | |
321 | sp++; | |
322 | break; | |
323 | case 't': | |
324 | c = '\t'; | |
325 | sp++; | |
326 | break; | |
327 | } | |
328 | } | |
329 | if (*sp == '-' && *(++sp)) | |
330 | i = *sp++; | |
331 | else | |
332 | i = c; | |
333 | while (c <= i) { | |
334 | cp[c / BITSPERBYTE] |= 1 << (c % BITSPERBYTE); | |
335 | if (fold && isalpha(c)) | |
336 | cp[(c ^ 32) / BITSPERBYTE] |= | |
337 | 1 << ((c ^ 32) % BITSPERBYTE); | |
338 | /* set the other bit too */ | |
339 | c++; | |
340 | } | |
341 | } while ((c = *sp++) != ']'); | |
342 | if (cp[-1] == NCCL) | |
343 | cp[0] |= 1; | |
344 | cp += BMAPSIZ; | |
345 | continue; | |
346 | } | |
347 | ||
348 | case '^': | |
349 | if (cp != compex->compbuf && cp[-1] != FINIS) | |
350 | goto normal_char; | |
351 | *cp++ = BEG; | |
352 | continue; | |
353 | ||
354 | case '$': | |
355 | if (isdigit(*sp)) { | |
356 | *cp++ = REF; | |
357 | *cp++ = *sp - '0'; | |
358 | break; | |
359 | } | |
360 | if (*sp && *sp != '|') | |
361 | goto normal_char; | |
362 | *cp++ = END; | |
363 | continue; | |
364 | ||
365 | case '*': case '?': case '+': | |
366 | if (lastcp == 0 || | |
367 | (*lastcp & (MINZERO|MAXINF)) || | |
368 | *lastcp == LPAR || | |
369 | *lastcp == RPAR || | |
370 | *lastcp == BEG || | |
371 | *lastcp == END || | |
372 | *lastcp == WBOUND || | |
373 | *lastcp == NWBOUND ) | |
374 | goto normal_char; | |
375 | if (c != '+') | |
376 | *lastcp |= MINZERO; | |
377 | if (c != '?') | |
378 | *lastcp |= MAXINF; | |
379 | continue; | |
380 | ||
381 | case '(': | |
382 | if (compex->numsubs >= MAXSUB) { | |
383 | #ifdef VERBOSE | |
384 | retmes = "Too many parens"; | |
385 | #endif | |
386 | goto badcomp; | |
387 | } | |
388 | *parenp++ = ++compex->numsubs; | |
389 | *cp++ = LPAR; | |
390 | *cp++ = compex->numsubs; | |
391 | break; | |
392 | case ')': | |
393 | if (parenp <= paren) { | |
394 | #ifdef VERBOSE | |
395 | retmes = "Unmatched right paren"; | |
396 | #endif | |
397 | goto badcomp; | |
398 | } | |
399 | *cp++ = RPAR; | |
400 | *cp++ = *--parenp; | |
401 | break; | |
402 | case '|': | |
403 | if (parenp>paren) { | |
404 | #ifdef VERBOSE | |
405 | retmes = "No | in subpattern"; /* Sigh! */ | |
406 | #endif | |
407 | goto badcomp; | |
408 | } | |
409 | *cp++ = FINIS; | |
410 | if (alt - compex->alternatives >= MAXALT) { | |
411 | #ifdef VERBOSE | |
412 | retmes = "Too many alternatives"; | |
413 | #endif | |
414 | goto badcomp; | |
415 | } | |
416 | *alt++ = cp; | |
417 | break; | |
418 | case '\\': /* backslashed thingie */ | |
419 | switch (c = *sp++) { | |
420 | case '0': case '1': case '2': case '3': case '4': | |
421 | case '5': case '6': case '7': case '8': case '9': | |
422 | *cp++ = REF; | |
423 | *cp++ = c - '0'; | |
424 | break; | |
425 | case 'w': | |
426 | *cp++ = WORD; | |
427 | break; | |
428 | case 'W': | |
429 | *cp++ = NWORD; | |
430 | break; | |
431 | case 'b': | |
432 | *cp++ = WBOUND; | |
433 | break; | |
434 | case 'B': | |
435 | *cp++ = NWBOUND; | |
436 | break; | |
437 | default: | |
438 | *cp++ = CHAR; | |
439 | if (c == '\0') | |
440 | goto badcomp; | |
441 | switch (c) { | |
442 | case 'n': | |
443 | c = '\n'; | |
444 | break; | |
445 | case 'r': | |
446 | c = '\r'; | |
447 | break; | |
448 | case 'f': | |
449 | c = '\f'; | |
450 | break; | |
451 | case 't': | |
452 | c = '\t'; | |
453 | break; | |
454 | } | |
455 | *cp++ = c; | |
456 | break; | |
457 | } | |
458 | break; | |
459 | } | |
460 | } | |
461 | badcomp: | |
462 | compex->compbuf[0] = 0; | |
463 | compex->numsubs = 0; | |
464 | return retmes; | |
465 | } | |
466 | ||
467 | void | |
468 | grow_comp(compex) | |
469 | register COMPEX *compex; | |
470 | { | |
471 | compex->complen += 80; | |
472 | compex->compbuf = saferealloc(compex->compbuf, (MEM_SIZE)compex->complen + 4); | |
473 | } | |
474 | ||
475 | char * | |
476 | execute(compex, addr, beginning, minend) | |
477 | register COMPEX *compex; | |
478 | char *addr; | |
479 | bool beginning; | |
480 | int minend; | |
481 | { | |
482 | register char *p1 = addr; | |
483 | register char *trt = trans; | |
484 | register int c; | |
485 | register int scr; | |
486 | register int c2; | |
487 | ||
488 | if (addr == Nullch) | |
489 | return Nullch; | |
490 | if (compex->numsubs) { /* any submatches? */ | |
491 | for (c = 0; c <= compex->numsubs; c++) | |
492 | compex->subbeg[c] = compex->subend[c] = Nullch; | |
493 | } | |
494 | case_fold(compex->do_folding); /* make sure table is correct */ | |
495 | if (beginning) | |
496 | FirstCharacter = p1; /* for ^ tests */ | |
497 | else { | |
498 | if (multiline || compex->alternatives[1] || compex->compbuf[0] != BEG) | |
499 | FirstCharacter = Nullch; | |
500 | else | |
501 | return Nullch; /* can't match */ | |
502 | } | |
503 | matchend = Nullch; | |
504 | matchtill = addr + minend; | |
505 | err = 0; | |
506 | if (compex->compbuf[0] == CHAR && !compex->alternatives[1]) { | |
507 | if (compex->do_folding) { | |
508 | c = compex->compbuf[1]; /* fast check for first character */ | |
509 | do { | |
510 | if (trt[*p1] == c && try(compex, p1, compex->compbuf)) | |
511 | goto got_it; | |
512 | } while (*p1++ && !err); | |
513 | } | |
514 | else { | |
515 | c = compex->compbuf[1]; /* faster check for first character */ | |
516 | if (compex->compbuf[2] == CHAR) | |
517 | c2 = compex->compbuf[3]; | |
518 | else | |
519 | c2 = 0; | |
520 | do { | |
521 | false_alarm: | |
522 | while (scr = *p1++, scr && scr != c) ; | |
523 | if (!scr) | |
524 | break; | |
525 | if (c2 && *p1 != c2) /* and maybe even second character */ | |
526 | goto false_alarm; | |
527 | if (try(compex, p1, compex->compbuf+2)) { | |
528 | p1--; | |
529 | goto got_it; | |
530 | } | |
531 | } while (!err); | |
532 | } | |
533 | return Nullch; | |
534 | } | |
535 | else { /* normal algorithm */ | |
536 | do { | |
537 | register char **alt = compex->alternatives; | |
538 | while (*alt) { | |
539 | if (try(compex, p1, *alt++)) | |
540 | goto got_it; | |
541 | } | |
542 | } while (*p1++ && err < FATAL); | |
543 | return Nullch; | |
544 | } | |
545 | ||
546 | got_it: | |
547 | if (compex->numsubs) { /* any parens? */ | |
548 | trt = savestr(addr); /* in case addr is not static */ | |
549 | if (compex->subbase) | |
550 | safefree(compex->subbase); /* (may be freeing addr!) */ | |
551 | compex->subbase = trt; | |
552 | scr = compex->subbase - addr; | |
553 | p1 += scr; | |
554 | matchend += scr; | |
555 | for (c = 0; c <= compex->numsubs; c++) { | |
556 | if (compex->subend[c]) { | |
557 | compex->subbeg[c] += scr; | |
558 | compex->subend[c] += scr; | |
559 | } | |
560 | } | |
561 | } | |
562 | compex->subend[0] = matchend; | |
563 | compex->subbeg[0] = p1; | |
564 | return p1; | |
565 | } | |
566 | ||
567 | bool | |
568 | try(compex, sp, cp) | |
569 | COMPEX *compex; | |
570 | register char *cp; | |
571 | register char *sp; | |
572 | { | |
573 | register char *basesp; | |
574 | register char *trt = trans; | |
575 | register int i; | |
576 | register int backlen; | |
577 | register int code; | |
578 | ||
579 | while (*sp || (*cp & MAXINF) || *cp == BEG || *cp == RPAR || | |
580 | *cp == WBOUND || *cp == NWBOUND) { | |
581 | switch ((code = *cp++) & CODEMASK) { | |
582 | ||
583 | case CHAR: | |
584 | basesp = sp; | |
585 | i = *cp++; | |
586 | if (code & MAXINF) | |
587 | while (*sp && trt[*sp] == i) sp++; | |
588 | else | |
589 | if (*sp && trt[*sp] == i) sp++; | |
590 | backlen = 1; | |
591 | goto backoff; | |
592 | ||
593 | backoff: | |
594 | while (sp > basesp) { | |
595 | if (try(compex, sp, cp)) | |
596 | goto right; | |
597 | sp -= backlen; | |
598 | } | |
599 | if (code & MINZERO) | |
600 | continue; | |
601 | goto wrong; | |
602 | ||
603 | case ANY: | |
604 | basesp = sp; | |
605 | if (code & MAXINF) | |
606 | while (*sp && *sp != '\n') sp++; | |
607 | else | |
608 | if (*sp && *sp != '\n') sp++; | |
609 | backlen = 1; | |
610 | goto backoff; | |
611 | ||
612 | case CCL: | |
613 | basesp = sp; | |
614 | if (code & MAXINF) | |
615 | while (*sp && cclass(cp, *sp, 1)) sp++; | |
616 | else | |
617 | if (*sp && cclass(cp, *sp, 1)) sp++; | |
618 | cp += BMAPSIZ; | |
619 | backlen = 1; | |
620 | goto backoff; | |
621 | ||
622 | case NCCL: | |
623 | basesp = sp; | |
624 | if (code & MAXINF) | |
625 | while (*sp && cclass(cp, *sp, 0)) sp++; | |
626 | else | |
627 | if (*sp && cclass(cp, *sp, 0)) sp++; | |
628 | cp += BMAPSIZ; | |
629 | backlen = 1; | |
630 | goto backoff; | |
631 | ||
632 | case END: | |
633 | if (!*sp || *sp == '\n') { | |
634 | matchtill--; | |
635 | continue; | |
636 | } | |
637 | goto wrong; | |
638 | ||
639 | case BEG: | |
640 | if (sp == FirstCharacter || ( | |
641 | *sp && sp[-1] == '\n') ) { | |
642 | matchtill--; | |
643 | continue; | |
644 | } | |
645 | if (!multiline) /* no point in advancing more */ | |
646 | err = BEGFAIL; | |
647 | goto wrong; | |
648 | ||
649 | case WORD: | |
650 | basesp = sp; | |
651 | if (code & MAXINF) | |
652 | while (*sp && isalnum(*sp)) sp++; | |
653 | else | |
654 | if (*sp && isalnum(*sp)) sp++; | |
655 | backlen = 1; | |
656 | goto backoff; | |
657 | ||
658 | case NWORD: | |
659 | basesp = sp; | |
660 | if (code & MAXINF) | |
661 | while (*sp && !isalnum(*sp)) sp++; | |
662 | else | |
663 | if (*sp && !isalnum(*sp)) sp++; | |
664 | backlen = 1; | |
665 | goto backoff; | |
666 | ||
667 | case WBOUND: | |
668 | if ((sp == FirstCharacter || !isalnum(sp[-1])) != | |
669 | (!*sp || !isalnum(*sp)) ) | |
670 | continue; | |
671 | goto wrong; | |
672 | ||
673 | case NWBOUND: | |
674 | if ((sp == FirstCharacter || !isalnum(sp[-1])) == | |
675 | (!*sp || !isalnum(*sp))) | |
676 | continue; | |
677 | goto wrong; | |
678 | ||
679 | case FINIS: | |
680 | goto right; | |
681 | ||
682 | case LPAR: | |
683 | compex->subbeg[*cp++] = sp; | |
684 | continue; | |
685 | ||
686 | case RPAR: | |
687 | i = *cp++; | |
688 | compex->subend[i] = sp; | |
689 | compex->lastparen = i; | |
690 | continue; | |
691 | ||
692 | case REF: | |
693 | if (compex->subend[i = *cp++] == 0) { | |
694 | fputs("Bad subpattern reference\n",stdout) FLUSH; | |
695 | err = FATAL; | |
696 | goto wrong; | |
697 | } | |
698 | basesp = sp; | |
699 | backlen = compex->subend[i] - compex->subbeg[i]; | |
700 | if (code & MAXINF) | |
701 | while (*sp && subpat(compex, i, sp)) sp += backlen; | |
702 | else | |
703 | if (*sp && subpat(compex, i, sp)) sp += backlen; | |
704 | goto backoff; | |
705 | ||
706 | default: | |
707 | fputs("Botched pattern compilation\n",stdout) FLUSH; | |
708 | err = FATAL; | |
709 | return -1; | |
710 | } | |
711 | } | |
712 | if (*cp == FINIS || *cp == END) { | |
713 | right: | |
714 | if (matchend == Nullch || sp > matchend) | |
715 | matchend = sp; | |
716 | return matchend >= matchtill; | |
717 | } | |
718 | wrong: | |
719 | matchend = Nullch; | |
720 | return FALSE; | |
721 | } | |
722 | ||
723 | bool | |
724 | subpat(compex, i, sp) | |
725 | register COMPEX *compex; | |
726 | register int i; | |
727 | register char *sp; | |
728 | { | |
729 | register char *bp; | |
730 | ||
731 | bp = compex->subbeg[i]; | |
732 | while (*sp && *bp == *sp) { | |
733 | bp++; | |
734 | sp++; | |
735 | if (bp >= compex->subend[i]) | |
736 | return TRUE; | |
737 | } | |
738 | return FALSE; | |
739 | } | |
740 | ||
741 | bool | |
742 | cclass(set, c, af) | |
743 | register char *set; | |
744 | register int c; | |
745 | { | |
746 | c &= 0177; | |
747 | #if BITSPERBYTE == 8 | |
748 | if (set[c >> 3] & 1 << (c & 7)) | |
749 | #else | |
750 | if (set[c / BITSPERBYTE] & 1 << (c % BITSPERBYTE)) | |
751 | #endif | |
752 | return af; | |
753 | return !af; | |
754 | } |