This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
change#3612 was buggy and failed to build Tk; applied Ilya's
[perl5.git] / regexec.c
... / ...
CommitLineData
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
34/* We *really* need to overwrite these symbols: */
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
38# define Perl_re_intuit_start my_re_intuit_start
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
41# define Perl_reginitcolors my_reginitcolors
42#endif
43
44/*SUPPRESS 112*/
45/*
46 * pregcomp and pregexec -- regsub and regerror are not used in perl
47 *
48 * Copyright (c) 1986 by University of Toronto.
49 * Written by Henry Spencer. Not derived from licensed software.
50 *
51 * Permission is granted to anyone to use this software for any
52 * purpose on any computer system, and to redistribute it freely,
53 * subject to the following restrictions:
54 *
55 * 1. The author is not responsible for the consequences of use of
56 * this software, no matter how awful, even if they arise
57 * from defects in it.
58 *
59 * 2. The origin of this software must not be misrepresented, either
60 * by explicit claim or by omission.
61 *
62 * 3. Altered versions must be plainly marked as such, and must not
63 * be misrepresented as being the original software.
64 *
65 **** Alterations to Henry's code are...
66 ****
67 **** Copyright (c) 1991-1999, Larry Wall
68 ****
69 **** You may distribute under the terms of either the GNU General Public
70 **** License or the Artistic License, as specified in the README file.
71 *
72 * Beware that some of this code is subtly aware of the way operator
73 * precedence is structured in regular expressions. Serious changes in
74 * regular-expression syntax might require a total rethink.
75 */
76#include "EXTERN.h"
77#define PERL_IN_REGEXEC_C
78#include "perl.h"
79
80#include "regcomp.h"
81
82#define RF_tainted 1 /* tainted information used? */
83#define RF_warned 2 /* warned about big count? */
84#define RF_evaled 4 /* Did an EVAL with setting? */
85#define RF_utf8 8 /* String contains multibyte chars? */
86
87#define UTF (PL_reg_flags & RF_utf8)
88
89#define RS_init 1 /* eval environment created */
90#define RS_set 2 /* replsv value is set */
91
92#ifndef STATIC
93#define STATIC static
94#endif
95
96/*
97 * Forwards.
98 */
99
100#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
101#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
102
103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
105
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
108#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
112
113static void restore_pos(pTHXo_ void *arg);
114
115
116STATIC CHECKPOINT
117S_regcppush(pTHX_ I32 parenfloor)
118{
119 dTHR;
120 int retval = PL_savestack_ix;
121 int i = (PL_regsize - parenfloor) * 4;
122 int p;
123
124 SSCHECK(i + 5);
125 for (p = PL_regsize; p > parenfloor; p--) {
126 SSPUSHINT(PL_regendp[p]);
127 SSPUSHINT(PL_regstartp[p]);
128 SSPUSHPTR(PL_reg_start_tmp[p]);
129 SSPUSHINT(p);
130 }
131 SSPUSHINT(PL_regsize);
132 SSPUSHINT(*PL_reglastparen);
133 SSPUSHPTR(PL_reginput);
134 SSPUSHINT(i + 3);
135 SSPUSHINT(SAVEt_REGCONTEXT);
136 return retval;
137}
138
139/* These are needed since we do not localize EVAL nodes: */
140# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
141 " Setting an EVAL scope, savestack=%i\n", \
142 PL_savestack_ix)); lastcp = PL_savestack_ix
143
144# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
145 PerlIO_printf(Perl_debug_log, \
146 " Clearing an EVAL scope, savestack=%i..%i\n", \
147 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
148
149STATIC char *
150S_regcppop(pTHX)
151{
152 dTHR;
153 I32 i = SSPOPINT;
154 U32 paren = 0;
155 char *input;
156 I32 tmps;
157 assert(i == SAVEt_REGCONTEXT);
158 i = SSPOPINT;
159 input = (char *) SSPOPPTR;
160 *PL_reglastparen = SSPOPINT;
161 PL_regsize = SSPOPINT;
162 for (i -= 3; i > 0; i -= 4) {
163 paren = (U32)SSPOPINT;
164 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
165 PL_regstartp[paren] = SSPOPINT;
166 tmps = SSPOPINT;
167 if (paren <= *PL_reglastparen)
168 PL_regendp[paren] = tmps;
169 DEBUG_r(
170 PerlIO_printf(Perl_debug_log,
171 " restoring \\%d to %d(%d)..%d%s\n",
172 paren, PL_regstartp[paren],
173 PL_reg_start_tmp[paren] - PL_bostr,
174 PL_regendp[paren],
175 (paren > *PL_reglastparen ? "(no)" : ""));
176 );
177 }
178 DEBUG_r(
179 if (*PL_reglastparen + 1 <= PL_regnpar) {
180 PerlIO_printf(Perl_debug_log,
181 " restoring \\%d..\\%d to undef\n",
182 *PL_reglastparen + 1, PL_regnpar);
183 }
184 );
185 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
186 if (paren > PL_regsize)
187 PL_regstartp[paren] = -1;
188 PL_regendp[paren] = -1;
189 }
190 return input;
191}
192
193STATIC char *
194S_regcp_set_to(pTHX_ I32 ss)
195{
196 dTHR;
197 I32 tmp = PL_savestack_ix;
198
199 PL_savestack_ix = ss;
200 regcppop();
201 PL_savestack_ix = tmp;
202 return Nullch;
203}
204
205typedef struct re_cc_state
206{
207 I32 ss;
208 regnode *node;
209 struct re_cc_state *prev;
210 CURCUR *cc;
211 regexp *re;
212} re_cc_state;
213
214#define regcpblow(cp) LEAVE_SCOPE(cp)
215
216/*
217 * pregexec and friends
218 */
219
220/*
221 - pregexec - match a regexp against a string
222 */
223I32
224Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
225 char *strbeg, I32 minend, SV *screamer, U32 nosave)
226/* strend: pointer to null at end of string */
227/* strbeg: real beginning of string */
228/* minend: end of match must be >=minend after stringarg. */
229/* nosave: For optimizations. */
230{
231 return
232 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
233 nosave ? 0 : REXEC_COPY_STR);
234}
235
236STATIC void
237S_cache_re(pTHX_ regexp *prog)
238{
239 dTHR;
240 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
241#ifdef DEBUGGING
242 PL_regprogram = prog->program;
243#endif
244 PL_regnpar = prog->nparens;
245 PL_regdata = prog->data;
246 PL_reg_re = prog;
247}
248
249/*
250 * Need to implement the following flags for reg_anch:
251 *
252 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
253 * USE_INTUIT_ML
254 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
255 * INTUIT_AUTORITATIVE_ML
256 * INTUIT_ONCE_NOML - Intuit can match in one location only.
257 * INTUIT_ONCE_ML
258 *
259 * Another flag for this function: SECOND_TIME (so that float substrs
260 * with giant delta may be not rechecked).
261 */
262
263/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
264
265/* If SCREAM, then sv should be compatible with strpos and strend.
266 Otherwise, only SvCUR(sv) is used to get strbeg. */
267
268/* XXXX We assume that strpos is strbeg unless sv. */
269
270char *
271Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
272 char *strend, U32 flags, re_scream_pos_data *data)
273{
274 I32 start_shift;
275 /* Should be nonnegative! */
276 I32 end_shift;
277 char *s;
278 char *t;
279 I32 ml_anch;
280
281 DEBUG_r( if (!PL_colorset) reginitcolors() );
282 DEBUG_r(PerlIO_printf(Perl_debug_log,
283 "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
284 PL_colors[4],PL_colors[5],PL_colors[0],
285 prog->precomp,
286 PL_colors[1],
287 (strlen(prog->precomp) > 60 ? "..." : ""),
288 PL_colors[0],
289 (strend - strpos > 60 ? 60 : strend - strpos),
290 strpos, PL_colors[1],
291 (strend - strpos > 60 ? "..." : ""))
292 );
293
294 if (prog->minlen > strend - strpos)
295 goto fail;
296
297 /* XXXX Move further down? */
298 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
299 /* Should be nonnegative! */
300 end_shift = prog->minlen - start_shift -
301 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
302
303 if (prog->reganch & ROPT_ANCH) {
304 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
305 || ( (prog->reganch & ROPT_ANCH_BOL)
306 && !PL_multiline ) );
307
308 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
309 /* Anchored... */
310 I32 slen;
311
312 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
313 && (sv && (strpos + SvCUR(sv) != strend)) )
314 goto fail;
315
316 s = (char*)HOP((U8*)strpos, prog->check_offset_min);
317 if (SvTAIL(prog->check_substr)) {
318 slen = SvCUR(prog->check_substr); /* >= 1 */
319
320 if ( strend - s > slen || strend - s < slen - 1 ) {
321 s = Nullch;
322 goto finish;
323 }
324 if ( strend - s == slen && strend[-1] != '\n') {
325 s = Nullch;
326 goto finish;
327 }
328 /* Now should match s[0..slen-2] */
329 slen--;
330 if (slen && (*SvPVX(prog->check_substr) != *s
331 || (slen > 1
332 && memNE(SvPVX(prog->check_substr), s, slen))))
333 s = Nullch;
334 }
335 else if (*SvPVX(prog->check_substr) != *s
336 || ((slen = SvCUR(prog->check_substr)) > 1
337 && memNE(SvPVX(prog->check_substr), s, slen)))
338 s = Nullch;
339 else
340 s = strpos;
341 goto finish;
342 }
343 s = strpos;
344 if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
345 end_shift += strend - s - prog->minlen - prog->check_offset_max;
346 }
347 else {
348 ml_anch = 0;
349 s = strpos;
350 }
351
352 restart:
353 if (flags & REXEC_SCREAM) {
354 SV *c = prog->check_substr;
355 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
356 I32 p = -1; /* Internal iterator of scream. */
357 I32 *pp = data ? data->scream_pos : &p;
358
359 if (PL_screamfirst[BmRARE(c)] >= 0
360 || ( BmRARE(c) == '\n'
361 && (BmPREVIOUS(c) == SvCUR(c) - 1)
362 && SvTAIL(c) ))
363 s = screaminstr(sv, prog->check_substr,
364 start_shift + (strpos - strbeg), end_shift, pp, 0);
365 else
366 s = Nullch;
367 if (data)
368 *data->scream_olds = s;
369 }
370 else
371 s = fbm_instr((unsigned char*)s + start_shift,
372 (unsigned char*)strend - end_shift,
373 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
374
375 /* Update the count-of-usability, remove useless subpatterns,
376 unshift s. */
377 finish:
378 if (!s) {
379 ++BmUSEFUL(prog->check_substr); /* hooray */
380 goto fail; /* not present */
381 }
382 else if (s - strpos > prog->check_offset_max &&
383 ((prog->reganch & ROPT_UTF8)
384 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
385 && t >= strpos)
386 : (t = s - prog->check_offset_max) != 0) ) {
387 if (ml_anch && t[-1] != '\n') {
388 find_anchor:
389 while (t < strend - end_shift - prog->minlen) {
390 if (*t == '\n') {
391 if (t < s - prog->check_offset_min) {
392 s = t + 1;
393 goto set_useful;
394 }
395 s = t + 1;
396 goto restart;
397 }
398 t++;
399 }
400 s = Nullch;
401 goto finish;
402 }
403 s = t;
404 set_useful:
405 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
406 }
407 else {
408 if (ml_anch && sv
409 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
410 t = strpos;
411 goto find_anchor;
412 }
413 if (!(prog->reganch & ROPT_NAUGHTY)
414 && --BmUSEFUL(prog->check_substr) < 0
415 && prog->check_substr == prog->float_substr) { /* boo */
416 /* If flags & SOMETHING - do not do it many times on the same match */
417 SvREFCNT_dec(prog->check_substr);
418 prog->check_substr = Nullsv; /* disable */
419 prog->float_substr = Nullsv; /* clear */
420 s = strpos;
421 prog->reganch &= ~RE_USE_INTUIT;
422 }
423 else
424 s = strpos;
425 }
426
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
428 PL_colors[4],PL_colors[5], (long)(s - strpos)) );
429 return s;
430 fail:
431 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
432 PL_colors[4],PL_colors[5]));
433 return Nullch;
434}
435
436/*
437 - regexec_flags - match a regexp against a string
438 */
439I32
440Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
441 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
442/* strend: pointer to null at end of string */
443/* strbeg: real beginning of string */
444/* minend: end of match must be >=minend after stringarg. */
445/* data: May be used for some additional optimizations. */
446/* nosave: For optimizations. */
447{
448 dTHR;
449 register char *s;
450 register regnode *c;
451 register char *startpos = stringarg;
452 register I32 tmp;
453 I32 minlen; /* must match at least this many chars */
454 I32 dontbother = 0; /* how many characters not to try at end */
455 CURCUR cc;
456 I32 start_shift = 0; /* Offset of the start to find
457 constant substr. */ /* CC */
458 I32 end_shift = 0; /* Same for the end. */ /* CC */
459 I32 scream_pos = -1; /* Internal iterator of scream. */
460 char *scream_olds;
461 SV* oreplsv = GvSV(PL_replgv);
462
463 cc.cur = 0;
464 cc.oldcc = 0;
465 PL_regcc = &cc;
466
467 cache_re(prog);
468#ifdef DEBUGGING
469 PL_regnarrate = PL_debug & 512;
470#endif
471
472 /* Be paranoid... */
473 if (prog == NULL || startpos == NULL) {
474 Perl_croak(aTHX_ "NULL regexp parameter");
475 return 0;
476 }
477
478 minlen = prog->minlen;
479 if (strend - startpos < minlen) goto phooey;
480
481 if (startpos == strbeg) /* is ^ valid at stringarg? */
482 PL_regprev = '\n';
483 else {
484 PL_regprev = (U32)stringarg[-1];
485 if (!PL_multiline && PL_regprev == '\n')
486 PL_regprev = '\0'; /* force ^ to NOT match */
487 }
488
489 /* Check validity of program. */
490 if (UCHARAT(prog->program) != REG_MAGIC) {
491 Perl_croak(aTHX_ "corrupted regexp program");
492 }
493
494 PL_reg_flags = 0;
495 PL_reg_eval_set = 0;
496
497 if (prog->reganch & ROPT_UTF8)
498 PL_reg_flags |= RF_utf8;
499
500 /* Mark beginning of line for ^ and lookbehind. */
501 PL_regbol = startpos;
502 PL_bostr = strbeg;
503 PL_reg_sv = sv;
504
505 /* Mark end of line for $ (and such) */
506 PL_regeol = strend;
507
508 /* see how far we have to get to not match where we matched before */
509 PL_regtill = startpos+minend;
510
511 /* We start without call_cc context. */
512 PL_reg_call_cc = 0;
513
514 /* If there is a "must appear" string, look for it. */
515 s = startpos;
516
517 if (prog->reganch & ROPT_GPOS_SEEN) {
518 MAGIC *mg;
519
520 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
521 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
522 PL_reg_ganch = strbeg + mg->mg_len;
523 else
524 PL_reg_ganch = startpos;
525 if (prog->reganch & ROPT_ANCH_GPOS) {
526 if (s > PL_reg_ganch)
527 goto phooey;
528 s = PL_reg_ganch;
529 }
530 }
531
532 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
533 re_scream_pos_data d;
534
535 d.scream_olds = &scream_olds;
536 d.scream_pos = &scream_pos;
537 s = re_intuit_start(prog, sv, s, strend, flags, &d);
538 if (!s)
539 goto phooey; /* not present */
540 }
541
542 DEBUG_r( if (!PL_colorset) reginitcolors() );
543 DEBUG_r(PerlIO_printf(Perl_debug_log,
544 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
545 PL_colors[4],PL_colors[5],PL_colors[0],
546 prog->precomp,
547 PL_colors[1],
548 (strlen(prog->precomp) > 60 ? "..." : ""),
549 PL_colors[0],
550 (strend - startpos > 60 ? 60 : strend - startpos),
551 startpos, PL_colors[1],
552 (strend - startpos > 60 ? "..." : ""))
553 );
554
555 /* Simplest case: anchored match need be tried only once. */
556 /* [unless only anchor is BOL and multiline is set] */
557 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
558 if (s == startpos && regtry(prog, startpos))
559 goto got_it;
560 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
561 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
562 {
563 char *end;
564
565 if (minlen)
566 dontbother = minlen - 1;
567 end = HOPc(strend, -dontbother) - 1;
568 /* for multiline we only have to try after newlines */
569 if (prog->check_substr) {
570 while (1) {
571 if (regtry(prog, s))
572 goto got_it;
573 if (s >= end)
574 goto phooey;
575 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
576 if (!s)
577 goto phooey;
578 }
579 } else {
580 if (s > startpos)
581 s--;
582 while (s < end) {
583 if (*s++ == '\n') { /* don't need PL_utf8skip here */
584 if (regtry(prog, s))
585 goto got_it;
586 }
587 }
588 }
589 }
590 goto phooey;
591 } else if (prog->reganch & ROPT_ANCH_GPOS) {
592 if (regtry(prog, PL_reg_ganch))
593 goto got_it;
594 goto phooey;
595 }
596
597 /* Messy cases: unanchored match. */
598 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
599 /* we have /x+whatever/ */
600 /* it must be a one character string (XXXX Except UTF?) */
601 char ch = SvPVX(prog->anchored_substr)[0];
602 if (UTF) {
603 while (s < strend) {
604 if (*s == ch) {
605 if (regtry(prog, s)) goto got_it;
606 s += UTF8SKIP(s);
607 while (s < strend && *s == ch)
608 s += UTF8SKIP(s);
609 }
610 s += UTF8SKIP(s);
611 }
612 }
613 else {
614 while (s < strend) {
615 if (*s == ch) {
616 if (regtry(prog, s)) goto got_it;
617 s++;
618 while (s < strend && *s == ch)
619 s++;
620 }
621 s++;
622 }
623 }
624 }
625 /*SUPPRESS 560*/
626 else if (prog->anchored_substr != Nullsv
627 || (prog->float_substr != Nullsv
628 && prog->float_max_offset < strend - s)) {
629 SV *must = prog->anchored_substr
630 ? prog->anchored_substr : prog->float_substr;
631 I32 back_max =
632 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
633 I32 back_min =
634 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
635 I32 delta = back_max - back_min;
636 char *last = HOPc(strend, /* Cannot start after this */
637 -(I32)(CHR_SVLEN(must)
638 - (SvTAIL(must) != 0) + back_min));
639 char *last1; /* Last position checked before */
640
641 if (s > PL_bostr)
642 last1 = HOPc(s, -1);
643 else
644 last1 = s - 1; /* bogus */
645
646 /* XXXX check_substr already used to find `s', can optimize if
647 check_substr==must. */
648 scream_pos = -1;
649 dontbother = end_shift;
650 strend = HOPc(strend, -dontbother);
651 while ( (s <= last) &&
652 ((flags & REXEC_SCREAM)
653 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
654 end_shift, &scream_pos, 0))
655 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
656 (unsigned char*)strend, must,
657 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
658 if (HOPc(s, -back_max) > last1) {
659 last1 = HOPc(s, -back_min);
660 s = HOPc(s, -back_max);
661 }
662 else {
663 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
664
665 last1 = HOPc(s, -back_min);
666 s = t;
667 }
668 if (UTF) {
669 while (s <= last1) {
670 if (regtry(prog, s))
671 goto got_it;
672 s += UTF8SKIP(s);
673 }
674 }
675 else {
676 while (s <= last1) {
677 if (regtry(prog, s))
678 goto got_it;
679 s++;
680 }
681 }
682 }
683 goto phooey;
684 }
685 else if (c = prog->regstclass) {
686 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
687 char *cc;
688
689 if (minlen)
690 dontbother = minlen - 1;
691 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
692 tmp = 1;
693 /* We know what class it must start with. */
694 switch (OP(c)) {
695 case ANYOFUTF8:
696 cc = (char *) OPERAND(c);
697 while (s < strend) {
698 if (REGINCLASSUTF8(c, (U8*)s)) {
699 if (tmp && regtry(prog, s))
700 goto got_it;
701 else
702 tmp = doevery;
703 }
704 else
705 tmp = 1;
706 s += UTF8SKIP(s);
707 }
708 break;
709 case ANYOF:
710 cc = (char *) OPERAND(c);
711 while (s < strend) {
712 if (REGINCLASS(cc, *s)) {
713 if (tmp && regtry(prog, s))
714 goto got_it;
715 else
716 tmp = doevery;
717 }
718 else
719 tmp = 1;
720 s++;
721 }
722 break;
723 case BOUNDL:
724 PL_reg_flags |= RF_tainted;
725 /* FALL THROUGH */
726 case BOUND:
727 if (minlen) {
728 dontbother++;
729 strend -= 1;
730 }
731 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
732 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
733 while (s < strend) {
734 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
735 tmp = !tmp;
736 if (regtry(prog, s))
737 goto got_it;
738 }
739 s++;
740 }
741 if ((minlen || tmp) && regtry(prog,s))
742 goto got_it;
743 break;
744 case BOUNDLUTF8:
745 PL_reg_flags |= RF_tainted;
746 /* FALL THROUGH */
747 case BOUNDUTF8:
748 if (minlen) {
749 dontbother++;
750 strend = reghop_c(strend, -1);
751 }
752 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
753 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
754 while (s < strend) {
755 if (tmp == !(OP(c) == BOUND ?
756 swash_fetch(PL_utf8_alnum, (U8*)s) :
757 isALNUM_LC_utf8((U8*)s)))
758 {
759 tmp = !tmp;
760 if (regtry(prog, s))
761 goto got_it;
762 }
763 s += UTF8SKIP(s);
764 }
765 if ((minlen || tmp) && regtry(prog,s))
766 goto got_it;
767 break;
768 case NBOUNDL:
769 PL_reg_flags |= RF_tainted;
770 /* FALL THROUGH */
771 case NBOUND:
772 if (minlen) {
773 dontbother++;
774 strend -= 1;
775 }
776 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
777 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
778 while (s < strend) {
779 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
780 tmp = !tmp;
781 else if (regtry(prog, s))
782 goto got_it;
783 s++;
784 }
785 if ((minlen || !tmp) && regtry(prog,s))
786 goto got_it;
787 break;
788 case NBOUNDLUTF8:
789 PL_reg_flags |= RF_tainted;
790 /* FALL THROUGH */
791 case NBOUNDUTF8:
792 if (minlen) {
793 dontbother++;
794 strend = reghop_c(strend, -1);
795 }
796 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
797 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
798 while (s < strend) {
799 if (tmp == !(OP(c) == NBOUND ?
800 swash_fetch(PL_utf8_alnum, (U8*)s) :
801 isALNUM_LC_utf8((U8*)s)))
802 tmp = !tmp;
803 else if (regtry(prog, s))
804 goto got_it;
805 s += UTF8SKIP(s);
806 }
807 if ((minlen || !tmp) && regtry(prog,s))
808 goto got_it;
809 break;
810 case ALNUM:
811 while (s < strend) {
812 if (isALNUM(*s)) {
813 if (tmp && regtry(prog, s))
814 goto got_it;
815 else
816 tmp = doevery;
817 }
818 else
819 tmp = 1;
820 s++;
821 }
822 break;
823 case ALNUMUTF8:
824 while (s < strend) {
825 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
826 if (tmp && regtry(prog, s))
827 goto got_it;
828 else
829 tmp = doevery;
830 }
831 else
832 tmp = 1;
833 s += UTF8SKIP(s);
834 }
835 break;
836 case ALNUML:
837 PL_reg_flags |= RF_tainted;
838 while (s < strend) {
839 if (isALNUM_LC(*s)) {
840 if (tmp && regtry(prog, s))
841 goto got_it;
842 else
843 tmp = doevery;
844 }
845 else
846 tmp = 1;
847 s++;
848 }
849 break;
850 case ALNUMLUTF8:
851 PL_reg_flags |= RF_tainted;
852 while (s < strend) {
853 if (isALNUM_LC_utf8((U8*)s)) {
854 if (tmp && regtry(prog, s))
855 goto got_it;
856 else
857 tmp = doevery;
858 }
859 else
860 tmp = 1;
861 s += UTF8SKIP(s);
862 }
863 break;
864 case NALNUM:
865 while (s < strend) {
866 if (!isALNUM(*s)) {
867 if (tmp && regtry(prog, s))
868 goto got_it;
869 else
870 tmp = doevery;
871 }
872 else
873 tmp = 1;
874 s++;
875 }
876 break;
877 case NALNUMUTF8:
878 while (s < strend) {
879 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
880 if (tmp && regtry(prog, s))
881 goto got_it;
882 else
883 tmp = doevery;
884 }
885 else
886 tmp = 1;
887 s += UTF8SKIP(s);
888 }
889 break;
890 case NALNUML:
891 PL_reg_flags |= RF_tainted;
892 while (s < strend) {
893 if (!isALNUM_LC(*s)) {
894 if (tmp && regtry(prog, s))
895 goto got_it;
896 else
897 tmp = doevery;
898 }
899 else
900 tmp = 1;
901 s++;
902 }
903 break;
904 case NALNUMLUTF8:
905 PL_reg_flags |= RF_tainted;
906 while (s < strend) {
907 if (!isALNUM_LC_utf8((U8*)s)) {
908 if (tmp && regtry(prog, s))
909 goto got_it;
910 else
911 tmp = doevery;
912 }
913 else
914 tmp = 1;
915 s += UTF8SKIP(s);
916 }
917 break;
918 case SPACE:
919 while (s < strend) {
920 if (isSPACE(*s)) {
921 if (tmp && regtry(prog, s))
922 goto got_it;
923 else
924 tmp = doevery;
925 }
926 else
927 tmp = 1;
928 s++;
929 }
930 break;
931 case SPACEUTF8:
932 while (s < strend) {
933 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
934 if (tmp && regtry(prog, s))
935 goto got_it;
936 else
937 tmp = doevery;
938 }
939 else
940 tmp = 1;
941 s += UTF8SKIP(s);
942 }
943 break;
944 case SPACEL:
945 PL_reg_flags |= RF_tainted;
946 while (s < strend) {
947 if (isSPACE_LC(*s)) {
948 if (tmp && regtry(prog, s))
949 goto got_it;
950 else
951 tmp = doevery;
952 }
953 else
954 tmp = 1;
955 s++;
956 }
957 break;
958 case SPACELUTF8:
959 PL_reg_flags |= RF_tainted;
960 while (s < strend) {
961 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
962 if (tmp && regtry(prog, s))
963 goto got_it;
964 else
965 tmp = doevery;
966 }
967 else
968 tmp = 1;
969 s += UTF8SKIP(s);
970 }
971 break;
972 case NSPACE:
973 while (s < strend) {
974 if (!isSPACE(*s)) {
975 if (tmp && regtry(prog, s))
976 goto got_it;
977 else
978 tmp = doevery;
979 }
980 else
981 tmp = 1;
982 s++;
983 }
984 break;
985 case NSPACEUTF8:
986 while (s < strend) {
987 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
988 if (tmp && regtry(prog, s))
989 goto got_it;
990 else
991 tmp = doevery;
992 }
993 else
994 tmp = 1;
995 s += UTF8SKIP(s);
996 }
997 break;
998 case NSPACEL:
999 PL_reg_flags |= RF_tainted;
1000 while (s < strend) {
1001 if (!isSPACE_LC(*s)) {
1002 if (tmp && regtry(prog, s))
1003 goto got_it;
1004 else
1005 tmp = doevery;
1006 }
1007 else
1008 tmp = 1;
1009 s++;
1010 }
1011 break;
1012 case NSPACELUTF8:
1013 PL_reg_flags |= RF_tainted;
1014 while (s < strend) {
1015 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1016 if (tmp && regtry(prog, s))
1017 goto got_it;
1018 else
1019 tmp = doevery;
1020 }
1021 else
1022 tmp = 1;
1023 s += UTF8SKIP(s);
1024 }
1025 break;
1026 case DIGIT:
1027 while (s < strend) {
1028 if (isDIGIT(*s)) {
1029 if (tmp && regtry(prog, s))
1030 goto got_it;
1031 else
1032 tmp = doevery;
1033 }
1034 else
1035 tmp = 1;
1036 s++;
1037 }
1038 break;
1039 case DIGITUTF8:
1040 while (s < strend) {
1041 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1042 if (tmp && regtry(prog, s))
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
1047 else
1048 tmp = 1;
1049 s += UTF8SKIP(s);
1050 }
1051 break;
1052 case DIGITL:
1053 PL_reg_flags |= RF_tainted;
1054 while (s < strend) {
1055 if (isDIGIT_LC(*s)) {
1056 if (tmp && regtry(prog, s))
1057 goto got_it;
1058 else
1059 tmp = doevery;
1060 }
1061 else
1062 tmp = 1;
1063 s++;
1064 }
1065 break;
1066 case DIGITLUTF8:
1067 PL_reg_flags |= RF_tainted;
1068 while (s < strend) {
1069 if (isDIGIT_LC_utf8((U8*)s)) {
1070 if (tmp && regtry(prog, s))
1071 goto got_it;
1072 else
1073 tmp = doevery;
1074 }
1075 else
1076 tmp = 1;
1077 s += UTF8SKIP(s);
1078 }
1079 break;
1080 case NDIGIT:
1081 while (s < strend) {
1082 if (!isDIGIT(*s)) {
1083 if (tmp && regtry(prog, s))
1084 goto got_it;
1085 else
1086 tmp = doevery;
1087 }
1088 else
1089 tmp = 1;
1090 s++;
1091 }
1092 break;
1093 case NDIGITUTF8:
1094 while (s < strend) {
1095 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1096 if (tmp && regtry(prog, s))
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
1101 else
1102 tmp = 1;
1103 s += UTF8SKIP(s);
1104 }
1105 break;
1106 case NDIGITL:
1107 PL_reg_flags |= RF_tainted;
1108 while (s < strend) {
1109 if (!isDIGIT_LC(*s)) {
1110 if (tmp && regtry(prog, s))
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
1115 else
1116 tmp = 1;
1117 s++;
1118 }
1119 break;
1120 case NDIGITLUTF8:
1121 PL_reg_flags |= RF_tainted;
1122 while (s < strend) {
1123 if (!isDIGIT_LC_utf8((U8*)s)) {
1124 if (tmp && regtry(prog, s))
1125 goto got_it;
1126 else
1127 tmp = doevery;
1128 }
1129 else
1130 tmp = 1;
1131 s += UTF8SKIP(s);
1132 }
1133 break;
1134 case ALNUMC:
1135 while (s < strend) {
1136 if (isALNUMC(*s)) {
1137 if (tmp && regtry(prog, s))
1138 goto got_it;
1139 else
1140 tmp = doevery;
1141 }
1142 else
1143 tmp = 1;
1144 s++;
1145 }
1146 break;
1147 case ALNUMCUTF8:
1148 while (s < strend) {
1149 if (swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1150 if (tmp && regtry(prog, s))
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s += UTF8SKIP(s);
1158 }
1159 break;
1160 case ALNUMCL:
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
1163 if (isALNUMC_LC(*s)) {
1164 if (tmp && regtry(prog, s))
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
1169 else
1170 tmp = 1;
1171 s++;
1172 }
1173 break;
1174 case ALNUMCLUTF8:
1175 PL_reg_flags |= RF_tainted;
1176 while (s < strend) {
1177 if (isALNUMC_LC_utf8((U8*)s)) {
1178 if (tmp && regtry(prog, s))
1179 goto got_it;
1180 else
1181 tmp = doevery;
1182 }
1183 else
1184 tmp = 1;
1185 s += UTF8SKIP(s);
1186 }
1187 break;
1188 case NALNUMC:
1189 while (s < strend) {
1190 if (!isALNUMC(*s)) {
1191 if (tmp && regtry(prog, s))
1192 goto got_it;
1193 else
1194 tmp = doevery;
1195 }
1196 else
1197 tmp = 1;
1198 s++;
1199 }
1200 break;
1201 case NALNUMCUTF8:
1202 while (s < strend) {
1203 if (!swash_fetch(PL_utf8_alnumc, (U8*)s)) {
1204 if (tmp && regtry(prog, s))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s += UTF8SKIP(s);
1212 }
1213 break;
1214 case NALNUMCL:
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
1217 if (!isALNUMC_LC(*s)) {
1218 if (tmp && regtry(prog, s))
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
1223 else
1224 tmp = 1;
1225 s++;
1226 }
1227 break;
1228 case NALNUMCLUTF8:
1229 PL_reg_flags |= RF_tainted;
1230 while (s < strend) {
1231 if (!isALNUMC_LC_utf8((U8*)s)) {
1232 if (tmp && regtry(prog, s))
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
1237 else
1238 tmp = 1;
1239 s += UTF8SKIP(s);
1240 }
1241 break;
1242 case ASCII:
1243 while (s < strend) {
1244 if (isASCII(*(U8*)s)) {
1245 if (tmp && regtry(prog, s))
1246 goto got_it;
1247 else
1248 tmp = doevery;
1249 }
1250 else
1251 tmp = 1;
1252 s++;
1253 }
1254 break;
1255 case NASCII:
1256 while (s < strend) {
1257 if (!isASCII(*(U8*)s)) {
1258 if (tmp && regtry(prog, s))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
1263 else
1264 tmp = 1;
1265 s++;
1266 }
1267 break;
1268 case CNTRL:
1269 while (s < strend) {
1270 if (isCNTRL(*s)) {
1271 if (tmp && regtry(prog, s))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
1276 else
1277 tmp = 1;
1278 s++;
1279 }
1280 break;
1281 case CNTRLUTF8:
1282 while (s < strend) {
1283 if (swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s += UTF8SKIP(s);
1292 }
1293 break;
1294 case CNTRLL:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (isCNTRL_LC(*s)) {
1298 if (tmp && regtry(prog, s))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s++;
1306 }
1307 break;
1308 case CNTRLLUTF8:
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (*s == ' ' || isCNTRL_LC_utf8((U8*)s)) {
1312 if (tmp && regtry(prog, s))
1313 goto got_it;
1314 else
1315 tmp = doevery;
1316 }
1317 else
1318 tmp = 1;
1319 s += UTF8SKIP(s);
1320 }
1321 break;
1322 case NCNTRL:
1323 while (s < strend) {
1324 if (!isCNTRL(*s)) {
1325 if (tmp && regtry(prog, s))
1326 goto got_it;
1327 else
1328 tmp = doevery;
1329 }
1330 else
1331 tmp = 1;
1332 s++;
1333 }
1334 break;
1335 case NCNTRLUTF8:
1336 while (s < strend) {
1337 if (!swash_fetch(PL_utf8_cntrl,(U8*)s)) {
1338 if (tmp && regtry(prog, s))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
1343 else
1344 tmp = 1;
1345 s += UTF8SKIP(s);
1346 }
1347 break;
1348 case NCNTRLL:
1349 PL_reg_flags |= RF_tainted;
1350 while (s < strend) {
1351 if (!isCNTRL_LC(*s)) {
1352 if (tmp && regtry(prog, s))
1353 goto got_it;
1354 else
1355 tmp = doevery;
1356 }
1357 else
1358 tmp = 1;
1359 s++;
1360 }
1361 break;
1362 case NCNTRLLUTF8:
1363 PL_reg_flags |= RF_tainted;
1364 while (s < strend) {
1365 if (!isCNTRL_LC_utf8((U8*)s)) {
1366 if (tmp && regtry(prog, s))
1367 goto got_it;
1368 else
1369 tmp = doevery;
1370 }
1371 else
1372 tmp = 1;
1373 s += UTF8SKIP(s);
1374 }
1375 break;
1376 case GRAPH:
1377 while (s < strend) {
1378 if (isGRAPH(*s)) {
1379 if (tmp && regtry(prog, s))
1380 goto got_it;
1381 else
1382 tmp = doevery;
1383 }
1384 else
1385 tmp = 1;
1386 s++;
1387 }
1388 break;
1389 case GRAPHUTF8:
1390 while (s < strend) {
1391 if (swash_fetch(PL_utf8_graph,(U8*)s)) {
1392 if (tmp && regtry(prog, s))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
1397 else
1398 tmp = 1;
1399 s += UTF8SKIP(s);
1400 }
1401 break;
1402 case GRAPHL:
1403 PL_reg_flags |= RF_tainted;
1404 while (s < strend) {
1405 if (isGRAPH_LC(*s)) {
1406 if (tmp && regtry(prog, s))
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
1411 else
1412 tmp = 1;
1413 s++;
1414 }
1415 break;
1416 case GRAPHLUTF8:
1417 PL_reg_flags |= RF_tainted;
1418 while (s < strend) {
1419 if (*s == ' ' || isGRAPH_LC_utf8((U8*)s)) {
1420 if (tmp && regtry(prog, s))
1421 goto got_it;
1422 else
1423 tmp = doevery;
1424 }
1425 else
1426 tmp = 1;
1427 s += UTF8SKIP(s);
1428 }
1429 break;
1430 case NGRAPH:
1431 while (s < strend) {
1432 if (!isGRAPH(*s)) {
1433 if (tmp && regtry(prog, s))
1434 goto got_it;
1435 else
1436 tmp = doevery;
1437 }
1438 else
1439 tmp = 1;
1440 s++;
1441 }
1442 break;
1443 case NGRAPHUTF8:
1444 while (s < strend) {
1445 if (!swash_fetch(PL_utf8_graph,(U8*)s)) {
1446 if (tmp && regtry(prog, s))
1447 goto got_it;
1448 else
1449 tmp = doevery;
1450 }
1451 else
1452 tmp = 1;
1453 s += UTF8SKIP(s);
1454 }
1455 break;
1456 case NGRAPHL:
1457 PL_reg_flags |= RF_tainted;
1458 while (s < strend) {
1459 if (!isGRAPH_LC(*s)) {
1460 if (tmp && regtry(prog, s))
1461 goto got_it;
1462 else
1463 tmp = doevery;
1464 }
1465 else
1466 tmp = 1;
1467 s++;
1468 }
1469 break;
1470 case NGRAPHLUTF8:
1471 PL_reg_flags |= RF_tainted;
1472 while (s < strend) {
1473 if (!isGRAPH_LC_utf8((U8*)s)) {
1474 if (tmp && regtry(prog, s))
1475 goto got_it;
1476 else
1477 tmp = doevery;
1478 }
1479 else
1480 tmp = 1;
1481 s += UTF8SKIP(s);
1482 }
1483 break;
1484 case LOWER:
1485 while (s < strend) {
1486 if (isLOWER(*s)) {
1487 if (tmp && regtry(prog, s))
1488 goto got_it;
1489 else
1490 tmp = doevery;
1491 }
1492 else
1493 tmp = 1;
1494 s++;
1495 }
1496 break;
1497 case LOWERUTF8:
1498 while (s < strend) {
1499 if (swash_fetch(PL_utf8_lower,(U8*)s)) {
1500 if (tmp && regtry(prog, s))
1501 goto got_it;
1502 else
1503 tmp = doevery;
1504 }
1505 else
1506 tmp = 1;
1507 s += UTF8SKIP(s);
1508 }
1509 break;
1510 case LOWERL:
1511 PL_reg_flags |= RF_tainted;
1512 while (s < strend) {
1513 if (isLOWER_LC(*s)) {
1514 if (tmp && regtry(prog, s))
1515 goto got_it;
1516 else
1517 tmp = doevery;
1518 }
1519 else
1520 tmp = 1;
1521 s++;
1522 }
1523 break;
1524 case LOWERLUTF8:
1525 PL_reg_flags |= RF_tainted;
1526 while (s < strend) {
1527 if (*s == ' ' || isLOWER_LC_utf8((U8*)s)) {
1528 if (tmp && regtry(prog, s))
1529 goto got_it;
1530 else
1531 tmp = doevery;
1532 }
1533 else
1534 tmp = 1;
1535 s += UTF8SKIP(s);
1536 }
1537 break;
1538 case NLOWER:
1539 while (s < strend) {
1540 if (!isLOWER(*s)) {
1541 if (tmp && regtry(prog, s))
1542 goto got_it;
1543 else
1544 tmp = doevery;
1545 }
1546 else
1547 tmp = 1;
1548 s++;
1549 }
1550 break;
1551 case NLOWERUTF8:
1552 while (s < strend) {
1553 if (!swash_fetch(PL_utf8_lower,(U8*)s)) {
1554 if (tmp && regtry(prog, s))
1555 goto got_it;
1556 else
1557 tmp = doevery;
1558 }
1559 else
1560 tmp = 1;
1561 s += UTF8SKIP(s);
1562 }
1563 break;
1564 case NLOWERL:
1565 PL_reg_flags |= RF_tainted;
1566 while (s < strend) {
1567 if (!isLOWER_LC(*s)) {
1568 if (tmp && regtry(prog, s))
1569 goto got_it;
1570 else
1571 tmp = doevery;
1572 }
1573 else
1574 tmp = 1;
1575 s++;
1576 }
1577 break;
1578 case NLOWERLUTF8:
1579 PL_reg_flags |= RF_tainted;
1580 while (s < strend) {
1581 if (!isLOWER_LC_utf8((U8*)s)) {
1582 if (tmp && regtry(prog, s))
1583 goto got_it;
1584 else
1585 tmp = doevery;
1586 }
1587 else
1588 tmp = 1;
1589 s += UTF8SKIP(s);
1590 }
1591 break;
1592 case PRINT:
1593 while (s < strend) {
1594 if (isPRINT(*s)) {
1595 if (tmp && regtry(prog, s))
1596 goto got_it;
1597 else
1598 tmp = doevery;
1599 }
1600 else
1601 tmp = 1;
1602 s++;
1603 }
1604 break;
1605 case PRINTUTF8:
1606 while (s < strend) {
1607 if (swash_fetch(PL_utf8_print,(U8*)s)) {
1608 if (tmp && regtry(prog, s))
1609 goto got_it;
1610 else
1611 tmp = doevery;
1612 }
1613 else
1614 tmp = 1;
1615 s += UTF8SKIP(s);
1616 }
1617 break;
1618 case PRINTL:
1619 PL_reg_flags |= RF_tainted;
1620 while (s < strend) {
1621 if (isPRINT_LC(*s)) {
1622 if (tmp && regtry(prog, s))
1623 goto got_it;
1624 else
1625 tmp = doevery;
1626 }
1627 else
1628 tmp = 1;
1629 s++;
1630 }
1631 break;
1632 case PRINTLUTF8:
1633 PL_reg_flags |= RF_tainted;
1634 while (s < strend) {
1635 if (*s == ' ' || isPRINT_LC_utf8((U8*)s)) {
1636 if (tmp && regtry(prog, s))
1637 goto got_it;
1638 else
1639 tmp = doevery;
1640 }
1641 else
1642 tmp = 1;
1643 s += UTF8SKIP(s);
1644 }
1645 break;
1646 case NPRINT:
1647 while (s < strend) {
1648 if (!isPRINT(*s)) {
1649 if (tmp && regtry(prog, s))
1650 goto got_it;
1651 else
1652 tmp = doevery;
1653 }
1654 else
1655 tmp = 1;
1656 s++;
1657 }
1658 break;
1659 case NPRINTUTF8:
1660 while (s < strend) {
1661 if (!swash_fetch(PL_utf8_print,(U8*)s)) {
1662 if (tmp && regtry(prog, s))
1663 goto got_it;
1664 else
1665 tmp = doevery;
1666 }
1667 else
1668 tmp = 1;
1669 s += UTF8SKIP(s);
1670 }
1671 break;
1672 case NPRINTL:
1673 PL_reg_flags |= RF_tainted;
1674 while (s < strend) {
1675 if (!isPRINT_LC(*s)) {
1676 if (tmp && regtry(prog, s))
1677 goto got_it;
1678 else
1679 tmp = doevery;
1680 }
1681 else
1682 tmp = 1;
1683 s++;
1684 }
1685 break;
1686 case NPRINTLUTF8:
1687 PL_reg_flags |= RF_tainted;
1688 while (s < strend) {
1689 if (!isPRINT_LC_utf8((U8*)s)) {
1690 if (tmp && regtry(prog, s))
1691 goto got_it;
1692 else
1693 tmp = doevery;
1694 }
1695 else
1696 tmp = 1;
1697 s += UTF8SKIP(s);
1698 }
1699 break;
1700 case PUNCT:
1701 while (s < strend) {
1702 if (isPUNCT(*s)) {
1703 if (tmp && regtry(prog, s))
1704 goto got_it;
1705 else
1706 tmp = doevery;
1707 }
1708 else
1709 tmp = 1;
1710 s++;
1711 }
1712 break;
1713 case PUNCTUTF8:
1714 while (s < strend) {
1715 if (swash_fetch(PL_utf8_punct,(U8*)s)) {
1716 if (tmp && regtry(prog, s))
1717 goto got_it;
1718 else
1719 tmp = doevery;
1720 }
1721 else
1722 tmp = 1;
1723 s += UTF8SKIP(s);
1724 }
1725 break;
1726 case PUNCTL:
1727 PL_reg_flags |= RF_tainted;
1728 while (s < strend) {
1729 if (isPUNCT_LC(*s)) {
1730 if (tmp && regtry(prog, s))
1731 goto got_it;
1732 else
1733 tmp = doevery;
1734 }
1735 else
1736 tmp = 1;
1737 s++;
1738 }
1739 break;
1740 case PUNCTLUTF8:
1741 PL_reg_flags |= RF_tainted;
1742 while (s < strend) {
1743 if (*s == ' ' || isPUNCT_LC_utf8((U8*)s)) {
1744 if (tmp && regtry(prog, s))
1745 goto got_it;
1746 else
1747 tmp = doevery;
1748 }
1749 else
1750 tmp = 1;
1751 s += UTF8SKIP(s);
1752 }
1753 break;
1754 case NPUNCT:
1755 while (s < strend) {
1756 if (!isPUNCT(*s)) {
1757 if (tmp && regtry(prog, s))
1758 goto got_it;
1759 else
1760 tmp = doevery;
1761 }
1762 else
1763 tmp = 1;
1764 s++;
1765 }
1766 break;
1767 case NPUNCTUTF8:
1768 while (s < strend) {
1769 if (!swash_fetch(PL_utf8_punct,(U8*)s)) {
1770 if (tmp && regtry(prog, s))
1771 goto got_it;
1772 else
1773 tmp = doevery;
1774 }
1775 else
1776 tmp = 1;
1777 s += UTF8SKIP(s);
1778 }
1779 break;
1780 case NPUNCTL:
1781 PL_reg_flags |= RF_tainted;
1782 while (s < strend) {
1783 if (!isPUNCT_LC(*s)) {
1784 if (tmp && regtry(prog, s))
1785 goto got_it;
1786 else
1787 tmp = doevery;
1788 }
1789 else
1790 tmp = 1;
1791 s++;
1792 }
1793 break;
1794 case NPUNCTLUTF8:
1795 PL_reg_flags |= RF_tainted;
1796 while (s < strend) {
1797 if (!isPUNCT_LC_utf8((U8*)s)) {
1798 if (tmp && regtry(prog, s))
1799 goto got_it;
1800 else
1801 tmp = doevery;
1802 }
1803 else
1804 tmp = 1;
1805 s += UTF8SKIP(s);
1806 }
1807 break;
1808 case UPPER:
1809 while (s < strend) {
1810 if (isUPPER(*s)) {
1811 if (tmp && regtry(prog, s))
1812 goto got_it;
1813 else
1814 tmp = doevery;
1815 }
1816 else
1817 tmp = 1;
1818 s++;
1819 }
1820 break;
1821 case UPPERUTF8:
1822 while (s < strend) {
1823 if (swash_fetch(PL_utf8_upper,(U8*)s)) {
1824 if (tmp && regtry(prog, s))
1825 goto got_it;
1826 else
1827 tmp = doevery;
1828 }
1829 else
1830 tmp = 1;
1831 s += UTF8SKIP(s);
1832 }
1833 break;
1834 case UPPERL:
1835 PL_reg_flags |= RF_tainted;
1836 while (s < strend) {
1837 if (isUPPER_LC(*s)) {
1838 if (tmp && regtry(prog, s))
1839 goto got_it;
1840 else
1841 tmp = doevery;
1842 }
1843 else
1844 tmp = 1;
1845 s++;
1846 }
1847 break;
1848 case UPPERLUTF8:
1849 PL_reg_flags |= RF_tainted;
1850 while (s < strend) {
1851 if (*s == ' ' || isUPPER_LC_utf8((U8*)s)) {
1852 if (tmp && regtry(prog, s))
1853 goto got_it;
1854 else
1855 tmp = doevery;
1856 }
1857 else
1858 tmp = 1;
1859 s += UTF8SKIP(s);
1860 }
1861 break;
1862 case NUPPER:
1863 while (s < strend) {
1864 if (!isUPPER(*s)) {
1865 if (tmp && regtry(prog, s))
1866 goto got_it;
1867 else
1868 tmp = doevery;
1869 }
1870 else
1871 tmp = 1;
1872 s++;
1873 }
1874 break;
1875 case NUPPERUTF8:
1876 while (s < strend) {
1877 if (!swash_fetch(PL_utf8_upper,(U8*)s)) {
1878 if (tmp && regtry(prog, s))
1879 goto got_it;
1880 else
1881 tmp = doevery;
1882 }
1883 else
1884 tmp = 1;
1885 s += UTF8SKIP(s);
1886 }
1887 break;
1888 case NUPPERL:
1889 PL_reg_flags |= RF_tainted;
1890 while (s < strend) {
1891 if (!isUPPER_LC(*s)) {
1892 if (tmp && regtry(prog, s))
1893 goto got_it;
1894 else
1895 tmp = doevery;
1896 }
1897 else
1898 tmp = 1;
1899 s++;
1900 }
1901 break;
1902 case NUPPERLUTF8:
1903 PL_reg_flags |= RF_tainted;
1904 while (s < strend) {
1905 if (!isUPPER_LC_utf8((U8*)s)) {
1906 if (tmp && regtry(prog, s))
1907 goto got_it;
1908 else
1909 tmp = doevery;
1910 }
1911 else
1912 tmp = 1;
1913 s += UTF8SKIP(s);
1914 }
1915 break;
1916 case XDIGIT:
1917 while (s < strend) {
1918 if (isXDIGIT(*s)) {
1919 if (tmp && regtry(prog, s))
1920 goto got_it;
1921 else
1922 tmp = doevery;
1923 }
1924 else
1925 tmp = 1;
1926 s++;
1927 }
1928 break;
1929 case NXDIGIT:
1930 while (s < strend) {
1931 if (!isXDIGIT(*s)) {
1932 if (tmp && regtry(prog, s))
1933 goto got_it;
1934 else
1935 tmp = doevery;
1936 }
1937 else
1938 tmp = 1;
1939 s++;
1940 }
1941 break;
1942 }
1943 }
1944 else {
1945 dontbother = 0;
1946 if (prog->float_substr != Nullsv) { /* Trim the end. */
1947 char *last;
1948 I32 oldpos = scream_pos;
1949
1950 if (flags & REXEC_SCREAM) {
1951 last = screaminstr(sv, prog->float_substr, s - strbeg,
1952 end_shift, &scream_pos, 1); /* last one */
1953 if (!last)
1954 last = scream_olds; /* Only one occurence. */
1955 }
1956 else {
1957 STRLEN len;
1958 char *little = SvPV(prog->float_substr, len);
1959
1960 if (SvTAIL(prog->float_substr)) {
1961 if (memEQ(strend - len + 1, little, len - 1))
1962 last = strend - len + 1;
1963 else if (!PL_multiline)
1964 last = memEQ(strend - len, little, len)
1965 ? strend - len : Nullch;
1966 else
1967 goto find_last;
1968 } else {
1969 find_last:
1970 if (len)
1971 last = rninstr(s, strend, little, little + len);
1972 else
1973 last = strend; /* matching `$' */
1974 }
1975 }
1976 if (last == NULL) goto phooey; /* Should not happen! */
1977 dontbother = strend - last + prog->float_min_offset;
1978 }
1979 if (minlen && (dontbother < minlen))
1980 dontbother = minlen - 1;
1981 strend -= dontbother; /* this one's always in bytes! */
1982 /* We don't know much -- general case. */
1983 if (UTF) {
1984 for (;;) {
1985 if (regtry(prog, s))
1986 goto got_it;
1987 if (s >= strend)
1988 break;
1989 s += UTF8SKIP(s);
1990 };
1991 }
1992 else {
1993 do {
1994 if (regtry(prog, s))
1995 goto got_it;
1996 } while (s++ < strend);
1997 }
1998 }
1999
2000 /* Failure. */
2001 goto phooey;
2002
2003got_it:
2004 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2005
2006 if (PL_reg_eval_set) {
2007 /* Preserve the current value of $^R */
2008 if (oreplsv != GvSV(PL_replgv))
2009 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2010 restored, the value remains
2011 the same. */
2012 restore_pos(aTHXo_ 0);
2013 }
2014
2015 /* make sure $`, $&, $', and $digit will work later */
2016 if ( !(flags & REXEC_NOT_FIRST) ) {
2017 if (RX_MATCH_COPIED(prog)) {
2018 Safefree(prog->subbeg);
2019 RX_MATCH_COPIED_off(prog);
2020 }
2021 if (flags & REXEC_COPY_STR) {
2022 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2023
2024 s = savepvn(strbeg, i);
2025 prog->subbeg = s;
2026 prog->sublen = i;
2027 RX_MATCH_COPIED_on(prog);
2028 }
2029 else {
2030 prog->subbeg = strbeg;
2031 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2032 }
2033 }
2034
2035 return 1;
2036
2037phooey:
2038 if (PL_reg_eval_set)
2039 restore_pos(aTHXo_ 0);
2040 return 0;
2041}
2042
2043/*
2044 - regtry - try match at specific point
2045 */
2046STATIC I32 /* 0 failure, 1 success */
2047S_regtry(pTHX_ regexp *prog, char *startpos)
2048{
2049 dTHR;
2050 register I32 i;
2051 register I32 *sp;
2052 register I32 *ep;
2053 CHECKPOINT lastcp;
2054
2055 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2056 MAGIC *mg;
2057
2058 PL_reg_eval_set = RS_init;
2059 DEBUG_r(DEBUG_s(
2060 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
2061 PL_stack_sp - PL_stack_base);
2062 ));
2063 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
2064 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2065 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2066 SAVETMPS;
2067 /* Apparently this is not needed, judging by wantarray. */
2068 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
2069 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2070
2071 if (PL_reg_sv) {
2072 /* Make $_ available to executed code. */
2073 if (PL_reg_sv != DEFSV) {
2074 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
2075 SAVESPTR(DEFSV);
2076 DEFSV = PL_reg_sv;
2077 }
2078
2079 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
2080 && (mg = mg_find(PL_reg_sv, 'g')))) {
2081 /* prepare for quick setting of pos */
2082 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
2083 mg = mg_find(PL_reg_sv, 'g');
2084 mg->mg_len = -1;
2085 }
2086 PL_reg_magic = mg;
2087 PL_reg_oldpos = mg->mg_len;
2088 SAVEDESTRUCTOR(restore_pos, 0);
2089 }
2090 if (!PL_reg_curpm)
2091 New(22,PL_reg_curpm, 1, PMOP);
2092 PL_reg_curpm->op_pmregexp = prog;
2093 PL_reg_oldcurpm = PL_curpm;
2094 PL_curpm = PL_reg_curpm;
2095 if (RX_MATCH_COPIED(prog)) {
2096 /* Here is a serious problem: we cannot rewrite subbeg,
2097 since it may be needed if this match fails. Thus
2098 $` inside (?{}) could fail... */
2099 PL_reg_oldsaved = prog->subbeg;
2100 PL_reg_oldsavedlen = prog->sublen;
2101 RX_MATCH_COPIED_off(prog);
2102 }
2103 else
2104 PL_reg_oldsaved = Nullch;
2105 prog->subbeg = PL_bostr;
2106 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2107 }
2108 prog->startp[0] = startpos - PL_bostr;
2109 PL_reginput = startpos;
2110 PL_regstartp = prog->startp;
2111 PL_regendp = prog->endp;
2112 PL_reglastparen = &prog->lastparen;
2113 prog->lastparen = 0;
2114 PL_regsize = 0;
2115 DEBUG_r(PL_reg_starttry = startpos);
2116 if (PL_reg_start_tmpl <= prog->nparens) {
2117 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2118 if(PL_reg_start_tmp)
2119 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2120 else
2121 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2122 }
2123
2124 /* XXXX What this code is doing here?!!! There should be no need
2125 to do this again and again, PL_reglastparen should take care of
2126 this! */
2127 sp = prog->startp;
2128 ep = prog->endp;
2129 if (prog->nparens) {
2130 for (i = prog->nparens; i >= 1; i--) {
2131 *++sp = -1;
2132 *++ep = -1;
2133 }
2134 }
2135 REGCP_SET;
2136 if (regmatch(prog->program + 1)) {
2137 prog->endp[0] = PL_reginput - PL_bostr;
2138 return 1;
2139 }
2140 REGCP_UNWIND;
2141 return 0;
2142}
2143
2144/*
2145 - regmatch - main matching routine
2146 *
2147 * Conceptually the strategy is simple: check to see whether the current
2148 * node matches, call self recursively to see whether the rest matches,
2149 * and then act accordingly. In practice we make some effort to avoid
2150 * recursion, in particular by going through "ordinary" nodes (that don't
2151 * need to know whether the rest of the match failed) by a loop instead of
2152 * by recursion.
2153 */
2154/* [lwall] I've hoisted the register declarations to the outer block in order to
2155 * maybe save a little bit of pushing and popping on the stack. It also takes
2156 * advantage of machines that use a register save mask on subroutine entry.
2157 */
2158STATIC I32 /* 0 failure, 1 success */
2159S_regmatch(pTHX_ regnode *prog)
2160{
2161 dTHR;
2162 register regnode *scan; /* Current node. */
2163 regnode *next; /* Next node. */
2164 regnode *inner; /* Next node in internal branch. */
2165 register I32 nextchr; /* renamed nextchr - nextchar colides with
2166 function of same name */
2167 register I32 n; /* no or next */
2168 register I32 ln; /* len or last */
2169 register char *s; /* operand or save */
2170 register char *locinput = PL_reginput;
2171 register I32 c1, c2, paren; /* case fold search, parenth */
2172 int minmod = 0, sw = 0, logical = 0;
2173#ifdef DEBUGGING
2174 PL_regindent++;
2175#endif
2176
2177 /* Note that nextchr is a byte even in UTF */
2178 nextchr = UCHARAT(locinput);
2179 scan = prog;
2180 while (scan != NULL) {
2181#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
2182#ifdef DEBUGGING
2183# define sayYES goto yes
2184# define sayNO goto no
2185# define saySAME(x) if (x) goto yes; else goto no
2186# define REPORT_CODE_OFF 24
2187#else
2188# define sayYES return 1
2189# define sayNO return 0
2190# define saySAME(x) return x
2191#endif
2192 DEBUG_r( {
2193 SV *prop = sv_newmortal();
2194 int docolor = *PL_colors[0];
2195 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2196 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
2197 /* The part of the string before starttry has one color
2198 (pref0_len chars), between starttry and current
2199 position another one (pref_len - pref0_len chars),
2200 after the current position the third one.
2201 We assume that pref0_len <= pref_len, otherwise we
2202 decrease pref0_len. */
2203 int pref_len = (locinput - PL_bostr > (5 + taill) - l
2204 ? (5 + taill) - l : locinput - PL_bostr);
2205 int pref0_len = pref_len - (locinput - PL_reg_starttry);
2206
2207 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
2208 l = ( PL_regeol - locinput > (5 + taill) - pref_len
2209 ? (5 + taill) - pref_len : PL_regeol - locinput);
2210 if (pref0_len < 0)
2211 pref0_len = 0;
2212 if (pref0_len > pref_len)
2213 pref0_len = pref_len;
2214 regprop(prop, scan);
2215 PerlIO_printf(Perl_debug_log,
2216 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
2217 locinput - PL_bostr,
2218 PL_colors[4], pref0_len,
2219 locinput - pref_len, PL_colors[5],
2220 PL_colors[2], pref_len - pref0_len,
2221 locinput - pref_len + pref0_len, PL_colors[3],
2222 (docolor ? "" : "> <"),
2223 PL_colors[0], l, locinput, PL_colors[1],
2224 15 - l - pref_len + 1,
2225 "",
2226 scan - PL_regprogram, PL_regindent*2, "",
2227 SvPVX(prop));
2228 } );
2229
2230 next = scan + NEXT_OFF(scan);
2231 if (next == scan)
2232 next = NULL;
2233
2234 switch (OP(scan)) {
2235 case BOL:
2236 if (locinput == PL_bostr
2237 ? PL_regprev == '\n'
2238 : (PL_multiline &&
2239 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2240 {
2241 /* regtill = regbol; */
2242 break;
2243 }
2244 sayNO;
2245 case MBOL:
2246 if (locinput == PL_bostr
2247 ? PL_regprev == '\n'
2248 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2249 {
2250 break;
2251 }
2252 sayNO;
2253 case SBOL:
2254 if (locinput == PL_regbol && PL_regprev == '\n')
2255 break;
2256 sayNO;
2257 case GPOS:
2258 if (locinput == PL_reg_ganch)
2259 break;
2260 sayNO;
2261 case EOL:
2262 if (PL_multiline)
2263 goto meol;
2264 else
2265 goto seol;
2266 case MEOL:
2267 meol:
2268 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2269 sayNO;
2270 break;
2271 case SEOL:
2272 seol:
2273 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2274 sayNO;
2275 if (PL_regeol - locinput > 1)
2276 sayNO;
2277 break;
2278 case EOS:
2279 if (PL_regeol != locinput)
2280 sayNO;
2281 break;
2282 case SANYUTF8:
2283 if (nextchr & 0x80) {
2284 locinput += PL_utf8skip[nextchr];
2285 if (locinput > PL_regeol)
2286 sayNO;
2287 nextchr = UCHARAT(locinput);
2288 break;
2289 }
2290 if (!nextchr && locinput >= PL_regeol)
2291 sayNO;
2292 nextchr = UCHARAT(++locinput);
2293 break;
2294 case SANY:
2295 if (!nextchr && locinput >= PL_regeol)
2296 sayNO;
2297 nextchr = UCHARAT(++locinput);
2298 break;
2299 case ANYUTF8:
2300 if (nextchr & 0x80) {
2301 locinput += PL_utf8skip[nextchr];
2302 if (locinput > PL_regeol)
2303 sayNO;
2304 nextchr = UCHARAT(locinput);
2305 break;
2306 }
2307 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2308 sayNO;
2309 nextchr = UCHARAT(++locinput);
2310 break;
2311 case REG_ANY:
2312 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
2313 sayNO;
2314 nextchr = UCHARAT(++locinput);
2315 break;
2316 case EXACT:
2317 s = (char *) OPERAND(scan);
2318 ln = UCHARAT(s++);
2319 /* Inline the first character, for speed. */
2320 if (UCHARAT(s) != nextchr)
2321 sayNO;
2322 if (PL_regeol - locinput < ln)
2323 sayNO;
2324 if (ln > 1 && memNE(s, locinput, ln))
2325 sayNO;
2326 locinput += ln;
2327 nextchr = UCHARAT(locinput);
2328 break;
2329 case EXACTFL:
2330 PL_reg_flags |= RF_tainted;
2331 /* FALL THROUGH */
2332 case EXACTF:
2333 s = (char *) OPERAND(scan);
2334 ln = UCHARAT(s++);
2335
2336 if (UTF) {
2337 char *l = locinput;
2338 char *e = s + ln;
2339 c1 = OP(scan) == EXACTF;
2340 while (s < e) {
2341 if (l >= PL_regeol)
2342 sayNO;
2343 if (utf8_to_uv((U8*)s, 0) != (c1 ?
2344 toLOWER_utf8((U8*)l) :
2345 toLOWER_LC_utf8((U8*)l)))
2346 {
2347 sayNO;
2348 }
2349 s += UTF8SKIP(s);
2350 l += UTF8SKIP(l);
2351 }
2352 locinput = l;
2353 nextchr = UCHARAT(locinput);
2354 break;
2355 }
2356
2357 /* Inline the first character, for speed. */
2358 if (UCHARAT(s) != nextchr &&
2359 UCHARAT(s) != ((OP(scan) == EXACTF)
2360 ? PL_fold : PL_fold_locale)[nextchr])
2361 sayNO;
2362 if (PL_regeol - locinput < ln)
2363 sayNO;
2364 if (ln > 1 && (OP(scan) == EXACTF
2365 ? ibcmp(s, locinput, ln)
2366 : ibcmp_locale(s, locinput, ln)))
2367 sayNO;
2368 locinput += ln;
2369 nextchr = UCHARAT(locinput);
2370 break;
2371 case ANYOFUTF8:
2372 s = (char *) OPERAND(scan);
2373 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2374 sayNO;
2375 if (locinput >= PL_regeol)
2376 sayNO;
2377 locinput += PL_utf8skip[nextchr];
2378 nextchr = UCHARAT(locinput);
2379 break;
2380 case ANYOF:
2381 s = (char *) OPERAND(scan);
2382 if (nextchr < 0)
2383 nextchr = UCHARAT(locinput);
2384 if (!REGINCLASS(s, nextchr))
2385 sayNO;
2386 if (!nextchr && locinput >= PL_regeol)
2387 sayNO;
2388 nextchr = UCHARAT(++locinput);
2389 break;
2390 case ALNUML:
2391 PL_reg_flags |= RF_tainted;
2392 /* FALL THROUGH */
2393 case ALNUM:
2394 if (!nextchr)
2395 sayNO;
2396 if (!(OP(scan) == ALNUM
2397 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2398 sayNO;
2399 nextchr = UCHARAT(++locinput);
2400 break;
2401 case ALNUMLUTF8:
2402 PL_reg_flags |= RF_tainted;
2403 /* FALL THROUGH */
2404 case ALNUMUTF8:
2405 if (!nextchr)
2406 sayNO;
2407 if (nextchr & 0x80) {
2408 if (!(OP(scan) == ALNUMUTF8
2409 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2410 : isALNUM_LC_utf8((U8*)locinput)))
2411 {
2412 sayNO;
2413 }
2414 locinput += PL_utf8skip[nextchr];
2415 nextchr = UCHARAT(locinput);
2416 break;
2417 }
2418 if (!(OP(scan) == ALNUMUTF8
2419 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2420 sayNO;
2421 nextchr = UCHARAT(++locinput);
2422 break;
2423 case NALNUML:
2424 PL_reg_flags |= RF_tainted;
2425 /* FALL THROUGH */
2426 case NALNUM:
2427 if (!nextchr && locinput >= PL_regeol)
2428 sayNO;
2429 if (OP(scan) == NALNUM
2430 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2431 sayNO;
2432 nextchr = UCHARAT(++locinput);
2433 break;
2434 case NALNUMLUTF8:
2435 PL_reg_flags |= RF_tainted;
2436 /* FALL THROUGH */
2437 case NALNUMUTF8:
2438 if (!nextchr && locinput >= PL_regeol)
2439 sayNO;
2440 if (nextchr & 0x80) {
2441 if (OP(scan) == NALNUMUTF8
2442 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2443 : isALNUM_LC_utf8((U8*)locinput))
2444 {
2445 sayNO;
2446 }
2447 locinput += PL_utf8skip[nextchr];
2448 nextchr = UCHARAT(locinput);
2449 break;
2450 }
2451 if (OP(scan) == NALNUMUTF8
2452 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2453 sayNO;
2454 nextchr = UCHARAT(++locinput);
2455 break;
2456 case BOUNDL:
2457 case NBOUNDL:
2458 PL_reg_flags |= RF_tainted;
2459 /* FALL THROUGH */
2460 case BOUND:
2461 case NBOUND:
2462 /* was last char in word? */
2463 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2464 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2465 ln = isALNUM(ln);
2466 n = isALNUM(nextchr);
2467 }
2468 else {
2469 ln = isALNUM_LC(ln);
2470 n = isALNUM_LC(nextchr);
2471 }
2472 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2473 sayNO;
2474 break;
2475 case BOUNDLUTF8:
2476 case NBOUNDLUTF8:
2477 PL_reg_flags |= RF_tainted;
2478 /* FALL THROUGH */
2479 case BOUNDUTF8:
2480 case NBOUNDUTF8:
2481 /* was last char in word? */
2482 ln = (locinput != PL_regbol)
2483 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2484 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2485 ln = isALNUM_uni(ln);
2486 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2487 }
2488 else {
2489 ln = isALNUM_LC_uni(ln);
2490 n = isALNUM_LC_utf8((U8*)locinput);
2491 }
2492 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2493 sayNO;
2494 break;
2495 case SPACEL:
2496 PL_reg_flags |= RF_tainted;
2497 /* FALL THROUGH */
2498 case SPACE:
2499 if (!nextchr && locinput >= PL_regeol)
2500 sayNO;
2501 if (!(OP(scan) == SPACE
2502 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2503 sayNO;
2504 nextchr = UCHARAT(++locinput);
2505 break;
2506 case SPACELUTF8:
2507 PL_reg_flags |= RF_tainted;
2508 /* FALL THROUGH */
2509 case SPACEUTF8:
2510 if (!nextchr && locinput >= PL_regeol)
2511 sayNO;
2512 if (nextchr & 0x80) {
2513 if (!(OP(scan) == SPACEUTF8
2514 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2515 : isSPACE_LC_utf8((U8*)locinput)))
2516 {
2517 sayNO;
2518 }
2519 locinput += PL_utf8skip[nextchr];
2520 nextchr = UCHARAT(locinput);
2521 break;
2522 }
2523 if (!(OP(scan) == SPACEUTF8
2524 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2525 sayNO;
2526 nextchr = UCHARAT(++locinput);
2527 break;
2528 case NSPACEL:
2529 PL_reg_flags |= RF_tainted;
2530 /* FALL THROUGH */
2531 case NSPACE:
2532 if (!nextchr)
2533 sayNO;
2534 if (OP(scan) == SPACE
2535 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2536 sayNO;
2537 nextchr = UCHARAT(++locinput);
2538 break;
2539 case NSPACELUTF8:
2540 PL_reg_flags |= RF_tainted;
2541 /* FALL THROUGH */
2542 case NSPACEUTF8:
2543 if (!nextchr)
2544 sayNO;
2545 if (nextchr & 0x80) {
2546 if (OP(scan) == NSPACEUTF8
2547 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2548 : isSPACE_LC_utf8((U8*)locinput))
2549 {
2550 sayNO;
2551 }
2552 locinput += PL_utf8skip[nextchr];
2553 nextchr = UCHARAT(locinput);
2554 break;
2555 }
2556 if (OP(scan) == NSPACEUTF8
2557 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2558 sayNO;
2559 nextchr = UCHARAT(++locinput);
2560 break;
2561 case DIGITL:
2562 PL_reg_flags |= RF_tainted;
2563 /* FALL THROUGH */
2564 case DIGIT:
2565 if (!nextchr && locinput >= PL_regeol)
2566 sayNO;
2567 if (!(OP(scan) == DIGIT
2568 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2569 sayNO;
2570 nextchr = UCHARAT(++locinput);
2571 break;
2572 case DIGITLUTF8:
2573 PL_reg_flags |= RF_tainted;
2574 /* FALL THROUGH */
2575 case DIGITUTF8:
2576 if (!nextchr)
2577 sayNO;
2578 if (nextchr & 0x80) {
2579 if (OP(scan) == NDIGITUTF8
2580 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2581 : isDIGIT_LC_utf8((U8*)locinput))
2582 {
2583 sayNO;
2584 }
2585 locinput += PL_utf8skip[nextchr];
2586 nextchr = UCHARAT(locinput);
2587 break;
2588 }
2589 if (!isDIGIT(nextchr))
2590 sayNO;
2591 nextchr = UCHARAT(++locinput);
2592 break;
2593 case NDIGITL:
2594 PL_reg_flags |= RF_tainted;
2595 /* FALL THROUGH */
2596 case NDIGIT:
2597 if (!nextchr)
2598 sayNO;
2599 if (OP(scan) == DIGIT
2600 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2601 sayNO;
2602 nextchr = UCHARAT(++locinput);
2603 break;
2604 case NDIGITLUTF8:
2605 PL_reg_flags |= RF_tainted;
2606 /* FALL THROUGH */
2607 case NDIGITUTF8:
2608 if (!nextchr && locinput >= PL_regeol)
2609 sayNO;
2610 if (nextchr & 0x80) {
2611 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
2612 sayNO;
2613 locinput += PL_utf8skip[nextchr];
2614 nextchr = UCHARAT(locinput);
2615 break;
2616 }
2617 if (isDIGIT(nextchr))
2618 sayNO;
2619 nextchr = UCHARAT(++locinput);
2620 break;
2621 case ALNUMCL:
2622 PL_reg_flags |= RF_tainted;
2623 /* FALL THROUGH */
2624 case ALNUMC:
2625 if (!nextchr)
2626 sayNO;
2627 if (!(OP(scan) == ALNUMC
2628 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2629 sayNO;
2630 nextchr = UCHARAT(++locinput);
2631 break;
2632 case ALNUMCLUTF8:
2633 PL_reg_flags |= RF_tainted;
2634 /* FALL THROUGH */
2635 case ALNUMCUTF8:
2636 if (!nextchr)
2637 sayNO;
2638 if (nextchr & 0x80) {
2639 if (!(OP(scan) == ALNUMCUTF8
2640 ? swash_fetch(PL_utf8_alnumc, (U8*)locinput)
2641 : isALNUMC_LC_utf8((U8*)locinput)))
2642 {
2643 sayNO;
2644 }
2645 locinput += PL_utf8skip[nextchr];
2646 nextchr = UCHARAT(locinput);
2647 break;
2648 }
2649 if (!(OP(scan) == ALNUMCUTF8
2650 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr)))
2651 sayNO;
2652 nextchr = UCHARAT(++locinput);
2653 break;
2654 case NALNUMCL:
2655 PL_reg_flags |= RF_tainted;
2656 /* FALL THROUGH */
2657 case NALNUMC:
2658 if (!nextchr)
2659 sayNO;
2660 if (OP(scan) == ALNUMC
2661 ? isALNUMC(nextchr) : isALNUMC_LC(nextchr))
2662 sayNO;
2663 nextchr = UCHARAT(++locinput);
2664 break;
2665 case NALNUMCLUTF8:
2666 PL_reg_flags |= RF_tainted;
2667 /* FALL THROUGH */
2668 case NALNUMCUTF8:
2669 if (!nextchr && locinput >= PL_regeol)
2670 sayNO;
2671 if (nextchr & 0x80) {
2672 if (swash_fetch(PL_utf8_alnumc,(U8*)locinput))
2673 sayNO;
2674 locinput += PL_utf8skip[nextchr];
2675 nextchr = UCHARAT(locinput);
2676 break;
2677 }
2678 if (isALNUMC(nextchr))
2679 sayNO;
2680 nextchr = UCHARAT(++locinput);
2681 break;
2682 case ALPHAL:
2683 PL_reg_flags |= RF_tainted;
2684 /* FALL THROUGH */
2685 case ALPHA:
2686 if (!nextchr)
2687 sayNO;
2688 if (!(OP(scan) == ALPHA
2689 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2690 sayNO;
2691 nextchr = UCHARAT(++locinput);
2692 break;
2693 case ALPHALUTF8:
2694 PL_reg_flags |= RF_tainted;
2695 /* FALL THROUGH */
2696 case ALPHAUTF8:
2697 if (!nextchr)
2698 sayNO;
2699 if (nextchr & 0x80) {
2700 if (!(OP(scan) == ALPHAUTF8
2701 ? swash_fetch(PL_utf8_alpha, (U8*)locinput)
2702 : isALPHA_LC_utf8((U8*)locinput)))
2703 {
2704 sayNO;
2705 }
2706 locinput += PL_utf8skip[nextchr];
2707 nextchr = UCHARAT(locinput);
2708 break;
2709 }
2710 if (!(OP(scan) == ALPHAUTF8
2711 ? isALPHA(nextchr) : isALPHA_LC(nextchr)))
2712 sayNO;
2713 nextchr = UCHARAT(++locinput);
2714 break;
2715 case NALPHAL:
2716 PL_reg_flags |= RF_tainted;
2717 /* FALL THROUGH */
2718 case NALPHA:
2719 if (!nextchr)
2720 sayNO;
2721 if (OP(scan) == ALPHA
2722 ? isALPHA(nextchr) : isALPHA_LC(nextchr))
2723 sayNO;
2724 nextchr = UCHARAT(++locinput);
2725 break;
2726 case NALPHALUTF8:
2727 PL_reg_flags |= RF_tainted;
2728 /* FALL THROUGH */
2729 case NALPHAUTF8:
2730 if (!nextchr && locinput >= PL_regeol)
2731 sayNO;
2732 if (nextchr & 0x80) {
2733 if (swash_fetch(PL_utf8_alpha,(U8*)locinput))
2734 sayNO;
2735 locinput += PL_utf8skip[nextchr];
2736 nextchr = UCHARAT(locinput);
2737 break;
2738 }
2739 if (isALPHA(nextchr))
2740 sayNO;
2741 nextchr = UCHARAT(++locinput);
2742 break;
2743 case ASCII:
2744 if (!nextchr && locinput >= PL_regeol)
2745 sayNO;
2746 if (!isASCII(nextchr))
2747 sayNO;
2748 nextchr = UCHARAT(++locinput);
2749 break;
2750 case NASCII:
2751 if (!nextchr && locinput >= PL_regeol)
2752 sayNO;
2753 if (isASCII(nextchr))
2754 sayNO;
2755 nextchr = UCHARAT(++locinput);
2756 break;
2757 case CNTRLL:
2758 PL_reg_flags |= RF_tainted;
2759 /* FALL THROUGH */
2760 case CNTRL:
2761 if (!nextchr)
2762 sayNO;
2763 if (!(OP(scan) == CNTRL
2764 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2765 sayNO;
2766 nextchr = UCHARAT(++locinput);
2767 break;
2768 case CNTRLLUTF8:
2769 PL_reg_flags |= RF_tainted;
2770 /* FALL THROUGH */
2771 case CNTRLUTF8:
2772 if (!nextchr)
2773 sayNO;
2774 if (nextchr & 0x80) {
2775 if (!(OP(scan) == CNTRLUTF8
2776 ? swash_fetch(PL_utf8_cntrl, (U8*)locinput)
2777 : isCNTRL_LC_utf8((U8*)locinput)))
2778 {
2779 sayNO;
2780 }
2781 locinput += PL_utf8skip[nextchr];
2782 nextchr = UCHARAT(locinput);
2783 break;
2784 }
2785 if (!(OP(scan) == CNTRLUTF8
2786 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr)))
2787 sayNO;
2788 nextchr = UCHARAT(++locinput);
2789 break;
2790 case NCNTRLL:
2791 PL_reg_flags |= RF_tainted;
2792 /* FALL THROUGH */
2793 case NCNTRL:
2794 if (!nextchr)
2795 sayNO;
2796 if (OP(scan) == CNTRL
2797 ? isCNTRL(nextchr) : isCNTRL_LC(nextchr))
2798 sayNO;
2799 nextchr = UCHARAT(++locinput);
2800 break;
2801 case NCNTRLLUTF8:
2802 PL_reg_flags |= RF_tainted;
2803 /* FALL THROUGH */
2804 case NCNTRLUTF8:
2805 if (!nextchr && locinput >= PL_regeol)
2806 sayNO;
2807 if (nextchr & 0x80) {
2808 if (swash_fetch(PL_utf8_cntrl,(U8*)locinput))
2809 sayNO;
2810 locinput += PL_utf8skip[nextchr];
2811 nextchr = UCHARAT(locinput);
2812 break;
2813 }
2814 if (isCNTRL(nextchr))
2815 sayNO;
2816 nextchr = UCHARAT(++locinput);
2817 break;
2818 case GRAPHL:
2819 PL_reg_flags |= RF_tainted;
2820 /* FALL THROUGH */
2821 case GRAPH:
2822 if (!nextchr)
2823 sayNO;
2824 if (!(OP(scan) == GRAPH
2825 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2826 sayNO;
2827 nextchr = UCHARAT(++locinput);
2828 break;
2829 case GRAPHLUTF8:
2830 PL_reg_flags |= RF_tainted;
2831 /* FALL THROUGH */
2832 case GRAPHUTF8:
2833 if (!nextchr)
2834 sayNO;
2835 if (nextchr & 0x80) {
2836 if (!(OP(scan) == GRAPHUTF8
2837 ? swash_fetch(PL_utf8_graph, (U8*)locinput)
2838 : isGRAPH_LC_utf8((U8*)locinput)))
2839 {
2840 sayNO;
2841 }
2842 locinput += PL_utf8skip[nextchr];
2843 nextchr = UCHARAT(locinput);
2844 break;
2845 }
2846 if (!(OP(scan) == GRAPHUTF8
2847 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr)))
2848 sayNO;
2849 nextchr = UCHARAT(++locinput);
2850 break;
2851 case NGRAPHL:
2852 PL_reg_flags |= RF_tainted;
2853 /* FALL THROUGH */
2854 case NGRAPH:
2855 if (!nextchr)
2856 sayNO;
2857 if (OP(scan) == GRAPH
2858 ? isGRAPH(nextchr) : isGRAPH_LC(nextchr))
2859 sayNO;
2860 nextchr = UCHARAT(++locinput);
2861 break;
2862 case NGRAPHLUTF8:
2863 PL_reg_flags |= RF_tainted;
2864 /* FALL THROUGH */
2865 case NGRAPHUTF8:
2866 if (!nextchr && locinput >= PL_regeol)
2867 sayNO;
2868 if (nextchr & 0x80) {
2869 if (swash_fetch(PL_utf8_graph,(U8*)locinput))
2870 sayNO;
2871 locinput += PL_utf8skip[nextchr];
2872 nextchr = UCHARAT(locinput);
2873 break;
2874 }
2875 if (isGRAPH(nextchr))
2876 sayNO;
2877 nextchr = UCHARAT(++locinput);
2878 break;
2879 case LOWERL:
2880 PL_reg_flags |= RF_tainted;
2881 /* FALL THROUGH */
2882 case LOWER:
2883 if (!nextchr)
2884 sayNO;
2885 if (!(OP(scan) == LOWER
2886 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2887 sayNO;
2888 nextchr = UCHARAT(++locinput);
2889 break;
2890 case LOWERLUTF8:
2891 PL_reg_flags |= RF_tainted;
2892 /* FALL THROUGH */
2893 case LOWERUTF8:
2894 if (!nextchr)
2895 sayNO;
2896 if (nextchr & 0x80) {
2897 if (!(OP(scan) == LOWERUTF8
2898 ? swash_fetch(PL_utf8_lower, (U8*)locinput)
2899 : isLOWER_LC_utf8((U8*)locinput)))
2900 {
2901 sayNO;
2902 }
2903 locinput += PL_utf8skip[nextchr];
2904 nextchr = UCHARAT(locinput);
2905 break;
2906 }
2907 if (!(OP(scan) == LOWERUTF8
2908 ? isLOWER(nextchr) : isLOWER_LC(nextchr)))
2909 sayNO;
2910 nextchr = UCHARAT(++locinput);
2911 break;
2912 case NLOWERL:
2913 PL_reg_flags |= RF_tainted;
2914 /* FALL THROUGH */
2915 case NLOWER:
2916 if (!nextchr)
2917 sayNO;
2918 if (OP(scan) == LOWER
2919 ? isLOWER(nextchr) : isLOWER_LC(nextchr))
2920 sayNO;
2921 nextchr = UCHARAT(++locinput);
2922 break;
2923 case NLOWERLUTF8:
2924 PL_reg_flags |= RF_tainted;
2925 /* FALL THROUGH */
2926 case NLOWERUTF8:
2927 if (!nextchr && locinput >= PL_regeol)
2928 sayNO;
2929 if (nextchr & 0x80) {
2930 if (swash_fetch(PL_utf8_lower,(U8*)locinput))
2931 sayNO;
2932 locinput += PL_utf8skip[nextchr];
2933 nextchr = UCHARAT(locinput);
2934 break;
2935 }
2936 if (isLOWER(nextchr))
2937 sayNO;
2938 nextchr = UCHARAT(++locinput);
2939 break;
2940 case PRINTL:
2941 PL_reg_flags |= RF_tainted;
2942 /* FALL THROUGH */
2943 case PRINT:
2944 if (!nextchr)
2945 sayNO;
2946 if (!(OP(scan) == PRINT
2947 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2948 sayNO;
2949 nextchr = UCHARAT(++locinput);
2950 break;
2951 case PRINTLUTF8:
2952 PL_reg_flags |= RF_tainted;
2953 /* FALL THROUGH */
2954 case PRINTUTF8:
2955 if (!nextchr)
2956 sayNO;
2957 if (nextchr & 0x80) {
2958 if (!(OP(scan) == PRINTUTF8
2959 ? swash_fetch(PL_utf8_print, (U8*)locinput)
2960 : isPRINT_LC_utf8((U8*)locinput)))
2961 {
2962 sayNO;
2963 }
2964 locinput += PL_utf8skip[nextchr];
2965 nextchr = UCHARAT(locinput);
2966 break;
2967 }
2968 if (!(OP(scan) == PRINTUTF8
2969 ? isPRINT(nextchr) : isPRINT_LC(nextchr)))
2970 sayNO;
2971 nextchr = UCHARAT(++locinput);
2972 break;
2973 case NPRINTL:
2974 PL_reg_flags |= RF_tainted;
2975 /* FALL THROUGH */
2976 case NPRINT:
2977 if (!nextchr)
2978 sayNO;
2979 if (OP(scan) == PRINT
2980 ? isPRINT(nextchr) : isPRINT_LC(nextchr))
2981 sayNO;
2982 nextchr = UCHARAT(++locinput);
2983 break;
2984 case NPRINTLUTF8:
2985 PL_reg_flags |= RF_tainted;
2986 /* FALL THROUGH */
2987 case NPRINTUTF8:
2988 if (!nextchr && locinput >= PL_regeol)
2989 sayNO;
2990 if (nextchr & 0x80) {
2991 if (swash_fetch(PL_utf8_print,(U8*)locinput))
2992 sayNO;
2993 locinput += PL_utf8skip[nextchr];
2994 nextchr = UCHARAT(locinput);
2995 break;
2996 }
2997 if (isPRINT(nextchr))
2998 sayNO;
2999 nextchr = UCHARAT(++locinput);
3000 break;
3001 case PUNCTL:
3002 PL_reg_flags |= RF_tainted;
3003 /* FALL THROUGH */
3004 case PUNCT:
3005 if (!nextchr)
3006 sayNO;
3007 if (!(OP(scan) == PUNCT
3008 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3009 sayNO;
3010 nextchr = UCHARAT(++locinput);
3011 break;
3012 case PUNCTLUTF8:
3013 PL_reg_flags |= RF_tainted;
3014 /* FALL THROUGH */
3015 case PUNCTUTF8:
3016 if (!nextchr)
3017 sayNO;
3018 if (nextchr & 0x80) {
3019 if (!(OP(scan) == PUNCTUTF8
3020 ? swash_fetch(PL_utf8_punct, (U8*)locinput)
3021 : isPUNCT_LC_utf8((U8*)locinput)))
3022 {
3023 sayNO;
3024 }
3025 locinput += PL_utf8skip[nextchr];
3026 nextchr = UCHARAT(locinput);
3027 break;
3028 }
3029 if (!(OP(scan) == PUNCTUTF8
3030 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr)))
3031 sayNO;
3032 nextchr = UCHARAT(++locinput);
3033 break;
3034 case NPUNCTL:
3035 PL_reg_flags |= RF_tainted;
3036 /* FALL THROUGH */
3037 case NPUNCT:
3038 if (!nextchr)
3039 sayNO;
3040 if (OP(scan) == PUNCT
3041 ? isPUNCT(nextchr) : isPUNCT_LC(nextchr))
3042 sayNO;
3043 nextchr = UCHARAT(++locinput);
3044 break;
3045 case NPUNCTLUTF8:
3046 PL_reg_flags |= RF_tainted;
3047 /* FALL THROUGH */
3048 case NPUNCTUTF8:
3049 if (!nextchr && locinput >= PL_regeol)
3050 sayNO;
3051 if (nextchr & 0x80) {
3052 if (swash_fetch(PL_utf8_punct,(U8*)locinput))
3053 sayNO;
3054 locinput += PL_utf8skip[nextchr];
3055 nextchr = UCHARAT(locinput);
3056 break;
3057 }
3058 if (isPUNCT(nextchr))
3059 sayNO;
3060 nextchr = UCHARAT(++locinput);
3061 break;
3062 case UPPERL:
3063 PL_reg_flags |= RF_tainted;
3064 /* FALL THROUGH */
3065 case UPPER:
3066 if (!nextchr)
3067 sayNO;
3068 if (!(OP(scan) == UPPER
3069 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3070 sayNO;
3071 nextchr = UCHARAT(++locinput);
3072 break;
3073 case UPPERLUTF8:
3074 PL_reg_flags |= RF_tainted;
3075 /* FALL THROUGH */
3076 case UPPERUTF8:
3077 if (!nextchr)
3078 sayNO;
3079 if (nextchr & 0x80) {
3080 if (!(OP(scan) == UPPERUTF8
3081 ? swash_fetch(PL_utf8_upper, (U8*)locinput)
3082 : isUPPER_LC_utf8((U8*)locinput)))
3083 {
3084 sayNO;
3085 }
3086 locinput += PL_utf8skip[nextchr];
3087 nextchr = UCHARAT(locinput);
3088 break;
3089 }
3090 if (!(OP(scan) == UPPERUTF8
3091 ? isUPPER(nextchr) : isUPPER_LC(nextchr)))
3092 sayNO;
3093 nextchr = UCHARAT(++locinput);
3094 break;
3095 case NUPPERL:
3096 PL_reg_flags |= RF_tainted;
3097 /* FALL THROUGH */
3098 case NUPPER:
3099 if (!nextchr)
3100 sayNO;
3101 if (OP(scan) == UPPER
3102 ? isUPPER(nextchr) : isUPPER_LC(nextchr))
3103 sayNO;
3104 nextchr = UCHARAT(++locinput);
3105 break;
3106 case NUPPERLUTF8:
3107 PL_reg_flags |= RF_tainted;
3108 /* FALL THROUGH */
3109 case NUPPERUTF8:
3110 if (!nextchr && locinput >= PL_regeol)
3111 sayNO;
3112 if (nextchr & 0x80) {
3113 if (swash_fetch(PL_utf8_upper,(U8*)locinput))
3114 sayNO;
3115 locinput += PL_utf8skip[nextchr];
3116 nextchr = UCHARAT(locinput);
3117 break;
3118 }
3119 if (isUPPER(nextchr))
3120 sayNO;
3121 nextchr = UCHARAT(++locinput);
3122 break;
3123 case XDIGIT:
3124 if (!nextchr && locinput >= PL_regeol)
3125 sayNO;
3126 if (!isXDIGIT(nextchr))
3127 sayNO;
3128 nextchr = UCHARAT(++locinput);
3129 break;
3130 case NXDIGIT:
3131 if (!nextchr && locinput >= PL_regeol)
3132 sayNO;
3133 if (isXDIGIT(nextchr))
3134 sayNO;
3135 nextchr = UCHARAT(++locinput);
3136 break;
3137 case CLUMP:
3138 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
3139 sayNO;
3140 locinput += PL_utf8skip[nextchr];
3141 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
3142 locinput += UTF8SKIP(locinput);
3143 if (locinput > PL_regeol)
3144 sayNO;
3145 nextchr = UCHARAT(locinput);
3146 break;
3147 case REFFL:
3148 PL_reg_flags |= RF_tainted;
3149 /* FALL THROUGH */
3150 case REF:
3151 case REFF:
3152 n = ARG(scan); /* which paren pair */
3153 ln = PL_regstartp[n];
3154 if (*PL_reglastparen < n || ln == -1)
3155 sayNO; /* Do not match unless seen CLOSEn. */
3156 if (ln == PL_regendp[n])
3157 break;
3158
3159 s = PL_bostr + ln;
3160 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
3161 char *l = locinput;
3162 char *e = PL_bostr + PL_regendp[n];
3163 /*
3164 * Note that we can't do the "other character" lookup trick as
3165 * in the 8-bit case (no pun intended) because in Unicode we
3166 * have to map both upper and title case to lower case.
3167 */
3168 if (OP(scan) == REFF) {
3169 while (s < e) {
3170 if (l >= PL_regeol)
3171 sayNO;
3172 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
3173 sayNO;
3174 s += UTF8SKIP(s);
3175 l += UTF8SKIP(l);
3176 }
3177 }
3178 else {
3179 while (s < e) {
3180 if (l >= PL_regeol)
3181 sayNO;
3182 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
3183 sayNO;
3184 s += UTF8SKIP(s);
3185 l += UTF8SKIP(l);
3186 }
3187 }
3188 locinput = l;
3189 nextchr = UCHARAT(locinput);
3190 break;
3191 }
3192
3193 /* Inline the first character, for speed. */
3194 if (UCHARAT(s) != nextchr &&
3195 (OP(scan) == REF ||
3196 (UCHARAT(s) != ((OP(scan) == REFF
3197 ? PL_fold : PL_fold_locale)[nextchr]))))
3198 sayNO;
3199 ln = PL_regendp[n] - ln;
3200 if (locinput + ln > PL_regeol)
3201 sayNO;
3202 if (ln > 1 && (OP(scan) == REF
3203 ? memNE(s, locinput, ln)
3204 : (OP(scan) == REFF
3205 ? ibcmp(s, locinput, ln)
3206 : ibcmp_locale(s, locinput, ln))))
3207 sayNO;
3208 locinput += ln;
3209 nextchr = UCHARAT(locinput);
3210 break;
3211
3212 case NOTHING:
3213 case TAIL:
3214 break;
3215 case BACK:
3216 break;
3217 case EVAL:
3218 {
3219 dSP;
3220 OP_4tree *oop = PL_op;
3221 COP *ocurcop = PL_curcop;
3222 SV **ocurpad = PL_curpad;
3223 SV *ret;
3224
3225 n = ARG(scan);
3226 PL_op = (OP_4tree*)PL_regdata->data[n];
3227 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3228 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
3229 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3230
3231 CALLRUNOPS(aTHX); /* Scalar context. */
3232 SPAGAIN;
3233 ret = POPs;
3234 PUTBACK;
3235
3236 PL_op = oop;
3237 PL_curpad = ocurpad;
3238 PL_curcop = ocurcop;
3239 if (logical) {
3240 if (logical == 2) { /* Postponed subexpression. */
3241 regexp *re;
3242 MAGIC *mg = Null(MAGIC*);
3243 re_cc_state state;
3244 CURCUR cctmp;
3245 CHECKPOINT cp, lastcp;
3246
3247 if(SvROK(ret) || SvRMAGICAL(ret)) {
3248 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
3249
3250 if(SvMAGICAL(sv))
3251 mg = mg_find(sv, 'r');
3252 }
3253 if (mg) {
3254 re = (regexp *)mg->mg_obj;
3255 (void)ReREFCNT_inc(re);
3256 }
3257 else {
3258 STRLEN len;
3259 char *t = SvPV(ret, len);
3260 PMOP pm;
3261 char *oprecomp = PL_regprecomp;
3262 I32 osize = PL_regsize;
3263 I32 onpar = PL_regnpar;
3264
3265 pm.op_pmflags = 0;
3266 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
3267 if (!(SvFLAGS(ret)
3268 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
3269 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
3270 PL_regprecomp = oprecomp;
3271 PL_regsize = osize;
3272 PL_regnpar = onpar;
3273 }
3274 DEBUG_r(
3275 PerlIO_printf(Perl_debug_log,
3276 "Entering embedded `%s%.60s%s%s'\n",
3277 PL_colors[0],
3278 re->precomp,
3279 PL_colors[1],
3280 (strlen(re->precomp) > 60 ? "..." : ""))
3281 );
3282 state.node = next;
3283 state.prev = PL_reg_call_cc;
3284 state.cc = PL_regcc;
3285 state.re = PL_reg_re;
3286
3287 cctmp.cur = 0;
3288 cctmp.oldcc = 0;
3289 PL_regcc = &cctmp;
3290
3291 cp = regcppush(0); /* Save *all* the positions. */
3292 REGCP_SET;
3293 cache_re(re);
3294 state.ss = PL_savestack_ix;
3295 *PL_reglastparen = 0;
3296 PL_reg_call_cc = &state;
3297 PL_reginput = locinput;
3298 if (regmatch(re->program + 1)) {
3299 ReREFCNT_dec(re);
3300 regcpblow(cp);
3301 sayYES;
3302 }
3303 DEBUG_r(
3304 PerlIO_printf(Perl_debug_log,
3305 "%*s failed...\n",
3306 REPORT_CODE_OFF+PL_regindent*2, "")
3307 );
3308 ReREFCNT_dec(re);
3309 REGCP_UNWIND;
3310 regcppop();
3311 PL_reg_call_cc = state.prev;
3312 PL_regcc = state.cc;
3313 PL_reg_re = state.re;
3314 cache_re(PL_reg_re);
3315 sayNO;
3316 }
3317 sw = SvTRUE(ret);
3318 logical = 0;
3319 }
3320 else
3321 sv_setsv(save_scalar(PL_replgv), ret);
3322 break;
3323 }
3324 case OPEN:
3325 n = ARG(scan); /* which paren pair */
3326 PL_reg_start_tmp[n] = locinput;
3327 if (n > PL_regsize)
3328 PL_regsize = n;
3329 break;
3330 case CLOSE:
3331 n = ARG(scan); /* which paren pair */
3332 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3333 PL_regendp[n] = locinput - PL_bostr;
3334 if (n > *PL_reglastparen)
3335 *PL_reglastparen = n;
3336 break;
3337 case GROUPP:
3338 n = ARG(scan); /* which paren pair */
3339 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
3340 break;
3341 case IFTHEN:
3342 if (sw)
3343 next = NEXTOPER(NEXTOPER(scan));
3344 else {
3345 next = scan + ARG(scan);
3346 if (OP(next) == IFTHEN) /* Fake one. */
3347 next = NEXTOPER(NEXTOPER(next));
3348 }
3349 break;
3350 case LOGICAL:
3351 logical = scan->flags;
3352 break;
3353 case CURLYX: {
3354 CURCUR cc;
3355 CHECKPOINT cp = PL_savestack_ix;
3356
3357 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3358 next += ARG(next);
3359 cc.oldcc = PL_regcc;
3360 PL_regcc = &cc;
3361 cc.parenfloor = *PL_reglastparen;
3362 cc.cur = -1;
3363 cc.min = ARG1(scan);
3364 cc.max = ARG2(scan);
3365 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3366 cc.next = next;
3367 cc.minmod = minmod;
3368 cc.lastloc = 0;
3369 PL_reginput = locinput;
3370 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3371 regcpblow(cp);
3372 PL_regcc = cc.oldcc;
3373 saySAME(n);
3374 }
3375 /* NOT REACHED */
3376 case WHILEM: {
3377 /*
3378 * This is really hard to understand, because after we match
3379 * what we're trying to match, we must make sure the rest of
3380 * the RE is going to match for sure, and to do that we have
3381 * to go back UP the parse tree by recursing ever deeper. And
3382 * if it fails, we have to reset our parent's current state
3383 * that we can try again after backing off.
3384 */
3385
3386 CHECKPOINT cp, lastcp;
3387 CURCUR* cc = PL_regcc;
3388 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3389
3390 n = cc->cur + 1; /* how many we know we matched */
3391 PL_reginput = locinput;
3392
3393 DEBUG_r(
3394 PerlIO_printf(Perl_debug_log,
3395 "%*s %ld out of %ld..%ld cc=%lx\n",
3396 REPORT_CODE_OFF+PL_regindent*2, "",
3397 (long)n, (long)cc->min,
3398 (long)cc->max, (long)cc)
3399 );
3400
3401 /* If degenerate scan matches "", assume scan done. */
3402
3403 if (locinput == cc->lastloc && n >= cc->min) {
3404 PL_regcc = cc->oldcc;
3405 ln = PL_regcc->cur;
3406 DEBUG_r(
3407 PerlIO_printf(Perl_debug_log,
3408 "%*s empty match detected, try continuation...\n",
3409 REPORT_CODE_OFF+PL_regindent*2, "")
3410 );
3411 if (regmatch(cc->next))
3412 sayYES;
3413 DEBUG_r(
3414 PerlIO_printf(Perl_debug_log,
3415 "%*s failed...\n",
3416 REPORT_CODE_OFF+PL_regindent*2, "")
3417 );
3418 PL_regcc->cur = ln;
3419 PL_regcc = cc;
3420 sayNO;
3421 }
3422
3423 /* First just match a string of min scans. */
3424
3425 if (n < cc->min) {
3426 cc->cur = n;
3427 cc->lastloc = locinput;
3428 if (regmatch(cc->scan))
3429 sayYES;
3430 cc->cur = n - 1;
3431 cc->lastloc = lastloc;
3432 DEBUG_r(
3433 PerlIO_printf(Perl_debug_log,
3434 "%*s failed...\n",
3435 REPORT_CODE_OFF+PL_regindent*2, "")
3436 );
3437 sayNO;
3438 }
3439
3440 /* Prefer next over scan for minimal matching. */
3441
3442 if (cc->minmod) {
3443 PL_regcc = cc->oldcc;
3444 ln = PL_regcc->cur;
3445 cp = regcppush(cc->parenfloor);
3446 REGCP_SET;
3447 if (regmatch(cc->next)) {
3448 regcpblow(cp);
3449 sayYES; /* All done. */
3450 }
3451 REGCP_UNWIND;
3452 regcppop();
3453 PL_regcc->cur = ln;
3454 PL_regcc = cc;
3455
3456 if (n >= cc->max) { /* Maximum greed exceeded? */
3457 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3458 && !(PL_reg_flags & RF_warned)) {
3459 PL_reg_flags |= RF_warned;
3460 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3461 "Complex regular subexpression recursion",
3462 REG_INFTY - 1);
3463 }
3464 sayNO;
3465 }
3466
3467 DEBUG_r(
3468 PerlIO_printf(Perl_debug_log,
3469 "%*s trying longer...\n",
3470 REPORT_CODE_OFF+PL_regindent*2, "")
3471 );
3472 /* Try scanning more and see if it helps. */
3473 PL_reginput = locinput;
3474 cc->cur = n;
3475 cc->lastloc = locinput;
3476 cp = regcppush(cc->parenfloor);
3477 REGCP_SET;
3478 if (regmatch(cc->scan)) {
3479 regcpblow(cp);
3480 sayYES;
3481 }
3482 DEBUG_r(
3483 PerlIO_printf(Perl_debug_log,
3484 "%*s failed...\n",
3485 REPORT_CODE_OFF+PL_regindent*2, "")
3486 );
3487 REGCP_UNWIND;
3488 regcppop();
3489 cc->cur = n - 1;
3490 cc->lastloc = lastloc;
3491 sayNO;
3492 }
3493
3494 /* Prefer scan over next for maximal matching. */
3495
3496 if (n < cc->max) { /* More greed allowed? */
3497 cp = regcppush(cc->parenfloor);
3498 cc->cur = n;
3499 cc->lastloc = locinput;
3500 REGCP_SET;
3501 if (regmatch(cc->scan)) {
3502 regcpblow(cp);
3503 sayYES;
3504 }
3505 REGCP_UNWIND;
3506 regcppop(); /* Restore some previous $<digit>s? */
3507 PL_reginput = locinput;
3508 DEBUG_r(
3509 PerlIO_printf(Perl_debug_log,
3510 "%*s failed, try continuation...\n",
3511 REPORT_CODE_OFF+PL_regindent*2, "")
3512 );
3513 }
3514 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3515 && !(PL_reg_flags & RF_warned)) {
3516 PL_reg_flags |= RF_warned;
3517 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
3518 "Complex regular subexpression recursion",
3519 REG_INFTY - 1);
3520 }
3521
3522 /* Failed deeper matches of scan, so see if this one works. */
3523 PL_regcc = cc->oldcc;
3524 ln = PL_regcc->cur;
3525 if (regmatch(cc->next))
3526 sayYES;
3527 DEBUG_r(
3528 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3529 REPORT_CODE_OFF+PL_regindent*2, "")
3530 );
3531 PL_regcc->cur = ln;
3532 PL_regcc = cc;
3533 cc->cur = n - 1;
3534 cc->lastloc = lastloc;
3535 sayNO;
3536 }
3537 /* NOT REACHED */
3538 case BRANCHJ:
3539 next = scan + ARG(scan);
3540 if (next == scan)
3541 next = NULL;
3542 inner = NEXTOPER(NEXTOPER(scan));
3543 goto do_branch;
3544 case BRANCH:
3545 inner = NEXTOPER(scan);
3546 do_branch:
3547 {
3548 CHECKPOINT lastcp;
3549 c1 = OP(scan);
3550 if (OP(next) != c1) /* No choice. */
3551 next = inner; /* Avoid recursion. */
3552 else {
3553 int lastparen = *PL_reglastparen;
3554
3555 REGCP_SET;
3556 do {
3557 PL_reginput = locinput;
3558 if (regmatch(inner))
3559 sayYES;
3560 REGCP_UNWIND;
3561 for (n = *PL_reglastparen; n > lastparen; n--)
3562 PL_regendp[n] = -1;
3563 *PL_reglastparen = n;
3564 scan = next;
3565 /*SUPPRESS 560*/
3566 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
3567 next += n;
3568 else
3569 next = NULL;
3570 inner = NEXTOPER(scan);
3571 if (c1 == BRANCHJ) {
3572 inner = NEXTOPER(inner);
3573 }
3574 } while (scan != NULL && OP(scan) == c1);
3575 sayNO;
3576 /* NOTREACHED */
3577 }
3578 }
3579 break;
3580 case MINMOD:
3581 minmod = 1;
3582 break;
3583 case CURLYM:
3584 {
3585 I32 l = 0;
3586 CHECKPOINT lastcp;
3587
3588 /* We suppose that the next guy does not need
3589 backtracking: in particular, it is of constant length,
3590 and has no parenths to influence future backrefs. */
3591 ln = ARG1(scan); /* min to match */
3592 n = ARG2(scan); /* max to match */
3593 paren = scan->flags;
3594 if (paren) {
3595 if (paren > PL_regsize)
3596 PL_regsize = paren;
3597 if (paren > *PL_reglastparen)
3598 *PL_reglastparen = paren;
3599 }
3600 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3601 if (paren)
3602 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3603 PL_reginput = locinput;
3604 if (minmod) {
3605 minmod = 0;
3606 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3607 sayNO;
3608 if (ln && l == 0 && n >= ln
3609 /* In fact, this is tricky. If paren, then the
3610 fact that we did/didnot match may influence
3611 future execution. */
3612 && !(paren && ln == 0))
3613 ln = n;
3614 locinput = PL_reginput;
3615 if (PL_regkind[(U8)OP(next)] == EXACT) {
3616 c1 = UCHARAT(OPERAND(next) + 1);
3617 if (OP(next) == EXACTF)
3618 c2 = PL_fold[c1];
3619 else if (OP(next) == EXACTFL)
3620 c2 = PL_fold_locale[c1];
3621 else
3622 c2 = c1;
3623 }
3624 else
3625 c1 = c2 = -1000;
3626 REGCP_SET;
3627 /* This may be improved if l == 0. */
3628 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3629 /* If it could work, try it. */
3630 if (c1 == -1000 ||
3631 UCHARAT(PL_reginput) == c1 ||
3632 UCHARAT(PL_reginput) == c2)
3633 {
3634 if (paren) {
3635 if (n) {
3636 PL_regstartp[paren] =
3637 HOPc(PL_reginput, -l) - PL_bostr;
3638 PL_regendp[paren] = PL_reginput - PL_bostr;
3639 }
3640 else
3641 PL_regendp[paren] = -1;
3642 }
3643 if (regmatch(next))
3644 sayYES;
3645 REGCP_UNWIND;
3646 }
3647 /* Couldn't or didn't -- move forward. */
3648 PL_reginput = locinput;
3649 if (regrepeat_hard(scan, 1, &l)) {
3650 ln++;
3651 locinput = PL_reginput;
3652 }
3653 else
3654 sayNO;
3655 }
3656 }
3657 else {
3658 n = regrepeat_hard(scan, n, &l);
3659 if (n != 0 && l == 0
3660 /* In fact, this is tricky. If paren, then the
3661 fact that we did/didnot match may influence
3662 future execution. */
3663 && !(paren && ln == 0))
3664 ln = n;
3665 locinput = PL_reginput;
3666 DEBUG_r(
3667 PerlIO_printf(Perl_debug_log,
3668 "%*s matched %ld times, len=%ld...\n",
3669 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
3670 );
3671 if (n >= ln) {
3672 if (PL_regkind[(U8)OP(next)] == EXACT) {
3673 c1 = UCHARAT(OPERAND(next) + 1);
3674 if (OP(next) == EXACTF)
3675 c2 = PL_fold[c1];
3676 else if (OP(next) == EXACTFL)
3677 c2 = PL_fold_locale[c1];
3678 else
3679 c2 = c1;
3680 }
3681 else
3682 c1 = c2 = -1000;
3683 }
3684 REGCP_SET;
3685 while (n >= ln) {
3686 /* If it could work, try it. */
3687 if (c1 == -1000 ||
3688 UCHARAT(PL_reginput) == c1 ||
3689 UCHARAT(PL_reginput) == c2)
3690 {
3691 DEBUG_r(
3692 PerlIO_printf(Perl_debug_log,
3693 "%*s trying tail with n=%ld...\n",
3694 REPORT_CODE_OFF+PL_regindent*2, "", n)
3695 );
3696 if (paren) {
3697 if (n) {
3698 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3699 PL_regendp[paren] = PL_reginput - PL_bostr;
3700 }
3701 else
3702 PL_regendp[paren] = -1;
3703 }
3704 if (regmatch(next))
3705 sayYES;
3706 REGCP_UNWIND;
3707 }
3708 /* Couldn't or didn't -- back up. */
3709 n--;
3710 locinput = HOPc(locinput, -l);
3711 PL_reginput = locinput;
3712 }
3713 }
3714 sayNO;
3715 break;
3716 }
3717 case CURLYN:
3718 paren = scan->flags; /* Which paren to set */
3719 if (paren > PL_regsize)
3720 PL_regsize = paren;
3721 if (paren > *PL_reglastparen)
3722 *PL_reglastparen = paren;
3723 ln = ARG1(scan); /* min to match */
3724 n = ARG2(scan); /* max to match */
3725 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3726 goto repeat;
3727 case CURLY:
3728 paren = 0;
3729 ln = ARG1(scan); /* min to match */
3730 n = ARG2(scan); /* max to match */
3731 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3732 goto repeat;
3733 case STAR:
3734 ln = 0;
3735 n = REG_INFTY;
3736 scan = NEXTOPER(scan);
3737 paren = 0;
3738 goto repeat;
3739 case PLUS:
3740 ln = 1;
3741 n = REG_INFTY;
3742 scan = NEXTOPER(scan);
3743 paren = 0;
3744 repeat:
3745 /*
3746 * Lookahead to avoid useless match attempts
3747 * when we know what character comes next.
3748 */
3749 if (PL_regkind[(U8)OP(next)] == EXACT) {
3750 c1 = UCHARAT(OPERAND(next) + 1);
3751 if (OP(next) == EXACTF)
3752 c2 = PL_fold[c1];
3753 else if (OP(next) == EXACTFL)
3754 c2 = PL_fold_locale[c1];
3755 else
3756 c2 = c1;
3757 }
3758 else
3759 c1 = c2 = -1000;
3760 PL_reginput = locinput;
3761 if (minmod) {
3762 CHECKPOINT lastcp;
3763 minmod = 0;
3764 if (ln && regrepeat(scan, ln) < ln)
3765 sayNO;
3766 locinput = PL_reginput;
3767 REGCP_SET;
3768 if (c1 != -1000) {
3769 char *e = locinput + n - ln; /* Should not check after this */
3770 char *old = locinput;
3771
3772 if (e >= PL_regeol || (n == REG_INFTY))
3773 e = PL_regeol - 1;
3774 while (1) {
3775 /* Find place 'next' could work */
3776 if (c1 == c2) {
3777 while (locinput <= e && *locinput != c1)
3778 locinput++;
3779 } else {
3780 while (locinput <= e
3781 && *locinput != c1
3782 && *locinput != c2)
3783 locinput++;
3784 }
3785 if (locinput > e)
3786 sayNO;
3787 /* PL_reginput == old now */
3788 if (locinput != old) {
3789 ln = 1; /* Did some */
3790 if (regrepeat(scan, locinput - old) <
3791 locinput - old)
3792 sayNO;
3793 }
3794 /* PL_reginput == locinput now */
3795 if (paren) {
3796 if (ln) {
3797 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3798 PL_regendp[paren] = locinput - PL_bostr;
3799 }
3800 else
3801 PL_regendp[paren] = -1;
3802 }
3803 if (regmatch(next))
3804 sayYES;
3805 PL_reginput = locinput; /* Could be reset... */
3806 REGCP_UNWIND;
3807 /* Couldn't or didn't -- move forward. */
3808 old = locinput++;
3809 }
3810 }
3811 else
3812 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3813 /* If it could work, try it. */
3814 if (c1 == -1000 ||
3815 UCHARAT(PL_reginput) == c1 ||
3816 UCHARAT(PL_reginput) == c2)
3817 {
3818 if (paren) {
3819 if (n) {
3820 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3821 PL_regendp[paren] = PL_reginput - PL_bostr;
3822 }
3823 else
3824 PL_regendp[paren] = -1;
3825 }
3826 if (regmatch(next))
3827 sayYES;
3828 REGCP_UNWIND;
3829 }
3830 /* Couldn't or didn't -- move forward. */
3831 PL_reginput = locinput;
3832 if (regrepeat(scan, 1)) {
3833 ln++;
3834 locinput = PL_reginput;
3835 }
3836 else
3837 sayNO;
3838 }
3839 }
3840 else {
3841 CHECKPOINT lastcp;
3842 n = regrepeat(scan, n);
3843 locinput = PL_reginput;
3844 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3845 (!PL_multiline || OP(next) == SEOL))
3846 ln = n; /* why back off? */
3847 REGCP_SET;
3848 if (paren) {
3849 while (n >= ln) {
3850 /* If it could work, try it. */
3851 if (c1 == -1000 ||
3852 UCHARAT(PL_reginput) == c1 ||
3853 UCHARAT(PL_reginput) == c2)
3854 {
3855 if (paren && n) {
3856 if (n) {
3857 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3858 PL_regendp[paren] = PL_reginput - PL_bostr;
3859 }
3860 else
3861 PL_regendp[paren] = -1;
3862 }
3863 if (regmatch(next))
3864 sayYES;
3865 REGCP_UNWIND;
3866 }
3867 /* Couldn't or didn't -- back up. */
3868 n--;
3869 PL_reginput = locinput = HOPc(locinput, -1);
3870 }
3871 }
3872 else {
3873 while (n >= ln) {
3874 /* If it could work, try it. */
3875 if (c1 == -1000 ||
3876 UCHARAT(PL_reginput) == c1 ||
3877 UCHARAT(PL_reginput) == c2)
3878 {
3879 if (regmatch(next))
3880 sayYES;
3881 REGCP_UNWIND;
3882 }
3883 /* Couldn't or didn't -- back up. */
3884 n--;
3885 PL_reginput = locinput = HOPc(locinput, -1);
3886 }
3887 }
3888 }
3889 sayNO;
3890 break;
3891 case END:
3892 if (PL_reg_call_cc) {
3893 re_cc_state *cur_call_cc = PL_reg_call_cc;
3894 CURCUR *cctmp = PL_regcc;
3895 regexp *re = PL_reg_re;
3896 CHECKPOINT cp, lastcp;
3897
3898 cp = regcppush(0); /* Save *all* the positions. */
3899 REGCP_SET;
3900 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3901 the caller. */
3902 PL_reginput = locinput; /* Make position available to
3903 the callcc. */
3904 cache_re(PL_reg_call_cc->re);
3905 PL_regcc = PL_reg_call_cc->cc;
3906 PL_reg_call_cc = PL_reg_call_cc->prev;
3907 if (regmatch(cur_call_cc->node)) {
3908 PL_reg_call_cc = cur_call_cc;
3909 regcpblow(cp);
3910 sayYES;
3911 }
3912 REGCP_UNWIND;
3913 regcppop();
3914 PL_reg_call_cc = cur_call_cc;
3915 PL_regcc = cctmp;
3916 PL_reg_re = re;
3917 cache_re(re);
3918
3919 DEBUG_r(
3920 PerlIO_printf(Perl_debug_log,
3921 "%*s continuation failed...\n",
3922 REPORT_CODE_OFF+PL_regindent*2, "")
3923 );
3924 sayNO;
3925 }
3926 if (locinput < PL_regtill)
3927 sayNO; /* Cannot match: too short. */
3928 /* Fall through */
3929 case SUCCEED:
3930 PL_reginput = locinput; /* put where regtry can find it */
3931 sayYES; /* Success! */
3932 case SUSPEND:
3933 n = 1;
3934 PL_reginput = locinput;
3935 goto do_ifmatch;
3936 case UNLESSM:
3937 n = 0;
3938 if (scan->flags) {
3939 if (UTF) { /* XXXX This is absolutely
3940 broken, we read before
3941 start of string. */
3942 s = HOPMAYBEc(locinput, -scan->flags);
3943 if (!s)
3944 goto say_yes;
3945 PL_reginput = s;
3946 }
3947 else {
3948 if (locinput < PL_bostr + scan->flags)
3949 goto say_yes;
3950 PL_reginput = locinput - scan->flags;
3951 goto do_ifmatch;
3952 }
3953 }
3954 else
3955 PL_reginput = locinput;
3956 goto do_ifmatch;
3957 case IFMATCH:
3958 n = 1;
3959 if (scan->flags) {
3960 if (UTF) { /* XXXX This is absolutely
3961 broken, we read before
3962 start of string. */
3963 s = HOPMAYBEc(locinput, -scan->flags);
3964 if (!s || s < PL_bostr)
3965 goto say_no;
3966 PL_reginput = s;
3967 }
3968 else {
3969 if (locinput < PL_bostr + scan->flags)
3970 goto say_no;
3971 PL_reginput = locinput - scan->flags;
3972 goto do_ifmatch;
3973 }
3974 }
3975 else
3976 PL_reginput = locinput;
3977
3978 do_ifmatch:
3979 inner = NEXTOPER(NEXTOPER(scan));
3980 if (regmatch(inner) != n) {
3981 say_no:
3982 if (logical) {
3983 logical = 0;
3984 sw = 0;
3985 goto do_longjump;
3986 }
3987 else
3988 sayNO;
3989 }
3990 say_yes:
3991 if (logical) {
3992 logical = 0;
3993 sw = 1;
3994 }
3995 if (OP(scan) == SUSPEND) {
3996 locinput = PL_reginput;
3997 nextchr = UCHARAT(locinput);
3998 }
3999 /* FALL THROUGH. */
4000 case LONGJMP:
4001 do_longjump:
4002 next = scan + ARG(scan);
4003 if (next == scan)
4004 next = NULL;
4005 break;
4006 default:
4007 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
4008 (unsigned long)scan, OP(scan));
4009 Perl_croak(aTHX_ "regexp memory corruption");
4010 }
4011 scan = next;
4012 }
4013
4014 /*
4015 * We get here only if there's trouble -- normally "case END" is
4016 * the terminating point.
4017 */
4018 Perl_croak(aTHX_ "corrupted regexp pointers");
4019 /*NOTREACHED*/
4020 sayNO;
4021
4022yes:
4023#ifdef DEBUGGING
4024 PL_regindent--;
4025#endif
4026 return 1;
4027
4028no:
4029#ifdef DEBUGGING
4030 PL_regindent--;
4031#endif
4032 return 0;
4033}
4034
4035/*
4036 - regrepeat - repeatedly match something simple, report how many
4037 */
4038/*
4039 * [This routine now assumes that it will only match on things of length 1.
4040 * That was true before, but now we assume scan - reginput is the count,
4041 * rather than incrementing count on every character. [Er, except utf8.]]
4042 */
4043STATIC I32
4044S_regrepeat(pTHX_ regnode *p, I32 max)
4045{
4046 dTHR;
4047 register char *scan;
4048 register char *opnd;
4049 register I32 c;
4050 register char *loceol = PL_regeol;
4051 register I32 hardcount = 0;
4052
4053 scan = PL_reginput;
4054 if (max != REG_INFTY && max < loceol - scan)
4055 loceol = scan + max;
4056 opnd = (char *) OPERAND(p);
4057 switch (OP(p)) {
4058 case REG_ANY:
4059 while (scan < loceol && *scan != '\n')
4060 scan++;
4061 break;
4062 case SANY:
4063 scan = loceol;
4064 break;
4065 case ANYUTF8:
4066 loceol = PL_regeol;
4067 while (scan < loceol && *scan != '\n') {
4068 scan += UTF8SKIP(scan);
4069 hardcount++;
4070 }
4071 break;
4072 case SANYUTF8:
4073 loceol = PL_regeol;
4074 while (scan < loceol) {
4075 scan += UTF8SKIP(scan);
4076 hardcount++;
4077 }
4078 break;
4079 case EXACT: /* length of string is 1 */
4080 c = UCHARAT(++opnd);
4081 while (scan < loceol && UCHARAT(scan) == c)
4082 scan++;
4083 break;
4084 case EXACTF: /* length of string is 1 */
4085 c = UCHARAT(++opnd);
4086 while (scan < loceol &&
4087 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
4088 scan++;
4089 break;
4090 case EXACTFL: /* length of string is 1 */
4091 PL_reg_flags |= RF_tainted;
4092 c = UCHARAT(++opnd);
4093 while (scan < loceol &&
4094 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
4095 scan++;
4096 break;
4097 case ANYOFUTF8:
4098 loceol = PL_regeol;
4099 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
4100 scan += UTF8SKIP(scan);
4101 hardcount++;
4102 }
4103 break;
4104 case ANYOF:
4105 while (scan < loceol && REGINCLASS(opnd, *scan))
4106 scan++;
4107 break;
4108 case ALNUM:
4109 while (scan < loceol && isALNUM(*scan))
4110 scan++;
4111 break;
4112 case ALNUMUTF8:
4113 loceol = PL_regeol;
4114 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4115 scan += UTF8SKIP(scan);
4116 hardcount++;
4117 }
4118 break;
4119 case ALNUML:
4120 PL_reg_flags |= RF_tainted;
4121 while (scan < loceol && isALNUM_LC(*scan))
4122 scan++;
4123 break;
4124 case ALNUMLUTF8:
4125 PL_reg_flags |= RF_tainted;
4126 loceol = PL_regeol;
4127 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
4128 scan += UTF8SKIP(scan);
4129 hardcount++;
4130 }
4131 break;
4132 break;
4133 case NALNUM:
4134 while (scan < loceol && !isALNUM(*scan))
4135 scan++;
4136 break;
4137 case NALNUMUTF8:
4138 loceol = PL_regeol;
4139 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
4140 scan += UTF8SKIP(scan);
4141 hardcount++;
4142 }
4143 break;
4144 case NALNUML:
4145 PL_reg_flags |= RF_tainted;
4146 while (scan < loceol && !isALNUM_LC(*scan))
4147 scan++;
4148 break;
4149 case NALNUMLUTF8:
4150 PL_reg_flags |= RF_tainted;
4151 loceol = PL_regeol;
4152 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
4153 scan += UTF8SKIP(scan);
4154 hardcount++;
4155 }
4156 break;
4157 case SPACE:
4158 while (scan < loceol && isSPACE(*scan))
4159 scan++;
4160 break;
4161 case SPACEUTF8:
4162 loceol = PL_regeol;
4163 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4164 scan += UTF8SKIP(scan);
4165 hardcount++;
4166 }
4167 break;
4168 case SPACEL:
4169 PL_reg_flags |= RF_tainted;
4170 while (scan < loceol && isSPACE_LC(*scan))
4171 scan++;
4172 break;
4173 case SPACELUTF8:
4174 PL_reg_flags |= RF_tainted;
4175 loceol = PL_regeol;
4176 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4177 scan += UTF8SKIP(scan);
4178 hardcount++;
4179 }
4180 break;
4181 case NSPACE:
4182 while (scan < loceol && !isSPACE(*scan))
4183 scan++;
4184 break;
4185 case NSPACEUTF8:
4186 loceol = PL_regeol;
4187 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
4188 scan += UTF8SKIP(scan);
4189 hardcount++;
4190 }
4191 break;
4192 case NSPACEL:
4193 PL_reg_flags |= RF_tainted;
4194 while (scan < loceol && !isSPACE_LC(*scan))
4195 scan++;
4196 break;
4197 case NSPACELUTF8:
4198 PL_reg_flags |= RF_tainted;
4199 loceol = PL_regeol;
4200 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4201 scan += UTF8SKIP(scan);
4202 hardcount++;
4203 }
4204 break;
4205 case DIGIT:
4206 while (scan < loceol && isDIGIT(*scan))
4207 scan++;
4208 break;
4209 case DIGITUTF8:
4210 loceol = PL_regeol;
4211 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
4212 scan += UTF8SKIP(scan);
4213 hardcount++;
4214 }
4215 break;
4216 break;
4217 case NDIGIT:
4218 while (scan < loceol && !isDIGIT(*scan))
4219 scan++;
4220 break;
4221 case NDIGITUTF8:
4222 loceol = PL_regeol;
4223 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
4224 scan += UTF8SKIP(scan);
4225 hardcount++;
4226 }
4227 break;
4228 default: /* Called on something of 0 width. */
4229 break; /* So match right here or not at all. */
4230 }
4231
4232 if (hardcount)
4233 c = hardcount;
4234 else
4235 c = scan - PL_reginput;
4236 PL_reginput = scan;
4237
4238 DEBUG_r(
4239 {
4240 SV *prop = sv_newmortal();
4241
4242 regprop(prop, p);
4243 PerlIO_printf(Perl_debug_log,
4244 "%*s %s can match %ld times out of %ld...\n",
4245 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
4246 });
4247
4248 return(c);
4249}
4250
4251/*
4252 - regrepeat_hard - repeatedly match something, report total lenth and length
4253 *
4254 * The repeater is supposed to have constant length.
4255 */
4256
4257STATIC I32
4258S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
4259{
4260 dTHR;
4261 register char *scan;
4262 register char *start;
4263 register char *loceol = PL_regeol;
4264 I32 l = 0;
4265 I32 count = 0, res = 1;
4266
4267 if (!max)
4268 return 0;
4269
4270 start = PL_reginput;
4271 if (UTF) {
4272 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4273 if (!count++) {
4274 l = 0;
4275 while (start < PL_reginput) {
4276 l++;
4277 start += UTF8SKIP(start);
4278 }
4279 *lp = l;
4280 if (l == 0)
4281 return max;
4282 }
4283 if (count == max)
4284 return count;
4285 }
4286 }
4287 else {
4288 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
4289 if (!count++) {
4290 *lp = l = PL_reginput - start;
4291 if (max != REG_INFTY && l*max < loceol - scan)
4292 loceol = scan + l*max;
4293 if (l == 0)
4294 return max;
4295 }
4296 }
4297 }
4298 if (!res)
4299 PL_reginput = scan;
4300
4301 return count;
4302}
4303
4304/*
4305 - reginclass - determine if a character falls into a character class
4306 */
4307
4308STATIC bool
4309S_reginclass(pTHX_ register char *p, register I32 c)
4310{
4311 dTHR;
4312 char flags = ANYOF_FLAGS(p);
4313 bool match = FALSE;
4314
4315 c &= 0xFF;
4316 if (ANYOF_BITMAP_TEST(p, c))
4317 match = TRUE;
4318 else if (flags & ANYOF_FOLD) {
4319 I32 cf;
4320 if (flags & ANYOF_LOCALE) {
4321 PL_reg_flags |= RF_tainted;
4322 cf = PL_fold_locale[c];
4323 }
4324 else
4325 cf = PL_fold[c];
4326 if (ANYOF_BITMAP_TEST(p, cf))
4327 match = TRUE;
4328 }
4329
4330 if (!match && (flags & ANYOF_CLASS)) {
4331 PL_reg_flags |= RF_tainted;
4332 if (
4333 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4334 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4335 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
4336 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4337 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4338 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4339 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4340 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4341 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4342 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4343 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
4344 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
4345 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4346 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4347 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4348 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4349 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
4350 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4351 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
4352 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4353 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4354 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4355 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
4356 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4357 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4358 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
4359 ) /* How's that for a conditional? */
4360 {
4361 match = TRUE;
4362 }
4363 }
4364
4365 return (flags & ANYOF_INVERT) ? !match : match;
4366}
4367
4368STATIC bool
4369S_reginclassutf8(pTHX_ regnode *f, U8 *p)
4370{
4371 dTHR;
4372 char flags = ARG1(f);
4373 bool match = FALSE;
4374 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
4375
4376 if (swash_fetch(sv, p))
4377 match = TRUE;
4378 else if (flags & ANYOF_FOLD) {
4379 I32 cf;
4380 U8 tmpbuf[10];
4381 if (flags & ANYOF_LOCALE) {
4382 PL_reg_flags |= RF_tainted;
4383 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
4384 }
4385 else
4386 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
4387 if (swash_fetch(sv, tmpbuf))
4388 match = TRUE;
4389 }
4390
4391 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
4392
4393 return (flags & ANYOF_INVERT) ? !match : match;
4394}
4395
4396STATIC U8 *
4397S_reghop(pTHX_ U8 *s, I32 off)
4398{
4399 dTHR;
4400 if (off >= 0) {
4401 while (off-- && s < (U8*)PL_regeol)
4402 s += UTF8SKIP(s);
4403 }
4404 else {
4405 while (off++) {
4406 if (s > (U8*)PL_bostr) {
4407 s--;
4408 if (*s & 0x80) {
4409 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4410 s--;
4411 } /* XXX could check well-formedness here */
4412 }
4413 }
4414 }
4415 return s;
4416}
4417
4418STATIC U8 *
4419S_reghopmaybe(pTHX_ U8* s, I32 off)
4420{
4421 dTHR;
4422 if (off >= 0) {
4423 while (off-- && s < (U8*)PL_regeol)
4424 s += UTF8SKIP(s);
4425 if (off >= 0)
4426 return 0;
4427 }
4428 else {
4429 while (off++) {
4430 if (s > (U8*)PL_bostr) {
4431 s--;
4432 if (*s & 0x80) {
4433 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
4434 s--;
4435 } /* XXX could check well-formedness here */
4436 }
4437 else
4438 break;
4439 }
4440 if (off <= 0)
4441 return 0;
4442 }
4443 return s;
4444}
4445
4446#ifdef PERL_OBJECT
4447#define NO_XSLOCKS
4448#include "XSUB.h"
4449#endif
4450
4451static void
4452restore_pos(pTHXo_ void *arg)
4453{
4454 dTHR;
4455 if (PL_reg_eval_set) {
4456 if (PL_reg_oldsaved) {
4457 PL_reg_re->subbeg = PL_reg_oldsaved;
4458 PL_reg_re->sublen = PL_reg_oldsavedlen;
4459 RX_MATCH_COPIED_on(PL_reg_re);
4460 }
4461 PL_reg_magic->mg_len = PL_reg_oldpos;
4462 PL_reg_eval_set = 0;
4463 PL_curpm = PL_reg_oldcurpm;
4464 }
4465}
4466