5 * "One Ring to rule them all, One Ring to find them..."
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
12 /* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
17 /* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 # ifndef PERL_IN_XSUB_RE
25 # define PERL_IN_XSUB_RE
27 /* need access to debugger hooks */
28 # if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 # define Perl_regexec_flags my_regexec
36 # define Perl_regdump my_regdump
37 # define Perl_regprop my_regprop
38 # define Perl_re_intuit_start my_re_intuit_start
39 /* *These* symbols are masked to allow static link. */
40 # define Perl_pregexec my_pregexec
41 # define Perl_reginitcolors my_reginitcolors
43 # define PERL_NO_GET_CONTEXT
48 * pregcomp and pregexec -- regsub and regerror are not used in perl
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
67 **** Alterations to Henry's code are...
69 **** Copyright (c) 1991-2000, Larry Wall
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
79 #define PERL_IN_REGEXEC_C
82 #ifdef PERL_IN_XSUB_RE
83 # if defined(PERL_CAPI) || defined(PERL_OBJECT)
90 #define RF_tainted 1 /* tainted information used? */
91 #define RF_warned 2 /* warned about big count? */
92 #define RF_evaled 4 /* Did an EVAL with setting? */
93 #define RF_utf8 8 /* String contains multibyte chars? */
95 #define UTF (PL_reg_flags & RF_utf8)
97 #define RS_init 1 /* eval environment created */
98 #define RS_set 2 /* replsv value is set */
101 #define STATIC static
108 #define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
110 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch(*av_fetch((AV*)SvRV((SV*)PL_regdata->data[ARG2(f)]),0,FALSE),p))
112 # define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
115 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
116 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
118 #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
119 #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
120 #define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
121 #define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
122 #define HOPc(pos,off) ((char*)HOP(pos,off))
123 #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
125 static void restore_pos(pTHXo_ void *arg);
129 S_regcppush(pTHX_ I32 parenfloor)
131 int retval = PL_savestack_ix;
132 #define REGCP_PAREN_ELEMS 4
133 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
136 #define REGCP_OTHER_ELEMS 5
137 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
138 for (p = PL_regsize; p > parenfloor; p--) {
139 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
140 SSPUSHINT(PL_regendp[p]);
141 SSPUSHINT(PL_regstartp[p]);
142 SSPUSHPTR(PL_reg_start_tmp[p]);
145 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
146 SSPUSHINT(PL_regsize);
147 SSPUSHINT(*PL_reglastparen);
148 SSPUSHPTR(PL_reginput);
149 #define REGCP_FRAME_ELEMS 2
150 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
151 * are needed for the regexp context stack bookkeeping. */
152 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
153 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
158 /* These are needed since we do not localize EVAL nodes: */
159 # define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
160 " Setting an EVAL scope, savestack=%"IVdf"\n", \
161 (IV)PL_savestack_ix)); cp = PL_savestack_ix
163 # define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
164 PerlIO_printf(Perl_debug_log, \
165 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
166 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
176 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
178 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
179 i = SSPOPINT; /* Parentheses elements to pop. */
180 input = (char *) SSPOPPTR;
181 *PL_reglastparen = SSPOPINT;
182 PL_regsize = SSPOPINT;
184 /* Now restore the parentheses context. */
185 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
186 i > 0; i -= REGCP_PAREN_ELEMS) {
187 paren = (U32)SSPOPINT;
188 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
189 PL_regstartp[paren] = SSPOPINT;
191 if (paren <= *PL_reglastparen)
192 PL_regendp[paren] = tmps;
194 PerlIO_printf(Perl_debug_log,
195 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
196 (UV)paren, (IV)PL_regstartp[paren],
197 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
198 (IV)PL_regendp[paren],
199 (paren > *PL_reglastparen ? "(no)" : ""));
203 if (*PL_reglastparen + 1 <= PL_regnpar) {
204 PerlIO_printf(Perl_debug_log,
205 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
206 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
210 /* It would seem that the similar code in regtry()
211 * already takes care of this, and in fact it is in
212 * a better location to since this code can #if 0-ed out
213 * but the code in regtry() is needed or otherwise tests
214 * requiring null fields (pat.t#187 and split.t#{13,14}
215 * (as of patchlevel 7877) will fail. Then again,
216 * this code seems to be necessary or otherwise
217 * building DynaLoader will fail:
218 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
220 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
221 if (paren > PL_regsize)
222 PL_regstartp[paren] = -1;
223 PL_regendp[paren] = -1;
230 S_regcp_set_to(pTHX_ I32 ss)
232 I32 tmp = PL_savestack_ix;
234 PL_savestack_ix = ss;
236 PL_savestack_ix = tmp;
240 typedef struct re_cc_state
244 struct re_cc_state *prev;
249 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
251 #define TRYPAREN(paren, n, input) { \
254 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
255 PL_regendp[paren] = input - PL_bostr; \
258 PL_regendp[paren] = -1; \
260 if (regmatch(next)) \
263 PL_regendp[paren] = -1; \
268 * pregexec and friends
272 - pregexec - match a regexp against a string
275 Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
276 char *strbeg, I32 minend, SV *screamer, U32 nosave)
277 /* strend: pointer to null at end of string */
278 /* strbeg: real beginning of string */
279 /* minend: end of match must be >=minend after stringarg. */
280 /* nosave: For optimizations. */
283 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
284 nosave ? 0 : REXEC_COPY_STR);
288 S_cache_re(pTHX_ regexp *prog)
290 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
292 PL_regprogram = prog->program;
294 PL_regnpar = prog->nparens;
295 PL_regdata = prog->data;
300 * Need to implement the following flags for reg_anch:
302 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
304 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
305 * INTUIT_AUTORITATIVE_ML
306 * INTUIT_ONCE_NOML - Intuit can match in one location only.
309 * Another flag for this function: SECOND_TIME (so that float substrs
310 * with giant delta may be not rechecked).
313 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
315 /* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
316 Otherwise, only SvCUR(sv) is used to get strbeg. */
318 /* XXXX We assume that strpos is strbeg unless sv. */
320 /* XXXX Some places assume that there is a fixed substring.
321 An update may be needed if optimizer marks as "INTUITable"
322 RExen without fixed substrings. Similarly, it is assumed that
323 lengths of all the strings are no more than minlen, thus they
324 cannot come from lookahead.
325 (Or minlen should take into account lookahead.) */
327 /* A failure to find a constant substring means that there is no need to make
328 an expensive call to REx engine, thus we celebrate a failure. Similarly,
329 finding a substring too deep into the string means that less calls to
330 regtry() should be needed.
332 REx compiler's optimizer found 4 possible hints:
333 a) Anchored substring;
335 c) Whether we are anchored (beginning-of-line or \G);
336 d) First node (of those at offset 0) which may distingush positions;
337 We use a)b)d) and multiline-part of c), and try to find a position in the
338 string which does not contradict any of them.
341 /* Most of decisions we do here should have been done at compile time.
342 The nodes of the REx which we used for the search should have been
343 deleted from the finite automaton. */
346 Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
347 char *strend, U32 flags, re_scream_pos_data *data)
349 register I32 start_shift;
350 /* Should be nonnegative! */
351 register I32 end_shift;
358 register char *other_last = Nullch; /* other substr checked before this */
359 char *check_at; /* check substr found at this pos */
361 char *i_strpos = strpos;
364 DEBUG_r( if (!PL_colorset) reginitcolors() );
365 DEBUG_r(PerlIO_printf(Perl_debug_log,
366 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
367 PL_colors[4],PL_colors[5],PL_colors[0],
370 (strlen(prog->precomp) > 60 ? "..." : ""),
372 (int)(strend - strpos > 60 ? 60 : strend - strpos),
373 strpos, PL_colors[1],
374 (strend - strpos > 60 ? "..." : ""))
377 if (prog->minlen > strend - strpos) {
378 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
381 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
382 check = prog->check_substr;
383 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
384 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
385 || ( (prog->reganch & ROPT_ANCH_BOL)
386 && !PL_multiline ) ); /* Check after \n? */
389 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
390 /* SvCUR is not set on references: SvRV and SvPVX overlap */
392 && (strpos != strbeg)) {
393 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
396 if (prog->check_offset_min == prog->check_offset_max) {
397 /* Substring at constant offset from beg-of-str... */
400 PL_regeol = strend; /* Used in HOP() */
401 s = HOPc(strpos, prog->check_offset_min);
403 slen = SvCUR(check); /* >= 1 */
405 if ( strend - s > slen || strend - s < slen - 1
406 || (strend - s == slen && strend[-1] != '\n')) {
407 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
410 /* Now should match s[0..slen-2] */
412 if (slen && (*SvPVX(check) != *s
414 && memNE(SvPVX(check), s, slen)))) {
416 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
420 else if (*SvPVX(check) != *s
421 || ((slen = SvCUR(check)) > 1
422 && memNE(SvPVX(check), s, slen)))
424 goto success_at_start;
427 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
429 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
430 end_shift = prog->minlen - start_shift -
431 CHR_SVLEN(check) + (SvTAIL(check) != 0);
433 I32 end = prog->check_offset_max + CHR_SVLEN(check)
434 - (SvTAIL(check) != 0);
435 I32 eshift = strend - s - end;
437 if (end_shift < eshift)
441 else { /* Can match at random position */
444 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
445 /* Should be nonnegative! */
446 end_shift = prog->minlen - start_shift -
447 CHR_SVLEN(check) + (SvTAIL(check) != 0);
450 #ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
452 Perl_croak(aTHX_ "panic: end_shift");
458 /* Find a possible match in the region s..strend by looking for
459 the "check" substring in the region corrected by start/end_shift. */
460 if (flags & REXEC_SCREAM) {
461 I32 p = -1; /* Internal iterator of scream. */
462 I32 *pp = data ? data->scream_pos : &p;
464 if (PL_screamfirst[BmRARE(check)] >= 0
465 || ( BmRARE(check) == '\n'
466 && (BmPREVIOUS(check) == SvCUR(check) - 1)
468 s = screaminstr(sv, check,
469 start_shift + (s - strbeg), end_shift, pp, 0);
473 *data->scream_olds = s;
476 s = fbm_instr((unsigned char*)s + start_shift,
477 (unsigned char*)strend - end_shift,
478 check, PL_multiline ? FBMrf_MULTILINE : 0);
480 /* Update the count-of-usability, remove useless subpatterns,
483 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
484 (s ? "Found" : "Did not find"),
485 ((check == prog->anchored_substr) ? "anchored" : "floating"),
487 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
489 PL_colors[1], (SvTAIL(check) ? "$" : ""),
490 (s ? " at offset " : "...\n") ) );
497 /* Finish the diagnostic message */
498 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
500 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
501 Start with the other substr.
502 XXXX no SCREAM optimization yet - and a very coarse implementation
503 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
504 *always* match. Probably should be marked during compile...
505 Probably it is right to do no SCREAM here...
508 if (prog->float_substr && prog->anchored_substr) {
509 /* Take into account the "other" substring. */
510 /* XXXX May be hopelessly wrong for UTF... */
513 if (check == prog->float_substr) {
516 char *last = s - start_shift, *last1, *last2;
520 t = s - prog->check_offset_max;
521 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
522 && (!(prog->reganch & ROPT_UTF8)
523 || (PL_bostr = strpos, /* Used in regcopmaybe() */
524 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
529 t += prog->anchored_offset;
530 if (t < other_last) /* These positions already checked */
533 last2 = last1 = strend - prog->minlen;
536 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
537 /* On end-of-str: see comment below. */
538 s = fbm_instr((unsigned char*)t,
539 (unsigned char*)last1 + prog->anchored_offset
540 + SvCUR(prog->anchored_substr)
541 - (SvTAIL(prog->anchored_substr)!=0),
542 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
543 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
544 (s ? "Found" : "Contradicts"),
546 (int)(SvCUR(prog->anchored_substr)
547 - (SvTAIL(prog->anchored_substr)!=0)),
548 SvPVX(prog->anchored_substr),
549 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
551 if (last1 >= last2) {
552 DEBUG_r(PerlIO_printf(Perl_debug_log,
553 ", giving up...\n"));
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
557 ", trying floating at offset %ld...\n",
558 (long)(s1 + 1 - i_strpos)));
559 PL_regeol = strend; /* Used in HOP() */
560 other_last = last1 + prog->anchored_offset + 1;
565 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
566 (long)(s - i_strpos)));
567 t = s - prog->anchored_offset;
576 else { /* Take into account the floating substring. */
581 last1 = last = strend - prog->minlen + prog->float_min_offset;
582 if (last - t > prog->float_max_offset)
583 last = t + prog->float_max_offset;
584 s = t + prog->float_min_offset;
587 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
588 /* fbm_instr() takes into account exact value of end-of-str
589 if the check is SvTAIL(ed). Since false positives are OK,
590 and end-of-str is not later than strend we are OK. */
591 s = fbm_instr((unsigned char*)s,
592 (unsigned char*)last + SvCUR(prog->float_substr)
593 - (SvTAIL(prog->float_substr)!=0),
594 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
596 (s ? "Found" : "Contradicts"),
598 (int)(SvCUR(prog->float_substr)
599 - (SvTAIL(prog->float_substr)!=0)),
600 SvPVX(prog->float_substr),
601 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying anchored starting at offset %ld...\n",
610 (long)(s1 + 1 - i_strpos)));
612 PL_regeol = strend; /* Used in HOP() */
617 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
618 (long)(s - i_strpos)));
619 other_last = s; /* Fix this later. --Hugo */
628 t = s - prog->check_offset_max;
630 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
631 && (!(prog->reganch & ROPT_UTF8)
632 || (PL_bostr = strpos, /* Used in regcopmaybe() */
633 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
636 /* Fixed substring is found far enough so that the match
637 cannot start at strpos. */
639 if (ml_anch && t[-1] != '\n') {
640 /* Eventually fbm_*() should handle this, but often
641 anchored_offset is not 0, so this check will not be wasted. */
642 /* XXXX In the code below we prefer to look for "^" even in
643 presence of anchored substrings. And we search even
644 beyond the found float position. These pessimizations
645 are historical artefacts only. */
647 while (t < strend - prog->minlen) {
649 if (t < check_at - prog->check_offset_min) {
650 if (prog->anchored_substr) {
651 /* Since we moved from the found position,
652 we definitely contradict the found anchored
653 substr. Due to the above check we do not
654 contradict "check" substr.
655 Thus we can arrive here only if check substr
656 is float. Redo checking for "other"=="fixed".
659 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
660 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
661 goto do_other_anchored;
663 /* We don't contradict the found floating substring. */
664 /* XXXX Why not check for STCLASS? */
666 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
667 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
670 /* Position contradicts check-string */
671 /* XXXX probably better to look for check-string
672 than for "\n", so one should lower the limit for t? */
673 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
674 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
675 other_last = strpos = s = t + 1;
680 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
681 PL_colors[0],PL_colors[1]));
685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
686 PL_colors[0],PL_colors[1]));
690 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
694 /* The found string does not prohibit matching at strpos,
695 - no optimization of calling REx engine can be performed,
696 unless it was an MBOL and we are not after MBOL,
697 or a future STCLASS check will fail this. */
699 /* Even in this situation we may use MBOL flag if strpos is offset
700 wrt the start of the string. */
701 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
702 && (strpos != strbeg) && strpos[-1] != '\n'
703 /* May be due to an implicit anchor of m{.*foo} */
704 && !(prog->reganch & ROPT_IMPLICIT))
709 DEBUG_r( if (ml_anch)
710 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
711 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
714 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
715 && prog->check_substr /* Could be deleted already */
716 && --BmUSEFUL(prog->check_substr) < 0
717 && prog->check_substr == prog->float_substr)
719 /* If flags & SOMETHING - do not do it many times on the same match */
720 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
721 SvREFCNT_dec(prog->check_substr);
722 prog->check_substr = Nullsv; /* disable */
723 prog->float_substr = Nullsv; /* clear */
724 check = Nullsv; /* abort */
726 /* XXXX This is a remnant of the old implementation. It
727 looks wasteful, since now INTUIT can use many
729 prog->reganch &= ~RE_USE_INTUIT;
736 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
737 if (prog->regstclass) {
738 /* minlen == 0 is possible if regstclass is \b or \B,
739 and the fixed substr is ''$.
740 Since minlen is already taken into account, s+1 is before strend;
741 accidentally, minlen >= 1 guaranties no false positives at s + 1
742 even for \b or \B. But (minlen? 1 : 0) below assumes that
743 regstclass does not come from lookahead... */
744 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
745 This leaves EXACTF only, which is dealt with in find_byclass(). */
746 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
747 ? STR_LEN(prog->regstclass)
749 char *endpos = (prog->anchored_substr || ml_anch)
750 ? s + (prog->minlen? cl_l : 0)
751 : (prog->float_substr ? check_at - start_shift + cl_l
753 char *startpos = strbeg;
756 if (prog->reganch & ROPT_UTF8) {
757 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
760 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
765 if (endpos == strend) {
766 DEBUG_r( PerlIO_printf(Perl_debug_log,
767 "Could not match STCLASS...\n") );
770 DEBUG_r( PerlIO_printf(Perl_debug_log,
771 "This position contradicts STCLASS...\n") );
772 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
774 /* Contradict one of substrings */
775 if (prog->anchored_substr) {
776 if (prog->anchored_substr == check) {
777 DEBUG_r( what = "anchored" );
779 PL_regeol = strend; /* Used in HOP() */
781 if (s + start_shift + end_shift > strend) {
782 /* XXXX Should be taken into account earlier? */
783 DEBUG_r( PerlIO_printf(Perl_debug_log,
784 "Could not match STCLASS...\n") );
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "Looking for %s substr starting at offset %ld...\n",
791 what, (long)(s + start_shift - i_strpos)) );
794 /* Have both, check_string is floating */
795 if (t + start_shift >= check_at) /* Contradicts floating=check */
796 goto retry_floating_check;
797 /* Recheck anchored substring, but not floating... */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Looking for anchored substr starting at offset %ld...\n",
803 (long)(other_last - i_strpos)) );
804 goto do_other_anchored;
806 /* Another way we could have checked stclass at the
807 current position only: */
812 DEBUG_r( PerlIO_printf(Perl_debug_log,
813 "Looking for /%s^%s/m starting at offset %ld...\n",
814 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
817 if (!prog->float_substr) /* Could have been deleted */
819 /* Check is floating subtring. */
820 retry_floating_check:
821 t = check_at - start_shift;
822 DEBUG_r( what = "floating" );
823 goto hop_and_restart;
826 PerlIO_printf(Perl_debug_log,
827 "By STCLASS: moving %ld --> %ld\n",
828 (long)(t - i_strpos), (long)(s - i_strpos));
830 PerlIO_printf(Perl_debug_log,
831 "Does not contradict STCLASS...\n") );
834 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
835 PL_colors[4], (check ? "Guessed" : "Giving up"),
836 PL_colors[5], (long)(s - i_strpos)) );
839 fail_finish: /* Substring not found */
840 if (prog->check_substr) /* could be removed already */
841 BmUSEFUL(prog->check_substr) += 5; /* hooray */
843 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
844 PL_colors[4],PL_colors[5]));
848 /* We know what class REx starts with. Try to find this position... */
850 S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
852 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
858 register I32 tmp = 1; /* Scratch variable? */
860 /* We know what class it must start with. */
864 if (REGINCLASSUTF8(c, (U8*)s)) {
865 if (tmp && (norun || regtry(prog, s)))
877 if (REGINCLASS(c, *(U8*)s)) {
878 if (tmp && (norun || regtry(prog, s)))
898 c2 = PL_fold_locale[c1];
903 e = s; /* Due to minlen logic of intuit() */
904 /* Here it is NOT UTF! */
908 && (ln == 1 || !(OP(c) == EXACTF
910 : ibcmp_locale(s, m, ln)))
911 && (norun || regtry(prog, s)) )
917 if ( (*(U8*)s == c1 || *(U8*)s == c2)
918 && (ln == 1 || !(OP(c) == EXACTF
920 : ibcmp_locale(s, m, ln)))
921 && (norun || regtry(prog, s)) )
928 PL_reg_flags |= RF_tainted;
931 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
932 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
934 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
936 if ((norun || regtry(prog, s)))
941 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
945 PL_reg_flags |= RF_tainted;
951 U8 *r = reghop((U8*)s, -1);
953 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
955 tmp = ((OP(c) == BOUNDUTF8 ?
956 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
958 if (tmp == !(OP(c) == BOUNDUTF8 ?
959 swash_fetch(PL_utf8_alnum, (U8*)s) :
960 isALNUM_LC_utf8((U8*)s)))
963 if ((norun || regtry(prog, s)))
968 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
972 PL_reg_flags |= RF_tainted;
975 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
976 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
978 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
980 else if ((norun || regtry(prog, s)))
984 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
988 PL_reg_flags |= RF_tainted;
994 U8 *r = reghop((U8*)s, -1);
996 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
998 tmp = ((OP(c) == NBOUNDUTF8 ?
999 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1000 while (s < strend) {
1001 if (tmp == !(OP(c) == NBOUNDUTF8 ?
1002 swash_fetch(PL_utf8_alnum, (U8*)s) :
1003 isALNUM_LC_utf8((U8*)s)))
1005 else if ((norun || regtry(prog, s)))
1009 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
1013 while (s < strend) {
1015 if (tmp && (norun || regtry(prog, s)))
1026 while (s < strend) {
1027 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1028 if (tmp && (norun || regtry(prog, s)))
1039 PL_reg_flags |= RF_tainted;
1040 while (s < strend) {
1041 if (isALNUM_LC(*s)) {
1042 if (tmp && (norun || regtry(prog, s)))
1053 PL_reg_flags |= RF_tainted;
1054 while (s < strend) {
1055 if (isALNUM_LC_utf8((U8*)s)) {
1056 if (tmp && (norun || regtry(prog, s)))
1067 while (s < strend) {
1069 if (tmp && (norun || regtry(prog, s)))
1080 while (s < strend) {
1081 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1082 if (tmp && (norun || regtry(prog, s)))
1093 PL_reg_flags |= RF_tainted;
1094 while (s < strend) {
1095 if (!isALNUM_LC(*s)) {
1096 if (tmp && (norun || regtry(prog, s)))
1107 PL_reg_flags |= RF_tainted;
1108 while (s < strend) {
1109 if (!isALNUM_LC_utf8((U8*)s)) {
1110 if (tmp && (norun || regtry(prog, s)))
1121 while (s < strend) {
1123 if (tmp && (norun || regtry(prog, s)))
1134 while (s < strend) {
1135 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1136 if (tmp && (norun || regtry(prog, s)))
1147 PL_reg_flags |= RF_tainted;
1148 while (s < strend) {
1149 if (isSPACE_LC(*s)) {
1150 if (tmp && (norun || regtry(prog, s)))
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
1163 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1164 if (tmp && (norun || regtry(prog, s)))
1175 while (s < strend) {
1177 if (tmp && (norun || regtry(prog, s)))
1188 while (s < strend) {
1189 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1190 if (tmp && (norun || regtry(prog, s)))
1201 PL_reg_flags |= RF_tainted;
1202 while (s < strend) {
1203 if (!isSPACE_LC(*s)) {
1204 if (tmp && (norun || regtry(prog, s)))
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
1217 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1218 if (tmp && (norun || regtry(prog, s)))
1229 while (s < strend) {
1231 if (tmp && (norun || regtry(prog, s)))
1242 while (s < strend) {
1243 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1244 if (tmp && (norun || regtry(prog, s)))
1255 PL_reg_flags |= RF_tainted;
1256 while (s < strend) {
1257 if (isDIGIT_LC(*s)) {
1258 if (tmp && (norun || regtry(prog, s)))
1269 PL_reg_flags |= RF_tainted;
1270 while (s < strend) {
1271 if (isDIGIT_LC_utf8((U8*)s)) {
1272 if (tmp && (norun || regtry(prog, s)))
1283 while (s < strend) {
1285 if (tmp && (norun || regtry(prog, s)))
1296 while (s < strend) {
1297 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1298 if (tmp && (norun || regtry(prog, s)))
1309 PL_reg_flags |= RF_tainted;
1310 while (s < strend) {
1311 if (!isDIGIT_LC(*s)) {
1312 if (tmp && (norun || regtry(prog, s)))
1323 PL_reg_flags |= RF_tainted;
1324 while (s < strend) {
1325 if (!isDIGIT_LC_utf8((U8*)s)) {
1326 if (tmp && (norun || regtry(prog, s)))
1337 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1346 - regexec_flags - match a regexp against a string
1349 Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1350 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1351 /* strend: pointer to null at end of string */
1352 /* strbeg: real beginning of string */
1353 /* minend: end of match must be >=minend after stringarg. */
1354 /* data: May be used for some additional optimizations. */
1355 /* nosave: For optimizations. */
1358 register regnode *c;
1359 register char *startpos = stringarg;
1360 I32 minlen; /* must match at least this many chars */
1361 I32 dontbother = 0; /* how many characters not to try at end */
1362 /* I32 start_shift = 0; */ /* Offset of the start to find
1363 constant substr. */ /* CC */
1364 I32 end_shift = 0; /* Same for the end. */ /* CC */
1365 I32 scream_pos = -1; /* Internal iterator of scream. */
1367 SV* oreplsv = GvSV(PL_replgv);
1373 PL_regnarrate = PL_debug & 512;
1376 /* Be paranoid... */
1377 if (prog == NULL || startpos == NULL) {
1378 Perl_croak(aTHX_ "NULL regexp parameter");
1382 minlen = prog->minlen;
1383 if (strend - startpos < minlen) goto phooey;
1385 if (startpos == strbeg) /* is ^ valid at stringarg? */
1388 PL_regprev = (U32)stringarg[-1];
1389 if (!PL_multiline && PL_regprev == '\n')
1390 PL_regprev = '\0'; /* force ^ to NOT match */
1393 /* Check validity of program. */
1394 if (UCHARAT(prog->program) != REG_MAGIC) {
1395 Perl_croak(aTHX_ "corrupted regexp program");
1399 PL_reg_eval_set = 0;
1402 if (prog->reganch & ROPT_UTF8)
1403 PL_reg_flags |= RF_utf8;
1405 /* Mark beginning of line for ^ and lookbehind. */
1406 PL_regbol = startpos;
1410 /* Mark end of line for $ (and such) */
1413 /* see how far we have to get to not match where we matched before */
1414 PL_regtill = startpos+minend;
1416 /* We start without call_cc context. */
1419 /* If there is a "must appear" string, look for it. */
1422 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1425 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1426 PL_reg_ganch = startpos;
1427 else if (sv && SvTYPE(sv) >= SVt_PVMG
1429 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1430 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1431 if (prog->reganch & ROPT_ANCH_GPOS) {
1432 if (s > PL_reg_ganch)
1437 else /* pos() not defined */
1438 PL_reg_ganch = strbeg;
1441 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1442 re_scream_pos_data d;
1444 d.scream_olds = &scream_olds;
1445 d.scream_pos = &scream_pos;
1446 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1448 goto phooey; /* not present */
1451 DEBUG_r( if (!PL_colorset) reginitcolors() );
1452 DEBUG_r(PerlIO_printf(Perl_debug_log,
1453 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1454 PL_colors[4],PL_colors[5],PL_colors[0],
1457 (strlen(prog->precomp) > 60 ? "..." : ""),
1459 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1460 startpos, PL_colors[1],
1461 (strend - startpos > 60 ? "..." : ""))
1464 /* Simplest case: anchored match need be tried only once. */
1465 /* [unless only anchor is BOL and multiline is set] */
1466 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1467 if (s == startpos && regtry(prog, startpos))
1469 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1470 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1475 dontbother = minlen - 1;
1476 end = HOPc(strend, -dontbother) - 1;
1477 /* for multiline we only have to try after newlines */
1478 if (prog->check_substr) {
1482 if (regtry(prog, s))
1487 if (prog->reganch & RE_USE_INTUIT) {
1488 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1499 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1500 if (regtry(prog, s))
1507 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1508 if (regtry(prog, PL_reg_ganch))
1513 /* Messy cases: unanchored match. */
1514 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1515 /* we have /x+whatever/ */
1516 /* it must be a one character string (XXXX Except UTF?) */
1517 char ch = SvPVX(prog->anchored_substr)[0];
1523 while (s < strend) {
1525 DEBUG_r( did_match = 1 );
1526 if (regtry(prog, s)) goto got_it;
1528 while (s < strend && *s == ch)
1535 while (s < strend) {
1537 DEBUG_r( did_match = 1 );
1538 if (regtry(prog, s)) goto got_it;
1540 while (s < strend && *s == ch)
1546 DEBUG_r(did_match ||
1547 PerlIO_printf(Perl_debug_log,
1548 "Did not find anchored character...\n"));
1551 else if (prog->anchored_substr != Nullsv
1552 || (prog->float_substr != Nullsv
1553 && prog->float_max_offset < strend - s)) {
1554 SV *must = prog->anchored_substr
1555 ? prog->anchored_substr : prog->float_substr;
1557 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1559 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1560 char *last = HOPc(strend, /* Cannot start after this */
1561 -(I32)(CHR_SVLEN(must)
1562 - (SvTAIL(must) != 0) + back_min));
1563 char *last1; /* Last position checked before */
1569 last1 = HOPc(s, -1);
1571 last1 = s - 1; /* bogus */
1573 /* XXXX check_substr already used to find `s', can optimize if
1574 check_substr==must. */
1576 dontbother = end_shift;
1577 strend = HOPc(strend, -dontbother);
1578 while ( (s <= last) &&
1579 ((flags & REXEC_SCREAM)
1580 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1581 end_shift, &scream_pos, 0))
1582 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1583 (unsigned char*)strend, must,
1584 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1585 DEBUG_r( did_match = 1 );
1586 if (HOPc(s, -back_max) > last1) {
1587 last1 = HOPc(s, -back_min);
1588 s = HOPc(s, -back_max);
1591 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1593 last1 = HOPc(s, -back_min);
1597 while (s <= last1) {
1598 if (regtry(prog, s))
1604 while (s <= last1) {
1605 if (regtry(prog, s))
1611 DEBUG_r(did_match ||
1612 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1613 ((must == prog->anchored_substr)
1614 ? "anchored" : "floating"),
1616 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1618 PL_colors[1], (SvTAIL(must) ? "$" : "")));
1621 else if ((c = prog->regstclass)) {
1622 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1623 /* don't bother with what can't match */
1624 strend = HOPc(strend, -(minlen - 1));
1625 if (find_byclass(prog, c, s, strend, startpos, 0))
1627 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
1631 if (prog->float_substr != Nullsv) { /* Trim the end. */
1634 if (flags & REXEC_SCREAM) {
1635 last = screaminstr(sv, prog->float_substr, s - strbeg,
1636 end_shift, &scream_pos, 1); /* last one */
1638 last = scream_olds; /* Only one occurence. */
1642 char *little = SvPV(prog->float_substr, len);
1644 if (SvTAIL(prog->float_substr)) {
1645 if (memEQ(strend - len + 1, little, len - 1))
1646 last = strend - len + 1;
1647 else if (!PL_multiline)
1648 last = memEQ(strend - len, little, len)
1649 ? strend - len : Nullch;
1655 last = rninstr(s, strend, little, little + len);
1657 last = strend; /* matching `$' */
1661 DEBUG_r(PerlIO_printf(Perl_debug_log,
1662 "%sCan't trim the tail, match fails (should not happen)%s\n",
1663 PL_colors[4],PL_colors[5]));
1664 goto phooey; /* Should not happen! */
1666 dontbother = strend - last + prog->float_min_offset;
1668 if (minlen && (dontbother < minlen))
1669 dontbother = minlen - 1;
1670 strend -= dontbother; /* this one's always in bytes! */
1671 /* We don't know much -- general case. */
1674 if (regtry(prog, s))
1683 if (regtry(prog, s))
1685 } while (s++ < strend);
1693 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1695 if (PL_reg_eval_set) {
1696 /* Preserve the current value of $^R */
1697 if (oreplsv != GvSV(PL_replgv))
1698 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1699 restored, the value remains
1701 restore_pos(aTHXo_ 0);
1704 /* make sure $`, $&, $', and $digit will work later */
1705 if ( !(flags & REXEC_NOT_FIRST) ) {
1706 if (RX_MATCH_COPIED(prog)) {
1707 Safefree(prog->subbeg);
1708 RX_MATCH_COPIED_off(prog);
1710 if (flags & REXEC_COPY_STR) {
1711 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1713 s = savepvn(strbeg, i);
1716 RX_MATCH_COPIED_on(prog);
1719 prog->subbeg = strbeg;
1720 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1727 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1728 PL_colors[4],PL_colors[5]));
1729 if (PL_reg_eval_set)
1730 restore_pos(aTHXo_ 0);
1735 - regtry - try match at specific point
1737 STATIC I32 /* 0 failure, 1 success */
1738 S_regtry(pTHX_ regexp *prog, char *startpos)
1746 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1748 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1751 PL_reg_eval_set = RS_init;
1753 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1754 (IV)(PL_stack_sp - PL_stack_base));
1756 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
1757 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1758 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1760 /* Apparently this is not needed, judging by wantarray. */
1761 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
1762 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1765 /* Make $_ available to executed code. */
1766 if (PL_reg_sv != DEFSV) {
1767 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1772 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1773 && (mg = mg_find(PL_reg_sv, 'g')))) {
1774 /* prepare for quick setting of pos */
1775 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1776 mg = mg_find(PL_reg_sv, 'g');
1780 PL_reg_oldpos = mg->mg_len;
1781 SAVEDESTRUCTOR_X(restore_pos, 0);
1784 Newz(22,PL_reg_curpm, 1, PMOP);
1785 PL_reg_curpm->op_pmregexp = prog;
1786 PL_reg_oldcurpm = PL_curpm;
1787 PL_curpm = PL_reg_curpm;
1788 if (RX_MATCH_COPIED(prog)) {
1789 /* Here is a serious problem: we cannot rewrite subbeg,
1790 since it may be needed if this match fails. Thus
1791 $` inside (?{}) could fail... */
1792 PL_reg_oldsaved = prog->subbeg;
1793 PL_reg_oldsavedlen = prog->sublen;
1794 RX_MATCH_COPIED_off(prog);
1797 PL_reg_oldsaved = Nullch;
1798 prog->subbeg = PL_bostr;
1799 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1801 prog->startp[0] = startpos - PL_bostr;
1802 PL_reginput = startpos;
1803 PL_regstartp = prog->startp;
1804 PL_regendp = prog->endp;
1805 PL_reglastparen = &prog->lastparen;
1806 prog->lastparen = 0;
1808 DEBUG_r(PL_reg_starttry = startpos);
1809 if (PL_reg_start_tmpl <= prog->nparens) {
1810 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1811 if(PL_reg_start_tmp)
1812 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1814 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1817 /* XXXX What this code is doing here?!!! There should be no need
1818 to do this again and again, PL_reglastparen should take care of
1821 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1822 * Actually, the code in regcppop() (which Ilya may be meaning by
1823 * PL_reglastparen), is not needed at all by the test suite
1824 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1825 * enough, for building DynaLoader, or otherwise this
1826 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1827 * will happen. Meanwhile, this code *is* needed for the
1828 * above-mentioned test suite tests to succeed. The common theme
1829 * on those tests seems to be returning null fields from matches.
1834 if (prog->nparens) {
1835 for (i = prog->nparens; i > *PL_reglastparen; i--) {
1842 if (regmatch(prog->program + 1)) {
1843 prog->endp[0] = PL_reginput - PL_bostr;
1846 REGCP_UNWIND(lastcp);
1850 #define RE_UNWIND_BRANCH 1
1851 #define RE_UNWIND_BRANCHJ 2
1855 typedef struct { /* XX: makes sense to enlarge it... */
1859 } re_unwind_generic_t;
1872 } re_unwind_branch_t;
1874 typedef union re_unwind_t {
1876 re_unwind_generic_t generic;
1877 re_unwind_branch_t branch;
1881 - regmatch - main matching routine
1883 * Conceptually the strategy is simple: check to see whether the current
1884 * node matches, call self recursively to see whether the rest matches,
1885 * and then act accordingly. In practice we make some effort to avoid
1886 * recursion, in particular by going through "ordinary" nodes (that don't
1887 * need to know whether the rest of the match failed) by a loop instead of
1890 /* [lwall] I've hoisted the register declarations to the outer block in order to
1891 * maybe save a little bit of pushing and popping on the stack. It also takes
1892 * advantage of machines that use a register save mask on subroutine entry.
1894 STATIC I32 /* 0 failure, 1 success */
1895 S_regmatch(pTHX_ regnode *prog)
1897 register regnode *scan; /* Current node. */
1898 regnode *next; /* Next node. */
1899 regnode *inner; /* Next node in internal branch. */
1900 register I32 nextchr; /* renamed nextchr - nextchar colides with
1901 function of same name */
1902 register I32 n; /* no or next */
1903 register I32 ln; /* len or last */
1904 register char *s; /* operand or save */
1905 register char *locinput = PL_reginput;
1906 register I32 c1, c2, paren; /* case fold search, parenth */
1907 int minmod = 0, sw = 0, logical = 0;
1909 I32 firstcp = PL_savestack_ix;
1915 /* Note that nextchr is a byte even in UTF */
1916 nextchr = UCHARAT(locinput);
1918 while (scan != NULL) {
1919 #define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1921 # define sayYES goto yes
1922 # define sayNO goto no
1923 # define sayYES_FINAL goto yes_final
1924 # define sayYES_LOUD goto yes_loud
1925 # define sayNO_FINAL goto no_final
1926 # define sayNO_SILENT goto do_no
1927 # define saySAME(x) if (x) goto yes; else goto no
1928 # define REPORT_CODE_OFF 24
1930 # define sayYES return 1
1931 # define sayNO return 0
1932 # define sayYES_FINAL return 1
1933 # define sayYES_LOUD return 1
1934 # define sayNO_FINAL return 0
1935 # define sayNO_SILENT return 0
1936 # define saySAME(x) return x
1939 SV *prop = sv_newmortal();
1940 int docolor = *PL_colors[0];
1941 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1942 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1943 /* The part of the string before starttry has one color
1944 (pref0_len chars), between starttry and current
1945 position another one (pref_len - pref0_len chars),
1946 after the current position the third one.
1947 We assume that pref0_len <= pref_len, otherwise we
1948 decrease pref0_len. */
1949 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1950 ? (5 + taill) - l : locinput - PL_bostr);
1951 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1953 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1954 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1955 ? (5 + taill) - pref_len : PL_regeol - locinput);
1958 if (pref0_len > pref_len)
1959 pref0_len = pref_len;
1960 regprop(prop, scan);
1961 PerlIO_printf(Perl_debug_log,
1962 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1963 (IV)(locinput - PL_bostr),
1964 PL_colors[4], pref0_len,
1965 locinput - pref_len, PL_colors[5],
1966 PL_colors[2], pref_len - pref0_len,
1967 locinput - pref_len + pref0_len, PL_colors[3],
1968 (docolor ? "" : "> <"),
1969 PL_colors[0], l, locinput, PL_colors[1],
1970 15 - l - pref_len + 1,
1972 (IV)(scan - PL_regprogram), PL_regindent*2, "",
1976 next = scan + NEXT_OFF(scan);
1982 if (locinput == PL_bostr
1983 ? PL_regprev == '\n'
1985 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1987 /* regtill = regbol; */
1992 if (locinput == PL_bostr
1993 ? PL_regprev == '\n'
1994 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2000 if (locinput == PL_bostr)
2004 if (locinput == PL_reg_ganch)
2014 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2019 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2021 if (PL_regeol - locinput > 1)
2025 if (PL_regeol != locinput)
2029 if (nextchr & 0x80) {
2030 locinput += PL_utf8skip[nextchr];
2031 if (locinput > PL_regeol)
2033 nextchr = UCHARAT(locinput);
2036 if (!nextchr && locinput >= PL_regeol)
2038 nextchr = UCHARAT(++locinput);
2041 if (!nextchr && locinput >= PL_regeol)
2043 nextchr = UCHARAT(++locinput);
2046 if (nextchr & 0x80) {
2047 locinput += PL_utf8skip[nextchr];
2048 if (locinput > PL_regeol)
2050 nextchr = UCHARAT(locinput);
2053 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2055 nextchr = UCHARAT(++locinput);
2058 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2060 nextchr = UCHARAT(++locinput);
2065 /* Inline the first character, for speed. */
2066 if (UCHARAT(s) != nextchr)
2068 if (PL_regeol - locinput < ln)
2070 if (ln > 1 && memNE(s, locinput, ln))
2073 nextchr = UCHARAT(locinput);
2076 PL_reg_flags |= RF_tainted;
2085 c1 = OP(scan) == EXACTF;
2089 if (utf8_to_uv((U8*)s, e - s, 0, 0) !=
2091 toLOWER_utf8((U8*)l) :
2092 toLOWER_LC_utf8((U8*)l)))
2100 nextchr = UCHARAT(locinput);
2104 /* Inline the first character, for speed. */
2105 if (UCHARAT(s) != nextchr &&
2106 UCHARAT(s) != ((OP(scan) == EXACTF)
2107 ? PL_fold : PL_fold_locale)[nextchr])
2109 if (PL_regeol - locinput < ln)
2111 if (ln > 1 && (OP(scan) == EXACTF
2112 ? ibcmp(s, locinput, ln)
2113 : ibcmp_locale(s, locinput, ln)))
2116 nextchr = UCHARAT(locinput);
2119 if (!REGINCLASSUTF8(scan, (U8*)locinput))
2121 if (locinput >= PL_regeol)
2123 locinput += PL_utf8skip[nextchr];
2124 nextchr = UCHARAT(locinput);
2128 nextchr = UCHARAT(locinput);
2129 if (!REGINCLASS(scan, nextchr))
2131 if (!nextchr && locinput >= PL_regeol)
2133 nextchr = UCHARAT(++locinput);
2136 PL_reg_flags |= RF_tainted;
2141 if (!(OP(scan) == ALNUM
2142 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2144 nextchr = UCHARAT(++locinput);
2147 PL_reg_flags |= RF_tainted;
2152 if (nextchr & 0x80) {
2153 if (!(OP(scan) == ALNUMUTF8
2154 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2155 : isALNUM_LC_utf8((U8*)locinput)))
2159 locinput += PL_utf8skip[nextchr];
2160 nextchr = UCHARAT(locinput);
2163 if (!(OP(scan) == ALNUMUTF8
2164 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
2166 nextchr = UCHARAT(++locinput);
2169 PL_reg_flags |= RF_tainted;
2172 if (!nextchr && locinput >= PL_regeol)
2174 if (OP(scan) == NALNUM
2175 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2177 nextchr = UCHARAT(++locinput);
2180 PL_reg_flags |= RF_tainted;
2183 if (!nextchr && locinput >= PL_regeol)
2185 if (nextchr & 0x80) {
2186 if (OP(scan) == NALNUMUTF8
2187 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2188 : isALNUM_LC_utf8((U8*)locinput))
2192 locinput += PL_utf8skip[nextchr];
2193 nextchr = UCHARAT(locinput);
2196 if (OP(scan) == NALNUMUTF8
2197 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
2199 nextchr = UCHARAT(++locinput);
2203 PL_reg_flags |= RF_tainted;
2207 /* was last char in word? */
2208 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2209 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2211 n = isALNUM(nextchr);
2214 ln = isALNUM_LC(ln);
2215 n = isALNUM_LC(nextchr);
2217 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
2222 PL_reg_flags |= RF_tainted;
2226 /* was last char in word? */
2227 if (locinput == PL_regbol)
2230 U8 *r = reghop((U8*)locinput, -1);
2232 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2234 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2235 ln = isALNUM_uni(ln);
2236 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2239 ln = isALNUM_LC_uni(ln);
2240 n = isALNUM_LC_utf8((U8*)locinput);
2242 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
2246 PL_reg_flags |= RF_tainted;
2251 if (!(OP(scan) == SPACE
2252 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2254 nextchr = UCHARAT(++locinput);
2257 PL_reg_flags |= RF_tainted;
2262 if (nextchr & 0x80) {
2263 if (!(OP(scan) == SPACEUTF8
2264 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2265 : isSPACE_LC_utf8((U8*)locinput)))
2269 locinput += PL_utf8skip[nextchr];
2270 nextchr = UCHARAT(locinput);
2273 if (!(OP(scan) == SPACEUTF8
2274 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2276 nextchr = UCHARAT(++locinput);
2279 PL_reg_flags |= RF_tainted;
2282 if (!nextchr && locinput >= PL_regeol)
2284 if (OP(scan) == NSPACE
2285 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2287 nextchr = UCHARAT(++locinput);
2290 PL_reg_flags |= RF_tainted;
2293 if (!nextchr && locinput >= PL_regeol)
2295 if (nextchr & 0x80) {
2296 if (OP(scan) == NSPACEUTF8
2297 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2298 : isSPACE_LC_utf8((U8*)locinput))
2302 locinput += PL_utf8skip[nextchr];
2303 nextchr = UCHARAT(locinput);
2306 if (OP(scan) == NSPACEUTF8
2307 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
2309 nextchr = UCHARAT(++locinput);
2312 PL_reg_flags |= RF_tainted;
2317 if (!(OP(scan) == DIGIT
2318 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2320 nextchr = UCHARAT(++locinput);
2323 PL_reg_flags |= RF_tainted;
2328 if (nextchr & 0x80) {
2329 if (!(OP(scan) == DIGITUTF8
2330 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2331 : isDIGIT_LC_utf8((U8*)locinput)))
2335 locinput += PL_utf8skip[nextchr];
2336 nextchr = UCHARAT(locinput);
2339 if (!(OP(scan) == DIGITUTF8
2340 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
2342 nextchr = UCHARAT(++locinput);
2345 PL_reg_flags |= RF_tainted;
2348 if (!nextchr && locinput >= PL_regeol)
2350 if (OP(scan) == NDIGIT
2351 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2353 nextchr = UCHARAT(++locinput);
2356 PL_reg_flags |= RF_tainted;
2359 if (!nextchr && locinput >= PL_regeol)
2361 if (nextchr & 0x80) {
2362 if (OP(scan) == NDIGITUTF8
2363 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2364 : isDIGIT_LC_utf8((U8*)locinput))
2368 locinput += PL_utf8skip[nextchr];
2369 nextchr = UCHARAT(locinput);
2372 if (OP(scan) == NDIGITUTF8
2373 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
2375 nextchr = UCHARAT(++locinput);
2378 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
2380 locinput += PL_utf8skip[nextchr];
2381 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
2382 locinput += UTF8SKIP(locinput);
2383 if (locinput > PL_regeol)
2385 nextchr = UCHARAT(locinput);
2388 PL_reg_flags |= RF_tainted;
2392 n = ARG(scan); /* which paren pair */
2393 ln = PL_regstartp[n];
2394 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2395 if (*PL_reglastparen < n || ln == -1)
2396 sayNO; /* Do not match unless seen CLOSEn. */
2397 if (ln == PL_regendp[n])
2401 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2403 char *e = PL_bostr + PL_regendp[n];
2405 * Note that we can't do the "other character" lookup trick as
2406 * in the 8-bit case (no pun intended) because in Unicode we
2407 * have to map both upper and title case to lower case.
2409 if (OP(scan) == REFF) {
2413 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
2423 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
2430 nextchr = UCHARAT(locinput);
2434 /* Inline the first character, for speed. */
2435 if (UCHARAT(s) != nextchr &&
2437 (UCHARAT(s) != ((OP(scan) == REFF
2438 ? PL_fold : PL_fold_locale)[nextchr]))))
2440 ln = PL_regendp[n] - ln;
2441 if (locinput + ln > PL_regeol)
2443 if (ln > 1 && (OP(scan) == REF
2444 ? memNE(s, locinput, ln)
2446 ? ibcmp(s, locinput, ln)
2447 : ibcmp_locale(s, locinput, ln))))
2450 nextchr = UCHARAT(locinput);
2461 OP_4tree *oop = PL_op;
2462 COP *ocurcop = PL_curcop;
2463 SV **ocurpad = PL_curpad;
2467 PL_op = (OP_4tree*)PL_regdata->data[n];
2468 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
2469 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
2470 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
2472 CALLRUNOPS(aTHX); /* Scalar context. */
2478 PL_curpad = ocurpad;
2479 PL_curcop = ocurcop;
2481 if (logical == 2) { /* Postponed subexpression. */
2483 MAGIC *mg = Null(MAGIC*);
2485 CHECKPOINT cp, lastcp;
2487 if(SvROK(ret) || SvRMAGICAL(ret)) {
2488 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2491 mg = mg_find(sv, 'r');
2494 re = (regexp *)mg->mg_obj;
2495 (void)ReREFCNT_inc(re);
2499 char *t = SvPV(ret, len);
2501 char *oprecomp = PL_regprecomp;
2502 I32 osize = PL_regsize;
2503 I32 onpar = PL_regnpar;
2506 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
2507 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
2509 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2510 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2511 PL_regprecomp = oprecomp;
2516 PerlIO_printf(Perl_debug_log,
2517 "Entering embedded `%s%.60s%s%s'\n",
2521 (strlen(re->precomp) > 60 ? "..." : ""))
2524 state.prev = PL_reg_call_cc;
2525 state.cc = PL_regcc;
2526 state.re = PL_reg_re;
2530 cp = regcppush(0); /* Save *all* the positions. */
2533 state.ss = PL_savestack_ix;
2534 *PL_reglastparen = 0;
2535 PL_reg_call_cc = &state;
2536 PL_reginput = locinput;
2538 /* XXXX This is too dramatic a measure... */
2541 if (regmatch(re->program + 1)) {
2542 /* Even though we succeeded, we need to restore
2543 global variables, since we may be wrapped inside
2544 SUSPEND, thus the match may be not finished yet. */
2546 /* XXXX Do this only if SUSPENDed? */
2547 PL_reg_call_cc = state.prev;
2548 PL_regcc = state.cc;
2549 PL_reg_re = state.re;
2550 cache_re(PL_reg_re);
2552 /* XXXX This is too dramatic a measure... */
2555 /* These are needed even if not SUSPEND. */
2561 REGCP_UNWIND(lastcp);
2563 PL_reg_call_cc = state.prev;
2564 PL_regcc = state.cc;
2565 PL_reg_re = state.re;
2566 cache_re(PL_reg_re);
2568 /* XXXX This is too dramatic a measure... */
2577 sv_setsv(save_scalar(PL_replgv), ret);
2581 n = ARG(scan); /* which paren pair */
2582 PL_reg_start_tmp[n] = locinput;
2587 n = ARG(scan); /* which paren pair */
2588 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2589 PL_regendp[n] = locinput - PL_bostr;
2590 if (n > *PL_reglastparen)
2591 *PL_reglastparen = n;
2594 n = ARG(scan); /* which paren pair */
2595 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
2598 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
2600 next = NEXTOPER(NEXTOPER(scan));
2602 next = scan + ARG(scan);
2603 if (OP(next) == IFTHEN) /* Fake one. */
2604 next = NEXTOPER(NEXTOPER(next));
2608 logical = scan->flags;
2610 /*******************************************************************
2611 PL_regcc contains infoblock about the innermost (...)* loop, and
2612 a pointer to the next outer infoblock.
2614 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2616 1) After matching X, regnode for CURLYX is processed;
2618 2) This regnode creates infoblock on the stack, and calls
2619 regmatch() recursively with the starting point at WHILEM node;
2621 3) Each hit of WHILEM node tries to match A and Z (in the order
2622 depending on the current iteration, min/max of {min,max} and
2623 greediness). The information about where are nodes for "A"
2624 and "Z" is read from the infoblock, as is info on how many times "A"
2625 was already matched, and greediness.
2627 4) After A matches, the same WHILEM node is hit again.
2629 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2630 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2631 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2632 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2633 of the external loop.
2635 Currently present infoblocks form a tree with a stem formed by PL_curcc
2636 and whatever it mentions via ->next, and additional attached trees
2637 corresponding to temporarily unset infoblocks as in "5" above.
2639 In the following picture infoblocks for outer loop of
2640 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2641 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2642 infoblocks are drawn below the "reset" infoblock.
2644 In fact in the picture below we do not show failed matches for Z and T
2645 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2646 more obvious *why* one needs to *temporary* unset infoblocks.]
2648 Matched REx position InfoBlocks Comment
2652 Y A)*?Z)*?T x <- O <- I
2653 YA )*?Z)*?T x <- O <- I
2654 YA A)*?Z)*?T x <- O <- I
2655 YAA )*?Z)*?T x <- O <- I
2656 YAA Z)*?T x <- O # Temporary unset I
2659 YAAZ Y(A)*?Z)*?T x <- O
2662 YAAZY (A)*?Z)*?T x <- O
2665 YAAZY A)*?Z)*?T x <- O <- I
2668 YAAZYA )*?Z)*?T x <- O <- I
2671 YAAZYA Z)*?T x <- O # Temporary unset I
2677 YAAZYAZ T x # Temporary unset O
2684 *******************************************************************/
2687 CHECKPOINT cp = PL_savestack_ix;
2688 /* No need to save/restore up to this paren */
2689 I32 parenfloor = scan->flags;
2691 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2693 cc.oldcc = PL_regcc;
2695 /* XXXX Probably it is better to teach regpush to support
2696 parenfloor > PL_regsize... */
2697 if (parenfloor > *PL_reglastparen)
2698 parenfloor = *PL_reglastparen; /* Pessimization... */
2699 cc.parenfloor = parenfloor;
2701 cc.min = ARG1(scan);
2702 cc.max = ARG2(scan);
2703 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
2707 PL_reginput = locinput;
2708 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2710 PL_regcc = cc.oldcc;
2716 * This is really hard to understand, because after we match
2717 * what we're trying to match, we must make sure the rest of
2718 * the REx is going to match for sure, and to do that we have
2719 * to go back UP the parse tree by recursing ever deeper. And
2720 * if it fails, we have to reset our parent's current state
2721 * that we can try again after backing off.
2724 CHECKPOINT cp, lastcp;
2725 CURCUR* cc = PL_regcc;
2726 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2728 n = cc->cur + 1; /* how many we know we matched */
2729 PL_reginput = locinput;
2732 PerlIO_printf(Perl_debug_log,
2733 "%*s %ld out of %ld..%ld cc=%lx\n",
2734 REPORT_CODE_OFF+PL_regindent*2, "",
2735 (long)n, (long)cc->min,
2736 (long)cc->max, (long)cc)
2739 /* If degenerate scan matches "", assume scan done. */
2741 if (locinput == cc->lastloc && n >= cc->min) {
2742 PL_regcc = cc->oldcc;
2746 PerlIO_printf(Perl_debug_log,
2747 "%*s empty match detected, try continuation...\n",
2748 REPORT_CODE_OFF+PL_regindent*2, "")
2750 if (regmatch(cc->next))
2758 /* First just match a string of min scans. */
2762 cc->lastloc = locinput;
2763 if (regmatch(cc->scan))
2766 cc->lastloc = lastloc;
2771 /* Check whether we already were at this position.
2772 Postpone detection until we know the match is not
2773 *that* much linear. */
2774 if (!PL_reg_maxiter) {
2775 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2776 PL_reg_leftiter = PL_reg_maxiter;
2778 if (PL_reg_leftiter-- == 0) {
2779 I32 size = (PL_reg_maxiter + 7)/8;
2780 if (PL_reg_poscache) {
2781 if (PL_reg_poscache_size < size) {
2782 Renew(PL_reg_poscache, size, char);
2783 PL_reg_poscache_size = size;
2785 Zero(PL_reg_poscache, size, char);
2788 PL_reg_poscache_size = size;
2789 Newz(29, PL_reg_poscache, size, char);
2792 PerlIO_printf(Perl_debug_log,
2793 "%sDetected a super-linear match, switching on caching%s...\n",
2794 PL_colors[4], PL_colors[5])
2797 if (PL_reg_leftiter < 0) {
2798 I32 o = locinput - PL_bostr, b;
2800 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2803 if (PL_reg_poscache[o] & (1<<b)) {
2805 PerlIO_printf(Perl_debug_log,
2806 "%*s already tried at this position...\n",
2807 REPORT_CODE_OFF+PL_regindent*2, "")
2811 PL_reg_poscache[o] |= (1<<b);
2815 /* Prefer next over scan for minimal matching. */
2818 PL_regcc = cc->oldcc;
2821 cp = regcppush(cc->parenfloor);
2823 if (regmatch(cc->next)) {
2825 sayYES; /* All done. */
2827 REGCP_UNWIND(lastcp);
2833 if (n >= cc->max) { /* Maximum greed exceeded? */
2834 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2835 && !(PL_reg_flags & RF_warned)) {
2836 PL_reg_flags |= RF_warned;
2837 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2838 "Complex regular subexpression recursion",
2845 PerlIO_printf(Perl_debug_log,
2846 "%*s trying longer...\n",
2847 REPORT_CODE_OFF+PL_regindent*2, "")
2849 /* Try scanning more and see if it helps. */
2850 PL_reginput = locinput;
2852 cc->lastloc = locinput;
2853 cp = regcppush(cc->parenfloor);
2855 if (regmatch(cc->scan)) {
2859 REGCP_UNWIND(lastcp);
2862 cc->lastloc = lastloc;
2866 /* Prefer scan over next for maximal matching. */
2868 if (n < cc->max) { /* More greed allowed? */
2869 cp = regcppush(cc->parenfloor);
2871 cc->lastloc = locinput;
2873 if (regmatch(cc->scan)) {
2877 REGCP_UNWIND(lastcp);
2878 regcppop(); /* Restore some previous $<digit>s? */
2879 PL_reginput = locinput;
2881 PerlIO_printf(Perl_debug_log,
2882 "%*s failed, try continuation...\n",
2883 REPORT_CODE_OFF+PL_regindent*2, "")
2886 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
2887 && !(PL_reg_flags & RF_warned)) {
2888 PL_reg_flags |= RF_warned;
2889 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2890 "Complex regular subexpression recursion",
2894 /* Failed deeper matches of scan, so see if this one works. */
2895 PL_regcc = cc->oldcc;
2898 if (regmatch(cc->next))
2904 cc->lastloc = lastloc;
2909 next = scan + ARG(scan);
2912 inner = NEXTOPER(NEXTOPER(scan));
2915 inner = NEXTOPER(scan);
2920 if (OP(next) != c1) /* No choice. */
2921 next = inner; /* Avoid recursion. */
2923 I32 lastparen = *PL_reglastparen;
2925 re_unwind_branch_t *uw;
2927 /* Put unwinding data on stack */
2928 unwind1 = SSNEWt(1,re_unwind_branch_t);
2929 uw = SSPTRt(unwind1,re_unwind_branch_t);
2932 uw->type = ((c1 == BRANCH)
2934 : RE_UNWIND_BRANCHJ);
2935 uw->lastparen = lastparen;
2937 uw->locinput = locinput;
2938 uw->nextchr = nextchr;
2940 uw->regindent = ++PL_regindent;
2943 REGCP_SET(uw->lastcp);
2945 /* Now go into the first branch */
2958 /* We suppose that the next guy does not need
2959 backtracking: in particular, it is of constant length,
2960 and has no parenths to influence future backrefs. */
2961 ln = ARG1(scan); /* min to match */
2962 n = ARG2(scan); /* max to match */
2963 paren = scan->flags;
2965 if (paren > PL_regsize)
2967 if (paren > *PL_reglastparen)
2968 *PL_reglastparen = paren;
2970 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
2972 scan += NEXT_OFF(scan); /* Skip former OPEN. */
2973 PL_reginput = locinput;
2976 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2978 if (ln && l == 0 && n >= ln
2979 /* In fact, this is tricky. If paren, then the
2980 fact that we did/didnot match may influence
2981 future execution. */
2982 && !(paren && ln == 0))
2984 locinput = PL_reginput;
2985 if (PL_regkind[(U8)OP(next)] == EXACT) {
2986 c1 = (U8)*STRING(next);
2987 if (OP(next) == EXACTF)
2989 else if (OP(next) == EXACTFL)
2990 c2 = PL_fold_locale[c1];
2997 /* This may be improved if l == 0. */
2998 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2999 /* If it could work, try it. */
3001 UCHARAT(PL_reginput) == c1 ||
3002 UCHARAT(PL_reginput) == c2)
3006 PL_regstartp[paren] =
3007 HOPc(PL_reginput, -l) - PL_bostr;
3008 PL_regendp[paren] = PL_reginput - PL_bostr;
3011 PL_regendp[paren] = -1;
3015 REGCP_UNWIND(lastcp);
3017 /* Couldn't or didn't -- move forward. */
3018 PL_reginput = locinput;
3019 if (regrepeat_hard(scan, 1, &l)) {
3021 locinput = PL_reginput;
3028 n = regrepeat_hard(scan, n, &l);
3029 if (n != 0 && l == 0
3030 /* In fact, this is tricky. If paren, then the
3031 fact that we did/didnot match may influence
3032 future execution. */
3033 && !(paren && ln == 0))
3035 locinput = PL_reginput;
3037 PerlIO_printf(Perl_debug_log,
3038 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3039 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3043 if (PL_regkind[(U8)OP(next)] == EXACT) {
3044 c1 = (U8)*STRING(next);
3045 if (OP(next) == EXACTF)
3047 else if (OP(next) == EXACTFL)
3048 c2 = PL_fold_locale[c1];
3057 /* If it could work, try it. */
3059 UCHARAT(PL_reginput) == c1 ||
3060 UCHARAT(PL_reginput) == c2)
3063 PerlIO_printf(Perl_debug_log,
3064 "%*s trying tail with n=%"IVdf"...\n",
3065 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
3069 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3070 PL_regendp[paren] = PL_reginput - PL_bostr;
3073 PL_regendp[paren] = -1;
3077 REGCP_UNWIND(lastcp);
3079 /* Couldn't or didn't -- back up. */
3081 locinput = HOPc(locinput, -l);
3082 PL_reginput = locinput;
3089 paren = scan->flags; /* Which paren to set */
3090 if (paren > PL_regsize)
3092 if (paren > *PL_reglastparen)
3093 *PL_reglastparen = paren;
3094 ln = ARG1(scan); /* min to match */
3095 n = ARG2(scan); /* max to match */
3096 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
3100 ln = ARG1(scan); /* min to match */
3101 n = ARG2(scan); /* max to match */
3102 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
3107 scan = NEXTOPER(scan);
3113 scan = NEXTOPER(scan);
3117 * Lookahead to avoid useless match attempts
3118 * when we know what character comes next.
3120 if (PL_regkind[(U8)OP(next)] == EXACT) {
3121 c1 = (U8)*STRING(next);
3122 if (OP(next) == EXACTF)
3124 else if (OP(next) == EXACTFL)
3125 c2 = PL_fold_locale[c1];
3131 PL_reginput = locinput;
3135 if (ln && regrepeat(scan, ln) < ln)
3137 locinput = PL_reginput;
3140 char *e = locinput + n - ln; /* Should not check after this */
3141 char *old = locinput;
3143 if (e >= PL_regeol || (n == REG_INFTY))
3146 /* Find place 'next' could work */
3148 while (locinput <= e && *locinput != c1)
3151 while (locinput <= e
3158 /* PL_reginput == old now */
3159 if (locinput != old) {
3160 ln = 1; /* Did some */
3161 if (regrepeat(scan, locinput - old) <
3165 /* PL_reginput == locinput now */
3166 TRYPAREN(paren, ln, locinput);
3167 PL_reginput = locinput; /* Could be reset... */
3168 REGCP_UNWIND(lastcp);
3169 /* Couldn't or didn't -- move forward. */
3174 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
3175 /* If it could work, try it. */
3177 UCHARAT(PL_reginput) == c1 ||
3178 UCHARAT(PL_reginput) == c2)
3180 TRYPAREN(paren, n, PL_reginput);
3181 REGCP_UNWIND(lastcp);
3183 /* Couldn't or didn't -- move forward. */
3184 PL_reginput = locinput;
3185 if (regrepeat(scan, 1)) {
3187 locinput = PL_reginput;
3195 n = regrepeat(scan, n);
3196 locinput = PL_reginput;
3197 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3198 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
3199 ln = n; /* why back off? */
3200 /* ...because $ and \Z can match before *and* after
3201 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3202 We should back off by one in this case. */
3203 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3209 /* If it could work, try it. */
3211 UCHARAT(PL_reginput) == c1 ||
3212 UCHARAT(PL_reginput) == c2)
3214 TRYPAREN(paren, n, PL_reginput);
3215 REGCP_UNWIND(lastcp);
3217 /* Couldn't or didn't -- back up. */
3219 PL_reginput = locinput = HOPc(locinput, -1);
3224 /* If it could work, try it. */
3226 UCHARAT(PL_reginput) == c1 ||
3227 UCHARAT(PL_reginput) == c2)
3229 TRYPAREN(paren, n, PL_reginput);
3230 REGCP_UNWIND(lastcp);
3232 /* Couldn't or didn't -- back up. */
3234 PL_reginput = locinput = HOPc(locinput, -1);
3241 if (PL_reg_call_cc) {
3242 re_cc_state *cur_call_cc = PL_reg_call_cc;
3243 CURCUR *cctmp = PL_regcc;
3244 regexp *re = PL_reg_re;
3245 CHECKPOINT cp, lastcp;
3247 cp = regcppush(0); /* Save *all* the positions. */
3249 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3251 PL_reginput = locinput; /* Make position available to
3253 cache_re(PL_reg_call_cc->re);
3254 PL_regcc = PL_reg_call_cc->cc;
3255 PL_reg_call_cc = PL_reg_call_cc->prev;
3256 if (regmatch(cur_call_cc->node)) {
3257 PL_reg_call_cc = cur_call_cc;
3261 REGCP_UNWIND(lastcp);
3263 PL_reg_call_cc = cur_call_cc;
3269 PerlIO_printf(Perl_debug_log,
3270 "%*s continuation failed...\n",
3271 REPORT_CODE_OFF+PL_regindent*2, "")
3275 if (locinput < PL_regtill) {
3276 DEBUG_r(PerlIO_printf(Perl_debug_log,
3277 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3279 (long)(locinput - PL_reg_starttry),
3280 (long)(PL_regtill - PL_reg_starttry),
3282 sayNO_FINAL; /* Cannot match: too short. */
3284 PL_reginput = locinput; /* put where regtry can find it */
3285 sayYES_FINAL; /* Success! */
3287 PL_reginput = locinput; /* put where regtry can find it */
3288 sayYES_LOUD; /* Success! */
3291 PL_reginput = locinput;
3296 if (UTF) { /* XXXX This is absolutely
3297 broken, we read before
3299 s = HOPMAYBEc(locinput, -scan->flags);
3305 if (locinput < PL_bostr + scan->flags)
3307 PL_reginput = locinput - scan->flags;
3312 PL_reginput = locinput;
3317 if (UTF) { /* XXXX This is absolutely
3318 broken, we read before
3320 s = HOPMAYBEc(locinput, -scan->flags);
3321 if (!s || s < PL_bostr)
3326 if (locinput < PL_bostr + scan->flags)
3328 PL_reginput = locinput - scan->flags;
3333 PL_reginput = locinput;
3336 inner = NEXTOPER(NEXTOPER(scan));
3337 if (regmatch(inner) != n) {
3352 if (OP(scan) == SUSPEND) {
3353 locinput = PL_reginput;
3354 nextchr = UCHARAT(locinput);
3359 next = scan + ARG(scan);
3364 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
3365 PTR2UV(scan), OP(scan));
3366 Perl_croak(aTHX_ "regexp memory corruption");
3373 * We get here only if there's trouble -- normally "case END" is
3374 * the terminating point.
3376 Perl_croak(aTHX_ "corrupted regexp pointers");
3382 PerlIO_printf(Perl_debug_log,
3383 "%*s %scould match...%s\n",
3384 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3388 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3389 PL_colors[4],PL_colors[5]));
3395 #if 0 /* Breaks $^R */
3403 PerlIO_printf(Perl_debug_log,
3404 "%*s %sfailed...%s\n",
3405 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3411 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3414 case RE_UNWIND_BRANCH:
3415 case RE_UNWIND_BRANCHJ:
3417 re_unwind_branch_t *uwb = &(uw->branch);
3418 I32 lastparen = uwb->lastparen;
3420 REGCP_UNWIND(uwb->lastcp);
3421 for (n = *PL_reglastparen; n > lastparen; n--)
3423 *PL_reglastparen = n;
3424 scan = next = uwb->next;
3426 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3427 ? BRANCH : BRANCHJ) ) { /* Failure */
3434 /* Have more choice yet. Reuse the same uwb. */
3436 if ((n = (uwb->type == RE_UNWIND_BRANCH
3437 ? NEXT_OFF(next) : ARG(next))))
3440 next = NULL; /* XXXX Needn't unwinding in this case... */
3442 next = NEXTOPER(scan);
3443 if (uwb->type == RE_UNWIND_BRANCHJ)
3444 next = NEXTOPER(next);
3445 locinput = uwb->locinput;
3446 nextchr = uwb->nextchr;
3448 PL_regindent = uwb->regindent;
3455 Perl_croak(aTHX_ "regexp unwind memory corruption");
3466 - regrepeat - repeatedly match something simple, report how many
3469 * [This routine now assumes that it will only match on things of length 1.
3470 * That was true before, but now we assume scan - reginput is the count,
3471 * rather than incrementing count on every character. [Er, except utf8.]]
3474 S_regrepeat(pTHX_ regnode *p, I32 max)
3476 register char *scan;
3478 register char *loceol = PL_regeol;
3479 register I32 hardcount = 0;
3482 if (max != REG_INFTY && max < loceol - scan)
3483 loceol = scan + max;
3486 while (scan < loceol && *scan != '\n')
3494 while (scan < loceol && *scan != '\n') {
3495 scan += UTF8SKIP(scan);
3501 while (scan < loceol) {
3502 scan += UTF8SKIP(scan);
3506 case EXACT: /* length of string is 1 */
3508 while (scan < loceol && UCHARAT(scan) == c)
3511 case EXACTF: /* length of string is 1 */
3513 while (scan < loceol &&
3514 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
3517 case EXACTFL: /* length of string is 1 */
3518 PL_reg_flags |= RF_tainted;
3520 while (scan < loceol &&
3521 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
3526 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3527 scan += UTF8SKIP(scan);
3532 while (scan < loceol && REGINCLASS(p, *scan))
3536 while (scan < loceol && isALNUM(*scan))
3541 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3542 scan += UTF8SKIP(scan);
3547 PL_reg_flags |= RF_tainted;
3548 while (scan < loceol && isALNUM_LC(*scan))
3552 PL_reg_flags |= RF_tainted;
3554 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
3555 scan += UTF8SKIP(scan);
3561 while (scan < loceol && !isALNUM(*scan))
3566 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
3567 scan += UTF8SKIP(scan);
3572 PL_reg_flags |= RF_tainted;
3573 while (scan < loceol && !isALNUM_LC(*scan))
3577 PL_reg_flags |= RF_tainted;
3579 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
3580 scan += UTF8SKIP(scan);
3585 while (scan < loceol && isSPACE(*scan))
3590 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3591 scan += UTF8SKIP(scan);
3596 PL_reg_flags |= RF_tainted;
3597 while (scan < loceol && isSPACE_LC(*scan))
3601 PL_reg_flags |= RF_tainted;
3603 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3604 scan += UTF8SKIP(scan);
3609 while (scan < loceol && !isSPACE(*scan))
3614 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3615 scan += UTF8SKIP(scan);
3620 PL_reg_flags |= RF_tainted;
3621 while (scan < loceol && !isSPACE_LC(*scan))
3625 PL_reg_flags |= RF_tainted;
3627 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3628 scan += UTF8SKIP(scan);
3633 while (scan < loceol && isDIGIT(*scan))
3638 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
3639 scan += UTF8SKIP(scan);
3645 while (scan < loceol && !isDIGIT(*scan))
3650 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
3651 scan += UTF8SKIP(scan);
3655 default: /* Called on something of 0 width. */
3656 break; /* So match right here or not at all. */
3662 c = scan - PL_reginput;
3667 SV *prop = sv_newmortal();
3670 PerlIO_printf(Perl_debug_log,
3671 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3672 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
3679 - regrepeat_hard - repeatedly match something, report total lenth and length
3681 * The repeater is supposed to have constant length.
3685 S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
3687 register char *scan;
3688 register char *start;
3689 register char *loceol = PL_regeol;
3691 I32 count = 0, res = 1;
3696 start = PL_reginput;
3698 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3701 while (start < PL_reginput) {
3703 start += UTF8SKIP(start);
3714 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
3716 *lp = l = PL_reginput - start;
3717 if (max != REG_INFTY && l*max < loceol - scan)
3718 loceol = scan + l*max;
3731 - reginclass - determine if a character falls into a character class
3735 S_reginclass(pTHX_ register regnode *p, register I32 c)
3737 char flags = ANYOF_FLAGS(p);
3741 if (ANYOF_BITMAP_TEST(p, c))
3743 else if (flags & ANYOF_FOLD) {
3745 if (flags & ANYOF_LOCALE) {
3746 PL_reg_flags |= RF_tainted;
3747 cf = PL_fold_locale[c];
3751 if (ANYOF_BITMAP_TEST(p, cf))
3755 if (!match && (flags & ANYOF_CLASS)) {
3756 PL_reg_flags |= RF_tainted;
3758 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3759 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3760 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3761 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3762 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3763 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3764 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3765 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3766 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3767 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3768 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3769 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3770 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3771 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3772 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3773 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3774 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3775 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3776 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3777 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3778 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3779 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3780 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3781 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3782 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3783 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3784 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3785 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3786 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3787 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
3788 ) /* How's that for a conditional? */
3794 return (flags & ANYOF_INVERT) ? !match : match;
3798 S_reginclassutf8(pTHX_ regnode *f, U8 *p)
3800 char flags = ARG1(f);
3803 SV *rv = (SV*)PL_regdata->data[ARG2(f)];
3804 AV *av = (AV*)SvRV((SV*)rv);
3805 SV *sw = *av_fetch(av, 0, FALSE);
3806 SV *lv = *av_fetch(av, 1, FALSE);
3808 SV *sw = (SV*)PL_regdata->data[ARG2(f)];
3811 if (swash_fetch(sw, p))
3813 else if (flags & ANYOF_FOLD) {
3814 U8 tmpbuf[UTF8_MAXLEN+1];
3815 if (flags & ANYOF_LOCALE) {
3816 PL_reg_flags |= RF_tainted;
3817 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3820 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3821 if (swash_fetch(sw, tmpbuf))
3825 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
3827 return (flags & ANYOF_INVERT) ? !match : match;
3831 S_reghop(pTHX_ U8 *s, I32 off)
3834 while (off-- && s < (U8*)PL_regeol)
3839 if (s > (U8*)PL_bostr) {
3842 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3844 } /* XXX could check well-formedness here */
3852 S_reghopmaybe(pTHX_ U8* s, I32 off)
3855 while (off-- && s < (U8*)PL_regeol)
3862 if (s > (U8*)PL_bostr) {
3865 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3867 } /* XXX could check well-formedness here */
3883 restore_pos(pTHXo_ void *arg)
3885 if (PL_reg_eval_set) {
3886 if (PL_reg_oldsaved) {
3887 PL_reg_re->subbeg = PL_reg_oldsaved;
3888 PL_reg_re->sublen = PL_reg_oldsavedlen;
3889 RX_MATCH_COPIED_on(PL_reg_re);
3891 PL_reg_magic->mg_len = PL_reg_oldpos;
3892 PL_reg_eval_set = 0;
3893 PL_curpm = PL_reg_oldcurpm;