This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate with Sarathy.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
349 && (sv && (strpos + SvCUR(sv) != strend)) ) {
350 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 351 goto fail;
2c2d71f5 352 }
adac82c7 353 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 354 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
355 if (SvTAIL(check)) {
356 slen = SvCUR(check); /* >= 1 */
cad2e5aa 357
2c2d71f5
JH
358 if ( strend - s > slen || strend - s < slen - 1
359 || (strend - s == slen && strend[-1] != '\n')) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
361 goto fail_finish;
cad2e5aa
JH
362 }
363 /* Now should match s[0..slen-2] */
364 slen--;
653099ff 365 if (slen && (*SvPVX(check) != *s
cad2e5aa 366 || (slen > 1
653099ff 367 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
368 report_neq:
369 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
370 goto fail_finish;
371 }
cad2e5aa 372 }
653099ff
GS
373 else if (*SvPVX(check) != *s
374 || ((slen = SvCUR(check)) > 1
375 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
376 goto report_neq;
377 goto success_at_start;
cad2e5aa 378 }
2c2d71f5 379 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 380 s = strpos;
2c2d71f5 381 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 382 end_shift = prog->minlen - start_shift -
653099ff 383 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 384 if (!ml_anch) {
653099ff
GS
385 I32 end = prog->check_offset_max + CHR_SVLEN(check)
386 - (SvTAIL(check) != 0);
2c2d71f5
JH
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
653099ff 399 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
2c2d71f5
JH
407 restart:
408 /* Find a possible match in the region s..strend by looking for
409 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 410 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
411 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
412 I32 p = -1; /* Internal iterator of scream. */
413 I32 *pp = data ? data->scream_pos : &p;
414
2c2d71f5
JH
415 if (PL_screamfirst[BmRARE(check)] >= 0
416 || ( BmRARE(check) == '\n'
417 && (BmPREVIOUS(check) == SvCUR(check) - 1)
418 && SvTAIL(check) ))
419 s = screaminstr(sv, check,
420 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 421 else
2c2d71f5 422 goto fail_finish;
cad2e5aa
JH
423 if (data)
424 *data->scream_olds = s;
425 }
426 else
427 s = fbm_instr((unsigned char*)s + start_shift,
428 (unsigned char*)strend - end_shift,
2c2d71f5 429 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
430
431 /* Update the count-of-usability, remove useless subpatterns,
432 unshift s. */
2c2d71f5
JH
433
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
435 (s ? "Found" : "Did not find"),
436 ((check == prog->anchored_substr) ? "anchored" : "floating"),
437 PL_colors[0],
7b0972df
JH
438 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
439 SvPVX(check),
2c2d71f5
JH
440 PL_colors[1], (SvTAIL(check) ? "$" : ""),
441 (s ? " at offset " : "...\n") ) );
442
443 if (!s)
444 goto fail_finish;
445
6eb5f6b9
JH
446 check_at = s;
447
2c2d71f5 448 /* Finish the diagnostic message */
30944b6d 449 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
450
451 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
452 Start with the other substr.
453 XXXX no SCREAM optimization yet - and a very coarse implementation
454 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
455 *always* match. Probably should be marked during compile...
456 Probably it is right to do no SCREAM here...
457 */
458
459 if (prog->float_substr && prog->anchored_substr) {
30944b6d 460 /* Take into account the "other" substring. */
2c2d71f5
JH
461 /* XXXX May be hopelessly wrong for UTF... */
462 if (!other_last)
6eb5f6b9 463 other_last = strpos;
2c2d71f5 464 if (check == prog->float_substr) {
30944b6d
IZ
465 do_other_anchored:
466 {
2c2d71f5
JH
467 char *last = s - start_shift, *last1, *last2;
468 char *s1 = s;
469
470 tmp = PL_bostr;
471 t = s - prog->check_offset_max;
472 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
473 && (!(prog->reganch & ROPT_UTF8)
474 || (PL_bostr = strpos, /* Used in regcopmaybe() */
475 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
476 && t > strpos)))
30944b6d 477 /* EMPTY */;
2c2d71f5
JH
478 else
479 t = strpos;
480 t += prog->anchored_offset;
6eb5f6b9
JH
481 if (t < other_last) /* These positions already checked */
482 t = other_last;
2c2d71f5
JH
483 PL_bostr = tmp;
484 last2 = last1 = strend - prog->minlen;
485 if (last < last1)
486 last1 = last;
487 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
488 /* On end-of-str: see comment below. */
489 s = fbm_instr((unsigned char*)t,
490 (unsigned char*)last1 + prog->anchored_offset
491 + SvCUR(prog->anchored_substr)
492 - (SvTAIL(prog->anchored_substr)!=0),
493 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
494 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
495 (s ? "Found" : "Contradicts"),
496 PL_colors[0],
7b0972df
JH
497 (int)(SvCUR(prog->anchored_substr)
498 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
499 SvPVX(prog->anchored_substr),
500 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
501 if (!s) {
502 if (last1 >= last2) {
503 DEBUG_r(PerlIO_printf(Perl_debug_log,
504 ", giving up...\n"));
505 goto fail_finish;
506 }
507 DEBUG_r(PerlIO_printf(Perl_debug_log,
508 ", trying floating at offset %ld...\n",
30944b6d 509 (long)(s1 + 1 - i_strpos)));
2c2d71f5 510 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 511 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
512 s = HOPc(last, 1);
513 goto restart;
514 }
515 else {
516 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 517 (long)(s - i_strpos)));
2c2d71f5 518 t = s - prog->anchored_offset;
6eb5f6b9 519 other_last = s + 1;
30944b6d 520 s = s1;
2c2d71f5
JH
521 if (t == strpos)
522 goto try_at_start;
2c2d71f5
JH
523 goto try_at_offset;
524 }
30944b6d 525 }
2c2d71f5
JH
526 }
527 else { /* Take into account the floating substring. */
528 char *last, *last1;
529 char *s1 = s;
530
531 t = s - start_shift;
532 last1 = last = strend - prog->minlen + prog->float_min_offset;
533 if (last - t > prog->float_max_offset)
534 last = t + prog->float_max_offset;
535 s = t + prog->float_min_offset;
6eb5f6b9
JH
536 if (s < other_last)
537 s = other_last;
2c2d71f5
JH
538 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
539 /* fbm_instr() takes into account exact value of end-of-str
540 if the check is SvTAIL(ed). Since false positives are OK,
541 and end-of-str is not later than strend we are OK. */
542 s = fbm_instr((unsigned char*)s,
543 (unsigned char*)last + SvCUR(prog->float_substr)
544 - (SvTAIL(prog->float_substr)!=0),
545 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
546 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
547 (s ? "Found" : "Contradicts"),
548 PL_colors[0],
7b0972df
JH
549 (int)(SvCUR(prog->float_substr)
550 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
551 SvPVX(prog->float_substr),
552 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
553 if (!s) {
554 if (last1 == last) {
555 DEBUG_r(PerlIO_printf(Perl_debug_log,
556 ", giving up...\n"));
557 goto fail_finish;
558 }
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", trying anchored starting at offset %ld...\n",
30944b6d 561 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 562 other_last = last + 1;
2c2d71f5
JH
563 PL_regeol = strend; /* Used in HOP() */
564 s = HOPc(t, 1);
565 goto restart;
566 }
567 else {
568 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 569 (long)(s - i_strpos)));
6eb5f6b9 570 other_last = s + 1;
30944b6d 571 s = s1;
2c2d71f5
JH
572 if (t == strpos)
573 goto try_at_start;
2c2d71f5
JH
574 goto try_at_offset;
575 }
576 }
cad2e5aa 577 }
2c2d71f5
JH
578
579 t = s - prog->check_offset_max;
580 tmp = PL_bostr;
581 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
582 && (!(prog->reganch & ROPT_UTF8)
583 || (PL_bostr = strpos, /* Used in regcopmaybe() */
584 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
585 && t > strpos)))) {
586 PL_bostr = tmp;
587 /* Fixed substring is found far enough so that the match
588 cannot start at strpos. */
589 try_at_offset:
cad2e5aa 590 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
591 /* Eventually fbm_*() should handle this, but often
592 anchored_offset is not 0, so this check will not be wasted. */
593 /* XXXX In the code below we prefer to look for "^" even in
594 presence of anchored substrings. And we search even
595 beyond the found float position. These pessimizations
596 are historical artefacts only. */
597 find_anchor:
2c2d71f5 598 while (t < strend - prog->minlen) {
cad2e5aa
JH
599 if (*t == '\n') {
600 if (t < s - prog->check_offset_min) {
30944b6d
IZ
601 if (prog->anchored_substr) {
602 /* We definitely contradict the found anchored
603 substr. Due to the above check we do not
604 contradict "check" substr.
605 Thus we can arrive here only if check substr
606 is float. Redo checking for "other"=="fixed".
607 */
608 strpos = t + 1;
609 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
610 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
611 goto do_other_anchored;
612 }
cad2e5aa 613 s = t + 1;
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 615 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
616 goto set_useful;
617 }
2c2d71f5 618 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
619 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
620 strpos = s = t + 1;
cad2e5aa
JH
621 goto restart;
622 }
623 t++;
624 }
2c2d71f5
JH
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
626 PL_colors[0],PL_colors[1]));
627 goto fail_finish;
cad2e5aa
JH
628 }
629 s = t;
630 set_useful:
2c2d71f5 631 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
632 }
633 else {
2c2d71f5
JH
634 PL_bostr = tmp;
635 /* The found string does not prohibit matching at beg-of-str
636 - no optimization of calling REx engine can be performed,
637 unless it was an MBOL and we are not after MBOL. */
638 try_at_start:
639 /* Even in this situation we may use MBOL flag if strpos is offset
640 wrt the start of the string. */
641 if (ml_anch && sv
d506a20d
IZ
642 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
643 /* May be due to an implicit anchor of m{.*foo} */
644 && !(prog->reganch & ROPT_IMPLICIT))
645 {
cad2e5aa
JH
646 t = strpos;
647 goto find_anchor;
648 }
30944b6d
IZ
649 DEBUG_r( if (ml_anch)
650 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
651 PL_colors[0],PL_colors[1]);
652 );
2c2d71f5 653 success_at_start:
30944b6d 654 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 655 && prog->check_substr /* Could be deleted already */
cad2e5aa 656 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
657 && prog->check_substr == prog->float_substr)
658 {
cad2e5aa
JH
659 /* If flags & SOMETHING - do not do it many times on the same match */
660 SvREFCNT_dec(prog->check_substr);
661 prog->check_substr = Nullsv; /* disable */
662 prog->float_substr = Nullsv; /* clear */
663 s = strpos;
3cf5c195
IZ
664 /* XXXX This is a remnant of the old implementation. It
665 looks wasteful, since now INTUIT can use many
6eb5f6b9 666 other heuristics. */
cad2e5aa
JH
667 prog->reganch &= ~RE_USE_INTUIT;
668 }
669 else
670 s = strpos;
671 }
672
6eb5f6b9
JH
673 /* Last resort... */
674 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
675 if (prog->regstclass) {
676 /* minlen == 0 is possible if regstclass is \b or \B,
677 and the fixed substr is ''$.
678 Since minlen is already taken into account, s+1 is before strend;
679 accidentally, minlen >= 1 guaranties no false positives at s + 1
680 even for \b or \B. But (minlen? 1 : 0) below assumes that
681 regstclass does not come from lookahead... */
682 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
683 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
684 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
685 ? STR_LEN(prog->regstclass)
686 : 1);
6eb5f6b9 687 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
688 ? s + (prog->minlen? cl_l : 0)
689 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 690 : strend) ;
8fdd422b 691 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
692
693 t = s;
76384e4a
GS
694 if (prog->reganch & ROPT_UTF8) {
695 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
696 PL_bostr = startpos;
697 }
6eb5f6b9
JH
698 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
699 if (!s) {
700#ifdef DEBUGGING
701 char *what;
702#endif
703 if (endpos == strend) {
704 DEBUG_r( PerlIO_printf(Perl_debug_log,
705 "Could not match STCLASS...\n") );
706 goto fail;
707 }
66e933ab
GS
708 DEBUG_r( PerlIO_printf(Perl_debug_log,
709 "This position contradicts STCLASS...\n") );
653099ff
GS
710 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
711 goto fail;
6eb5f6b9
JH
712 /* Contradict one of substrings */
713 if (prog->anchored_substr) {
6eb5f6b9
JH
714 if (prog->anchored_substr == check) {
715 DEBUG_r( what = "anchored" );
716 hop_and_restart:
717 PL_regeol = strend; /* Used in HOP() */
718 s = HOPc(t, 1);
66e933ab
GS
719 if (s + start_shift + end_shift > strend) {
720 /* XXXX Should be taken into account earlier? */
721 DEBUG_r( PerlIO_printf(Perl_debug_log,
722 "Could not match STCLASS...\n") );
723 goto fail;
724 }
6eb5f6b9 725 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 726 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
727 what, (long)(s + start_shift - i_strpos)) );
728 goto restart;
729 }
66e933ab 730 /* Have both, check_string is floating */
6eb5f6b9
JH
731 if (t + start_shift >= check_at) /* Contradicts floating=check */
732 goto retry_floating_check;
733 /* Recheck anchored substring, but not floating... */
734 s = check_at;
735 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 736 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
737 (long)(other_last - i_strpos)) );
738 goto do_other_anchored;
739 }
60e71179
GS
740 /* Another way we could have checked stclass at the
741 current position only: */
742 if (ml_anch) {
743 s = t = t + 1;
744 DEBUG_r( PerlIO_printf(Perl_debug_log,
745 "Trying /^/m starting at offset %ld...\n",
746 (long)(t - i_strpos)) );
747 goto try_at_offset;
66e933ab 748 }
60e71179
GS
749 if (!prog->float_substr) /* Could have been deleted */
750 goto fail;
6eb5f6b9
JH
751 /* Check is floating subtring. */
752 retry_floating_check:
753 t = check_at - start_shift;
754 DEBUG_r( what = "floating" );
755 goto hop_and_restart;
756 }
757 DEBUG_r( if (t != s)
758 PerlIO_printf(Perl_debug_log,
759 "By STCLASS: moving %ld --> %ld\n",
760 (long)(t - i_strpos), (long)(s - i_strpos));
761 else
762 PerlIO_printf(Perl_debug_log,
763 "Does not contradict STCLASS...\n") );
764 }
2c2d71f5 765 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 766 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 767 return s;
2c2d71f5
JH
768
769 fail_finish: /* Substring not found */
66e933ab
GS
770 if (prog->check_substr) /* could be removed already */
771 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 772 fail:
2c2d71f5 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
774 PL_colors[4],PL_colors[5]));
775 return Nullch;
776}
9661b544 777
6eb5f6b9 778/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
779STATIC char *
780S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 781{
6eb5f6b9
JH
782 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
783 char *m;
d8093b23
G
784 STRLEN ln;
785 unsigned int c1;
786 unsigned int c2;
6eb5f6b9
JH
787 char *e;
788 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 789
6eb5f6b9
JH
790 /* We know what class it must start with. */
791 switch (OP(c)) {
792 case ANYOFUTF8:
793 while (s < strend) {
794 if (REGINCLASSUTF8(c, (U8*)s)) {
795 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 796 goto got_it;
3cf5c195 797 else
6eb5f6b9 798 tmp = doevery;
a0ed51b3 799 }
6eb5f6b9
JH
800 else
801 tmp = 1;
a0ed51b3
LW
802 s += UTF8SKIP(s);
803 }
6eb5f6b9
JH
804 break;
805 case ANYOF:
a0ed51b3 806 while (s < strend) {
d8093b23 807 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
808 if (tmp && (norun || regtry(prog, s)))
809 goto got_it;
810 else
811 tmp = doevery;
a0ed51b3 812 }
6eb5f6b9
JH
813 else
814 tmp = 1;
a0ed51b3 815 s++;
a0d0e21e 816 }
6eb5f6b9
JH
817 break;
818 case EXACTF:
819 m = STRING(c);
820 ln = STR_LEN(c);
d8093b23 821 c1 = *(U8*)m;
6eb5f6b9
JH
822 c2 = PL_fold[c1];
823 goto do_exactf;
824 case EXACTFL:
825 m = STRING(c);
826 ln = STR_LEN(c);
d8093b23 827 c1 = *(U8*)m;
6eb5f6b9
JH
828 c2 = PL_fold_locale[c1];
829 do_exactf:
830 e = strend - ln;
b3c9acc1 831
6eb5f6b9
JH
832 if (norun && e < s)
833 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
834 /* Here it is NOT UTF! */
835 if (c1 == c2) {
836 while (s <= e) {
d8093b23 837 if ( *(U8*)s == c1
66e933ab
GS
838 && (ln == 1 || !(OP(c) == EXACTF
839 ? ibcmp(s, m, ln)
840 : ibcmp_locale(s, m, ln)))
6eb5f6b9 841 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
842 goto got_it;
843 s++;
844 }
845 } else {
846 while (s <= e) {
d8093b23 847 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
848 && (ln == 1 || !(OP(c) == EXACTF
849 ? ibcmp(s, m, ln)
850 : ibcmp_locale(s, m, ln)))
6eb5f6b9 851 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
852 goto got_it;
853 s++;
854 }
855 }
856 break;
bbce6d69 857 case BOUNDL:
3280af22 858 PL_reg_flags |= RF_tainted;
bbce6d69 859 /* FALL THROUGH */
a0d0e21e 860 case BOUND:
6eb5f6b9 861 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 862 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 863 while (s < strend) {
95bac841 864 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 865 tmp = !tmp;
6eb5f6b9 866 if ((norun || regtry(prog, s)))
a0d0e21e 867 goto got_it;
a687059c 868 }
a0d0e21e
LW
869 s++;
870 }
6eb5f6b9 871 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
872 goto got_it;
873 break;
a0ed51b3
LW
874 case BOUNDLUTF8:
875 PL_reg_flags |= RF_tainted;
876 /* FALL THROUGH */
877 case BOUNDUTF8:
6eb5f6b9 878 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 879 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 880 while (s < strend) {
76384e4a 881 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
882 swash_fetch(PL_utf8_alnum, (U8*)s) :
883 isALNUM_LC_utf8((U8*)s)))
884 {
a0ed51b3 885 tmp = !tmp;
6eb5f6b9 886 if ((norun || regtry(prog, s)))
a0ed51b3
LW
887 goto got_it;
888 }
889 s += UTF8SKIP(s);
890 }
6eb5f6b9 891 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
892 goto got_it;
893 break;
bbce6d69 894 case NBOUNDL:
3280af22 895 PL_reg_flags |= RF_tainted;
bbce6d69 896 /* FALL THROUGH */
a0d0e21e 897 case NBOUND:
6eb5f6b9 898 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 899 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 900 while (s < strend) {
95bac841 901 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 902 tmp = !tmp;
6eb5f6b9 903 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
904 goto got_it;
905 s++;
906 }
6eb5f6b9 907 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
908 goto got_it;
909 break;
a0ed51b3
LW
910 case NBOUNDLUTF8:
911 PL_reg_flags |= RF_tainted;
912 /* FALL THROUGH */
913 case NBOUNDUTF8:
6eb5f6b9 914 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 915 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 916 while (s < strend) {
76384e4a 917 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
918 swash_fetch(PL_utf8_alnum, (U8*)s) :
919 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 920 tmp = !tmp;
6eb5f6b9 921 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
922 goto got_it;
923 s += UTF8SKIP(s);
924 }
6eb5f6b9 925 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
926 goto got_it;
927 break;
a0d0e21e
LW
928 case ALNUM:
929 while (s < strend) {
bbce6d69 930 if (isALNUM(*s)) {
6eb5f6b9 931 if (tmp && (norun || regtry(prog, s)))
bbce6d69 932 goto got_it;
933 else
934 tmp = doevery;
935 }
936 else
937 tmp = 1;
938 s++;
939 }
940 break;
a0ed51b3
LW
941 case ALNUMUTF8:
942 while (s < strend) {
dfe13c55 943 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 944 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
945 goto got_it;
946 else
947 tmp = doevery;
948 }
949 else
950 tmp = 1;
951 s += UTF8SKIP(s);
952 }
953 break;
bbce6d69 954 case ALNUML:
3280af22 955 PL_reg_flags |= RF_tainted;
bbce6d69 956 while (s < strend) {
957 if (isALNUM_LC(*s)) {
6eb5f6b9 958 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 959 goto got_it;
a687059c 960 else
a0d0e21e
LW
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
965 s++;
966 }
967 break;
a0ed51b3
LW
968 case ALNUMLUTF8:
969 PL_reg_flags |= RF_tainted;
970 while (s < strend) {
dfe13c55 971 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 972 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
973 goto got_it;
974 else
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
979 s += UTF8SKIP(s);
980 }
981 break;
a0d0e21e
LW
982 case NALNUM:
983 while (s < strend) {
bbce6d69 984 if (!isALNUM(*s)) {
6eb5f6b9 985 if (tmp && (norun || regtry(prog, s)))
bbce6d69 986 goto got_it;
987 else
988 tmp = doevery;
989 }
990 else
991 tmp = 1;
992 s++;
993 }
994 break;
a0ed51b3
LW
995 case NALNUMUTF8:
996 while (s < strend) {
dfe13c55 997 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 998 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
999 goto got_it;
1000 else
1001 tmp = doevery;
1002 }
1003 else
1004 tmp = 1;
1005 s += UTF8SKIP(s);
1006 }
1007 break;
bbce6d69 1008 case NALNUML:
3280af22 1009 PL_reg_flags |= RF_tainted;
bbce6d69 1010 while (s < strend) {
1011 if (!isALNUM_LC(*s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1013 goto got_it;
a687059c 1014 else
a0d0e21e 1015 tmp = doevery;
a687059c 1016 }
a0d0e21e
LW
1017 else
1018 tmp = 1;
1019 s++;
1020 }
1021 break;
a0ed51b3
LW
1022 case NALNUMLUTF8:
1023 PL_reg_flags |= RF_tainted;
1024 while (s < strend) {
dfe13c55 1025 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1027 goto got_it;
1028 else
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s += UTF8SKIP(s);
1034 }
1035 break;
a0d0e21e
LW
1036 case SPACE:
1037 while (s < strend) {
1038 if (isSPACE(*s)) {
6eb5f6b9 1039 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1040 goto got_it;
1041 else
1042 tmp = doevery;
2304df62 1043 }
a0d0e21e
LW
1044 else
1045 tmp = 1;
1046 s++;
1047 }
1048 break;
a0ed51b3
LW
1049 case SPACEUTF8:
1050 while (s < strend) {
dfe13c55 1051 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1052 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
1057 else
1058 tmp = 1;
1059 s += UTF8SKIP(s);
1060 }
1061 break;
bbce6d69 1062 case SPACEL:
3280af22 1063 PL_reg_flags |= RF_tainted;
bbce6d69 1064 while (s < strend) {
1065 if (isSPACE_LC(*s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s++;
1074 }
1075 break;
a0ed51b3
LW
1076 case SPACELUTF8:
1077 PL_reg_flags |= RF_tainted;
1078 while (s < strend) {
dfe13c55 1079 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1081 goto got_it;
1082 else
1083 tmp = doevery;
1084 }
1085 else
1086 tmp = 1;
1087 s += UTF8SKIP(s);
1088 }
1089 break;
a0d0e21e
LW
1090 case NSPACE:
1091 while (s < strend) {
1092 if (!isSPACE(*s)) {
6eb5f6b9 1093 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1094 goto got_it;
1095 else
1096 tmp = doevery;
a687059c 1097 }
a0d0e21e
LW
1098 else
1099 tmp = 1;
1100 s++;
1101 }
1102 break;
a0ed51b3
LW
1103 case NSPACEUTF8:
1104 while (s < strend) {
dfe13c55 1105 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1106 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
1111 else
1112 tmp = 1;
1113 s += UTF8SKIP(s);
1114 }
1115 break;
bbce6d69 1116 case NSPACEL:
3280af22 1117 PL_reg_flags |= RF_tainted;
bbce6d69 1118 while (s < strend) {
1119 if (!isSPACE_LC(*s)) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s++;
1128 }
1129 break;
a0ed51b3
LW
1130 case NSPACELUTF8:
1131 PL_reg_flags |= RF_tainted;
1132 while (s < strend) {
dfe13c55 1133 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s += UTF8SKIP(s);
1142 }
1143 break;
a0d0e21e
LW
1144 case DIGIT:
1145 while (s < strend) {
1146 if (isDIGIT(*s)) {
6eb5f6b9 1147 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1148 goto got_it;
1149 else
1150 tmp = doevery;
2b69d0c2 1151 }
a0d0e21e
LW
1152 else
1153 tmp = 1;
1154 s++;
1155 }
1156 break;
a0ed51b3
LW
1157 case DIGITUTF8:
1158 while (s < strend) {
dfe13c55 1159 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1160 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1161 goto got_it;
1162 else
1163 tmp = doevery;
1164 }
1165 else
1166 tmp = 1;
1167 s += UTF8SKIP(s);
1168 }
1169 break;
b8c5462f
JH
1170 case DIGITL:
1171 PL_reg_flags |= RF_tainted;
1172 while (s < strend) {
1173 if (isDIGIT_LC(*s)) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s++;
1182 }
1183 break;
1184 case DIGITLUTF8:
1185 PL_reg_flags |= RF_tainted;
1186 while (s < strend) {
1187 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s += UTF8SKIP(s);
1196 }
1197 break;
a0d0e21e
LW
1198 case NDIGIT:
1199 while (s < strend) {
1200 if (!isDIGIT(*s)) {
6eb5f6b9 1201 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1202 goto got_it;
1203 else
1204 tmp = doevery;
a687059c 1205 }
a0d0e21e
LW
1206 else
1207 tmp = 1;
1208 s++;
1209 }
1210 break;
a0ed51b3
LW
1211 case NDIGITUTF8:
1212 while (s < strend) {
dfe13c55 1213 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1214 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1215 goto got_it;
1216 else
1217 tmp = doevery;
1218 }
1219 else
1220 tmp = 1;
1221 s += UTF8SKIP(s);
1222 }
1223 break;
b8c5462f
JH
1224 case NDIGITL:
1225 PL_reg_flags |= RF_tainted;
1226 while (s < strend) {
1227 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s++;
a0ed51b3 1236 }
b8c5462f
JH
1237 break;
1238 case NDIGITLUTF8:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1243 goto got_it;
cf93c79d 1244 else
b8c5462f
JH
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s += UTF8SKIP(s);
1250 }
1251 break;
b3c9acc1 1252 default:
3c3eec57
GS
1253 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1254 break;
d6a28714 1255 }
6eb5f6b9
JH
1256 return 0;
1257 got_it:
1258 return s;
1259}
1260
1261/*
1262 - regexec_flags - match a regexp against a string
1263 */
1264I32
1265Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1266 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1267/* strend: pointer to null at end of string */
1268/* strbeg: real beginning of string */
1269/* minend: end of match must be >=minend after stringarg. */
1270/* data: May be used for some additional optimizations. */
1271/* nosave: For optimizations. */
1272{
1273 dTHR;
1274 register char *s;
1275 register regnode *c;
1276 register char *startpos = stringarg;
6eb5f6b9
JH
1277 I32 minlen; /* must match at least this many chars */
1278 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1279 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1280 constant substr. */ /* CC */
1281 I32 end_shift = 0; /* Same for the end. */ /* CC */
1282 I32 scream_pos = -1; /* Internal iterator of scream. */
1283 char *scream_olds;
1284 SV* oreplsv = GvSV(PL_replgv);
1285
1286 PL_regcc = 0;
1287
1288 cache_re(prog);
1289#ifdef DEBUGGING
1290 PL_regnarrate = PL_debug & 512;
1291#endif
1292
1293 /* Be paranoid... */
1294 if (prog == NULL || startpos == NULL) {
1295 Perl_croak(aTHX_ "NULL regexp parameter");
1296 return 0;
1297 }
1298
1299 minlen = prog->minlen;
1300 if (strend - startpos < minlen) goto phooey;
1301
1302 if (startpos == strbeg) /* is ^ valid at stringarg? */
1303 PL_regprev = '\n';
1304 else {
1305 PL_regprev = (U32)stringarg[-1];
1306 if (!PL_multiline && PL_regprev == '\n')
1307 PL_regprev = '\0'; /* force ^ to NOT match */
1308 }
1309
1310 /* Check validity of program. */
1311 if (UCHARAT(prog->program) != REG_MAGIC) {
1312 Perl_croak(aTHX_ "corrupted regexp program");
1313 }
1314
1315 PL_reg_flags = 0;
1316 PL_reg_eval_set = 0;
1317 PL_reg_maxiter = 0;
1318
1319 if (prog->reganch & ROPT_UTF8)
1320 PL_reg_flags |= RF_utf8;
1321
1322 /* Mark beginning of line for ^ and lookbehind. */
1323 PL_regbol = startpos;
1324 PL_bostr = strbeg;
1325 PL_reg_sv = sv;
1326
1327 /* Mark end of line for $ (and such) */
1328 PL_regeol = strend;
1329
1330 /* see how far we have to get to not match where we matched before */
1331 PL_regtill = startpos+minend;
1332
1333 /* We start without call_cc context. */
1334 PL_reg_call_cc = 0;
1335
1336 /* If there is a "must appear" string, look for it. */
1337 s = startpos;
1338
1339 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1340 MAGIC *mg;
1341
1342 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1343 PL_reg_ganch = startpos;
1344 else if (sv && SvTYPE(sv) >= SVt_PVMG
1345 && SvMAGIC(sv)
1346 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1347 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1348 if (prog->reganch & ROPT_ANCH_GPOS) {
1349 if (s > PL_reg_ganch)
1350 goto phooey;
1351 s = PL_reg_ganch;
1352 }
1353 }
1354 else /* pos() not defined */
1355 PL_reg_ganch = strbeg;
1356 }
1357
1358 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1359 re_scream_pos_data d;
1360
1361 d.scream_olds = &scream_olds;
1362 d.scream_pos = &scream_pos;
1363 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1364 if (!s)
1365 goto phooey; /* not present */
1366 }
1367
1368 DEBUG_r( if (!PL_colorset) reginitcolors() );
1369 DEBUG_r(PerlIO_printf(Perl_debug_log,
1370 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1371 PL_colors[4],PL_colors[5],PL_colors[0],
1372 prog->precomp,
1373 PL_colors[1],
1374 (strlen(prog->precomp) > 60 ? "..." : ""),
1375 PL_colors[0],
1376 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1377 startpos, PL_colors[1],
1378 (strend - startpos > 60 ? "..." : ""))
1379 );
1380
1381 /* Simplest case: anchored match need be tried only once. */
1382 /* [unless only anchor is BOL and multiline is set] */
1383 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1384 if (s == startpos && regtry(prog, startpos))
1385 goto got_it;
1386 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1387 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1388 {
1389 char *end;
1390
1391 if (minlen)
1392 dontbother = minlen - 1;
1393 end = HOPc(strend, -dontbother) - 1;
1394 /* for multiline we only have to try after newlines */
1395 if (prog->check_substr) {
1396 if (s == startpos)
1397 goto after_try;
1398 while (1) {
1399 if (regtry(prog, s))
1400 goto got_it;
1401 after_try:
1402 if (s >= end)
1403 goto phooey;
1404 if (prog->reganch & RE_USE_INTUIT) {
1405 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1406 if (!s)
1407 goto phooey;
1408 }
1409 else
1410 s++;
1411 }
1412 } else {
1413 if (s > startpos)
1414 s--;
1415 while (s < end) {
1416 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1417 if (regtry(prog, s))
1418 goto got_it;
1419 }
1420 }
1421 }
1422 }
1423 goto phooey;
1424 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1425 if (regtry(prog, PL_reg_ganch))
1426 goto got_it;
1427 goto phooey;
1428 }
1429
1430 /* Messy cases: unanchored match. */
1431 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1432 /* we have /x+whatever/ */
1433 /* it must be a one character string (XXXX Except UTF?) */
1434 char ch = SvPVX(prog->anchored_substr)[0];
1435 if (UTF) {
1436 while (s < strend) {
1437 if (*s == ch) {
1438 if (regtry(prog, s)) goto got_it;
1439 s += UTF8SKIP(s);
1440 while (s < strend && *s == ch)
1441 s += UTF8SKIP(s);
1442 }
1443 s += UTF8SKIP(s);
1444 }
1445 }
1446 else {
1447 while (s < strend) {
1448 if (*s == ch) {
1449 if (regtry(prog, s)) goto got_it;
1450 s++;
1451 while (s < strend && *s == ch)
1452 s++;
1453 }
1454 s++;
1455 }
1456 }
1457 }
1458 /*SUPPRESS 560*/
1459 else if (prog->anchored_substr != Nullsv
1460 || (prog->float_substr != Nullsv
1461 && prog->float_max_offset < strend - s)) {
1462 SV *must = prog->anchored_substr
1463 ? prog->anchored_substr : prog->float_substr;
1464 I32 back_max =
1465 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1466 I32 back_min =
1467 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1468 char *last = HOPc(strend, /* Cannot start after this */
1469 -(I32)(CHR_SVLEN(must)
1470 - (SvTAIL(must) != 0) + back_min));
1471 char *last1; /* Last position checked before */
1472
1473 if (s > PL_bostr)
1474 last1 = HOPc(s, -1);
1475 else
1476 last1 = s - 1; /* bogus */
1477
1478 /* XXXX check_substr already used to find `s', can optimize if
1479 check_substr==must. */
1480 scream_pos = -1;
1481 dontbother = end_shift;
1482 strend = HOPc(strend, -dontbother);
1483 while ( (s <= last) &&
1484 ((flags & REXEC_SCREAM)
1485 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1486 end_shift, &scream_pos, 0))
1487 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1488 (unsigned char*)strend, must,
1489 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1490 if (HOPc(s, -back_max) > last1) {
1491 last1 = HOPc(s, -back_min);
1492 s = HOPc(s, -back_max);
1493 }
1494 else {
1495 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1496
1497 last1 = HOPc(s, -back_min);
1498 s = t;
1499 }
1500 if (UTF) {
1501 while (s <= last1) {
1502 if (regtry(prog, s))
1503 goto got_it;
1504 s += UTF8SKIP(s);
1505 }
1506 }
1507 else {
1508 while (s <= last1) {
1509 if (regtry(prog, s))
1510 goto got_it;
1511 s++;
1512 }
1513 }
1514 }
1515 goto phooey;
1516 }
155aba94 1517 else if ((c = prog->regstclass)) {
66e933ab
GS
1518 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1519 /* don't bother with what can't match */
6eb5f6b9
JH
1520 strend = HOPc(strend, -(minlen - 1));
1521 if (find_byclass(prog, c, s, strend, startpos, 0))
1522 goto got_it;
d6a28714
JH
1523 }
1524 else {
1525 dontbother = 0;
1526 if (prog->float_substr != Nullsv) { /* Trim the end. */
1527 char *last;
d6a28714
JH
1528
1529 if (flags & REXEC_SCREAM) {
1530 last = screaminstr(sv, prog->float_substr, s - strbeg,
1531 end_shift, &scream_pos, 1); /* last one */
1532 if (!last)
1533 last = scream_olds; /* Only one occurence. */
b8c5462f 1534 }
d6a28714
JH
1535 else {
1536 STRLEN len;
1537 char *little = SvPV(prog->float_substr, len);
1538
1539 if (SvTAIL(prog->float_substr)) {
1540 if (memEQ(strend - len + 1, little, len - 1))
1541 last = strend - len + 1;
1542 else if (!PL_multiline)
1543 last = memEQ(strend - len, little, len)
1544 ? strend - len : Nullch;
b8c5462f 1545 else
d6a28714
JH
1546 goto find_last;
1547 } else {
1548 find_last:
1549 if (len)
1550 last = rninstr(s, strend, little, little + len);
b8c5462f 1551 else
d6a28714 1552 last = strend; /* matching `$' */
b8c5462f 1553 }
b8c5462f 1554 }
d6a28714
JH
1555 if (last == NULL) goto phooey; /* Should not happen! */
1556 dontbother = strend - last + prog->float_min_offset;
1557 }
1558 if (minlen && (dontbother < minlen))
1559 dontbother = minlen - 1;
1560 strend -= dontbother; /* this one's always in bytes! */
1561 /* We don't know much -- general case. */
1562 if (UTF) {
1563 for (;;) {
1564 if (regtry(prog, s))
1565 goto got_it;
1566 if (s >= strend)
1567 break;
b8c5462f 1568 s += UTF8SKIP(s);
d6a28714
JH
1569 };
1570 }
1571 else {
1572 do {
1573 if (regtry(prog, s))
1574 goto got_it;
1575 } while (s++ < strend);
1576 }
1577 }
1578
1579 /* Failure. */
1580 goto phooey;
1581
1582got_it:
1583 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1584
1585 if (PL_reg_eval_set) {
1586 /* Preserve the current value of $^R */
1587 if (oreplsv != GvSV(PL_replgv))
1588 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1589 restored, the value remains
1590 the same. */
1591 restore_pos(aTHXo_ 0);
1592 }
1593
1594 /* make sure $`, $&, $', and $digit will work later */
1595 if ( !(flags & REXEC_NOT_FIRST) ) {
1596 if (RX_MATCH_COPIED(prog)) {
1597 Safefree(prog->subbeg);
1598 RX_MATCH_COPIED_off(prog);
1599 }
1600 if (flags & REXEC_COPY_STR) {
1601 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1602
1603 s = savepvn(strbeg, i);
1604 prog->subbeg = s;
1605 prog->sublen = i;
1606 RX_MATCH_COPIED_on(prog);
1607 }
1608 else {
1609 prog->subbeg = strbeg;
1610 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1611 }
1612 }
1613
1614 return 1;
1615
1616phooey:
1617 if (PL_reg_eval_set)
1618 restore_pos(aTHXo_ 0);
1619 return 0;
1620}
1621
1622/*
1623 - regtry - try match at specific point
1624 */
1625STATIC I32 /* 0 failure, 1 success */
1626S_regtry(pTHX_ regexp *prog, char *startpos)
1627{
1628 dTHR;
1629 register I32 i;
1630 register I32 *sp;
1631 register I32 *ep;
1632 CHECKPOINT lastcp;
1633
1634 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1635 MAGIC *mg;
1636
1637 PL_reg_eval_set = RS_init;
1638 DEBUG_r(DEBUG_s(
b900a521
JH
1639 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1640 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1641 ));
e8347627 1642 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1643 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1644 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1645 SAVETMPS;
1646 /* Apparently this is not needed, judging by wantarray. */
e8347627 1647 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1648 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1649
1650 if (PL_reg_sv) {
1651 /* Make $_ available to executed code. */
1652 if (PL_reg_sv != DEFSV) {
1653 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1654 SAVESPTR(DEFSV);
1655 DEFSV = PL_reg_sv;
b8c5462f 1656 }
d6a28714
JH
1657
1658 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1659 && (mg = mg_find(PL_reg_sv, 'g')))) {
1660 /* prepare for quick setting of pos */
1661 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1662 mg = mg_find(PL_reg_sv, 'g');
1663 mg->mg_len = -1;
b8c5462f 1664 }
d6a28714
JH
1665 PL_reg_magic = mg;
1666 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1667 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1668 }
1669 if (!PL_reg_curpm)
0f79a09d 1670 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1671 PL_reg_curpm->op_pmregexp = prog;
1672 PL_reg_oldcurpm = PL_curpm;
1673 PL_curpm = PL_reg_curpm;
1674 if (RX_MATCH_COPIED(prog)) {
1675 /* Here is a serious problem: we cannot rewrite subbeg,
1676 since it may be needed if this match fails. Thus
1677 $` inside (?{}) could fail... */
1678 PL_reg_oldsaved = prog->subbeg;
1679 PL_reg_oldsavedlen = prog->sublen;
1680 RX_MATCH_COPIED_off(prog);
1681 }
1682 else
1683 PL_reg_oldsaved = Nullch;
1684 prog->subbeg = PL_bostr;
1685 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1686 }
1687 prog->startp[0] = startpos - PL_bostr;
1688 PL_reginput = startpos;
1689 PL_regstartp = prog->startp;
1690 PL_regendp = prog->endp;
1691 PL_reglastparen = &prog->lastparen;
1692 prog->lastparen = 0;
1693 PL_regsize = 0;
1694 DEBUG_r(PL_reg_starttry = startpos);
1695 if (PL_reg_start_tmpl <= prog->nparens) {
1696 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1697 if(PL_reg_start_tmp)
1698 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1699 else
1700 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1701 }
1702
1703 /* XXXX What this code is doing here?!!! There should be no need
1704 to do this again and again, PL_reglastparen should take care of
1705 this! */
1706 sp = prog->startp;
1707 ep = prog->endp;
1708 if (prog->nparens) {
1709 for (i = prog->nparens; i >= 1; i--) {
1710 *++sp = -1;
1711 *++ep = -1;
1712 }
1713 }
1714 REGCP_SET;
1715 if (regmatch(prog->program + 1)) {
1716 prog->endp[0] = PL_reginput - PL_bostr;
1717 return 1;
1718 }
1719 REGCP_UNWIND;
1720 return 0;
1721}
1722
1723/*
1724 - regmatch - main matching routine
1725 *
1726 * Conceptually the strategy is simple: check to see whether the current
1727 * node matches, call self recursively to see whether the rest matches,
1728 * and then act accordingly. In practice we make some effort to avoid
1729 * recursion, in particular by going through "ordinary" nodes (that don't
1730 * need to know whether the rest of the match failed) by a loop instead of
1731 * by recursion.
1732 */
1733/* [lwall] I've hoisted the register declarations to the outer block in order to
1734 * maybe save a little bit of pushing and popping on the stack. It also takes
1735 * advantage of machines that use a register save mask on subroutine entry.
1736 */
1737STATIC I32 /* 0 failure, 1 success */
1738S_regmatch(pTHX_ regnode *prog)
1739{
1740 dTHR;
1741 register regnode *scan; /* Current node. */
1742 regnode *next; /* Next node. */
1743 regnode *inner; /* Next node in internal branch. */
1744 register I32 nextchr; /* renamed nextchr - nextchar colides with
1745 function of same name */
1746 register I32 n; /* no or next */
1747 register I32 ln; /* len or last */
1748 register char *s; /* operand or save */
1749 register char *locinput = PL_reginput;
1750 register I32 c1, c2, paren; /* case fold search, parenth */
1751 int minmod = 0, sw = 0, logical = 0;
1752#ifdef DEBUGGING
1753 PL_regindent++;
1754#endif
1755
1756 /* Note that nextchr is a byte even in UTF */
1757 nextchr = UCHARAT(locinput);
1758 scan = prog;
1759 while (scan != NULL) {
1760#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1761#ifdef DEBUGGING
1762# define sayYES goto yes
1763# define sayNO goto no
7821416a
IZ
1764# define sayYES_FINAL goto yes_final
1765# define sayYES_LOUD goto yes_loud
1766# define sayNO_FINAL goto no_final
1767# define sayNO_SILENT goto do_no
d6a28714
JH
1768# define saySAME(x) if (x) goto yes; else goto no
1769# define REPORT_CODE_OFF 24
1770#else
1771# define sayYES return 1
1772# define sayNO return 0
7821416a
IZ
1773# define sayYES_FINAL return 1
1774# define sayYES_LOUD return 1
1775# define sayNO_FINAL return 0
1776# define sayNO_SILENT return 0
d6a28714
JH
1777# define saySAME(x) return x
1778#endif
1779 DEBUG_r( {
1780 SV *prop = sv_newmortal();
1781 int docolor = *PL_colors[0];
1782 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1783 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1784 /* The part of the string before starttry has one color
1785 (pref0_len chars), between starttry and current
1786 position another one (pref_len - pref0_len chars),
1787 after the current position the third one.
1788 We assume that pref0_len <= pref_len, otherwise we
1789 decrease pref0_len. */
1790 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1791 ? (5 + taill) - l : locinput - PL_bostr);
1792 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1793
1794 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1795 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1796 ? (5 + taill) - pref_len : PL_regeol - locinput);
1797 if (pref0_len < 0)
1798 pref0_len = 0;
1799 if (pref0_len > pref_len)
1800 pref0_len = pref_len;
1801 regprop(prop, scan);
1802 PerlIO_printf(Perl_debug_log,
b900a521
JH
1803 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1804 (IV)(locinput - PL_bostr),
d6a28714
JH
1805 PL_colors[4], pref0_len,
1806 locinput - pref_len, PL_colors[5],
1807 PL_colors[2], pref_len - pref0_len,
1808 locinput - pref_len + pref0_len, PL_colors[3],
1809 (docolor ? "" : "> <"),
1810 PL_colors[0], l, locinput, PL_colors[1],
1811 15 - l - pref_len + 1,
1812 "",
b900a521 1813 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1814 SvPVX(prop));
1815 } );
1816
1817 next = scan + NEXT_OFF(scan);
1818 if (next == scan)
1819 next = NULL;
1820
1821 switch (OP(scan)) {
1822 case BOL:
1823 if (locinput == PL_bostr
1824 ? PL_regprev == '\n'
1825 : (PL_multiline &&
1826 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1827 {
1828 /* regtill = regbol; */
b8c5462f
JH
1829 break;
1830 }
d6a28714
JH
1831 sayNO;
1832 case MBOL:
1833 if (locinput == PL_bostr
1834 ? PL_regprev == '\n'
1835 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1836 {
b8c5462f
JH
1837 break;
1838 }
d6a28714
JH
1839 sayNO;
1840 case SBOL:
1841 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1842 break;
d6a28714
JH
1843 sayNO;
1844 case GPOS:
1845 if (locinput == PL_reg_ganch)
1846 break;
1847 sayNO;
1848 case EOL:
1849 if (PL_multiline)
1850 goto meol;
1851 else
1852 goto seol;
1853 case MEOL:
1854 meol:
1855 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1856 sayNO;
b8c5462f 1857 break;
d6a28714
JH
1858 case SEOL:
1859 seol:
1860 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1861 sayNO;
d6a28714 1862 if (PL_regeol - locinput > 1)
b8c5462f 1863 sayNO;
b8c5462f 1864 break;
d6a28714
JH
1865 case EOS:
1866 if (PL_regeol != locinput)
b8c5462f 1867 sayNO;
d6a28714
JH
1868 break;
1869 case SANYUTF8:
b8c5462f 1870 if (nextchr & 0x80) {
b8c5462f 1871 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1872 if (locinput > PL_regeol)
1873 sayNO;
b8c5462f
JH
1874 nextchr = UCHARAT(locinput);
1875 break;
1876 }
d6a28714 1877 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1878 sayNO;
b8c5462f 1879 nextchr = UCHARAT(++locinput);
a0d0e21e 1880 break;
d6a28714
JH
1881 case SANY:
1882 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1883 sayNO;
1884 nextchr = UCHARAT(++locinput);
b85d18e9 1885 break;
d6a28714 1886 case ANYUTF8:
a0ed51b3 1887 if (nextchr & 0x80) {
b8c5462f 1888 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1889 if (locinput > PL_regeol)
1890 sayNO;
a0ed51b3
LW
1891 nextchr = UCHARAT(locinput);
1892 break;
1893 }
155aba94 1894 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1895 sayNO;
1896 nextchr = UCHARAT(++locinput);
1897 break;
d6a28714 1898 case REG_ANY:
155aba94 1899 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1900 sayNO;
76e3520e 1901 nextchr = UCHARAT(++locinput);
a0d0e21e 1902 break;
d6a28714 1903 case EXACT:
cd439c50
IZ
1904 s = STRING(scan);
1905 ln = STR_LEN(scan);
d6a28714
JH
1906 /* Inline the first character, for speed. */
1907 if (UCHARAT(s) != nextchr)
1908 sayNO;
1909 if (PL_regeol - locinput < ln)
1910 sayNO;
1911 if (ln > 1 && memNE(s, locinput, ln))
1912 sayNO;
1913 locinput += ln;
1914 nextchr = UCHARAT(locinput);
1915 break;
1916 case EXACTFL:
b8c5462f
JH
1917 PL_reg_flags |= RF_tainted;
1918 /* FALL THROUGH */
d6a28714 1919 case EXACTF:
cd439c50
IZ
1920 s = STRING(scan);
1921 ln = STR_LEN(scan);
d6a28714
JH
1922
1923 if (UTF) {
1924 char *l = locinput;
1925 char *e = s + ln;
1926 c1 = OP(scan) == EXACTF;
1927 while (s < e) {
1928 if (l >= PL_regeol)
1929 sayNO;
1930 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1931 toLOWER_utf8((U8*)l) :
1932 toLOWER_LC_utf8((U8*)l)))
1933 {
1934 sayNO;
1935 }
1936 s += UTF8SKIP(s);
1937 l += UTF8SKIP(l);
b8c5462f 1938 }
d6a28714 1939 locinput = l;
a0ed51b3
LW
1940 nextchr = UCHARAT(locinput);
1941 break;
1942 }
d6a28714
JH
1943
1944 /* Inline the first character, for speed. */
1945 if (UCHARAT(s) != nextchr &&
1946 UCHARAT(s) != ((OP(scan) == EXACTF)
1947 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1948 sayNO;
d6a28714 1949 if (PL_regeol - locinput < ln)
b8c5462f 1950 sayNO;
d6a28714
JH
1951 if (ln > 1 && (OP(scan) == EXACTF
1952 ? ibcmp(s, locinput, ln)
1953 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1954 sayNO;
d6a28714
JH
1955 locinput += ln;
1956 nextchr = UCHARAT(locinput);
a0d0e21e 1957 break;
d6a28714 1958 case ANYOFUTF8:
d6a28714 1959 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1960 sayNO;
d6a28714
JH
1961 if (locinput >= PL_regeol)
1962 sayNO;
1963 locinput += PL_utf8skip[nextchr];
1964 nextchr = UCHARAT(locinput);
1965 break;
1966 case ANYOF:
d6a28714 1967 if (nextchr < 0)
b8c5462f 1968 nextchr = UCHARAT(locinput);
936ed897 1969 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1970 sayNO;
1971 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1972 sayNO;
b8c5462f
JH
1973 nextchr = UCHARAT(++locinput);
1974 break;
d6a28714 1975 case ALNUML:
b8c5462f
JH
1976 PL_reg_flags |= RF_tainted;
1977 /* FALL THROUGH */
d6a28714 1978 case ALNUM:
b8c5462f 1979 if (!nextchr)
4633a7c4 1980 sayNO;
d6a28714
JH
1981 if (!(OP(scan) == ALNUM
1982 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1983 sayNO;
1984 nextchr = UCHARAT(++locinput);
bbce6d69 1985 break;
d6a28714 1986 case ALNUMLUTF8:
3280af22 1987 PL_reg_flags |= RF_tainted;
bbce6d69 1988 /* FALL THROUGH */
d6a28714 1989 case ALNUMUTF8:
b8c5462f
JH
1990 if (!nextchr)
1991 sayNO;
1992 if (nextchr & 0x80) {
d6a28714
JH
1993 if (!(OP(scan) == ALNUMUTF8
1994 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1995 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1996 {
1997 sayNO;
a0ed51b3 1998 }
b8c5462f 1999 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2000 nextchr = UCHARAT(locinput);
2001 break;
2002 }
d6a28714
JH
2003 if (!(OP(scan) == ALNUMUTF8
2004 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2005 sayNO;
b8c5462f 2006 nextchr = UCHARAT(++locinput);
a0d0e21e 2007 break;
d6a28714 2008 case NALNUML:
b8c5462f
JH
2009 PL_reg_flags |= RF_tainted;
2010 /* FALL THROUGH */
d6a28714
JH
2011 case NALNUM:
2012 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2013 sayNO;
d6a28714
JH
2014 if (OP(scan) == NALNUM
2015 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2016 sayNO;
b8c5462f 2017 nextchr = UCHARAT(++locinput);
a0ed51b3 2018 break;
d6a28714 2019 case NALNUMLUTF8:
b8c5462f
JH
2020 PL_reg_flags |= RF_tainted;
2021 /* FALL THROUGH */
d6a28714 2022 case NALNUMUTF8:
3280af22 2023 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2024 sayNO;
b8c5462f 2025 if (nextchr & 0x80) {
d6a28714
JH
2026 if (OP(scan) == NALNUMUTF8
2027 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2028 : isALNUM_LC_utf8((U8*)locinput))
2029 {
b8c5462f 2030 sayNO;
d6a28714 2031 }
b8c5462f
JH
2032 locinput += PL_utf8skip[nextchr];
2033 nextchr = UCHARAT(locinput);
2034 break;
2035 }
d6a28714
JH
2036 if (OP(scan) == NALNUMUTF8
2037 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2038 sayNO;
76e3520e 2039 nextchr = UCHARAT(++locinput);
a0d0e21e 2040 break;
d6a28714
JH
2041 case BOUNDL:
2042 case NBOUNDL:
3280af22 2043 PL_reg_flags |= RF_tainted;
bbce6d69 2044 /* FALL THROUGH */
d6a28714
JH
2045 case BOUND:
2046 case NBOUND:
2047 /* was last char in word? */
2048 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2049 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2050 ln = isALNUM(ln);
2051 n = isALNUM(nextchr);
2052 }
2053 else {
2054 ln = isALNUM_LC(ln);
2055 n = isALNUM_LC(nextchr);
2056 }
2057 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2058 sayNO;
a0d0e21e 2059 break;
d6a28714
JH
2060 case BOUNDLUTF8:
2061 case NBOUNDLUTF8:
a0ed51b3
LW
2062 PL_reg_flags |= RF_tainted;
2063 /* FALL THROUGH */
d6a28714
JH
2064 case BOUNDUTF8:
2065 case NBOUNDUTF8:
2066 /* was last char in word? */
2067 ln = (locinput != PL_regbol)
2068 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2069 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2070 ln = isALNUM_uni(ln);
2071 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2072 }
d6a28714
JH
2073 else {
2074 ln = isALNUM_LC_uni(ln);
2075 n = isALNUM_LC_utf8((U8*)locinput);
2076 }
2077 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2078 sayNO;
a0ed51b3 2079 break;
d6a28714 2080 case SPACEL:
3280af22 2081 PL_reg_flags |= RF_tainted;
bbce6d69 2082 /* FALL THROUGH */
d6a28714 2083 case SPACE:
9442cb0e 2084 if (!nextchr)
4633a7c4 2085 sayNO;
d6a28714
JH
2086 if (!(OP(scan) == SPACE
2087 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2088 sayNO;
76e3520e 2089 nextchr = UCHARAT(++locinput);
a0d0e21e 2090 break;
d6a28714 2091 case SPACELUTF8:
a0ed51b3
LW
2092 PL_reg_flags |= RF_tainted;
2093 /* FALL THROUGH */
d6a28714 2094 case SPACEUTF8:
9442cb0e 2095 if (!nextchr)
a0ed51b3
LW
2096 sayNO;
2097 if (nextchr & 0x80) {
d6a28714 2098 if (!(OP(scan) == SPACEUTF8
9442cb0e 2099 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2100 : isSPACE_LC_utf8((U8*)locinput)))
2101 {
a0ed51b3 2102 sayNO;
d6a28714 2103 }
6f06b55f 2104 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2105 nextchr = UCHARAT(locinput);
2106 break;
2107 }
d6a28714
JH
2108 if (!(OP(scan) == SPACEUTF8
2109 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2110 sayNO;
2111 nextchr = UCHARAT(++locinput);
2112 break;
d6a28714 2113 case NSPACEL:
3280af22 2114 PL_reg_flags |= RF_tainted;
bbce6d69 2115 /* FALL THROUGH */
d6a28714 2116 case NSPACE:
9442cb0e 2117 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2118 sayNO;
9442cb0e 2119 if (OP(scan) == NSPACE
d6a28714 2120 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2121 sayNO;
b8c5462f 2122 nextchr = UCHARAT(++locinput);
a0d0e21e 2123 break;
d6a28714 2124 case NSPACELUTF8:
a0ed51b3
LW
2125 PL_reg_flags |= RF_tainted;
2126 /* FALL THROUGH */
d6a28714 2127 case NSPACEUTF8:
9442cb0e 2128 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2129 sayNO;
2130 if (nextchr & 0x80) {
d6a28714 2131 if (OP(scan) == NSPACEUTF8
9442cb0e 2132 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2133 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2134 {
2135 sayNO;
2136 }
2137 locinput += PL_utf8skip[nextchr];
2138 nextchr = UCHARAT(locinput);
2139 break;
a0ed51b3 2140 }
d6a28714
JH
2141 if (OP(scan) == NSPACEUTF8
2142 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2143 sayNO;
76e3520e 2144 nextchr = UCHARAT(++locinput);
a0d0e21e 2145 break;
d6a28714 2146 case DIGITL:
a0ed51b3
LW
2147 PL_reg_flags |= RF_tainted;
2148 /* FALL THROUGH */
d6a28714 2149 case DIGIT:
9442cb0e 2150 if (!nextchr)
a0ed51b3 2151 sayNO;
d6a28714
JH
2152 if (!(OP(scan) == DIGIT
2153 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2154 sayNO;
76e3520e 2155 nextchr = UCHARAT(++locinput);
a0d0e21e 2156 break;
d6a28714 2157 case DIGITLUTF8:
a0ed51b3
LW
2158 PL_reg_flags |= RF_tainted;
2159 /* FALL THROUGH */
d6a28714 2160 case DIGITUTF8:
a0ed51b3
LW
2161 if (!nextchr)
2162 sayNO;
2163 if (nextchr & 0x80) {
9442cb0e
GS
2164 if (!(OP(scan) == DIGITUTF8
2165 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2166 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2167 {
a0ed51b3 2168 sayNO;
dfe13c55 2169 }
6f06b55f 2170 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2171 nextchr = UCHARAT(locinput);
2172 break;
2173 }
9442cb0e
GS
2174 if (!(OP(scan) == DIGITUTF8
2175 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2176 sayNO;
2177 nextchr = UCHARAT(++locinput);
2178 break;
d6a28714 2179 case NDIGITL:
b8c5462f
JH
2180 PL_reg_flags |= RF_tainted;
2181 /* FALL THROUGH */
d6a28714 2182 case NDIGIT:
9442cb0e 2183 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2184 sayNO;
9442cb0e 2185 if (OP(scan) == NDIGIT
d6a28714 2186 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2187 sayNO;
76e3520e 2188 nextchr = UCHARAT(++locinput);
a0d0e21e 2189 break;
d6a28714 2190 case NDIGITLUTF8:
b8c5462f
JH
2191 PL_reg_flags |= RF_tainted;
2192 /* FALL THROUGH */
d6a28714 2193 case NDIGITUTF8:
b8c5462f
JH
2194 if (!nextchr && locinput >= PL_regeol)
2195 sayNO;
a0ed51b3 2196 if (nextchr & 0x80) {
9442cb0e
GS
2197 if (OP(scan) == NDIGITUTF8
2198 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2199 : isDIGIT_LC_utf8((U8*)locinput))
2200 {
a0ed51b3 2201 sayNO;
9442cb0e 2202 }
6f06b55f 2203 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2204 nextchr = UCHARAT(locinput);
2205 break;
2206 }
9442cb0e
GS
2207 if (OP(scan) == NDIGITUTF8
2208 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2209 sayNO;
2210 nextchr = UCHARAT(++locinput);
2211 break;
2212 case CLUMP:
dfe13c55 2213 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2214 sayNO;
6f06b55f 2215 locinput += PL_utf8skip[nextchr];
dfe13c55 2216 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2217 locinput += UTF8SKIP(locinput);
2218 if (locinput > PL_regeol)
2219 sayNO;
2220 nextchr = UCHARAT(locinput);
2221 break;
c8756f30 2222 case REFFL:
3280af22 2223 PL_reg_flags |= RF_tainted;
c8756f30 2224 /* FALL THROUGH */
c277df42 2225 case REF:
c8756f30 2226 case REFF:
c277df42 2227 n = ARG(scan); /* which paren pair */
cf93c79d 2228 ln = PL_regstartp[n];
2c2d71f5 2229 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2230 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2231 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2232 if (ln == PL_regendp[n])
a0d0e21e 2233 break;
a0ed51b3 2234
cf93c79d 2235 s = PL_bostr + ln;
a0ed51b3
LW
2236 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2237 char *l = locinput;
cf93c79d 2238 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2239 /*
2240 * Note that we can't do the "other character" lookup trick as
2241 * in the 8-bit case (no pun intended) because in Unicode we
2242 * have to map both upper and title case to lower case.
2243 */
2244 if (OP(scan) == REFF) {
2245 while (s < e) {
2246 if (l >= PL_regeol)
2247 sayNO;
dfe13c55 2248 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2249 sayNO;
2250 s += UTF8SKIP(s);
2251 l += UTF8SKIP(l);
2252 }
2253 }
2254 else {
2255 while (s < e) {
2256 if (l >= PL_regeol)
2257 sayNO;
dfe13c55 2258 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2259 sayNO;
2260 s += UTF8SKIP(s);
2261 l += UTF8SKIP(l);
2262 }
2263 }
2264 locinput = l;
2265 nextchr = UCHARAT(locinput);
2266 break;
2267 }
2268
a0d0e21e 2269 /* Inline the first character, for speed. */
76e3520e 2270 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2271 (OP(scan) == REF ||
2272 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2273 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2274 sayNO;
cf93c79d 2275 ln = PL_regendp[n] - ln;
3280af22 2276 if (locinput + ln > PL_regeol)
4633a7c4 2277 sayNO;
c8756f30
AK
2278 if (ln > 1 && (OP(scan) == REF
2279 ? memNE(s, locinput, ln)
2280 : (OP(scan) == REFF
2281 ? ibcmp(s, locinput, ln)
2282 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2283 sayNO;
a0d0e21e 2284 locinput += ln;
76e3520e 2285 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2286 break;
2287
2288 case NOTHING:
c277df42 2289 case TAIL:
a0d0e21e
LW
2290 break;
2291 case BACK:
2292 break;
c277df42
IZ
2293 case EVAL:
2294 {
2295 dSP;
533c011a 2296 OP_4tree *oop = PL_op;
3280af22
NIS
2297 COP *ocurcop = PL_curcop;
2298 SV **ocurpad = PL_curpad;
c277df42
IZ
2299 SV *ret;
2300
2301 n = ARG(scan);
533c011a 2302 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2303 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2304 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2305 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2306
cea2e8a9 2307 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2308 SPAGAIN;
2309 ret = POPs;
2310 PUTBACK;
2311
0f5d15d6
IZ
2312 PL_op = oop;
2313 PL_curpad = ocurpad;
2314 PL_curcop = ocurcop;
c277df42 2315 if (logical) {
0f5d15d6
IZ
2316 if (logical == 2) { /* Postponed subexpression. */
2317 regexp *re;
22c35a8c 2318 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2319 re_cc_state state;
0f5d15d6
IZ
2320 CHECKPOINT cp, lastcp;
2321
2322 if(SvROK(ret) || SvRMAGICAL(ret)) {
2323 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2324
2325 if(SvMAGICAL(sv))
2326 mg = mg_find(sv, 'r');
2327 }
2328 if (mg) {
2329 re = (regexp *)mg->mg_obj;
df0003d4 2330 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2331 }
2332 else {
2333 STRLEN len;
2334 char *t = SvPV(ret, len);
2335 PMOP pm;
2336 char *oprecomp = PL_regprecomp;
2337 I32 osize = PL_regsize;
2338 I32 onpar = PL_regnpar;
2339
2340 pm.op_pmflags = 0;
393fec97 2341 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2342 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2343 if (!(SvFLAGS(ret)
2344 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2345 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2346 PL_regprecomp = oprecomp;
2347 PL_regsize = osize;
2348 PL_regnpar = onpar;
2349 }
2350 DEBUG_r(
2351 PerlIO_printf(Perl_debug_log,
2352 "Entering embedded `%s%.60s%s%s'\n",
2353 PL_colors[0],
2354 re->precomp,
2355 PL_colors[1],
2356 (strlen(re->precomp) > 60 ? "..." : ""))
2357 );
2358 state.node = next;
2359 state.prev = PL_reg_call_cc;
2360 state.cc = PL_regcc;
2361 state.re = PL_reg_re;
2362
2ab05381 2363 PL_regcc = 0;
0f5d15d6
IZ
2364
2365 cp = regcppush(0); /* Save *all* the positions. */
2366 REGCP_SET;
2367 cache_re(re);
2368 state.ss = PL_savestack_ix;
2369 *PL_reglastparen = 0;
2370 PL_reg_call_cc = &state;
2371 PL_reginput = locinput;
2c2d71f5
JH
2372
2373 /* XXXX This is too dramatic a measure... */
2374 PL_reg_maxiter = 0;
2375
0f5d15d6 2376 if (regmatch(re->program + 1)) {
2c914db6
IZ
2377 /* Even though we succeeded, we need to restore
2378 global variables, since we may be wrapped inside
2379 SUSPEND, thus the match may be not finished yet. */
2380
2381 /* XXXX Do this only if SUSPENDed? */
2382 PL_reg_call_cc = state.prev;
2383 PL_regcc = state.cc;
2384 PL_reg_re = state.re;
2385 cache_re(PL_reg_re);
2386
2387 /* XXXX This is too dramatic a measure... */
2388 PL_reg_maxiter = 0;
2389
2390 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2391 ReREFCNT_dec(re);
2392 regcpblow(cp);
2393 sayYES;
2394 }
0f5d15d6
IZ
2395 ReREFCNT_dec(re);
2396 REGCP_UNWIND;
2397 regcppop();
2398 PL_reg_call_cc = state.prev;
2399 PL_regcc = state.cc;
2400 PL_reg_re = state.re;
d3790889 2401 cache_re(PL_reg_re);
2c2d71f5
JH
2402
2403 /* XXXX This is too dramatic a measure... */
2404 PL_reg_maxiter = 0;
2405
0f5d15d6
IZ
2406 sayNO;
2407 }
c277df42 2408 sw = SvTRUE(ret);
0f5d15d6 2409 logical = 0;
a0ed51b3
LW
2410 }
2411 else
3280af22 2412 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2413 break;
2414 }
a0d0e21e 2415 case OPEN:
c277df42 2416 n = ARG(scan); /* which paren pair */
3280af22
NIS
2417 PL_reg_start_tmp[n] = locinput;
2418 if (n > PL_regsize)
2419 PL_regsize = n;
a0d0e21e
LW
2420 break;
2421 case CLOSE:
c277df42 2422 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2423 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2424 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2425 if (n > *PL_reglastparen)
2426 *PL_reglastparen = n;
a0d0e21e 2427 break;
c277df42
IZ
2428 case GROUPP:
2429 n = ARG(scan); /* which paren pair */
cf93c79d 2430 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2431 break;
2432 case IFTHEN:
2c2d71f5 2433 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2434 if (sw)
2435 next = NEXTOPER(NEXTOPER(scan));
2436 else {
2437 next = scan + ARG(scan);
2438 if (OP(next) == IFTHEN) /* Fake one. */
2439 next = NEXTOPER(NEXTOPER(next));
2440 }
2441 break;
2442 case LOGICAL:
0f5d15d6 2443 logical = scan->flags;
c277df42 2444 break;
2ab05381
IZ
2445/*******************************************************************
2446 PL_regcc contains infoblock about the innermost (...)* loop, and
2447 a pointer to the next outer infoblock.
2448
2449 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2450
2451 1) After matching X, regnode for CURLYX is processed;
2452
2453 2) This regnode creates infoblock on the stack, and calls
2454 regmatch() recursively with the starting point at WHILEM node;
2455
2456 3) Each hit of WHILEM node tries to match A and Z (in the order
2457 depending on the current iteration, min/max of {min,max} and
2458 greediness). The information about where are nodes for "A"
2459 and "Z" is read from the infoblock, as is info on how many times "A"
2460 was already matched, and greediness.
2461
2462 4) After A matches, the same WHILEM node is hit again.
2463
2464 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2465 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2466 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2467 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2468 of the external loop.
2469
2470 Currently present infoblocks form a tree with a stem formed by PL_curcc
2471 and whatever it mentions via ->next, and additional attached trees
2472 corresponding to temporarily unset infoblocks as in "5" above.
2473
2474 In the following picture infoblocks for outer loop of
2475 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2476 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2477 infoblocks are drawn below the "reset" infoblock.
2478
2479 In fact in the picture below we do not show failed matches for Z and T
2480 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2481 more obvious *why* one needs to *temporary* unset infoblocks.]
2482
2483 Matched REx position InfoBlocks Comment
2484 (Y(A)*?Z)*?T x
2485 Y(A)*?Z)*?T x <- O
2486 Y (A)*?Z)*?T x <- O
2487 Y A)*?Z)*?T x <- O <- I
2488 YA )*?Z)*?T x <- O <- I
2489 YA A)*?Z)*?T x <- O <- I
2490 YAA )*?Z)*?T x <- O <- I
2491 YAA Z)*?T x <- O # Temporary unset I
2492 I
2493
2494 YAAZ Y(A)*?Z)*?T x <- O
2495 I
2496
2497 YAAZY (A)*?Z)*?T x <- O
2498 I
2499
2500 YAAZY A)*?Z)*?T x <- O <- I
2501 I
2502
2503 YAAZYA )*?Z)*?T x <- O <- I
2504 I
2505
2506 YAAZYA Z)*?T x <- O # Temporary unset I
2507 I,I
2508
2509 YAAZYAZ )*?T x <- O
2510 I,I
2511
2512 YAAZYAZ T x # Temporary unset O
2513 O
2514 I,I
2515
2516 YAAZYAZT x
2517 O
2518 I,I
2519 *******************************************************************/
a0d0e21e
LW
2520 case CURLYX: {
2521 CURCUR cc;
3280af22 2522 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2523
2524 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2525 next += ARG(next);
3280af22
NIS
2526 cc.oldcc = PL_regcc;
2527 PL_regcc = &cc;
2528 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2529 cc.cur = -1;
2530 cc.min = ARG1(scan);
2531 cc.max = ARG2(scan);
c277df42 2532 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2533 cc.next = next;
2534 cc.minmod = minmod;
2535 cc.lastloc = 0;
3280af22 2536 PL_reginput = locinput;
a0d0e21e
LW
2537 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2538 regcpblow(cp);
3280af22 2539 PL_regcc = cc.oldcc;
4633a7c4 2540 saySAME(n);
a0d0e21e
LW
2541 }
2542 /* NOT REACHED */
2543 case WHILEM: {
2544 /*
2545 * This is really hard to understand, because after we match
2546 * what we're trying to match, we must make sure the rest of
2c2d71f5 2547 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2548 * to go back UP the parse tree by recursing ever deeper. And
2549 * if it fails, we have to reset our parent's current state
2550 * that we can try again after backing off.
2551 */
2552
c277df42 2553 CHECKPOINT cp, lastcp;
3280af22 2554 CURCUR* cc = PL_regcc;
c277df42
IZ
2555 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2556
4633a7c4 2557 n = cc->cur + 1; /* how many we know we matched */
3280af22 2558 PL_reginput = locinput;
a0d0e21e 2559
c277df42
IZ
2560 DEBUG_r(
2561 PerlIO_printf(Perl_debug_log,
2562 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2563 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2564 (long)n, (long)cc->min,
2565 (long)cc->max, (long)cc)
2566 );
4633a7c4 2567
a0d0e21e
LW
2568 /* If degenerate scan matches "", assume scan done. */
2569
579cf2c3 2570 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2571 PL_regcc = cc->oldcc;
2ab05381
IZ
2572 if (PL_regcc)
2573 ln = PL_regcc->cur;
c277df42 2574 DEBUG_r(
c3464db5
DD
2575 PerlIO_printf(Perl_debug_log,
2576 "%*s empty match detected, try continuation...\n",
3280af22 2577 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2578 );
a0d0e21e 2579 if (regmatch(cc->next))
4633a7c4 2580 sayYES;
2ab05381
IZ
2581 if (PL_regcc)
2582 PL_regcc->cur = ln;
3280af22 2583 PL_regcc = cc;
4633a7c4 2584 sayNO;
a0d0e21e
LW
2585 }
2586
2587 /* First just match a string of min scans. */
2588
2589 if (n < cc->min) {
2590 cc->cur = n;
2591 cc->lastloc = locinput;
4633a7c4
LW
2592 if (regmatch(cc->scan))
2593 sayYES;
2594 cc->cur = n - 1;
c277df42 2595 cc->lastloc = lastloc;
4633a7c4 2596 sayNO;
a0d0e21e
LW
2597 }
2598
2c2d71f5
JH
2599 if (scan->flags) {
2600 /* Check whether we already were at this position.
2601 Postpone detection until we know the match is not
2602 *that* much linear. */
2603 if (!PL_reg_maxiter) {
2604 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2605 PL_reg_leftiter = PL_reg_maxiter;
2606 }
2607 if (PL_reg_leftiter-- == 0) {
2608 I32 size = (PL_reg_maxiter + 7)/8;
2609 if (PL_reg_poscache) {
2610 if (PL_reg_poscache_size < size) {
2611 Renew(PL_reg_poscache, size, char);
2612 PL_reg_poscache_size = size;
2613 }
2614 Zero(PL_reg_poscache, size, char);
2615 }
2616 else {
2617 PL_reg_poscache_size = size;
2618 Newz(29, PL_reg_poscache, size, char);
2619 }
2620 DEBUG_r(
2621 PerlIO_printf(Perl_debug_log,
2622 "%sDetected a super-linear match, switching on caching%s...\n",
2623 PL_colors[4], PL_colors[5])
2624 );
2625 }
2626 if (PL_reg_leftiter < 0) {
2627 I32 o = locinput - PL_bostr, b;
2628
2629 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2630 b = o % 8;
2631 o /= 8;
2632 if (PL_reg_poscache[o] & (1<<b)) {
2633 DEBUG_r(
2634 PerlIO_printf(Perl_debug_log,
2635 "%*s already tried at this position...\n",
2636 REPORT_CODE_OFF+PL_regindent*2, "")
2637 );
7821416a 2638 sayNO_SILENT;
2c2d71f5
JH
2639 }
2640 PL_reg_poscache[o] |= (1<<b);
2641 }
2642 }
2643
a0d0e21e
LW
2644 /* Prefer next over scan for minimal matching. */
2645
2646 if (cc->minmod) {
3280af22 2647 PL_regcc = cc->oldcc;
2ab05381
IZ
2648 if (PL_regcc)
2649 ln = PL_regcc->cur;
5f05dabc 2650 cp = regcppush(cc->parenfloor);
c277df42 2651 REGCP_SET;
5f05dabc 2652 if (regmatch(cc->next)) {
c277df42 2653 regcpblow(cp);
4633a7c4 2654 sayYES; /* All done. */
5f05dabc 2655 }
c277df42 2656 REGCP_UNWIND;
5f05dabc 2657 regcppop();
2ab05381
IZ
2658 if (PL_regcc)
2659 PL_regcc->cur = ln;
3280af22 2660 PL_regcc = cc;
a0d0e21e 2661
c277df42 2662 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2663 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2664 && !(PL_reg_flags & RF_warned)) {
2665 PL_reg_flags |= RF_warned;
e476b1b5 2666 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2667 "Complex regular subexpression recursion",
2668 REG_INFTY - 1);
c277df42 2669 }
4633a7c4 2670 sayNO;
c277df42 2671 }
a687059c 2672
c277df42 2673 DEBUG_r(
c3464db5
DD
2674 PerlIO_printf(Perl_debug_log,
2675 "%*s trying longer...\n",
3280af22 2676 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2677 );
a0d0e21e 2678 /* Try scanning more and see if it helps. */
3280af22 2679 PL_reginput = locinput;
a0d0e21e
LW
2680 cc->cur = n;
2681 cc->lastloc = locinput;
5f05dabc 2682 cp = regcppush(cc->parenfloor);
c277df42 2683 REGCP_SET;
5f05dabc 2684 if (regmatch(cc->scan)) {
c277df42 2685 regcpblow(cp);
4633a7c4 2686 sayYES;
5f05dabc 2687 }
c277df42 2688 REGCP_UNWIND;
5f05dabc 2689 regcppop();
4633a7c4 2690 cc->cur = n - 1;
c277df42 2691 cc->lastloc = lastloc;
4633a7c4 2692 sayNO;
a0d0e21e
LW
2693 }
2694
2695 /* Prefer scan over next for maximal matching. */
2696
2697 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2698 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2699 cc->cur = n;
2700 cc->lastloc = locinput;
c277df42 2701 REGCP_SET;
5f05dabc 2702 if (regmatch(cc->scan)) {
c277df42 2703 regcpblow(cp);
4633a7c4 2704 sayYES;
5f05dabc 2705 }
c277df42 2706 REGCP_UNWIND;
a0d0e21e 2707 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2708 PL_reginput = locinput;
c277df42 2709 DEBUG_r(
c3464db5
DD
2710 PerlIO_printf(Perl_debug_log,
2711 "%*s failed, try continuation...\n",
3280af22 2712 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2713 );
2714 }
e476b1b5 2715 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2716 && !(PL_reg_flags & RF_warned)) {
3280af22 2717 PL_reg_flags |= RF_warned;
e476b1b5 2718 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2719 "Complex regular subexpression recursion",
2720 REG_INFTY - 1);
a0d0e21e
LW
2721 }
2722
2723 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2724 PL_regcc = cc->oldcc;
2ab05381
IZ
2725 if (PL_regcc)
2726 ln = PL_regcc->cur;
a0d0e21e 2727 if (regmatch(cc->next))
4633a7c4 2728 sayYES;
2ab05381
IZ
2729 if (PL_regcc)
2730 PL_regcc->cur = ln;
3280af22 2731 PL_regcc = cc;
4633a7c4 2732 cc->cur = n - 1;
c277df42 2733 cc->lastloc = lastloc;
4633a7c4 2734 sayNO;
a0d0e21e
LW
2735 }
2736 /* NOT REACHED */
c277df42
IZ
2737 case BRANCHJ:
2738 next = scan + ARG(scan);
2739 if (next == scan)
2740 next = NULL;
2741 inner = NEXTOPER(NEXTOPER(scan));
2742 goto do_branch;
2743 case BRANCH:
2744 inner = NEXTOPER(scan);
2745 do_branch:
2746 {
2747 CHECKPOINT lastcp;
2748 c1 = OP(scan);
2749 if (OP(next) != c1) /* No choice. */
2750 next = inner; /* Avoid recursion. */
a0d0e21e 2751 else {
3280af22 2752 int lastparen = *PL_reglastparen;
c277df42
IZ
2753
2754 REGCP_SET;
a0d0e21e 2755 do {
3280af22 2756 PL_reginput = locinput;
c277df42 2757 if (regmatch(inner))
4633a7c4 2758 sayYES;
c277df42 2759 REGCP_UNWIND;
3280af22 2760 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2761 PL_regendp[n] = -1;
3280af22 2762 *PL_reglastparen = n;
c277df42 2763 scan = next;
a0d0e21e 2764 /*SUPPRESS 560*/
155aba94 2765 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2766 next += n;
a0d0e21e 2767 else
c277df42 2768 next = NULL;
c277df42
IZ
2769 inner = NEXTOPER(scan);
2770 if (c1 == BRANCHJ) {
2771 inner = NEXTOPER(inner);
2772 }
2773 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2774 sayNO;
a0d0e21e 2775 /* NOTREACHED */
a687059c 2776 }
a0d0e21e
LW
2777 }
2778 break;
2779 case MINMOD:
2780 minmod = 1;
2781 break;
c277df42
IZ
2782 case CURLYM:
2783 {
00db4c45 2784 I32 l = 0;
c277df42
IZ
2785 CHECKPOINT lastcp;
2786
2787 /* We suppose that the next guy does not need
2788 backtracking: in particular, it is of constant length,
2789 and has no parenths to influence future backrefs. */
2790 ln = ARG1(scan); /* min to match */
2791 n = ARG2(scan); /* max to match */
c277df42
IZ
2792 paren = scan->flags;
2793 if (paren) {
3280af22
NIS
2794 if (paren > PL_regsize)
2795 PL_regsize = paren;
2796 if (paren > *PL_reglastparen)
2797 *PL_reglastparen = paren;
c277df42 2798 }
dc45a647 2799 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2800 if (paren)
2801 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2802 PL_reginput = locinput;
c277df42
IZ
2803 if (minmod) {
2804 minmod = 0;
2805 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2806 sayNO;
5f4b28b2 2807 if (ln && l == 0 && n >= ln
c277df42
IZ
2808 /* In fact, this is tricky. If paren, then the
2809 fact that we did/didnot match may influence
2810 future execution. */
2811 && !(paren && ln == 0))
2812 ln = n;
3280af22 2813 locinput = PL_reginput;
22c35a8c 2814 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2815 c1 = (U8)*STRING(next);
c277df42 2816 if (OP(next) == EXACTF)
22c35a8c 2817 c2 = PL_fold[c1];
c277df42 2818 else if (OP(next) == EXACTFL)
22c35a8c 2819 c2 = PL_fold_locale[c1];
c277df42
IZ
2820 else
2821 c2 = c1;
a0ed51b3
LW
2822 }
2823 else
c277df42
IZ
2824 c1 = c2 = -1000;
2825 REGCP_SET;
5f4b28b2 2826 /* This may be improved if l == 0. */
c277df42
IZ
2827 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2828 /* If it could work, try it. */
2829 if (c1 == -1000 ||
3280af22
NIS
2830 UCHARAT(PL_reginput) == c1 ||
2831 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2832 {
2833 if (paren) {
2834 if (n) {
cf93c79d
IZ
2835 PL_regstartp[paren] =
2836 HOPc(PL_reginput, -l) - PL_bostr;
2837 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2838 }
2839 else
cf93c79d 2840 PL_regendp[paren] = -1;
c277df42
IZ
2841 }
2842 if (regmatch(next))
2843 sayYES;
2844 REGCP_UNWIND;
2845 }
2846 /* Couldn't or didn't -- move forward. */
3280af22 2847 PL_reginput = locinput;
c277df42
IZ
2848 if (regrepeat_hard(scan, 1, &l)) {
2849 ln++;
3280af22 2850 locinput = PL_reginput;
c277df42
IZ
2851 }
2852 else
2853 sayNO;
2854 }
a0ed51b3
LW
2855 }
2856 else {
c277df42
IZ
2857 n = regrepeat_hard(scan, n, &l);
2858 if (n != 0 && l == 0
2859 /* In fact, this is tricky. If paren, then the
2860 fact that we did/didnot match may influence
2861 future execution. */
2862 && !(paren && ln == 0))
2863 ln = n;
3280af22 2864 locinput = PL_reginput;
c277df42 2865 DEBUG_r(
5c0ca799 2866 PerlIO_printf(Perl_debug_log,
faccc32b 2867 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2868 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2869 (IV) n, (IV)l)
c277df42
IZ
2870 );
2871 if (n >= ln) {
22c35a8c 2872 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2873 c1 = (U8)*STRING(next);
c277df42 2874 if (OP(next) == EXACTF)
22c35a8c 2875 c2 = PL_fold[c1];
c277df42 2876 else if (OP(next) == EXACTFL)
22c35a8c 2877 c2 = PL_fold_locale[c1];
c277df42
IZ
2878 else
2879 c2 = c1;
a0ed51b3
LW
2880 }
2881 else
c277df42
IZ
2882 c1 = c2 = -1000;
2883 }
2884 REGCP_SET;
2885 while (n >= ln) {
2886 /* If it could work, try it. */
2887 if (c1 == -1000 ||
3280af22
NIS
2888 UCHARAT(PL_reginput) == c1 ||
2889 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2890 {
2891 DEBUG_r(
c3464db5 2892 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2893 "%*s trying tail with n=%"IVdf"...\n",
2894 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2895 );
2896 if (paren) {
2897 if (n) {
cf93c79d
IZ
2898 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2899 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2900 }
a0ed51b3 2901 else
cf93c79d 2902 PL_regendp[paren] = -1;
c277df42 2903 }
a0ed51b3
LW
2904 if (regmatch(next))
2905 sayYES;
2906 REGCP_UNWIND;
2907 }
c277df42
IZ
2908 /* Couldn't or didn't -- back up. */
2909 n--;
dfe13c55 2910 locinput = HOPc(locinput, -l);
3280af22 2911 PL_reginput = locinput;
c277df42
IZ
2912 }
2913 }
2914 sayNO;
2915 break;
2916 }
2917 case CURLYN:
2918 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2919 if (paren > PL_regsize)
2920 PL_regsize = paren;
2921 if (paren > *PL_reglastparen)
2922 *PL_reglastparen = paren;
c277df42
IZ
2923 ln = ARG1(scan); /* min to match */
2924 n = ARG2(scan); /* max to match */
dc45a647 2925 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2926 goto repeat;
a0d0e21e 2927 case CURLY:
c277df42 2928 paren = 0;
a0d0e21e
LW
2929 ln = ARG1(scan); /* min to match */
2930 n = ARG2(scan); /* max to match */
dc45a647 2931 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2932 goto repeat;
2933 case STAR:
2934 ln = 0;
c277df42 2935 n = REG_INFTY;
a0d0e21e 2936 scan = NEXTOPER(scan);
c277df42 2937 paren = 0;
a0d0e21e
LW
2938 goto repeat;
2939 case PLUS:
c277df42
IZ
2940 ln = 1;
2941 n = REG_INFTY;
2942 scan = NEXTOPER(scan);
2943 paren = 0;
2944 repeat:
a0d0e21e
LW
2945 /*
2946 * Lookahead to avoid useless match attempts
2947 * when we know what character comes next.
2948 */
22c35a8c 2949 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2950 c1 = (U8)*STRING(next);
bbce6d69 2951 if (OP(next) == EXACTF)
22c35a8c 2952 c2 = PL_fold[c1];
bbce6d69 2953 else if (OP(next) == EXACTFL)
22c35a8c 2954 c2 = PL_fold_locale[c1];
bbce6d69 2955 else
2956 c2 = c1;
2957 }
a0d0e21e 2958 else
bbce6d69 2959 c1 = c2 = -1000;
3280af22 2960 PL_reginput = locinput;
a0d0e21e 2961 if (minmod) {
c277df42 2962 CHECKPOINT lastcp;
a0d0e21e
LW
2963 minmod = 0;
2964 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2965 sayNO;
a0ed51b3 2966 locinput = PL_reginput;
c277df42 2967 REGCP_SET;
0fe9bf95
IZ
2968 if (c1 != -1000) {
2969 char *e = locinput + n - ln; /* Should not check after this */
2970 char *old = locinput;
2971
2972 if (e >= PL_regeol || (n == REG_INFTY))
2973 e = PL_regeol - 1;
2974 while (1) {
2975 /* Find place 'next' could work */
2976 if (c1 == c2) {
2977 while (locinput <= e && *locinput != c1)
2978 locinput++;
2979 } else {
2980 while (locinput <= e
2981 && *locinput != c1
2982 && *locinput != c2)
2983 locinput++;
2984 }
2985 if (locinput > e)
2986 sayNO;
2987 /* PL_reginput == old now */
2988 if (locinput != old) {
2989 ln = 1; /* Did some */
2990 if (regrepeat(scan, locinput - old) <
2991 locinput - old)
2992 sayNO;
2993 }
2994 /* PL_reginput == locinput now */
2995 if (paren) {
2996 if (ln) {
cf93c79d
IZ
2997 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2998 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2999 }
3000 else
cf93c79d 3001 PL_regendp[paren] = -1;
0fe9bf95
IZ
3002 }
3003 if (regmatch(next))
3004 sayYES;
3005 PL_reginput = locinput; /* Could be reset... */
3006 REGCP_UNWIND;
3007 /* Couldn't or didn't -- move forward. */
3008 old = locinput++;
3009 }
3010 }
3011 else
c277df42 3012 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3013 /* If it could work, try it. */
bbce6d69 3014 if (c1 == -1000 ||
3280af22
NIS
3015 UCHARAT(PL_reginput) == c1 ||
3016 UCHARAT(PL_reginput) == c2)
bbce6d69 3017 {
c277df42
IZ
3018 if (paren) {
3019 if (n) {
cf93c79d
IZ
3020 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3021 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3022 }
3023 else
cf93c79d 3024 PL_regendp[paren] = -1;
c277df42 3025 }
a0d0e21e 3026 if (regmatch(next))
4633a7c4 3027 sayYES;
c277df42 3028 REGCP_UNWIND;
bbce6d69 3029 }
c277df42 3030 /* Couldn't or didn't -- move forward. */
a0ed51b3 3031 PL_reginput = locinput;
a0d0e21e
LW
3032 if (regrepeat(scan, 1)) {
3033 ln++;
a0ed51b3
LW
3034 locinput = PL_reginput;
3035 }
3036 else
4633a7c4 3037 sayNO;
a0d0e21e
LW
3038 }
3039 }
3040 else {
c277df42 3041 CHECKPOINT lastcp;
a0d0e21e 3042 n = regrepeat(scan, n);
a0ed51b3 3043 locinput = PL_reginput;
22c35a8c 3044 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3045 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3046 ln = n; /* why back off? */
1aeab75a
GS
3047 /* ...because $ and \Z can match before *and* after
3048 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3049 We should back off by one in this case. */
3050 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3051 ln--;
3052 }
c277df42
IZ
3053 REGCP_SET;
3054 if (paren) {
3055 while (n >= ln) {
3056 /* If it could work, try it. */
3057 if (c1 == -1000 ||
3280af22
NIS
3058 UCHARAT(PL_reginput) == c1 ||
3059 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3060 {
3061 if (paren && n) {
3062 if (n) {
cf93c79d
IZ
3063 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3064 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3065 }
3066 else
cf93c79d 3067 PL_regendp[paren] = -1;
c277df42
IZ
3068 }
3069 if (regmatch(next))
3070 sayYES;
3071 REGCP_UNWIND;
3072 }
3073 /* Couldn't or didn't -- back up. */
3074 n--;
dfe13c55 3075 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3076 }
a0ed51b3
LW
3077 }
3078 else {
c277df42
IZ
3079 while (n >= ln) {
3080 /* If it could work, try it. */
3081 if (c1 == -1000 ||
3280af22
NIS
3082 UCHARAT(PL_reginput) == c1 ||
3083 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3084 {
3085 if (regmatch(next))
3086 sayYES;
3087 REGCP_UNWIND;
3088 }
3089 /* Couldn't or didn't -- back up. */
3090 n--;
dfe13c55 3091 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3092 }
a0d0e21e
LW
3093 }
3094 }
4633a7c4 3095 sayNO;
c277df42 3096 break;
a0d0e21e 3097 case END:
0f5d15d6
IZ
3098 if (PL_reg_call_cc) {
3099 re_cc_state *cur_call_cc = PL_reg_call_cc;
3100 CURCUR *cctmp = PL_regcc;
3101 regexp *re = PL_reg_re;
3102 CHECKPOINT cp, lastcp;
3103
3104 cp = regcppush(0); /* Save *all* the positions. */
3105 REGCP_SET;
3106 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3107 the caller. */
3108 PL_reginput = locinput; /* Make position available to
3109 the callcc. */
3110 cache_re(PL_reg_call_cc->re);
3111 PL_regcc = PL_reg_call_cc->cc;
3112 PL_reg_call_cc = PL_reg_call_cc->prev;
3113 if (regmatch(cur_call_cc->node)) {
3114 PL_reg_call_cc = cur_call_cc;
3115 regcpblow(cp);
3116 sayYES;
3117 }
3118 REGCP_UNWIND;
3119 regcppop();
3120 PL_reg_call_cc = cur_call_cc;
3121 PL_regcc = cctmp;
3122 PL_reg_re = re;
3123 cache_re(re);
3124
3125 DEBUG_r(
3126 PerlIO_printf(Perl_debug_log,
3127 "%*s continuation failed...\n",
3128 REPORT_CODE_OFF+PL_regindent*2, "")
3129 );
7821416a 3130 sayNO_SILENT;
0f5d15d6 3131 }
7821416a
IZ
3132 if (locinput < PL_regtill) {
3133 DEBUG_r(PerlIO_printf(Perl_debug_log,
3134 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3135 PL_colors[4],
3136 (long)(locinput - PL_reg_starttry),
3137 (long)(PL_regtill - PL_reg_starttry),
3138 PL_colors[5]));
3139 sayNO_FINAL; /* Cannot match: too short. */
3140 }
3141 PL_reginput = locinput; /* put where regtry can find it */
3142 sayYES_FINAL; /* Success! */
7e5428c5 3143 case SUCCEED:
3280af22 3144 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3145 sayYES_LOUD; /* Success! */
c277df42
IZ
3146 case SUSPEND:
3147 n = 1;
9fe1d20c 3148 PL_reginput = locinput;
c277df42 3149 goto do_ifmatch;
a0d0e21e 3150 case UNLESSM:
c277df42 3151 n = 0;
a0ed51b3 3152 if (scan->flags) {
0fe9bf95
IZ
3153 if (UTF) { /* XXXX This is absolutely
3154 broken, we read before
3155 start of string. */
3156 s = HOPMAYBEc(locinput, -scan->flags);
3157 if (!s)
3158 goto say_yes;
3159 PL_reginput = s;
3160 }
3161 else {
3162 if (locinput < PL_bostr + scan->flags)
3163 goto say_yes;
3164 PL_reginput = locinput - scan->flags;
3165 goto do_ifmatch;
3166 }
a0ed51b3
LW
3167 }
3168 else
3169 PL_reginput = locinput;
c277df42
IZ
3170 goto do_ifmatch;
3171 case IFMATCH:
3172 n = 1;
a0ed51b3 3173 if (scan->flags) {
0fe9bf95
IZ
3174 if (UTF) { /* XXXX This is absolutely
3175 broken, we read before
3176 start of string. */
3177 s = HOPMAYBEc(locinput, -scan->flags);
3178 if (!s || s < PL_bostr)
3179 goto say_no;
3180 PL_reginput = s;
3181 }
3182 else {
3183 if (locinput < PL_bostr + scan->flags)
3184 goto say_no;
3185 PL_reginput = locinput - scan->flags;
3186 goto do_ifmatch;
3187 }
a0ed51b3
LW
3188 }
3189 else
3190 PL_reginput = locinput;
3191
c277df42 3192 do_ifmatch:
c277df42
IZ
3193 inner = NEXTOPER(NEXTOPER(scan));
3194 if (regmatch(inner) != n) {
3195 say_no:
3196 if (logical) {
3197 logical = 0;
3198 sw = 0;
3199 goto do_longjump;
a0ed51b3
LW
3200 }
3201 else
c277df42
IZ
3202 sayNO;
3203 }
3204 say_yes:
3205 if (logical) {
3206 logical = 0;
3207 sw = 1;
3208 }
fe44a5e8 3209 if (OP(scan) == SUSPEND) {
3280af22 3210 locinput = PL_reginput;
565764a8 3211 nextchr = UCHARAT(locinput);
fe44a5e8 3212 }
c277df42
IZ
3213 /* FALL THROUGH. */
3214 case LONGJMP:
3215 do_longjump:
3216 next = scan + ARG(scan);
3217 if (next == scan)
3218 next = NULL;
a0d0e21e
LW
3219 break;
3220 default:
b900a521 3221 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3222 PTR2UV(scan), OP(scan));
cea2e8a9 3223 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3224 }
a0d0e21e
LW
3225 scan = next;
3226 }
a687059c 3227
a0d0e21e
LW
3228 /*
3229 * We get here only if there's trouble -- normally "case END" is
3230 * the terminating point.
3231 */
cea2e8a9 3232 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3233 /*NOTREACHED*/
4633a7c4
LW
3234 sayNO;
3235
7821416a
IZ
3236yes_loud:
3237 DEBUG_r(
3238 PerlIO_printf(Perl_debug_log,
3239 "%*s %scould match...%s\n",
3240 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3241 );
3242 goto yes;
3243yes_final:
3244 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3245 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3246yes:
3247#ifdef DEBUGGING
3280af22 3248 PL_regindent--;
4633a7c4
LW
3249#endif
3250 return 1;
3251
3252no:
7821416a
IZ
3253 DEBUG_r(
3254 PerlIO_printf(Perl_debug_log,
3255 "%*s %sfailed...%s\n",
3256 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3257 );
3258 goto do_no;
3259no_final:
3260do_no:
4633a7c4 3261#ifdef DEBUGGING
3280af22 3262 PL_regindent--;
4633a7c4 3263#endif
a0d0e21e 3264 return 0;
a687059c
LW
3265}
3266
3267/*
3268 - regrepeat - repeatedly match something simple, report how many
3269 */
3270/*
3271 * [This routine now assumes that it will only match on things of length 1.
3272 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3273 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3274 */
76e3520e 3275STATIC I32
cea2e8a9 3276S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3277{
5c0ca799 3278 dTHR;
a0d0e21e 3279 register char *scan;
a0d0e21e 3280 register I32 c;
3280af22 3281 register char *loceol = PL_regeol;
a0ed51b3 3282 register I32 hardcount = 0;
a0d0e21e 3283
3280af22 3284 scan = PL_reginput;
c277df42 3285 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3286 loceol = scan + max;
a0d0e21e 3287 switch (OP(p)) {
22c35a8c 3288 case REG_ANY:
a0d0e21e
LW
3289 while (scan < loceol && *scan != '\n')
3290 scan++;
3291 break;
3292 case SANY:
3293 scan = loceol;
3294 break;
a0ed51b3
LW
3295 case ANYUTF8:
3296 loceol = PL_regeol;
3297 while (scan < loceol && *scan != '\n') {
3298 scan += UTF8SKIP(scan);
3299 hardcount++;
3300 }
3301 break;
3302 case SANYUTF8:
3303 loceol = PL_regeol;
3304 while (scan < loceol) {
3305 scan += UTF8SKIP(scan);
3306 hardcount++;
3307 }
3308 break;
bbce6d69 3309 case EXACT: /* length of string is 1 */
cd439c50 3310 c = (U8)*STRING(p);
bbce6d69 3311 while (scan < loceol && UCHARAT(scan) == c)
3312 scan++;
3313 break;
3314 case EXACTF: /* length of string is 1 */
cd439c50 3315 c = (U8)*STRING(p);
bbce6d69 3316 while (scan < loceol &&
22c35a8c 3317 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3318 scan++;
3319 break;
3320 case EXACTFL: /* length of string is 1 */
3280af22 3321 PL_reg_flags |= RF_tainted;
cd439c50 3322 c = (U8)*STRING(p);
bbce6d69 3323 while (scan < loceol &&
22c35a8c 3324 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3325 scan++;
3326 break;
a0ed51b3
LW
3327 case ANYOFUTF8:
3328 loceol = PL_regeol;
3329 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3330 scan += UTF8SKIP(scan);
3331 hardcount++;
3332 }
3333 break;
a0d0e21e 3334 case ANYOF:
936ed897 3335 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3336 scan++;
a0d0e21e
LW
3337 break;
3338 case ALNUM:
3339 while (scan < loceol && isALNUM(*scan))
3340 scan++;
3341 break;
a0ed51b3
LW
3342 case ALNUMUTF8:
3343 loceol = PL_regeol;
dfe13c55 3344 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3345 scan += UTF8SKIP(scan);
3346 hardcount++;
3347 }
3348 break;
bbce6d69 3349 case ALNUML:
3280af22 3350 PL_reg_flags |= RF_tainted;
bbce6d69 3351 while (scan < loceol && isALNUM_LC(*scan))
3352 scan++;
3353 break;
a0ed51b3
LW
3354 case ALNUMLUTF8:
3355 PL_reg_flags |= RF_tainted;
3356 loceol = PL_regeol;
dfe13c55 3357 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3358 scan += UTF8SKIP(scan);
3359 hardcount++;
3360 }
3361 break;
3362 break;
a0d0e21e
LW
3363 case NALNUM:
3364 while (scan < loceol && !isALNUM(*scan))
3365 scan++;
3366 break;
a0ed51b3
LW
3367 case NALNUMUTF8:
3368 loceol = PL_regeol;
dfe13c55 3369 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3370 scan += UTF8SKIP(scan);
3371 hardcount++;
3372 }
3373 break;
bbce6d69 3374 case NALNUML:
3280af22 3375 PL_reg_flags |= RF_tainted;
bbce6d69 3376 while (scan < loceol && !isALNUM_LC(*scan))
3377 scan++;
3378 break;
a0ed51b3
LW
3379 case NALNUMLUTF8:
3380 PL_reg_flags |= RF_tainted;
3381 loceol = PL_regeol;
dfe13c55 3382 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3383 scan += UTF8SKIP(scan);
3384 hardcount++;
3385 }
3386 break;
a0d0e21e
LW
3387 case SPACE:
3388 while (scan < loceol && isSPACE(*scan))
3389 scan++;
3390 break;
a0ed51b3
LW
3391 case SPACEUTF8:
3392 loceol = PL_regeol;
dfe13c55 3393 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3394 scan += UTF8SKIP(scan);
3395 hardcount++;
3396 }
3397 break;
bbce6d69 3398 case SPACEL:
3280af22 3399 PL_reg_flags |= RF_tainted;
bbce6d69 3400 while (scan < loceol && isSPACE_LC(*scan))
3401 scan++;
3402 break;
a0ed51b3
LW
3403 case SPACELUTF8:
3404 PL_reg_flags |= RF_tainted;
3405 loceol = PL_regeol;
dfe13c55 3406 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3407 scan += UTF8SKIP(scan);
3408 hardcount++;
3409 }
3410 break;
a0d0e21e
LW
3411 case NSPACE:
3412 while (scan < loceol && !isSPACE(*scan))
3413 scan++;
3414 break;
a0ed51b3
LW
3415 case NSPACEUTF8:
3416 loceol = PL_regeol;
dfe13c55 3417 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3418 scan += UTF8SKIP(scan);
3419 hardcount++;
3420 }
3421 break;
bbce6d69 3422 case NSPACEL:
3280af22 3423 PL_reg_flags |= RF_tainted;
bbce6d69 3424 while (scan < loceol && !isSPACE_LC(*scan))
3425 scan++;
3426 break;
a0ed51b3
LW
3427 case NSPACELUTF8:
3428 PL_reg_flags |= RF_tainted;
3429 loceol = PL_regeol;
dfe13c55 3430 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3431 scan += UTF8SKIP(scan);
3432 hardcount++;
3433 }
3434 break;
a0d0e21e
LW
3435 case DIGIT:
3436 while (scan < loceol && isDIGIT(*scan))
3437 scan++;
3438 break;
a0ed51b3
LW
3439 case DIGITUTF8:
3440 loceol = PL_regeol;
dfe13c55 3441 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3442 scan += UTF8SKIP(scan);
3443 hardcount++;
3444 }
3445 break;
3446 break;
a0d0e21e
LW
3447 case NDIGIT:
3448 while (scan < loceol && !isDIGIT(*scan))
3449 scan++;
3450 break;
a0ed51b3
LW
3451 case NDIGITUTF8:
3452 loceol = PL_regeol;
dfe13c55 3453 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3454 scan += UTF8SKIP(scan);
3455 hardcount++;
3456 }
3457 break;
a0d0e21e
LW
3458 default: /* Called on something of 0 width. */
3459 break; /* So match right here or not at all. */
3460 }
a687059c 3461
a0ed51b3
LW
3462 if (hardcount)
3463 c = hardcount;
3464 else
3465 c = scan - PL_reginput;
3280af22 3466 PL_reginput = scan;
a687059c 3467
c277df42
IZ
3468 DEBUG_r(
3469 {
3470 SV *prop = sv_newmortal();
3471
3472 regprop(prop, p);
3473 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3474 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3475 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3476 });
3477
a0d0e21e 3478 return(c);
a687059c
LW
3479}
3480
3481/*
c277df42
IZ
3482 - regrepeat_hard - repeatedly match something, report total lenth and length
3483 *
3484 * The repeater is supposed to have constant length.
3485 */
3486
76e3520e 3487STATIC I32
cea2e8a9 3488S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3489{
5c0ca799 3490 dTHR;
c277df42
IZ
3491 register char *scan;
3492 register char *start;
3280af22 3493 register char *loceol = PL_regeol;
a0ed51b3 3494 I32 l = 0;
708e3b05 3495 I32 count = 0, res = 1;
a0ed51b3
LW
3496
3497 if (!max)
3498 return 0;
c277df42 3499
3280af22 3500 start = PL_reginput;
a0ed51b3 3501 if (UTF) {
708e3b05 3502 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3503 if (!count++) {
3504 l = 0;
3505 while (start < PL_reginput) {
3506 l++;
3507 start += UTF8SKIP(start);
3508 }
3509 *lp = l;
3510 if (l == 0)
3511 return max;
3512 }
3513 if (count == max)
3514 return count;
3515 }
3516 }
3517 else {
708e3b05 3518 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3519 if (!count++) {
3520 *lp = l = PL_reginput - start;
3521 if (max != REG_INFTY && l*max < loceol - scan)
3522 loceol = scan + l*max;
3523 if (l == 0)
3524 return max;
c277df42
IZ
3525 }
3526 }
3527 }
708e3b05 3528 if (!res)
3280af22 3529 PL_reginput = scan;
c277df42 3530
a0ed51b3 3531 return count;
c277df42
IZ
3532}
3533
3534/*
cb8d8820 3535 - reginclass - determine if a character falls into a character class
bbce6d69 3536 */
3537
76e3520e 3538STATIC bool
936ed897 3539S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3540{
5c0ca799 3541 dTHR;
b8c5462f 3542 char flags = ANYOF_FLAGS(p);
bbce6d69 3543 bool match = FALSE;
3544
3545 c &= 0xFF;
b8c5462f 3546 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3547 match = TRUE;
3548 else if (flags & ANYOF_FOLD) {
3549 I32 cf;
3550 if (flags & ANYOF_LOCALE) {
3280af22 3551 PL_reg_flags |= RF_tainted;
22c35a8c 3552 cf = PL_fold_locale[c];
bbce6d69 3553 }
3554 else
22c35a8c 3555 cf = PL_fold[c];
b8c5462f 3556 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3557 match = TRUE;
3558 }
3559
b8c5462f 3560 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3561 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3562 if (
3563 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3564 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3565 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3570 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3571 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3572 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3573 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3574 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3575 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3576 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3577 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3578 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3579 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3580 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3581 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3582 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3583 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3584 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3585 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3586 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3587 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3588 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3589 ) /* How's that for a conditional? */
bbce6d69 3590 {
3591 match = TRUE;
3592 }
3593 }
3594
ae5c130c 3595 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3596}
3597
a0ed51b3 3598STATIC bool
cea2e8a9 3599S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3600{
3601 dTHR;
a0ed51b3
LW
3602 char flags = ARG1(f);
3603 bool match = FALSE;
3604 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3605
3606 if (swash_fetch(sv, p))
3607 match = TRUE;
3608 else if (flags & ANYOF_FOLD) {
806e7201 3609 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3610 if (flags & ANYOF_LOCALE) {
3611 PL_reg_flags |= RF_tainted;
3612 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3613 }
3614 else
3615 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3616 if (swash_fetch(sv, tmpbuf))
3617 match = TRUE;
3618 }
3619
b8c5462f 3620 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3621
3622 return (flags & ANYOF_INVERT) ? !match : match;
3623}
161b471a 3624
dfe13c55 3625STATIC U8 *
cea2e8a9 3626S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3627{
3628 dTHR;
a0ed51b3
LW
3629 if (off >= 0) {
3630 while (off-- && s < (U8*)PL_regeol)
3631 s += UTF8SKIP(s);
3632 }
3633 else {
3634 while (off++) {
3635 if (s > (U8*)PL_bostr) {
3636 s--;
3637 if (*s & 0x80) {
3638 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3639 s--;
3640 } /* XXX could check well-formedness here */
3641 }
3642 }
3643 }
3644 return s;
3645}
161b471a 3646
dfe13c55 3647STATIC U8 *
cea2e8a9 3648S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3649{
c485e607 3650 dTHR;
a0ed51b3
LW
3651 if (off >= 0) {
3652 while (off-- && s < (U8*)PL_regeol)
3653 s += UTF8SKIP(s);
3654 if (off >= 0)
3655 return 0;
3656 }
3657 else {
3658 while (off++) {
3659 if (s > (U8*)PL_bostr) {
3660 s--;
3661 if (*s & 0x80) {
3662 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3663 s--;
3664 } /* XXX could check well-formedness here */
3665 }
3666 else
3667 break;
3668 }
3669 if (off <= 0)
3670 return 0;
3671 }
3672 return s;
3673}
51371543
GS
3674
3675#ifdef PERL_OBJECT
51371543
GS
3676#include "XSUB.h"
3677#endif
3678
3679static void
3680restore_pos(pTHXo_ void *arg)
3681{
3682 dTHR;
3683 if (PL_reg_eval_set) {
3684 if (PL_reg_oldsaved) {
3685 PL_reg_re->subbeg = PL_reg_oldsaved;
3686 PL_reg_re->sublen = PL_reg_oldsavedlen;
3687 RX_MATCH_COPIED_on(PL_reg_re);
3688 }
3689 PL_reg_magic->mg_len = PL_reg_oldpos;
3690 PL_reg_eval_set = 0;
3691 PL_curpm = PL_reg_oldcurpm;
3692 }
3693}
3694