This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
The op/stat.t patch in #5341 wasn't robust enough,
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
349 && (sv && (strpos + SvCUR(sv) != strend)) ) {
350 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 351 goto fail;
2c2d71f5 352 }
adac82c7 353 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 354 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
355 if (SvTAIL(check)) {
356 slen = SvCUR(check); /* >= 1 */
cad2e5aa 357
2c2d71f5
JH
358 if ( strend - s > slen || strend - s < slen - 1
359 || (strend - s == slen && strend[-1] != '\n')) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
361 goto fail_finish;
cad2e5aa
JH
362 }
363 /* Now should match s[0..slen-2] */
364 slen--;
653099ff 365 if (slen && (*SvPVX(check) != *s
cad2e5aa 366 || (slen > 1
653099ff 367 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
368 report_neq:
369 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
370 goto fail_finish;
371 }
cad2e5aa 372 }
653099ff
GS
373 else if (*SvPVX(check) != *s
374 || ((slen = SvCUR(check)) > 1
375 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
376 goto report_neq;
377 goto success_at_start;
cad2e5aa 378 }
2c2d71f5 379 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 380 s = strpos;
2c2d71f5 381 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 382 end_shift = prog->minlen - start_shift -
653099ff 383 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 384 if (!ml_anch) {
653099ff
GS
385 I32 end = prog->check_offset_max + CHR_SVLEN(check)
386 - (SvTAIL(check) != 0);
2c2d71f5
JH
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
653099ff 399 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
2c2d71f5
JH
407 restart:
408 /* Find a possible match in the region s..strend by looking for
409 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 410 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
411 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
412 I32 p = -1; /* Internal iterator of scream. */
413 I32 *pp = data ? data->scream_pos : &p;
414
2c2d71f5
JH
415 if (PL_screamfirst[BmRARE(check)] >= 0
416 || ( BmRARE(check) == '\n'
417 && (BmPREVIOUS(check) == SvCUR(check) - 1)
418 && SvTAIL(check) ))
419 s = screaminstr(sv, check,
420 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 421 else
2c2d71f5 422 goto fail_finish;
cad2e5aa
JH
423 if (data)
424 *data->scream_olds = s;
425 }
426 else
427 s = fbm_instr((unsigned char*)s + start_shift,
428 (unsigned char*)strend - end_shift,
2c2d71f5 429 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
430
431 /* Update the count-of-usability, remove useless subpatterns,
432 unshift s. */
2c2d71f5
JH
433
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
435 (s ? "Found" : "Did not find"),
436 ((check == prog->anchored_substr) ? "anchored" : "floating"),
437 PL_colors[0],
7b0972df
JH
438 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
439 SvPVX(check),
2c2d71f5
JH
440 PL_colors[1], (SvTAIL(check) ? "$" : ""),
441 (s ? " at offset " : "...\n") ) );
442
443 if (!s)
444 goto fail_finish;
445
6eb5f6b9
JH
446 check_at = s;
447
2c2d71f5 448 /* Finish the diagnostic message */
30944b6d 449 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
450
451 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
452 Start with the other substr.
453 XXXX no SCREAM optimization yet - and a very coarse implementation
454 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
455 *always* match. Probably should be marked during compile...
456 Probably it is right to do no SCREAM here...
457 */
458
459 if (prog->float_substr && prog->anchored_substr) {
30944b6d 460 /* Take into account the "other" substring. */
2c2d71f5
JH
461 /* XXXX May be hopelessly wrong for UTF... */
462 if (!other_last)
6eb5f6b9 463 other_last = strpos;
2c2d71f5 464 if (check == prog->float_substr) {
30944b6d
IZ
465 do_other_anchored:
466 {
2c2d71f5
JH
467 char *last = s - start_shift, *last1, *last2;
468 char *s1 = s;
469
470 tmp = PL_bostr;
471 t = s - prog->check_offset_max;
472 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
473 && (!(prog->reganch & ROPT_UTF8)
474 || (PL_bostr = strpos, /* Used in regcopmaybe() */
475 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
476 && t > strpos)))
30944b6d 477 /* EMPTY */;
2c2d71f5
JH
478 else
479 t = strpos;
480 t += prog->anchored_offset;
6eb5f6b9
JH
481 if (t < other_last) /* These positions already checked */
482 t = other_last;
2c2d71f5
JH
483 PL_bostr = tmp;
484 last2 = last1 = strend - prog->minlen;
485 if (last < last1)
486 last1 = last;
487 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
488 /* On end-of-str: see comment below. */
489 s = fbm_instr((unsigned char*)t,
490 (unsigned char*)last1 + prog->anchored_offset
491 + SvCUR(prog->anchored_substr)
492 - (SvTAIL(prog->anchored_substr)!=0),
493 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
494 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
495 (s ? "Found" : "Contradicts"),
496 PL_colors[0],
7b0972df
JH
497 (int)(SvCUR(prog->anchored_substr)
498 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
499 SvPVX(prog->anchored_substr),
500 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
501 if (!s) {
502 if (last1 >= last2) {
503 DEBUG_r(PerlIO_printf(Perl_debug_log,
504 ", giving up...\n"));
505 goto fail_finish;
506 }
507 DEBUG_r(PerlIO_printf(Perl_debug_log,
508 ", trying floating at offset %ld...\n",
30944b6d 509 (long)(s1 + 1 - i_strpos)));
2c2d71f5 510 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 511 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
512 s = HOPc(last, 1);
513 goto restart;
514 }
515 else {
516 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 517 (long)(s - i_strpos)));
2c2d71f5 518 t = s - prog->anchored_offset;
6eb5f6b9 519 other_last = s + 1;
30944b6d 520 s = s1;
2c2d71f5
JH
521 if (t == strpos)
522 goto try_at_start;
2c2d71f5
JH
523 goto try_at_offset;
524 }
30944b6d 525 }
2c2d71f5
JH
526 }
527 else { /* Take into account the floating substring. */
528 char *last, *last1;
529 char *s1 = s;
530
531 t = s - start_shift;
532 last1 = last = strend - prog->minlen + prog->float_min_offset;
533 if (last - t > prog->float_max_offset)
534 last = t + prog->float_max_offset;
535 s = t + prog->float_min_offset;
6eb5f6b9
JH
536 if (s < other_last)
537 s = other_last;
2c2d71f5
JH
538 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
539 /* fbm_instr() takes into account exact value of end-of-str
540 if the check is SvTAIL(ed). Since false positives are OK,
541 and end-of-str is not later than strend we are OK. */
542 s = fbm_instr((unsigned char*)s,
543 (unsigned char*)last + SvCUR(prog->float_substr)
544 - (SvTAIL(prog->float_substr)!=0),
545 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
546 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
547 (s ? "Found" : "Contradicts"),
548 PL_colors[0],
7b0972df
JH
549 (int)(SvCUR(prog->float_substr)
550 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
551 SvPVX(prog->float_substr),
552 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
553 if (!s) {
554 if (last1 == last) {
555 DEBUG_r(PerlIO_printf(Perl_debug_log,
556 ", giving up...\n"));
557 goto fail_finish;
558 }
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", trying anchored starting at offset %ld...\n",
30944b6d 561 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 562 other_last = last + 1;
2c2d71f5
JH
563 PL_regeol = strend; /* Used in HOP() */
564 s = HOPc(t, 1);
565 goto restart;
566 }
567 else {
568 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 569 (long)(s - i_strpos)));
6eb5f6b9 570 other_last = s + 1;
30944b6d 571 s = s1;
2c2d71f5
JH
572 if (t == strpos)
573 goto try_at_start;
2c2d71f5
JH
574 goto try_at_offset;
575 }
576 }
cad2e5aa 577 }
2c2d71f5
JH
578
579 t = s - prog->check_offset_max;
580 tmp = PL_bostr;
581 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
582 && (!(prog->reganch & ROPT_UTF8)
583 || (PL_bostr = strpos, /* Used in regcopmaybe() */
584 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
585 && t > strpos)))) {
586 PL_bostr = tmp;
587 /* Fixed substring is found far enough so that the match
588 cannot start at strpos. */
589 try_at_offset:
cad2e5aa 590 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
591 /* Eventually fbm_*() should handle this, but often
592 anchored_offset is not 0, so this check will not be wasted. */
593 /* XXXX In the code below we prefer to look for "^" even in
594 presence of anchored substrings. And we search even
595 beyond the found float position. These pessimizations
596 are historical artefacts only. */
597 find_anchor:
2c2d71f5 598 while (t < strend - prog->minlen) {
cad2e5aa
JH
599 if (*t == '\n') {
600 if (t < s - prog->check_offset_min) {
30944b6d
IZ
601 if (prog->anchored_substr) {
602 /* We definitely contradict the found anchored
603 substr. Due to the above check we do not
604 contradict "check" substr.
605 Thus we can arrive here only if check substr
606 is float. Redo checking for "other"=="fixed".
607 */
608 strpos = t + 1;
609 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
610 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
611 goto do_other_anchored;
612 }
cad2e5aa 613 s = t + 1;
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 615 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
616 goto set_useful;
617 }
2c2d71f5 618 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
619 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
620 strpos = s = t + 1;
cad2e5aa
JH
621 goto restart;
622 }
623 t++;
624 }
2c2d71f5
JH
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
626 PL_colors[0],PL_colors[1]));
627 goto fail_finish;
cad2e5aa
JH
628 }
629 s = t;
630 set_useful:
2c2d71f5 631 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
632 }
633 else {
2c2d71f5
JH
634 PL_bostr = tmp;
635 /* The found string does not prohibit matching at beg-of-str
636 - no optimization of calling REx engine can be performed,
637 unless it was an MBOL and we are not after MBOL. */
638 try_at_start:
639 /* Even in this situation we may use MBOL flag if strpos is offset
640 wrt the start of the string. */
641 if (ml_anch && sv
d506a20d
IZ
642 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
643 /* May be due to an implicit anchor of m{.*foo} */
644 && !(prog->reganch & ROPT_IMPLICIT))
645 {
cad2e5aa
JH
646 t = strpos;
647 goto find_anchor;
648 }
30944b6d
IZ
649 DEBUG_r( if (ml_anch)
650 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
651 PL_colors[0],PL_colors[1]);
652 );
2c2d71f5 653 success_at_start:
30944b6d 654 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 655 && prog->check_substr /* Could be deleted already */
cad2e5aa 656 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
657 && prog->check_substr == prog->float_substr)
658 {
cad2e5aa
JH
659 /* If flags & SOMETHING - do not do it many times on the same match */
660 SvREFCNT_dec(prog->check_substr);
661 prog->check_substr = Nullsv; /* disable */
662 prog->float_substr = Nullsv; /* clear */
663 s = strpos;
3cf5c195
IZ
664 /* XXXX This is a remnant of the old implementation. It
665 looks wasteful, since now INTUIT can use many
6eb5f6b9 666 other heuristics. */
cad2e5aa
JH
667 prog->reganch &= ~RE_USE_INTUIT;
668 }
669 else
670 s = strpos;
671 }
672
6eb5f6b9
JH
673 /* Last resort... */
674 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
675 if (prog->regstclass) {
676 /* minlen == 0 is possible if regstclass is \b or \B,
677 and the fixed substr is ''$.
678 Since minlen is already taken into account, s+1 is before strend;
679 accidentally, minlen >= 1 guaranties no false positives at s + 1
680 even for \b or \B. But (minlen? 1 : 0) below assumes that
681 regstclass does not come from lookahead... */
682 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
683 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
684 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
685 ? STR_LEN(prog->regstclass)
686 : 1);
6eb5f6b9 687 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
688 ? s + (prog->minlen? cl_l : 0)
689 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 690 : strend) ;
8fdd422b 691 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
692
693 t = s;
76384e4a
GS
694 if (prog->reganch & ROPT_UTF8) {
695 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
696 PL_bostr = startpos;
697 }
6eb5f6b9
JH
698 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
699 if (!s) {
700#ifdef DEBUGGING
701 char *what;
702#endif
703 if (endpos == strend) {
704 DEBUG_r( PerlIO_printf(Perl_debug_log,
705 "Could not match STCLASS...\n") );
706 goto fail;
707 }
66e933ab
GS
708 DEBUG_r( PerlIO_printf(Perl_debug_log,
709 "This position contradicts STCLASS...\n") );
653099ff
GS
710 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
711 goto fail;
6eb5f6b9
JH
712 /* Contradict one of substrings */
713 if (prog->anchored_substr) {
6eb5f6b9
JH
714 if (prog->anchored_substr == check) {
715 DEBUG_r( what = "anchored" );
716 hop_and_restart:
717 PL_regeol = strend; /* Used in HOP() */
718 s = HOPc(t, 1);
66e933ab
GS
719 if (s + start_shift + end_shift > strend) {
720 /* XXXX Should be taken into account earlier? */
721 DEBUG_r( PerlIO_printf(Perl_debug_log,
722 "Could not match STCLASS...\n") );
723 goto fail;
724 }
6eb5f6b9 725 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 726 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
727 what, (long)(s + start_shift - i_strpos)) );
728 goto restart;
729 }
66e933ab 730 /* Have both, check_string is floating */
6eb5f6b9
JH
731 if (t + start_shift >= check_at) /* Contradicts floating=check */
732 goto retry_floating_check;
733 /* Recheck anchored substring, but not floating... */
734 s = check_at;
735 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 736 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
737 (long)(other_last - i_strpos)) );
738 goto do_other_anchored;
739 }
60e71179
GS
740 /* Another way we could have checked stclass at the
741 current position only: */
742 if (ml_anch) {
743 s = t = t + 1;
744 DEBUG_r( PerlIO_printf(Perl_debug_log,
745 "Trying /^/m starting at offset %ld...\n",
746 (long)(t - i_strpos)) );
747 goto try_at_offset;
66e933ab 748 }
60e71179
GS
749 if (!prog->float_substr) /* Could have been deleted */
750 goto fail;
6eb5f6b9
JH
751 /* Check is floating subtring. */
752 retry_floating_check:
753 t = check_at - start_shift;
754 DEBUG_r( what = "floating" );
755 goto hop_and_restart;
756 }
757 DEBUG_r( if (t != s)
758 PerlIO_printf(Perl_debug_log,
759 "By STCLASS: moving %ld --> %ld\n",
760 (long)(t - i_strpos), (long)(s - i_strpos));
761 else
762 PerlIO_printf(Perl_debug_log,
763 "Does not contradict STCLASS...\n") );
764 }
2c2d71f5 765 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 766 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 767 return s;
2c2d71f5
JH
768
769 fail_finish: /* Substring not found */
66e933ab
GS
770 if (prog->check_substr) /* could be removed already */
771 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 772 fail:
2c2d71f5 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
774 PL_colors[4],PL_colors[5]));
775 return Nullch;
776}
9661b544 777
6eb5f6b9 778/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
779STATIC char *
780S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 781{
6eb5f6b9
JH
782 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
783 char *m;
784 int ln;
785 int c1;
786 int c2;
787 char *e;
788 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 789
6eb5f6b9
JH
790 /* We know what class it must start with. */
791 switch (OP(c)) {
792 case ANYOFUTF8:
793 while (s < strend) {
794 if (REGINCLASSUTF8(c, (U8*)s)) {
795 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 796 goto got_it;
3cf5c195 797 else
6eb5f6b9 798 tmp = doevery;
a0ed51b3 799 }
6eb5f6b9
JH
800 else
801 tmp = 1;
a0ed51b3
LW
802 s += UTF8SKIP(s);
803 }
6eb5f6b9
JH
804 break;
805 case ANYOF:
a0ed51b3 806 while (s < strend) {
6eb5f6b9
JH
807 if (REGINCLASS(c, *s)) {
808 if (tmp && (norun || regtry(prog, s)))
809 goto got_it;
810 else
811 tmp = doevery;
a0ed51b3 812 }
6eb5f6b9
JH
813 else
814 tmp = 1;
a0ed51b3 815 s++;
a0d0e21e 816 }
6eb5f6b9
JH
817 break;
818 case EXACTF:
819 m = STRING(c);
820 ln = STR_LEN(c);
821 c1 = *m;
822 c2 = PL_fold[c1];
823 goto do_exactf;
824 case EXACTFL:
825 m = STRING(c);
826 ln = STR_LEN(c);
827 c1 = *m;
828 c2 = PL_fold_locale[c1];
829 do_exactf:
830 e = strend - ln;
b3c9acc1 831
6eb5f6b9
JH
832 if (norun && e < s)
833 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
834 /* Here it is NOT UTF! */
835 if (c1 == c2) {
836 while (s <= e) {
837 if ( *s == c1
66e933ab
GS
838 && (ln == 1 || !(OP(c) == EXACTF
839 ? ibcmp(s, m, ln)
840 : ibcmp_locale(s, m, ln)))
6eb5f6b9 841 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
842 goto got_it;
843 s++;
844 }
845 } else {
846 while (s <= e) {
847 if ( (*s == c1 || *s == c2)
66e933ab
GS
848 && (ln == 1 || !(OP(c) == EXACTF
849 ? ibcmp(s, m, ln)
850 : ibcmp_locale(s, m, ln)))
6eb5f6b9 851 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
852 goto got_it;
853 s++;
854 }
855 }
856 break;
bbce6d69 857 case BOUNDL:
3280af22 858 PL_reg_flags |= RF_tainted;
bbce6d69 859 /* FALL THROUGH */
a0d0e21e 860 case BOUND:
6eb5f6b9 861 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 862 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 863 while (s < strend) {
95bac841 864 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 865 tmp = !tmp;
6eb5f6b9 866 if ((norun || regtry(prog, s)))
a0d0e21e 867 goto got_it;
a687059c 868 }
a0d0e21e
LW
869 s++;
870 }
6eb5f6b9 871 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
872 goto got_it;
873 break;
a0ed51b3
LW
874 case BOUNDLUTF8:
875 PL_reg_flags |= RF_tainted;
876 /* FALL THROUGH */
877 case BOUNDUTF8:
6eb5f6b9 878 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 879 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 880 while (s < strend) {
76384e4a 881 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
882 swash_fetch(PL_utf8_alnum, (U8*)s) :
883 isALNUM_LC_utf8((U8*)s)))
884 {
a0ed51b3 885 tmp = !tmp;
6eb5f6b9 886 if ((norun || regtry(prog, s)))
a0ed51b3
LW
887 goto got_it;
888 }
889 s += UTF8SKIP(s);
890 }
6eb5f6b9 891 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
892 goto got_it;
893 break;
bbce6d69 894 case NBOUNDL:
3280af22 895 PL_reg_flags |= RF_tainted;
bbce6d69 896 /* FALL THROUGH */
a0d0e21e 897 case NBOUND:
6eb5f6b9 898 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 899 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 900 while (s < strend) {
95bac841 901 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 902 tmp = !tmp;
6eb5f6b9 903 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
904 goto got_it;
905 s++;
906 }
6eb5f6b9 907 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
908 goto got_it;
909 break;
a0ed51b3
LW
910 case NBOUNDLUTF8:
911 PL_reg_flags |= RF_tainted;
912 /* FALL THROUGH */
913 case NBOUNDUTF8:
6eb5f6b9 914 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 915 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 916 while (s < strend) {
76384e4a 917 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
918 swash_fetch(PL_utf8_alnum, (U8*)s) :
919 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 920 tmp = !tmp;
6eb5f6b9 921 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
922 goto got_it;
923 s += UTF8SKIP(s);
924 }
6eb5f6b9 925 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
926 goto got_it;
927 break;
a0d0e21e
LW
928 case ALNUM:
929 while (s < strend) {
bbce6d69 930 if (isALNUM(*s)) {
6eb5f6b9 931 if (tmp && (norun || regtry(prog, s)))
bbce6d69 932 goto got_it;
933 else
934 tmp = doevery;
935 }
936 else
937 tmp = 1;
938 s++;
939 }
940 break;
a0ed51b3
LW
941 case ALNUMUTF8:
942 while (s < strend) {
dfe13c55 943 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 944 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
945 goto got_it;
946 else
947 tmp = doevery;
948 }
949 else
950 tmp = 1;
951 s += UTF8SKIP(s);
952 }
953 break;
bbce6d69 954 case ALNUML:
3280af22 955 PL_reg_flags |= RF_tainted;
bbce6d69 956 while (s < strend) {
957 if (isALNUM_LC(*s)) {
6eb5f6b9 958 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 959 goto got_it;
a687059c 960 else
a0d0e21e
LW
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
965 s++;
966 }
967 break;
a0ed51b3
LW
968 case ALNUMLUTF8:
969 PL_reg_flags |= RF_tainted;
970 while (s < strend) {
dfe13c55 971 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 972 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
973 goto got_it;
974 else
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
979 s += UTF8SKIP(s);
980 }
981 break;
a0d0e21e
LW
982 case NALNUM:
983 while (s < strend) {
bbce6d69 984 if (!isALNUM(*s)) {
6eb5f6b9 985 if (tmp && (norun || regtry(prog, s)))
bbce6d69 986 goto got_it;
987 else
988 tmp = doevery;
989 }
990 else
991 tmp = 1;
992 s++;
993 }
994 break;
a0ed51b3
LW
995 case NALNUMUTF8:
996 while (s < strend) {
dfe13c55 997 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 998 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
999 goto got_it;
1000 else
1001 tmp = doevery;
1002 }
1003 else
1004 tmp = 1;
1005 s += UTF8SKIP(s);
1006 }
1007 break;
bbce6d69 1008 case NALNUML:
3280af22 1009 PL_reg_flags |= RF_tainted;
bbce6d69 1010 while (s < strend) {
1011 if (!isALNUM_LC(*s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1013 goto got_it;
a687059c 1014 else
a0d0e21e 1015 tmp = doevery;
a687059c 1016 }
a0d0e21e
LW
1017 else
1018 tmp = 1;
1019 s++;
1020 }
1021 break;
a0ed51b3
LW
1022 case NALNUMLUTF8:
1023 PL_reg_flags |= RF_tainted;
1024 while (s < strend) {
dfe13c55 1025 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1027 goto got_it;
1028 else
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s += UTF8SKIP(s);
1034 }
1035 break;
a0d0e21e
LW
1036 case SPACE:
1037 while (s < strend) {
1038 if (isSPACE(*s)) {
6eb5f6b9 1039 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1040 goto got_it;
1041 else
1042 tmp = doevery;
2304df62 1043 }
a0d0e21e
LW
1044 else
1045 tmp = 1;
1046 s++;
1047 }
1048 break;
a0ed51b3
LW
1049 case SPACEUTF8:
1050 while (s < strend) {
dfe13c55 1051 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1052 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
1057 else
1058 tmp = 1;
1059 s += UTF8SKIP(s);
1060 }
1061 break;
bbce6d69 1062 case SPACEL:
3280af22 1063 PL_reg_flags |= RF_tainted;
bbce6d69 1064 while (s < strend) {
1065 if (isSPACE_LC(*s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s++;
1074 }
1075 break;
a0ed51b3
LW
1076 case SPACELUTF8:
1077 PL_reg_flags |= RF_tainted;
1078 while (s < strend) {
dfe13c55 1079 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1081 goto got_it;
1082 else
1083 tmp = doevery;
1084 }
1085 else
1086 tmp = 1;
1087 s += UTF8SKIP(s);
1088 }
1089 break;
a0d0e21e
LW
1090 case NSPACE:
1091 while (s < strend) {
1092 if (!isSPACE(*s)) {
6eb5f6b9 1093 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1094 goto got_it;
1095 else
1096 tmp = doevery;
a687059c 1097 }
a0d0e21e
LW
1098 else
1099 tmp = 1;
1100 s++;
1101 }
1102 break;
a0ed51b3
LW
1103 case NSPACEUTF8:
1104 while (s < strend) {
dfe13c55 1105 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1106 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
1111 else
1112 tmp = 1;
1113 s += UTF8SKIP(s);
1114 }
1115 break;
bbce6d69 1116 case NSPACEL:
3280af22 1117 PL_reg_flags |= RF_tainted;
bbce6d69 1118 while (s < strend) {
1119 if (!isSPACE_LC(*s)) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s++;
1128 }
1129 break;
a0ed51b3
LW
1130 case NSPACELUTF8:
1131 PL_reg_flags |= RF_tainted;
1132 while (s < strend) {
dfe13c55 1133 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s += UTF8SKIP(s);
1142 }
1143 break;
a0d0e21e
LW
1144 case DIGIT:
1145 while (s < strend) {
1146 if (isDIGIT(*s)) {
6eb5f6b9 1147 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1148 goto got_it;
1149 else
1150 tmp = doevery;
2b69d0c2 1151 }
a0d0e21e
LW
1152 else
1153 tmp = 1;
1154 s++;
1155 }
1156 break;
a0ed51b3
LW
1157 case DIGITUTF8:
1158 while (s < strend) {
dfe13c55 1159 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1160 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1161 goto got_it;
1162 else
1163 tmp = doevery;
1164 }
1165 else
1166 tmp = 1;
1167 s += UTF8SKIP(s);
1168 }
1169 break;
b8c5462f
JH
1170 case DIGITL:
1171 PL_reg_flags |= RF_tainted;
1172 while (s < strend) {
1173 if (isDIGIT_LC(*s)) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s++;
1182 }
1183 break;
1184 case DIGITLUTF8:
1185 PL_reg_flags |= RF_tainted;
1186 while (s < strend) {
1187 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s += UTF8SKIP(s);
1196 }
1197 break;
a0d0e21e
LW
1198 case NDIGIT:
1199 while (s < strend) {
1200 if (!isDIGIT(*s)) {
6eb5f6b9 1201 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1202 goto got_it;
1203 else
1204 tmp = doevery;
a687059c 1205 }
a0d0e21e
LW
1206 else
1207 tmp = 1;
1208 s++;
1209 }
1210 break;
a0ed51b3
LW
1211 case NDIGITUTF8:
1212 while (s < strend) {
dfe13c55 1213 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1214 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1215 goto got_it;
1216 else
1217 tmp = doevery;
1218 }
1219 else
1220 tmp = 1;
1221 s += UTF8SKIP(s);
1222 }
1223 break;
b8c5462f
JH
1224 case NDIGITL:
1225 PL_reg_flags |= RF_tainted;
1226 while (s < strend) {
1227 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s++;
a0ed51b3 1236 }
b8c5462f
JH
1237 break;
1238 case NDIGITLUTF8:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1243 goto got_it;
cf93c79d 1244 else
b8c5462f
JH
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s += UTF8SKIP(s);
1250 }
1251 break;
b3c9acc1 1252 default:
3c3eec57
GS
1253 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1254 break;
d6a28714 1255 }
6eb5f6b9
JH
1256 return 0;
1257 got_it:
1258 return s;
1259}
1260
1261/*
1262 - regexec_flags - match a regexp against a string
1263 */
1264I32
1265Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1266 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1267/* strend: pointer to null at end of string */
1268/* strbeg: real beginning of string */
1269/* minend: end of match must be >=minend after stringarg. */
1270/* data: May be used for some additional optimizations. */
1271/* nosave: For optimizations. */
1272{
1273 dTHR;
1274 register char *s;
1275 register regnode *c;
1276 register char *startpos = stringarg;
1277 register I32 tmp;
1278 I32 minlen; /* must match at least this many chars */
1279 I32 dontbother = 0; /* how many characters not to try at end */
1280 I32 start_shift = 0; /* Offset of the start to find
1281 constant substr. */ /* CC */
1282 I32 end_shift = 0; /* Same for the end. */ /* CC */
1283 I32 scream_pos = -1; /* Internal iterator of scream. */
1284 char *scream_olds;
1285 SV* oreplsv = GvSV(PL_replgv);
1286
1287 PL_regcc = 0;
1288
1289 cache_re(prog);
1290#ifdef DEBUGGING
1291 PL_regnarrate = PL_debug & 512;
1292#endif
1293
1294 /* Be paranoid... */
1295 if (prog == NULL || startpos == NULL) {
1296 Perl_croak(aTHX_ "NULL regexp parameter");
1297 return 0;
1298 }
1299
1300 minlen = prog->minlen;
1301 if (strend - startpos < minlen) goto phooey;
1302
1303 if (startpos == strbeg) /* is ^ valid at stringarg? */
1304 PL_regprev = '\n';
1305 else {
1306 PL_regprev = (U32)stringarg[-1];
1307 if (!PL_multiline && PL_regprev == '\n')
1308 PL_regprev = '\0'; /* force ^ to NOT match */
1309 }
1310
1311 /* Check validity of program. */
1312 if (UCHARAT(prog->program) != REG_MAGIC) {
1313 Perl_croak(aTHX_ "corrupted regexp program");
1314 }
1315
1316 PL_reg_flags = 0;
1317 PL_reg_eval_set = 0;
1318 PL_reg_maxiter = 0;
1319
1320 if (prog->reganch & ROPT_UTF8)
1321 PL_reg_flags |= RF_utf8;
1322
1323 /* Mark beginning of line for ^ and lookbehind. */
1324 PL_regbol = startpos;
1325 PL_bostr = strbeg;
1326 PL_reg_sv = sv;
1327
1328 /* Mark end of line for $ (and such) */
1329 PL_regeol = strend;
1330
1331 /* see how far we have to get to not match where we matched before */
1332 PL_regtill = startpos+minend;
1333
1334 /* We start without call_cc context. */
1335 PL_reg_call_cc = 0;
1336
1337 /* If there is a "must appear" string, look for it. */
1338 s = startpos;
1339
1340 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1341 MAGIC *mg;
1342
1343 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1344 PL_reg_ganch = startpos;
1345 else if (sv && SvTYPE(sv) >= SVt_PVMG
1346 && SvMAGIC(sv)
1347 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1348 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1349 if (prog->reganch & ROPT_ANCH_GPOS) {
1350 if (s > PL_reg_ganch)
1351 goto phooey;
1352 s = PL_reg_ganch;
1353 }
1354 }
1355 else /* pos() not defined */
1356 PL_reg_ganch = strbeg;
1357 }
1358
1359 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1360 re_scream_pos_data d;
1361
1362 d.scream_olds = &scream_olds;
1363 d.scream_pos = &scream_pos;
1364 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1365 if (!s)
1366 goto phooey; /* not present */
1367 }
1368
1369 DEBUG_r( if (!PL_colorset) reginitcolors() );
1370 DEBUG_r(PerlIO_printf(Perl_debug_log,
1371 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1372 PL_colors[4],PL_colors[5],PL_colors[0],
1373 prog->precomp,
1374 PL_colors[1],
1375 (strlen(prog->precomp) > 60 ? "..." : ""),
1376 PL_colors[0],
1377 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1378 startpos, PL_colors[1],
1379 (strend - startpos > 60 ? "..." : ""))
1380 );
1381
1382 /* Simplest case: anchored match need be tried only once. */
1383 /* [unless only anchor is BOL and multiline is set] */
1384 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1385 if (s == startpos && regtry(prog, startpos))
1386 goto got_it;
1387 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1388 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1389 {
1390 char *end;
1391
1392 if (minlen)
1393 dontbother = minlen - 1;
1394 end = HOPc(strend, -dontbother) - 1;
1395 /* for multiline we only have to try after newlines */
1396 if (prog->check_substr) {
1397 if (s == startpos)
1398 goto after_try;
1399 while (1) {
1400 if (regtry(prog, s))
1401 goto got_it;
1402 after_try:
1403 if (s >= end)
1404 goto phooey;
1405 if (prog->reganch & RE_USE_INTUIT) {
1406 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1407 if (!s)
1408 goto phooey;
1409 }
1410 else
1411 s++;
1412 }
1413 } else {
1414 if (s > startpos)
1415 s--;
1416 while (s < end) {
1417 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1418 if (regtry(prog, s))
1419 goto got_it;
1420 }
1421 }
1422 }
1423 }
1424 goto phooey;
1425 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1426 if (regtry(prog, PL_reg_ganch))
1427 goto got_it;
1428 goto phooey;
1429 }
1430
1431 /* Messy cases: unanchored match. */
1432 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1433 /* we have /x+whatever/ */
1434 /* it must be a one character string (XXXX Except UTF?) */
1435 char ch = SvPVX(prog->anchored_substr)[0];
1436 if (UTF) {
1437 while (s < strend) {
1438 if (*s == ch) {
1439 if (regtry(prog, s)) goto got_it;
1440 s += UTF8SKIP(s);
1441 while (s < strend && *s == ch)
1442 s += UTF8SKIP(s);
1443 }
1444 s += UTF8SKIP(s);
1445 }
1446 }
1447 else {
1448 while (s < strend) {
1449 if (*s == ch) {
1450 if (regtry(prog, s)) goto got_it;
1451 s++;
1452 while (s < strend && *s == ch)
1453 s++;
1454 }
1455 s++;
1456 }
1457 }
1458 }
1459 /*SUPPRESS 560*/
1460 else if (prog->anchored_substr != Nullsv
1461 || (prog->float_substr != Nullsv
1462 && prog->float_max_offset < strend - s)) {
1463 SV *must = prog->anchored_substr
1464 ? prog->anchored_substr : prog->float_substr;
1465 I32 back_max =
1466 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1467 I32 back_min =
1468 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1469 I32 delta = back_max - back_min;
1470 char *last = HOPc(strend, /* Cannot start after this */
1471 -(I32)(CHR_SVLEN(must)
1472 - (SvTAIL(must) != 0) + back_min));
1473 char *last1; /* Last position checked before */
1474
1475 if (s > PL_bostr)
1476 last1 = HOPc(s, -1);
1477 else
1478 last1 = s - 1; /* bogus */
1479
1480 /* XXXX check_substr already used to find `s', can optimize if
1481 check_substr==must. */
1482 scream_pos = -1;
1483 dontbother = end_shift;
1484 strend = HOPc(strend, -dontbother);
1485 while ( (s <= last) &&
1486 ((flags & REXEC_SCREAM)
1487 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1488 end_shift, &scream_pos, 0))
1489 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1490 (unsigned char*)strend, must,
1491 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1492 if (HOPc(s, -back_max) > last1) {
1493 last1 = HOPc(s, -back_min);
1494 s = HOPc(s, -back_max);
1495 }
1496 else {
1497 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1498
1499 last1 = HOPc(s, -back_min);
1500 s = t;
1501 }
1502 if (UTF) {
1503 while (s <= last1) {
1504 if (regtry(prog, s))
1505 goto got_it;
1506 s += UTF8SKIP(s);
1507 }
1508 }
1509 else {
1510 while (s <= last1) {
1511 if (regtry(prog, s))
1512 goto got_it;
1513 s++;
1514 }
1515 }
1516 }
1517 goto phooey;
1518 }
1519 else if (c = prog->regstclass) {
66e933ab
GS
1520 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1521 /* don't bother with what can't match */
6eb5f6b9
JH
1522 strend = HOPc(strend, -(minlen - 1));
1523 if (find_byclass(prog, c, s, strend, startpos, 0))
1524 goto got_it;
d6a28714
JH
1525 }
1526 else {
1527 dontbother = 0;
1528 if (prog->float_substr != Nullsv) { /* Trim the end. */
1529 char *last;
1530 I32 oldpos = scream_pos;
1531
1532 if (flags & REXEC_SCREAM) {
1533 last = screaminstr(sv, prog->float_substr, s - strbeg,
1534 end_shift, &scream_pos, 1); /* last one */
1535 if (!last)
1536 last = scream_olds; /* Only one occurence. */
b8c5462f 1537 }
d6a28714
JH
1538 else {
1539 STRLEN len;
1540 char *little = SvPV(prog->float_substr, len);
1541
1542 if (SvTAIL(prog->float_substr)) {
1543 if (memEQ(strend - len + 1, little, len - 1))
1544 last = strend - len + 1;
1545 else if (!PL_multiline)
1546 last = memEQ(strend - len, little, len)
1547 ? strend - len : Nullch;
b8c5462f 1548 else
d6a28714
JH
1549 goto find_last;
1550 } else {
1551 find_last:
1552 if (len)
1553 last = rninstr(s, strend, little, little + len);
b8c5462f 1554 else
d6a28714 1555 last = strend; /* matching `$' */
b8c5462f 1556 }
b8c5462f 1557 }
d6a28714
JH
1558 if (last == NULL) goto phooey; /* Should not happen! */
1559 dontbother = strend - last + prog->float_min_offset;
1560 }
1561 if (minlen && (dontbother < minlen))
1562 dontbother = minlen - 1;
1563 strend -= dontbother; /* this one's always in bytes! */
1564 /* We don't know much -- general case. */
1565 if (UTF) {
1566 for (;;) {
1567 if (regtry(prog, s))
1568 goto got_it;
1569 if (s >= strend)
1570 break;
b8c5462f 1571 s += UTF8SKIP(s);
d6a28714
JH
1572 };
1573 }
1574 else {
1575 do {
1576 if (regtry(prog, s))
1577 goto got_it;
1578 } while (s++ < strend);
1579 }
1580 }
1581
1582 /* Failure. */
1583 goto phooey;
1584
1585got_it:
1586 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1587
1588 if (PL_reg_eval_set) {
1589 /* Preserve the current value of $^R */
1590 if (oreplsv != GvSV(PL_replgv))
1591 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1592 restored, the value remains
1593 the same. */
1594 restore_pos(aTHXo_ 0);
1595 }
1596
1597 /* make sure $`, $&, $', and $digit will work later */
1598 if ( !(flags & REXEC_NOT_FIRST) ) {
1599 if (RX_MATCH_COPIED(prog)) {
1600 Safefree(prog->subbeg);
1601 RX_MATCH_COPIED_off(prog);
1602 }
1603 if (flags & REXEC_COPY_STR) {
1604 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1605
1606 s = savepvn(strbeg, i);
1607 prog->subbeg = s;
1608 prog->sublen = i;
1609 RX_MATCH_COPIED_on(prog);
1610 }
1611 else {
1612 prog->subbeg = strbeg;
1613 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1614 }
1615 }
1616
1617 return 1;
1618
1619phooey:
1620 if (PL_reg_eval_set)
1621 restore_pos(aTHXo_ 0);
1622 return 0;
1623}
1624
1625/*
1626 - regtry - try match at specific point
1627 */
1628STATIC I32 /* 0 failure, 1 success */
1629S_regtry(pTHX_ regexp *prog, char *startpos)
1630{
1631 dTHR;
1632 register I32 i;
1633 register I32 *sp;
1634 register I32 *ep;
1635 CHECKPOINT lastcp;
1636
1637 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1638 MAGIC *mg;
1639
1640 PL_reg_eval_set = RS_init;
1641 DEBUG_r(DEBUG_s(
b900a521
JH
1642 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1643 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1644 ));
e8347627 1645 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1646 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1647 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1648 SAVETMPS;
1649 /* Apparently this is not needed, judging by wantarray. */
e8347627 1650 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1651 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1652
1653 if (PL_reg_sv) {
1654 /* Make $_ available to executed code. */
1655 if (PL_reg_sv != DEFSV) {
1656 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1657 SAVESPTR(DEFSV);
1658 DEFSV = PL_reg_sv;
b8c5462f 1659 }
d6a28714
JH
1660
1661 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1662 && (mg = mg_find(PL_reg_sv, 'g')))) {
1663 /* prepare for quick setting of pos */
1664 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1665 mg = mg_find(PL_reg_sv, 'g');
1666 mg->mg_len = -1;
b8c5462f 1667 }
d6a28714
JH
1668 PL_reg_magic = mg;
1669 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1670 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1671 }
1672 if (!PL_reg_curpm)
0f79a09d 1673 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1674 PL_reg_curpm->op_pmregexp = prog;
1675 PL_reg_oldcurpm = PL_curpm;
1676 PL_curpm = PL_reg_curpm;
1677 if (RX_MATCH_COPIED(prog)) {
1678 /* Here is a serious problem: we cannot rewrite subbeg,
1679 since it may be needed if this match fails. Thus
1680 $` inside (?{}) could fail... */
1681 PL_reg_oldsaved = prog->subbeg;
1682 PL_reg_oldsavedlen = prog->sublen;
1683 RX_MATCH_COPIED_off(prog);
1684 }
1685 else
1686 PL_reg_oldsaved = Nullch;
1687 prog->subbeg = PL_bostr;
1688 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1689 }
1690 prog->startp[0] = startpos - PL_bostr;
1691 PL_reginput = startpos;
1692 PL_regstartp = prog->startp;
1693 PL_regendp = prog->endp;
1694 PL_reglastparen = &prog->lastparen;
1695 prog->lastparen = 0;
1696 PL_regsize = 0;
1697 DEBUG_r(PL_reg_starttry = startpos);
1698 if (PL_reg_start_tmpl <= prog->nparens) {
1699 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1700 if(PL_reg_start_tmp)
1701 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1702 else
1703 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1704 }
1705
1706 /* XXXX What this code is doing here?!!! There should be no need
1707 to do this again and again, PL_reglastparen should take care of
1708 this! */
1709 sp = prog->startp;
1710 ep = prog->endp;
1711 if (prog->nparens) {
1712 for (i = prog->nparens; i >= 1; i--) {
1713 *++sp = -1;
1714 *++ep = -1;
1715 }
1716 }
1717 REGCP_SET;
1718 if (regmatch(prog->program + 1)) {
1719 prog->endp[0] = PL_reginput - PL_bostr;
1720 return 1;
1721 }
1722 REGCP_UNWIND;
1723 return 0;
1724}
1725
1726/*
1727 - regmatch - main matching routine
1728 *
1729 * Conceptually the strategy is simple: check to see whether the current
1730 * node matches, call self recursively to see whether the rest matches,
1731 * and then act accordingly. In practice we make some effort to avoid
1732 * recursion, in particular by going through "ordinary" nodes (that don't
1733 * need to know whether the rest of the match failed) by a loop instead of
1734 * by recursion.
1735 */
1736/* [lwall] I've hoisted the register declarations to the outer block in order to
1737 * maybe save a little bit of pushing and popping on the stack. It also takes
1738 * advantage of machines that use a register save mask on subroutine entry.
1739 */
1740STATIC I32 /* 0 failure, 1 success */
1741S_regmatch(pTHX_ regnode *prog)
1742{
1743 dTHR;
1744 register regnode *scan; /* Current node. */
1745 regnode *next; /* Next node. */
1746 regnode *inner; /* Next node in internal branch. */
1747 register I32 nextchr; /* renamed nextchr - nextchar colides with
1748 function of same name */
1749 register I32 n; /* no or next */
1750 register I32 ln; /* len or last */
1751 register char *s; /* operand or save */
1752 register char *locinput = PL_reginput;
1753 register I32 c1, c2, paren; /* case fold search, parenth */
1754 int minmod = 0, sw = 0, logical = 0;
1755#ifdef DEBUGGING
1756 PL_regindent++;
1757#endif
1758
1759 /* Note that nextchr is a byte even in UTF */
1760 nextchr = UCHARAT(locinput);
1761 scan = prog;
1762 while (scan != NULL) {
1763#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1764#ifdef DEBUGGING
1765# define sayYES goto yes
1766# define sayNO goto no
7821416a
IZ
1767# define sayYES_FINAL goto yes_final
1768# define sayYES_LOUD goto yes_loud
1769# define sayNO_FINAL goto no_final
1770# define sayNO_SILENT goto do_no
d6a28714
JH
1771# define saySAME(x) if (x) goto yes; else goto no
1772# define REPORT_CODE_OFF 24
1773#else
1774# define sayYES return 1
1775# define sayNO return 0
7821416a
IZ
1776# define sayYES_FINAL return 1
1777# define sayYES_LOUD return 1
1778# define sayNO_FINAL return 0
1779# define sayNO_SILENT return 0
d6a28714
JH
1780# define saySAME(x) return x
1781#endif
1782 DEBUG_r( {
1783 SV *prop = sv_newmortal();
1784 int docolor = *PL_colors[0];
1785 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1786 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1787 /* The part of the string before starttry has one color
1788 (pref0_len chars), between starttry and current
1789 position another one (pref_len - pref0_len chars),
1790 after the current position the third one.
1791 We assume that pref0_len <= pref_len, otherwise we
1792 decrease pref0_len. */
1793 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1794 ? (5 + taill) - l : locinput - PL_bostr);
1795 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1796
1797 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1798 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1799 ? (5 + taill) - pref_len : PL_regeol - locinput);
1800 if (pref0_len < 0)
1801 pref0_len = 0;
1802 if (pref0_len > pref_len)
1803 pref0_len = pref_len;
1804 regprop(prop, scan);
1805 PerlIO_printf(Perl_debug_log,
b900a521
JH
1806 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1807 (IV)(locinput - PL_bostr),
d6a28714
JH
1808 PL_colors[4], pref0_len,
1809 locinput - pref_len, PL_colors[5],
1810 PL_colors[2], pref_len - pref0_len,
1811 locinput - pref_len + pref0_len, PL_colors[3],
1812 (docolor ? "" : "> <"),
1813 PL_colors[0], l, locinput, PL_colors[1],
1814 15 - l - pref_len + 1,
1815 "",
b900a521 1816 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1817 SvPVX(prop));
1818 } );
1819
1820 next = scan + NEXT_OFF(scan);
1821 if (next == scan)
1822 next = NULL;
1823
1824 switch (OP(scan)) {
1825 case BOL:
1826 if (locinput == PL_bostr
1827 ? PL_regprev == '\n'
1828 : (PL_multiline &&
1829 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1830 {
1831 /* regtill = regbol; */
b8c5462f
JH
1832 break;
1833 }
d6a28714
JH
1834 sayNO;
1835 case MBOL:
1836 if (locinput == PL_bostr
1837 ? PL_regprev == '\n'
1838 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1839 {
b8c5462f
JH
1840 break;
1841 }
d6a28714
JH
1842 sayNO;
1843 case SBOL:
1844 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1845 break;
d6a28714
JH
1846 sayNO;
1847 case GPOS:
1848 if (locinput == PL_reg_ganch)
1849 break;
1850 sayNO;
1851 case EOL:
1852 if (PL_multiline)
1853 goto meol;
1854 else
1855 goto seol;
1856 case MEOL:
1857 meol:
1858 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1859 sayNO;
b8c5462f 1860 break;
d6a28714
JH
1861 case SEOL:
1862 seol:
1863 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1864 sayNO;
d6a28714 1865 if (PL_regeol - locinput > 1)
b8c5462f 1866 sayNO;
b8c5462f 1867 break;
d6a28714
JH
1868 case EOS:
1869 if (PL_regeol != locinput)
b8c5462f 1870 sayNO;
d6a28714
JH
1871 break;
1872 case SANYUTF8:
b8c5462f 1873 if (nextchr & 0x80) {
b8c5462f 1874 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1875 if (locinput > PL_regeol)
1876 sayNO;
b8c5462f
JH
1877 nextchr = UCHARAT(locinput);
1878 break;
1879 }
d6a28714 1880 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1881 sayNO;
b8c5462f 1882 nextchr = UCHARAT(++locinput);
a0d0e21e 1883 break;
d6a28714
JH
1884 case SANY:
1885 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1886 sayNO;
1887 nextchr = UCHARAT(++locinput);
b85d18e9 1888 break;
d6a28714 1889 case ANYUTF8:
a0ed51b3 1890 if (nextchr & 0x80) {
b8c5462f 1891 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1892 if (locinput > PL_regeol)
1893 sayNO;
a0ed51b3
LW
1894 nextchr = UCHARAT(locinput);
1895 break;
1896 }
d6a28714 1897 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1898 sayNO;
1899 nextchr = UCHARAT(++locinput);
1900 break;
d6a28714
JH
1901 case REG_ANY:
1902 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1903 sayNO;
76e3520e 1904 nextchr = UCHARAT(++locinput);
a0d0e21e 1905 break;
d6a28714 1906 case EXACT:
cd439c50
IZ
1907 s = STRING(scan);
1908 ln = STR_LEN(scan);
d6a28714
JH
1909 /* Inline the first character, for speed. */
1910 if (UCHARAT(s) != nextchr)
1911 sayNO;
1912 if (PL_regeol - locinput < ln)
1913 sayNO;
1914 if (ln > 1 && memNE(s, locinput, ln))
1915 sayNO;
1916 locinput += ln;
1917 nextchr = UCHARAT(locinput);
1918 break;
1919 case EXACTFL:
b8c5462f
JH
1920 PL_reg_flags |= RF_tainted;
1921 /* FALL THROUGH */
d6a28714 1922 case EXACTF:
cd439c50
IZ
1923 s = STRING(scan);
1924 ln = STR_LEN(scan);
d6a28714
JH
1925
1926 if (UTF) {
1927 char *l = locinput;
1928 char *e = s + ln;
1929 c1 = OP(scan) == EXACTF;
1930 while (s < e) {
1931 if (l >= PL_regeol)
1932 sayNO;
1933 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1934 toLOWER_utf8((U8*)l) :
1935 toLOWER_LC_utf8((U8*)l)))
1936 {
1937 sayNO;
1938 }
1939 s += UTF8SKIP(s);
1940 l += UTF8SKIP(l);
b8c5462f 1941 }
d6a28714 1942 locinput = l;
a0ed51b3
LW
1943 nextchr = UCHARAT(locinput);
1944 break;
1945 }
d6a28714
JH
1946
1947 /* Inline the first character, for speed. */
1948 if (UCHARAT(s) != nextchr &&
1949 UCHARAT(s) != ((OP(scan) == EXACTF)
1950 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1951 sayNO;
d6a28714 1952 if (PL_regeol - locinput < ln)
b8c5462f 1953 sayNO;
d6a28714
JH
1954 if (ln > 1 && (OP(scan) == EXACTF
1955 ? ibcmp(s, locinput, ln)
1956 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1957 sayNO;
d6a28714
JH
1958 locinput += ln;
1959 nextchr = UCHARAT(locinput);
a0d0e21e 1960 break;
d6a28714 1961 case ANYOFUTF8:
d6a28714 1962 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1963 sayNO;
d6a28714
JH
1964 if (locinput >= PL_regeol)
1965 sayNO;
1966 locinput += PL_utf8skip[nextchr];
1967 nextchr = UCHARAT(locinput);
1968 break;
1969 case ANYOF:
d6a28714 1970 if (nextchr < 0)
b8c5462f 1971 nextchr = UCHARAT(locinput);
936ed897 1972 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1973 sayNO;
1974 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1975 sayNO;
b8c5462f
JH
1976 nextchr = UCHARAT(++locinput);
1977 break;
d6a28714 1978 case ALNUML:
b8c5462f
JH
1979 PL_reg_flags |= RF_tainted;
1980 /* FALL THROUGH */
d6a28714 1981 case ALNUM:
b8c5462f 1982 if (!nextchr)
4633a7c4 1983 sayNO;
d6a28714
JH
1984 if (!(OP(scan) == ALNUM
1985 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1986 sayNO;
1987 nextchr = UCHARAT(++locinput);
bbce6d69 1988 break;
d6a28714 1989 case ALNUMLUTF8:
3280af22 1990 PL_reg_flags |= RF_tainted;
bbce6d69 1991 /* FALL THROUGH */
d6a28714 1992 case ALNUMUTF8:
b8c5462f
JH
1993 if (!nextchr)
1994 sayNO;
1995 if (nextchr & 0x80) {
d6a28714
JH
1996 if (!(OP(scan) == ALNUMUTF8
1997 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1998 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1999 {
2000 sayNO;
a0ed51b3 2001 }
b8c5462f 2002 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2003 nextchr = UCHARAT(locinput);
2004 break;
2005 }
d6a28714
JH
2006 if (!(OP(scan) == ALNUMUTF8
2007 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2008 sayNO;
b8c5462f 2009 nextchr = UCHARAT(++locinput);
a0d0e21e 2010 break;
d6a28714 2011 case NALNUML:
b8c5462f
JH
2012 PL_reg_flags |= RF_tainted;
2013 /* FALL THROUGH */
d6a28714
JH
2014 case NALNUM:
2015 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2016 sayNO;
d6a28714
JH
2017 if (OP(scan) == NALNUM
2018 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2019 sayNO;
b8c5462f 2020 nextchr = UCHARAT(++locinput);
a0ed51b3 2021 break;
d6a28714 2022 case NALNUMLUTF8:
b8c5462f
JH
2023 PL_reg_flags |= RF_tainted;
2024 /* FALL THROUGH */
d6a28714 2025 case NALNUMUTF8:
3280af22 2026 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2027 sayNO;
b8c5462f 2028 if (nextchr & 0x80) {
d6a28714
JH
2029 if (OP(scan) == NALNUMUTF8
2030 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2031 : isALNUM_LC_utf8((U8*)locinput))
2032 {
b8c5462f 2033 sayNO;
d6a28714 2034 }
b8c5462f
JH
2035 locinput += PL_utf8skip[nextchr];
2036 nextchr = UCHARAT(locinput);
2037 break;
2038 }
d6a28714
JH
2039 if (OP(scan) == NALNUMUTF8
2040 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2041 sayNO;
76e3520e 2042 nextchr = UCHARAT(++locinput);
a0d0e21e 2043 break;
d6a28714
JH
2044 case BOUNDL:
2045 case NBOUNDL:
3280af22 2046 PL_reg_flags |= RF_tainted;
bbce6d69 2047 /* FALL THROUGH */
d6a28714
JH
2048 case BOUND:
2049 case NBOUND:
2050 /* was last char in word? */
2051 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2052 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2053 ln = isALNUM(ln);
2054 n = isALNUM(nextchr);
2055 }
2056 else {
2057 ln = isALNUM_LC(ln);
2058 n = isALNUM_LC(nextchr);
2059 }
2060 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2061 sayNO;
a0d0e21e 2062 break;
d6a28714
JH
2063 case BOUNDLUTF8:
2064 case NBOUNDLUTF8:
a0ed51b3
LW
2065 PL_reg_flags |= RF_tainted;
2066 /* FALL THROUGH */
d6a28714
JH
2067 case BOUNDUTF8:
2068 case NBOUNDUTF8:
2069 /* was last char in word? */
2070 ln = (locinput != PL_regbol)
2071 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2072 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2073 ln = isALNUM_uni(ln);
2074 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2075 }
d6a28714
JH
2076 else {
2077 ln = isALNUM_LC_uni(ln);
2078 n = isALNUM_LC_utf8((U8*)locinput);
2079 }
2080 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2081 sayNO;
a0ed51b3 2082 break;
d6a28714 2083 case SPACEL:
3280af22 2084 PL_reg_flags |= RF_tainted;
bbce6d69 2085 /* FALL THROUGH */
d6a28714 2086 case SPACE:
9442cb0e 2087 if (!nextchr)
4633a7c4 2088 sayNO;
d6a28714
JH
2089 if (!(OP(scan) == SPACE
2090 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2091 sayNO;
76e3520e 2092 nextchr = UCHARAT(++locinput);
a0d0e21e 2093 break;
d6a28714 2094 case SPACELUTF8:
a0ed51b3
LW
2095 PL_reg_flags |= RF_tainted;
2096 /* FALL THROUGH */
d6a28714 2097 case SPACEUTF8:
9442cb0e 2098 if (!nextchr)
a0ed51b3
LW
2099 sayNO;
2100 if (nextchr & 0x80) {
d6a28714 2101 if (!(OP(scan) == SPACEUTF8
9442cb0e 2102 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2103 : isSPACE_LC_utf8((U8*)locinput)))
2104 {
a0ed51b3 2105 sayNO;
d6a28714 2106 }
6f06b55f 2107 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2108 nextchr = UCHARAT(locinput);
2109 break;
2110 }
d6a28714
JH
2111 if (!(OP(scan) == SPACEUTF8
2112 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2113 sayNO;
2114 nextchr = UCHARAT(++locinput);
2115 break;
d6a28714 2116 case NSPACEL:
3280af22 2117 PL_reg_flags |= RF_tainted;
bbce6d69 2118 /* FALL THROUGH */
d6a28714 2119 case NSPACE:
9442cb0e 2120 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2121 sayNO;
9442cb0e 2122 if (OP(scan) == NSPACE
d6a28714 2123 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2124 sayNO;
b8c5462f 2125 nextchr = UCHARAT(++locinput);
a0d0e21e 2126 break;
d6a28714 2127 case NSPACELUTF8:
a0ed51b3
LW
2128 PL_reg_flags |= RF_tainted;
2129 /* FALL THROUGH */
d6a28714 2130 case NSPACEUTF8:
9442cb0e 2131 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2132 sayNO;
2133 if (nextchr & 0x80) {
d6a28714 2134 if (OP(scan) == NSPACEUTF8
9442cb0e 2135 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2136 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2137 {
2138 sayNO;
2139 }
2140 locinput += PL_utf8skip[nextchr];
2141 nextchr = UCHARAT(locinput);
2142 break;
a0ed51b3 2143 }
d6a28714
JH
2144 if (OP(scan) == NSPACEUTF8
2145 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2146 sayNO;
76e3520e 2147 nextchr = UCHARAT(++locinput);
a0d0e21e 2148 break;
d6a28714 2149 case DIGITL:
a0ed51b3
LW
2150 PL_reg_flags |= RF_tainted;
2151 /* FALL THROUGH */
d6a28714 2152 case DIGIT:
9442cb0e 2153 if (!nextchr)
a0ed51b3 2154 sayNO;
d6a28714
JH
2155 if (!(OP(scan) == DIGIT
2156 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2157 sayNO;
76e3520e 2158 nextchr = UCHARAT(++locinput);
a0d0e21e 2159 break;
d6a28714 2160 case DIGITLUTF8:
a0ed51b3
LW
2161 PL_reg_flags |= RF_tainted;
2162 /* FALL THROUGH */
d6a28714 2163 case DIGITUTF8:
a0ed51b3
LW
2164 if (!nextchr)
2165 sayNO;
2166 if (nextchr & 0x80) {
9442cb0e
GS
2167 if (!(OP(scan) == DIGITUTF8
2168 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2169 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2170 {
a0ed51b3 2171 sayNO;
dfe13c55 2172 }
6f06b55f 2173 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2174 nextchr = UCHARAT(locinput);
2175 break;
2176 }
9442cb0e
GS
2177 if (!(OP(scan) == DIGITUTF8
2178 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2179 sayNO;
2180 nextchr = UCHARAT(++locinput);
2181 break;
d6a28714 2182 case NDIGITL:
b8c5462f
JH
2183 PL_reg_flags |= RF_tainted;
2184 /* FALL THROUGH */
d6a28714 2185 case NDIGIT:
9442cb0e 2186 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2187 sayNO;
9442cb0e 2188 if (OP(scan) == NDIGIT
d6a28714 2189 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2190 sayNO;
76e3520e 2191 nextchr = UCHARAT(++locinput);
a0d0e21e 2192 break;
d6a28714 2193 case NDIGITLUTF8:
b8c5462f
JH
2194 PL_reg_flags |= RF_tainted;
2195 /* FALL THROUGH */
d6a28714 2196 case NDIGITUTF8:
b8c5462f
JH
2197 if (!nextchr && locinput >= PL_regeol)
2198 sayNO;
a0ed51b3 2199 if (nextchr & 0x80) {
9442cb0e
GS
2200 if (OP(scan) == NDIGITUTF8
2201 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2202 : isDIGIT_LC_utf8((U8*)locinput))
2203 {
a0ed51b3 2204 sayNO;
9442cb0e 2205 }
6f06b55f 2206 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2207 nextchr = UCHARAT(locinput);
2208 break;
2209 }
9442cb0e
GS
2210 if (OP(scan) == NDIGITUTF8
2211 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2212 sayNO;
2213 nextchr = UCHARAT(++locinput);
2214 break;
2215 case CLUMP:
dfe13c55 2216 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2217 sayNO;
6f06b55f 2218 locinput += PL_utf8skip[nextchr];
dfe13c55 2219 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2220 locinput += UTF8SKIP(locinput);
2221 if (locinput > PL_regeol)
2222 sayNO;
2223 nextchr = UCHARAT(locinput);
2224 break;
c8756f30 2225 case REFFL:
3280af22 2226 PL_reg_flags |= RF_tainted;
c8756f30 2227 /* FALL THROUGH */
c277df42 2228 case REF:
c8756f30 2229 case REFF:
c277df42 2230 n = ARG(scan); /* which paren pair */
cf93c79d 2231 ln = PL_regstartp[n];
2c2d71f5 2232 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2233 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2234 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2235 if (ln == PL_regendp[n])
a0d0e21e 2236 break;
a0ed51b3 2237
cf93c79d 2238 s = PL_bostr + ln;
a0ed51b3
LW
2239 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2240 char *l = locinput;
cf93c79d 2241 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2242 /*
2243 * Note that we can't do the "other character" lookup trick as
2244 * in the 8-bit case (no pun intended) because in Unicode we
2245 * have to map both upper and title case to lower case.
2246 */
2247 if (OP(scan) == REFF) {
2248 while (s < e) {
2249 if (l >= PL_regeol)
2250 sayNO;
dfe13c55 2251 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2252 sayNO;
2253 s += UTF8SKIP(s);
2254 l += UTF8SKIP(l);
2255 }
2256 }
2257 else {
2258 while (s < e) {
2259 if (l >= PL_regeol)
2260 sayNO;
dfe13c55 2261 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2262 sayNO;
2263 s += UTF8SKIP(s);
2264 l += UTF8SKIP(l);
2265 }
2266 }
2267 locinput = l;
2268 nextchr = UCHARAT(locinput);
2269 break;
2270 }
2271
a0d0e21e 2272 /* Inline the first character, for speed. */
76e3520e 2273 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2274 (OP(scan) == REF ||
2275 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2276 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2277 sayNO;
cf93c79d 2278 ln = PL_regendp[n] - ln;
3280af22 2279 if (locinput + ln > PL_regeol)
4633a7c4 2280 sayNO;
c8756f30
AK
2281 if (ln > 1 && (OP(scan) == REF
2282 ? memNE(s, locinput, ln)
2283 : (OP(scan) == REFF
2284 ? ibcmp(s, locinput, ln)
2285 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2286 sayNO;
a0d0e21e 2287 locinput += ln;
76e3520e 2288 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2289 break;
2290
2291 case NOTHING:
c277df42 2292 case TAIL:
a0d0e21e
LW
2293 break;
2294 case BACK:
2295 break;
c277df42
IZ
2296 case EVAL:
2297 {
2298 dSP;
533c011a 2299 OP_4tree *oop = PL_op;
3280af22
NIS
2300 COP *ocurcop = PL_curcop;
2301 SV **ocurpad = PL_curpad;
c277df42
IZ
2302 SV *ret;
2303
2304 n = ARG(scan);
533c011a 2305 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2306 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2307 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2308 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2309
cea2e8a9 2310 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2311 SPAGAIN;
2312 ret = POPs;
2313 PUTBACK;
2314
0f5d15d6
IZ
2315 PL_op = oop;
2316 PL_curpad = ocurpad;
2317 PL_curcop = ocurcop;
c277df42 2318 if (logical) {
0f5d15d6
IZ
2319 if (logical == 2) { /* Postponed subexpression. */
2320 regexp *re;
22c35a8c 2321 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2322 re_cc_state state;
0f5d15d6
IZ
2323 CHECKPOINT cp, lastcp;
2324
2325 if(SvROK(ret) || SvRMAGICAL(ret)) {
2326 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2327
2328 if(SvMAGICAL(sv))
2329 mg = mg_find(sv, 'r');
2330 }
2331 if (mg) {
2332 re = (regexp *)mg->mg_obj;
df0003d4 2333 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2334 }
2335 else {
2336 STRLEN len;
2337 char *t = SvPV(ret, len);
2338 PMOP pm;
2339 char *oprecomp = PL_regprecomp;
2340 I32 osize = PL_regsize;
2341 I32 onpar = PL_regnpar;
2342
2343 pm.op_pmflags = 0;
393fec97 2344 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2345 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2346 if (!(SvFLAGS(ret)
2347 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2348 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2349 PL_regprecomp = oprecomp;
2350 PL_regsize = osize;
2351 PL_regnpar = onpar;
2352 }
2353 DEBUG_r(
2354 PerlIO_printf(Perl_debug_log,
2355 "Entering embedded `%s%.60s%s%s'\n",
2356 PL_colors[0],
2357 re->precomp,
2358 PL_colors[1],
2359 (strlen(re->precomp) > 60 ? "..." : ""))
2360 );
2361 state.node = next;
2362 state.prev = PL_reg_call_cc;
2363 state.cc = PL_regcc;
2364 state.re = PL_reg_re;
2365
2ab05381 2366 PL_regcc = 0;
0f5d15d6
IZ
2367
2368 cp = regcppush(0); /* Save *all* the positions. */
2369 REGCP_SET;
2370 cache_re(re);
2371 state.ss = PL_savestack_ix;
2372 *PL_reglastparen = 0;
2373 PL_reg_call_cc = &state;
2374 PL_reginput = locinput;
2c2d71f5
JH
2375
2376 /* XXXX This is too dramatic a measure... */
2377 PL_reg_maxiter = 0;
2378
0f5d15d6 2379 if (regmatch(re->program + 1)) {
2c914db6
IZ
2380 /* Even though we succeeded, we need to restore
2381 global variables, since we may be wrapped inside
2382 SUSPEND, thus the match may be not finished yet. */
2383
2384 /* XXXX Do this only if SUSPENDed? */
2385 PL_reg_call_cc = state.prev;
2386 PL_regcc = state.cc;
2387 PL_reg_re = state.re;
2388 cache_re(PL_reg_re);
2389
2390 /* XXXX This is too dramatic a measure... */
2391 PL_reg_maxiter = 0;
2392
2393 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2394 ReREFCNT_dec(re);
2395 regcpblow(cp);
2396 sayYES;
2397 }
0f5d15d6
IZ
2398 ReREFCNT_dec(re);
2399 REGCP_UNWIND;
2400 regcppop();
2401 PL_reg_call_cc = state.prev;
2402 PL_regcc = state.cc;
2403 PL_reg_re = state.re;
d3790889 2404 cache_re(PL_reg_re);
2c2d71f5
JH
2405
2406 /* XXXX This is too dramatic a measure... */
2407 PL_reg_maxiter = 0;
2408
0f5d15d6
IZ
2409 sayNO;
2410 }
c277df42 2411 sw = SvTRUE(ret);
0f5d15d6 2412 logical = 0;
a0ed51b3
LW
2413 }
2414 else
3280af22 2415 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2416 break;
2417 }
a0d0e21e 2418 case OPEN:
c277df42 2419 n = ARG(scan); /* which paren pair */
3280af22
NIS
2420 PL_reg_start_tmp[n] = locinput;
2421 if (n > PL_regsize)
2422 PL_regsize = n;
a0d0e21e
LW
2423 break;
2424 case CLOSE:
c277df42 2425 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2426 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2427 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2428 if (n > *PL_reglastparen)
2429 *PL_reglastparen = n;
a0d0e21e 2430 break;
c277df42
IZ
2431 case GROUPP:
2432 n = ARG(scan); /* which paren pair */
cf93c79d 2433 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2434 break;
2435 case IFTHEN:
2c2d71f5 2436 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2437 if (sw)
2438 next = NEXTOPER(NEXTOPER(scan));
2439 else {
2440 next = scan + ARG(scan);
2441 if (OP(next) == IFTHEN) /* Fake one. */
2442 next = NEXTOPER(NEXTOPER(next));
2443 }
2444 break;
2445 case LOGICAL:
0f5d15d6 2446 logical = scan->flags;
c277df42 2447 break;
2ab05381
IZ
2448/*******************************************************************
2449 PL_regcc contains infoblock about the innermost (...)* loop, and
2450 a pointer to the next outer infoblock.
2451
2452 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2453
2454 1) After matching X, regnode for CURLYX is processed;
2455
2456 2) This regnode creates infoblock on the stack, and calls
2457 regmatch() recursively with the starting point at WHILEM node;
2458
2459 3) Each hit of WHILEM node tries to match A and Z (in the order
2460 depending on the current iteration, min/max of {min,max} and
2461 greediness). The information about where are nodes for "A"
2462 and "Z" is read from the infoblock, as is info on how many times "A"
2463 was already matched, and greediness.
2464
2465 4) After A matches, the same WHILEM node is hit again.
2466
2467 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2468 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2469 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2470 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2471 of the external loop.
2472
2473 Currently present infoblocks form a tree with a stem formed by PL_curcc
2474 and whatever it mentions via ->next, and additional attached trees
2475 corresponding to temporarily unset infoblocks as in "5" above.
2476
2477 In the following picture infoblocks for outer loop of
2478 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2479 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2480 infoblocks are drawn below the "reset" infoblock.
2481
2482 In fact in the picture below we do not show failed matches for Z and T
2483 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2484 more obvious *why* one needs to *temporary* unset infoblocks.]
2485
2486 Matched REx position InfoBlocks Comment
2487 (Y(A)*?Z)*?T x
2488 Y(A)*?Z)*?T x <- O
2489 Y (A)*?Z)*?T x <- O
2490 Y A)*?Z)*?T x <- O <- I
2491 YA )*?Z)*?T x <- O <- I
2492 YA A)*?Z)*?T x <- O <- I
2493 YAA )*?Z)*?T x <- O <- I
2494 YAA Z)*?T x <- O # Temporary unset I
2495 I
2496
2497 YAAZ Y(A)*?Z)*?T x <- O
2498 I
2499
2500 YAAZY (A)*?Z)*?T x <- O
2501 I
2502
2503 YAAZY A)*?Z)*?T x <- O <- I
2504 I
2505
2506 YAAZYA )*?Z)*?T x <- O <- I
2507 I
2508
2509 YAAZYA Z)*?T x <- O # Temporary unset I
2510 I,I
2511
2512 YAAZYAZ )*?T x <- O
2513 I,I
2514
2515 YAAZYAZ T x # Temporary unset O
2516 O
2517 I,I
2518
2519 YAAZYAZT x
2520 O
2521 I,I
2522 *******************************************************************/
a0d0e21e
LW
2523 case CURLYX: {
2524 CURCUR cc;
3280af22 2525 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2526
2527 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2528 next += ARG(next);
3280af22
NIS
2529 cc.oldcc = PL_regcc;
2530 PL_regcc = &cc;
2531 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2532 cc.cur = -1;
2533 cc.min = ARG1(scan);
2534 cc.max = ARG2(scan);
c277df42 2535 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2536 cc.next = next;
2537 cc.minmod = minmod;
2538 cc.lastloc = 0;
3280af22 2539 PL_reginput = locinput;
a0d0e21e
LW
2540 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2541 regcpblow(cp);
3280af22 2542 PL_regcc = cc.oldcc;
4633a7c4 2543 saySAME(n);
a0d0e21e
LW
2544 }
2545 /* NOT REACHED */
2546 case WHILEM: {
2547 /*
2548 * This is really hard to understand, because after we match
2549 * what we're trying to match, we must make sure the rest of
2c2d71f5 2550 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2551 * to go back UP the parse tree by recursing ever deeper. And
2552 * if it fails, we have to reset our parent's current state
2553 * that we can try again after backing off.
2554 */
2555
c277df42 2556 CHECKPOINT cp, lastcp;
3280af22 2557 CURCUR* cc = PL_regcc;
c277df42
IZ
2558 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2559
4633a7c4 2560 n = cc->cur + 1; /* how many we know we matched */
3280af22 2561 PL_reginput = locinput;
a0d0e21e 2562
c277df42
IZ
2563 DEBUG_r(
2564 PerlIO_printf(Perl_debug_log,
2565 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2566 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2567 (long)n, (long)cc->min,
2568 (long)cc->max, (long)cc)
2569 );
4633a7c4 2570
a0d0e21e
LW
2571 /* If degenerate scan matches "", assume scan done. */
2572
579cf2c3 2573 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2574 PL_regcc = cc->oldcc;
2ab05381
IZ
2575 if (PL_regcc)
2576 ln = PL_regcc->cur;
c277df42 2577 DEBUG_r(
c3464db5
DD
2578 PerlIO_printf(Perl_debug_log,
2579 "%*s empty match detected, try continuation...\n",
3280af22 2580 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2581 );
a0d0e21e 2582 if (regmatch(cc->next))
4633a7c4 2583 sayYES;
2ab05381
IZ
2584 if (PL_regcc)
2585 PL_regcc->cur = ln;
3280af22 2586 PL_regcc = cc;
4633a7c4 2587 sayNO;
a0d0e21e
LW
2588 }
2589
2590 /* First just match a string of min scans. */
2591
2592 if (n < cc->min) {
2593 cc->cur = n;
2594 cc->lastloc = locinput;
4633a7c4
LW
2595 if (regmatch(cc->scan))
2596 sayYES;
2597 cc->cur = n - 1;
c277df42 2598 cc->lastloc = lastloc;
4633a7c4 2599 sayNO;
a0d0e21e
LW
2600 }
2601
2c2d71f5
JH
2602 if (scan->flags) {
2603 /* Check whether we already were at this position.
2604 Postpone detection until we know the match is not
2605 *that* much linear. */
2606 if (!PL_reg_maxiter) {
2607 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2608 PL_reg_leftiter = PL_reg_maxiter;
2609 }
2610 if (PL_reg_leftiter-- == 0) {
2611 I32 size = (PL_reg_maxiter + 7)/8;
2612 if (PL_reg_poscache) {
2613 if (PL_reg_poscache_size < size) {
2614 Renew(PL_reg_poscache, size, char);
2615 PL_reg_poscache_size = size;
2616 }
2617 Zero(PL_reg_poscache, size, char);
2618 }
2619 else {
2620 PL_reg_poscache_size = size;
2621 Newz(29, PL_reg_poscache, size, char);
2622 }
2623 DEBUG_r(
2624 PerlIO_printf(Perl_debug_log,
2625 "%sDetected a super-linear match, switching on caching%s...\n",
2626 PL_colors[4], PL_colors[5])
2627 );
2628 }
2629 if (PL_reg_leftiter < 0) {
2630 I32 o = locinput - PL_bostr, b;
2631
2632 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2633 b = o % 8;
2634 o /= 8;
2635 if (PL_reg_poscache[o] & (1<<b)) {
2636 DEBUG_r(
2637 PerlIO_printf(Perl_debug_log,
2638 "%*s already tried at this position...\n",
2639 REPORT_CODE_OFF+PL_regindent*2, "")
2640 );
7821416a 2641 sayNO_SILENT;
2c2d71f5
JH
2642 }
2643 PL_reg_poscache[o] |= (1<<b);
2644 }
2645 }
2646
a0d0e21e
LW
2647 /* Prefer next over scan for minimal matching. */
2648
2649 if (cc->minmod) {
3280af22 2650 PL_regcc = cc->oldcc;
2ab05381
IZ
2651 if (PL_regcc)
2652 ln = PL_regcc->cur;
5f05dabc 2653 cp = regcppush(cc->parenfloor);
c277df42 2654 REGCP_SET;
5f05dabc 2655 if (regmatch(cc->next)) {
c277df42 2656 regcpblow(cp);
4633a7c4 2657 sayYES; /* All done. */
5f05dabc 2658 }
c277df42 2659 REGCP_UNWIND;
5f05dabc 2660 regcppop();
2ab05381
IZ
2661 if (PL_regcc)
2662 PL_regcc->cur = ln;
3280af22 2663 PL_regcc = cc;
a0d0e21e 2664
c277df42 2665 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2666 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2667 && !(PL_reg_flags & RF_warned)) {
2668 PL_reg_flags |= RF_warned;
e476b1b5 2669 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2670 "Complex regular subexpression recursion",
2671 REG_INFTY - 1);
c277df42 2672 }
4633a7c4 2673 sayNO;
c277df42 2674 }
a687059c 2675
c277df42 2676 DEBUG_r(
c3464db5
DD
2677 PerlIO_printf(Perl_debug_log,
2678 "%*s trying longer...\n",
3280af22 2679 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2680 );
a0d0e21e 2681 /* Try scanning more and see if it helps. */
3280af22 2682 PL_reginput = locinput;
a0d0e21e
LW
2683 cc->cur = n;
2684 cc->lastloc = locinput;
5f05dabc 2685 cp = regcppush(cc->parenfloor);
c277df42 2686 REGCP_SET;
5f05dabc 2687 if (regmatch(cc->scan)) {
c277df42 2688 regcpblow(cp);
4633a7c4 2689 sayYES;
5f05dabc 2690 }
c277df42 2691 REGCP_UNWIND;
5f05dabc 2692 regcppop();
4633a7c4 2693 cc->cur = n - 1;
c277df42 2694 cc->lastloc = lastloc;
4633a7c4 2695 sayNO;
a0d0e21e
LW
2696 }
2697
2698 /* Prefer scan over next for maximal matching. */
2699
2700 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2701 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2702 cc->cur = n;
2703 cc->lastloc = locinput;
c277df42 2704 REGCP_SET;
5f05dabc 2705 if (regmatch(cc->scan)) {
c277df42 2706 regcpblow(cp);
4633a7c4 2707 sayYES;
5f05dabc 2708 }
c277df42 2709 REGCP_UNWIND;
a0d0e21e 2710 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2711 PL_reginput = locinput;
c277df42 2712 DEBUG_r(
c3464db5
DD
2713 PerlIO_printf(Perl_debug_log,
2714 "%*s failed, try continuation...\n",
3280af22 2715 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2716 );
2717 }
e476b1b5 2718 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2719 && !(PL_reg_flags & RF_warned)) {
3280af22 2720 PL_reg_flags |= RF_warned;
e476b1b5 2721 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2722 "Complex regular subexpression recursion",
2723 REG_INFTY - 1);
a0d0e21e
LW
2724 }
2725
2726 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2727 PL_regcc = cc->oldcc;
2ab05381
IZ
2728 if (PL_regcc)
2729 ln = PL_regcc->cur;
a0d0e21e 2730 if (regmatch(cc->next))
4633a7c4 2731 sayYES;
2ab05381
IZ
2732 if (PL_regcc)
2733 PL_regcc->cur = ln;
3280af22 2734 PL_regcc = cc;
4633a7c4 2735 cc->cur = n - 1;
c277df42 2736 cc->lastloc = lastloc;
4633a7c4 2737 sayNO;
a0d0e21e
LW
2738 }
2739 /* NOT REACHED */
c277df42
IZ
2740 case BRANCHJ:
2741 next = scan + ARG(scan);
2742 if (next == scan)
2743 next = NULL;
2744 inner = NEXTOPER(NEXTOPER(scan));
2745 goto do_branch;
2746 case BRANCH:
2747 inner = NEXTOPER(scan);
2748 do_branch:
2749 {
2750 CHECKPOINT lastcp;
2751 c1 = OP(scan);
2752 if (OP(next) != c1) /* No choice. */
2753 next = inner; /* Avoid recursion. */
a0d0e21e 2754 else {
3280af22 2755 int lastparen = *PL_reglastparen;
c277df42
IZ
2756
2757 REGCP_SET;
a0d0e21e 2758 do {
3280af22 2759 PL_reginput = locinput;
c277df42 2760 if (regmatch(inner))
4633a7c4 2761 sayYES;
c277df42 2762 REGCP_UNWIND;
3280af22 2763 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2764 PL_regendp[n] = -1;
3280af22 2765 *PL_reglastparen = n;
c277df42 2766 scan = next;
a0d0e21e 2767 /*SUPPRESS 560*/
c277df42
IZ
2768 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2769 next += n;
a0d0e21e 2770 else
c277df42 2771 next = NULL;
c277df42
IZ
2772 inner = NEXTOPER(scan);
2773 if (c1 == BRANCHJ) {
2774 inner = NEXTOPER(inner);
2775 }
2776 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2777 sayNO;
a0d0e21e 2778 /* NOTREACHED */
a687059c 2779 }
a0d0e21e
LW
2780 }
2781 break;
2782 case MINMOD:
2783 minmod = 1;
2784 break;
c277df42
IZ
2785 case CURLYM:
2786 {
00db4c45 2787 I32 l = 0;
c277df42
IZ
2788 CHECKPOINT lastcp;
2789
2790 /* We suppose that the next guy does not need
2791 backtracking: in particular, it is of constant length,
2792 and has no parenths to influence future backrefs. */
2793 ln = ARG1(scan); /* min to match */
2794 n = ARG2(scan); /* max to match */
c277df42
IZ
2795 paren = scan->flags;
2796 if (paren) {
3280af22
NIS
2797 if (paren > PL_regsize)
2798 PL_regsize = paren;
2799 if (paren > *PL_reglastparen)
2800 *PL_reglastparen = paren;
c277df42 2801 }
dc45a647 2802 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2803 if (paren)
2804 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2805 PL_reginput = locinput;
c277df42
IZ
2806 if (minmod) {
2807 minmod = 0;
2808 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2809 sayNO;
5f4b28b2 2810 if (ln && l == 0 && n >= ln
c277df42
IZ
2811 /* In fact, this is tricky. If paren, then the
2812 fact that we did/didnot match may influence
2813 future execution. */
2814 && !(paren && ln == 0))
2815 ln = n;
3280af22 2816 locinput = PL_reginput;
22c35a8c 2817 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2818 c1 = (U8)*STRING(next);
c277df42 2819 if (OP(next) == EXACTF)
22c35a8c 2820 c2 = PL_fold[c1];
c277df42 2821 else if (OP(next) == EXACTFL)
22c35a8c 2822 c2 = PL_fold_locale[c1];
c277df42
IZ
2823 else
2824 c2 = c1;
a0ed51b3
LW
2825 }
2826 else
c277df42
IZ
2827 c1 = c2 = -1000;
2828 REGCP_SET;
5f4b28b2 2829 /* This may be improved if l == 0. */
c277df42
IZ
2830 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2831 /* If it could work, try it. */
2832 if (c1 == -1000 ||
3280af22
NIS
2833 UCHARAT(PL_reginput) == c1 ||
2834 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2835 {
2836 if (paren) {
2837 if (n) {
cf93c79d
IZ
2838 PL_regstartp[paren] =
2839 HOPc(PL_reginput, -l) - PL_bostr;
2840 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2841 }
2842 else
cf93c79d 2843 PL_regendp[paren] = -1;
c277df42
IZ
2844 }
2845 if (regmatch(next))
2846 sayYES;
2847 REGCP_UNWIND;
2848 }
2849 /* Couldn't or didn't -- move forward. */
3280af22 2850 PL_reginput = locinput;
c277df42
IZ
2851 if (regrepeat_hard(scan, 1, &l)) {
2852 ln++;
3280af22 2853 locinput = PL_reginput;
c277df42
IZ
2854 }
2855 else
2856 sayNO;
2857 }
a0ed51b3
LW
2858 }
2859 else {
c277df42
IZ
2860 n = regrepeat_hard(scan, n, &l);
2861 if (n != 0 && l == 0
2862 /* In fact, this is tricky. If paren, then the
2863 fact that we did/didnot match may influence
2864 future execution. */
2865 && !(paren && ln == 0))
2866 ln = n;
3280af22 2867 locinput = PL_reginput;
c277df42 2868 DEBUG_r(
5c0ca799 2869 PerlIO_printf(Perl_debug_log,
faccc32b 2870 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2871 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2872 (IV) n, (IV)l)
c277df42
IZ
2873 );
2874 if (n >= ln) {
22c35a8c 2875 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2876 c1 = (U8)*STRING(next);
c277df42 2877 if (OP(next) == EXACTF)
22c35a8c 2878 c2 = PL_fold[c1];
c277df42 2879 else if (OP(next) == EXACTFL)
22c35a8c 2880 c2 = PL_fold_locale[c1];
c277df42
IZ
2881 else
2882 c2 = c1;
a0ed51b3
LW
2883 }
2884 else
c277df42
IZ
2885 c1 = c2 = -1000;
2886 }
2887 REGCP_SET;
2888 while (n >= ln) {
2889 /* If it could work, try it. */
2890 if (c1 == -1000 ||
3280af22
NIS
2891 UCHARAT(PL_reginput) == c1 ||
2892 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2893 {
2894 DEBUG_r(
c3464db5 2895 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2896 "%*s trying tail with n=%"IVdf"...\n",
2897 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2898 );
2899 if (paren) {
2900 if (n) {
cf93c79d
IZ
2901 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2902 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2903 }
a0ed51b3 2904 else
cf93c79d 2905 PL_regendp[paren] = -1;
c277df42 2906 }
a0ed51b3
LW
2907 if (regmatch(next))
2908 sayYES;
2909 REGCP_UNWIND;
2910 }
c277df42
IZ
2911 /* Couldn't or didn't -- back up. */
2912 n--;
dfe13c55 2913 locinput = HOPc(locinput, -l);
3280af22 2914 PL_reginput = locinput;
c277df42
IZ
2915 }
2916 }
2917 sayNO;
2918 break;
2919 }
2920 case CURLYN:
2921 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2922 if (paren > PL_regsize)
2923 PL_regsize = paren;
2924 if (paren > *PL_reglastparen)
2925 *PL_reglastparen = paren;
c277df42
IZ
2926 ln = ARG1(scan); /* min to match */
2927 n = ARG2(scan); /* max to match */
dc45a647 2928 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2929 goto repeat;
a0d0e21e 2930 case CURLY:
c277df42 2931 paren = 0;
a0d0e21e
LW
2932 ln = ARG1(scan); /* min to match */
2933 n = ARG2(scan); /* max to match */
dc45a647 2934 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2935 goto repeat;
2936 case STAR:
2937 ln = 0;
c277df42 2938 n = REG_INFTY;
a0d0e21e 2939 scan = NEXTOPER(scan);
c277df42 2940 paren = 0;
a0d0e21e
LW
2941 goto repeat;
2942 case PLUS:
c277df42
IZ
2943 ln = 1;
2944 n = REG_INFTY;
2945 scan = NEXTOPER(scan);
2946 paren = 0;
2947 repeat:
a0d0e21e
LW
2948 /*
2949 * Lookahead to avoid useless match attempts
2950 * when we know what character comes next.
2951 */
22c35a8c 2952 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2953 c1 = (U8)*STRING(next);
bbce6d69 2954 if (OP(next) == EXACTF)
22c35a8c 2955 c2 = PL_fold[c1];
bbce6d69 2956 else if (OP(next) == EXACTFL)
22c35a8c 2957 c2 = PL_fold_locale[c1];
bbce6d69 2958 else
2959 c2 = c1;
2960 }
a0d0e21e 2961 else
bbce6d69 2962 c1 = c2 = -1000;
3280af22 2963 PL_reginput = locinput;
a0d0e21e 2964 if (minmod) {
c277df42 2965 CHECKPOINT lastcp;
a0d0e21e
LW
2966 minmod = 0;
2967 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2968 sayNO;
a0ed51b3 2969 locinput = PL_reginput;
c277df42 2970 REGCP_SET;
0fe9bf95
IZ
2971 if (c1 != -1000) {
2972 char *e = locinput + n - ln; /* Should not check after this */
2973 char *old = locinput;
2974
2975 if (e >= PL_regeol || (n == REG_INFTY))
2976 e = PL_regeol - 1;
2977 while (1) {
2978 /* Find place 'next' could work */
2979 if (c1 == c2) {
2980 while (locinput <= e && *locinput != c1)
2981 locinput++;
2982 } else {
2983 while (locinput <= e
2984 && *locinput != c1
2985 && *locinput != c2)
2986 locinput++;
2987 }
2988 if (locinput > e)
2989 sayNO;
2990 /* PL_reginput == old now */
2991 if (locinput != old) {
2992 ln = 1; /* Did some */
2993 if (regrepeat(scan, locinput - old) <
2994 locinput - old)
2995 sayNO;
2996 }
2997 /* PL_reginput == locinput now */
2998 if (paren) {
2999 if (ln) {
cf93c79d
IZ
3000 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3001 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3002 }
3003 else
cf93c79d 3004 PL_regendp[paren] = -1;
0fe9bf95
IZ
3005 }
3006 if (regmatch(next))
3007 sayYES;
3008 PL_reginput = locinput; /* Could be reset... */
3009 REGCP_UNWIND;
3010 /* Couldn't or didn't -- move forward. */
3011 old = locinput++;
3012 }
3013 }
3014 else
c277df42 3015 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3016 /* If it could work, try it. */
bbce6d69 3017 if (c1 == -1000 ||
3280af22
NIS
3018 UCHARAT(PL_reginput) == c1 ||
3019 UCHARAT(PL_reginput) == c2)
bbce6d69 3020 {
c277df42
IZ
3021 if (paren) {
3022 if (n) {
cf93c79d
IZ
3023 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3024 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3025 }
3026 else
cf93c79d 3027 PL_regendp[paren] = -1;
c277df42 3028 }
a0d0e21e 3029 if (regmatch(next))
4633a7c4 3030 sayYES;
c277df42 3031 REGCP_UNWIND;
bbce6d69 3032 }
c277df42 3033 /* Couldn't or didn't -- move forward. */
a0ed51b3 3034 PL_reginput = locinput;
a0d0e21e
LW
3035 if (regrepeat(scan, 1)) {
3036 ln++;
a0ed51b3
LW
3037 locinput = PL_reginput;
3038 }
3039 else
4633a7c4 3040 sayNO;
a0d0e21e
LW
3041 }
3042 }
3043 else {
c277df42 3044 CHECKPOINT lastcp;
a0d0e21e 3045 n = regrepeat(scan, n);
a0ed51b3 3046 locinput = PL_reginput;
22c35a8c 3047 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3048 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3049 ln = n; /* why back off? */
1aeab75a
GS
3050 /* ...because $ and \Z can match before *and* after
3051 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3052 We should back off by one in this case. */
3053 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3054 ln--;
3055 }
c277df42
IZ
3056 REGCP_SET;
3057 if (paren) {
3058 while (n >= ln) {
3059 /* If it could work, try it. */
3060 if (c1 == -1000 ||
3280af22
NIS
3061 UCHARAT(PL_reginput) == c1 ||
3062 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3063 {
3064 if (paren && n) {
3065 if (n) {
cf93c79d
IZ
3066 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3067 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3068 }
3069 else
cf93c79d 3070 PL_regendp[paren] = -1;
c277df42
IZ
3071 }
3072 if (regmatch(next))
3073 sayYES;
3074 REGCP_UNWIND;
3075 }
3076 /* Couldn't or didn't -- back up. */
3077 n--;
dfe13c55 3078 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3079 }
a0ed51b3
LW
3080 }
3081 else {
c277df42
IZ
3082 while (n >= ln) {
3083 /* If it could work, try it. */
3084 if (c1 == -1000 ||
3280af22
NIS
3085 UCHARAT(PL_reginput) == c1 ||
3086 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3087 {
3088 if (regmatch(next))
3089 sayYES;
3090 REGCP_UNWIND;
3091 }
3092 /* Couldn't or didn't -- back up. */
3093 n--;
dfe13c55 3094 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3095 }
a0d0e21e
LW
3096 }
3097 }
4633a7c4 3098 sayNO;
c277df42 3099 break;
a0d0e21e 3100 case END:
0f5d15d6
IZ
3101 if (PL_reg_call_cc) {
3102 re_cc_state *cur_call_cc = PL_reg_call_cc;
3103 CURCUR *cctmp = PL_regcc;
3104 regexp *re = PL_reg_re;
3105 CHECKPOINT cp, lastcp;
3106
3107 cp = regcppush(0); /* Save *all* the positions. */
3108 REGCP_SET;
3109 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3110 the caller. */
3111 PL_reginput = locinput; /* Make position available to
3112 the callcc. */
3113 cache_re(PL_reg_call_cc->re);
3114 PL_regcc = PL_reg_call_cc->cc;
3115 PL_reg_call_cc = PL_reg_call_cc->prev;
3116 if (regmatch(cur_call_cc->node)) {
3117 PL_reg_call_cc = cur_call_cc;
3118 regcpblow(cp);
3119 sayYES;
3120 }
3121 REGCP_UNWIND;
3122 regcppop();
3123 PL_reg_call_cc = cur_call_cc;
3124 PL_regcc = cctmp;
3125 PL_reg_re = re;
3126 cache_re(re);
3127
3128 DEBUG_r(
3129 PerlIO_printf(Perl_debug_log,
3130 "%*s continuation failed...\n",
3131 REPORT_CODE_OFF+PL_regindent*2, "")
3132 );
7821416a 3133 sayNO_SILENT;
0f5d15d6 3134 }
7821416a
IZ
3135 if (locinput < PL_regtill) {
3136 DEBUG_r(PerlIO_printf(Perl_debug_log,
3137 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3138 PL_colors[4],
3139 (long)(locinput - PL_reg_starttry),
3140 (long)(PL_regtill - PL_reg_starttry),
3141 PL_colors[5]));
3142 sayNO_FINAL; /* Cannot match: too short. */
3143 }
3144 PL_reginput = locinput; /* put where regtry can find it */
3145 sayYES_FINAL; /* Success! */
7e5428c5 3146 case SUCCEED:
3280af22 3147 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3148 sayYES_LOUD; /* Success! */
c277df42
IZ
3149 case SUSPEND:
3150 n = 1;
9fe1d20c 3151 PL_reginput = locinput;
c277df42 3152 goto do_ifmatch;
a0d0e21e 3153 case UNLESSM:
c277df42 3154 n = 0;
a0ed51b3 3155 if (scan->flags) {
0fe9bf95
IZ
3156 if (UTF) { /* XXXX This is absolutely
3157 broken, we read before
3158 start of string. */
3159 s = HOPMAYBEc(locinput, -scan->flags);
3160 if (!s)
3161 goto say_yes;
3162 PL_reginput = s;
3163 }
3164 else {
3165 if (locinput < PL_bostr + scan->flags)
3166 goto say_yes;
3167 PL_reginput = locinput - scan->flags;
3168 goto do_ifmatch;
3169 }
a0ed51b3
LW
3170 }
3171 else
3172 PL_reginput = locinput;
c277df42
IZ
3173 goto do_ifmatch;
3174 case IFMATCH:
3175 n = 1;
a0ed51b3 3176 if (scan->flags) {
0fe9bf95
IZ
3177 if (UTF) { /* XXXX This is absolutely
3178 broken, we read before
3179 start of string. */
3180 s = HOPMAYBEc(locinput, -scan->flags);
3181 if (!s || s < PL_bostr)
3182 goto say_no;
3183 PL_reginput = s;
3184 }
3185 else {
3186 if (locinput < PL_bostr + scan->flags)
3187 goto say_no;
3188 PL_reginput = locinput - scan->flags;
3189 goto do_ifmatch;
3190 }
a0ed51b3
LW
3191 }
3192 else
3193 PL_reginput = locinput;
3194
c277df42 3195 do_ifmatch:
c277df42
IZ
3196 inner = NEXTOPER(NEXTOPER(scan));
3197 if (regmatch(inner) != n) {
3198 say_no:
3199 if (logical) {
3200 logical = 0;
3201 sw = 0;
3202 goto do_longjump;
a0ed51b3
LW
3203 }
3204 else
c277df42
IZ
3205 sayNO;
3206 }
3207 say_yes:
3208 if (logical) {
3209 logical = 0;
3210 sw = 1;
3211 }
fe44a5e8 3212 if (OP(scan) == SUSPEND) {
3280af22 3213 locinput = PL_reginput;
565764a8 3214 nextchr = UCHARAT(locinput);
fe44a5e8 3215 }
c277df42
IZ
3216 /* FALL THROUGH. */
3217 case LONGJMP:
3218 do_longjump:
3219 next = scan + ARG(scan);
3220 if (next == scan)
3221 next = NULL;
a0d0e21e
LW
3222 break;
3223 default:
b900a521 3224 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3225 PTR2UV(scan), OP(scan));
cea2e8a9 3226 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3227 }
a0d0e21e
LW
3228 scan = next;
3229 }
a687059c 3230
a0d0e21e
LW
3231 /*
3232 * We get here only if there's trouble -- normally "case END" is
3233 * the terminating point.
3234 */
cea2e8a9 3235 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3236 /*NOTREACHED*/
4633a7c4
LW
3237 sayNO;
3238
7821416a
IZ
3239yes_loud:
3240 DEBUG_r(
3241 PerlIO_printf(Perl_debug_log,
3242 "%*s %scould match...%s\n",
3243 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3244 );
3245 goto yes;
3246yes_final:
3247 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3248 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3249yes:
3250#ifdef DEBUGGING
3280af22 3251 PL_regindent--;
4633a7c4
LW
3252#endif
3253 return 1;
3254
3255no:
7821416a
IZ
3256 DEBUG_r(
3257 PerlIO_printf(Perl_debug_log,
3258 "%*s %sfailed...%s\n",
3259 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3260 );
3261 goto do_no;
3262no_final:
3263do_no:
4633a7c4 3264#ifdef DEBUGGING
3280af22 3265 PL_regindent--;
4633a7c4 3266#endif
a0d0e21e 3267 return 0;
a687059c
LW
3268}
3269
3270/*
3271 - regrepeat - repeatedly match something simple, report how many
3272 */
3273/*
3274 * [This routine now assumes that it will only match on things of length 1.
3275 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3276 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3277 */
76e3520e 3278STATIC I32
cea2e8a9 3279S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3280{
5c0ca799 3281 dTHR;
a0d0e21e 3282 register char *scan;
a0d0e21e 3283 register I32 c;
3280af22 3284 register char *loceol = PL_regeol;
a0ed51b3 3285 register I32 hardcount = 0;
a0d0e21e 3286
3280af22 3287 scan = PL_reginput;
c277df42 3288 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3289 loceol = scan + max;
a0d0e21e 3290 switch (OP(p)) {
22c35a8c 3291 case REG_ANY:
a0d0e21e
LW
3292 while (scan < loceol && *scan != '\n')
3293 scan++;
3294 break;
3295 case SANY:
3296 scan = loceol;
3297 break;
a0ed51b3
LW
3298 case ANYUTF8:
3299 loceol = PL_regeol;
3300 while (scan < loceol && *scan != '\n') {
3301 scan += UTF8SKIP(scan);
3302 hardcount++;
3303 }
3304 break;
3305 case SANYUTF8:
3306 loceol = PL_regeol;
3307 while (scan < loceol) {
3308 scan += UTF8SKIP(scan);
3309 hardcount++;
3310 }
3311 break;
bbce6d69 3312 case EXACT: /* length of string is 1 */
cd439c50 3313 c = (U8)*STRING(p);
bbce6d69 3314 while (scan < loceol && UCHARAT(scan) == c)
3315 scan++;
3316 break;
3317 case EXACTF: /* length of string is 1 */
cd439c50 3318 c = (U8)*STRING(p);
bbce6d69 3319 while (scan < loceol &&
22c35a8c 3320 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3321 scan++;
3322 break;
3323 case EXACTFL: /* length of string is 1 */
3280af22 3324 PL_reg_flags |= RF_tainted;
cd439c50 3325 c = (U8)*STRING(p);
bbce6d69 3326 while (scan < loceol &&
22c35a8c 3327 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3328 scan++;
3329 break;
a0ed51b3
LW
3330 case ANYOFUTF8:
3331 loceol = PL_regeol;
3332 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3333 scan += UTF8SKIP(scan);
3334 hardcount++;
3335 }
3336 break;
a0d0e21e 3337 case ANYOF:
936ed897 3338 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3339 scan++;
a0d0e21e
LW
3340 break;
3341 case ALNUM:
3342 while (scan < loceol && isALNUM(*scan))
3343 scan++;
3344 break;
a0ed51b3
LW
3345 case ALNUMUTF8:
3346 loceol = PL_regeol;
dfe13c55 3347 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3348 scan += UTF8SKIP(scan);
3349 hardcount++;
3350 }
3351 break;
bbce6d69 3352 case ALNUML:
3280af22 3353 PL_reg_flags |= RF_tainted;
bbce6d69 3354 while (scan < loceol && isALNUM_LC(*scan))
3355 scan++;
3356 break;
a0ed51b3
LW
3357 case ALNUMLUTF8:
3358 PL_reg_flags |= RF_tainted;
3359 loceol = PL_regeol;
dfe13c55 3360 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3361 scan += UTF8SKIP(scan);
3362 hardcount++;
3363 }
3364 break;
3365 break;
a0d0e21e
LW
3366 case NALNUM:
3367 while (scan < loceol && !isALNUM(*scan))
3368 scan++;
3369 break;
a0ed51b3
LW
3370 case NALNUMUTF8:
3371 loceol = PL_regeol;
dfe13c55 3372 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3373 scan += UTF8SKIP(scan);
3374 hardcount++;
3375 }
3376 break;
bbce6d69 3377 case NALNUML:
3280af22 3378 PL_reg_flags |= RF_tainted;
bbce6d69 3379 while (scan < loceol && !isALNUM_LC(*scan))
3380 scan++;
3381 break;
a0ed51b3
LW
3382 case NALNUMLUTF8:
3383 PL_reg_flags |= RF_tainted;
3384 loceol = PL_regeol;
dfe13c55 3385 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3386 scan += UTF8SKIP(scan);
3387 hardcount++;
3388 }
3389 break;
a0d0e21e
LW
3390 case SPACE:
3391 while (scan < loceol && isSPACE(*scan))
3392 scan++;
3393 break;
a0ed51b3
LW
3394 case SPACEUTF8:
3395 loceol = PL_regeol;
dfe13c55 3396 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3397 scan += UTF8SKIP(scan);
3398 hardcount++;
3399 }
3400 break;
bbce6d69 3401 case SPACEL:
3280af22 3402 PL_reg_flags |= RF_tainted;
bbce6d69 3403 while (scan < loceol && isSPACE_LC(*scan))
3404 scan++;
3405 break;
a0ed51b3
LW
3406 case SPACELUTF8:
3407 PL_reg_flags |= RF_tainted;
3408 loceol = PL_regeol;
dfe13c55 3409 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3410 scan += UTF8SKIP(scan);
3411 hardcount++;
3412 }
3413 break;
a0d0e21e
LW
3414 case NSPACE:
3415 while (scan < loceol && !isSPACE(*scan))
3416 scan++;
3417 break;
a0ed51b3
LW
3418 case NSPACEUTF8:
3419 loceol = PL_regeol;
dfe13c55 3420 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3421 scan += UTF8SKIP(scan);
3422 hardcount++;
3423 }
3424 break;
bbce6d69 3425 case NSPACEL:
3280af22 3426 PL_reg_flags |= RF_tainted;
bbce6d69 3427 while (scan < loceol && !isSPACE_LC(*scan))
3428 scan++;
3429 break;
a0ed51b3
LW
3430 case NSPACELUTF8:
3431 PL_reg_flags |= RF_tainted;
3432 loceol = PL_regeol;
dfe13c55 3433 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3434 scan += UTF8SKIP(scan);
3435 hardcount++;
3436 }
3437 break;
a0d0e21e
LW
3438 case DIGIT:
3439 while (scan < loceol && isDIGIT(*scan))
3440 scan++;
3441 break;
a0ed51b3
LW
3442 case DIGITUTF8:
3443 loceol = PL_regeol;
dfe13c55 3444 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3445 scan += UTF8SKIP(scan);
3446 hardcount++;
3447 }
3448 break;
3449 break;
a0d0e21e
LW
3450 case NDIGIT:
3451 while (scan < loceol && !isDIGIT(*scan))
3452 scan++;
3453 break;
a0ed51b3
LW
3454 case NDIGITUTF8:
3455 loceol = PL_regeol;
dfe13c55 3456 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3457 scan += UTF8SKIP(scan);
3458 hardcount++;
3459 }
3460 break;
a0d0e21e
LW
3461 default: /* Called on something of 0 width. */
3462 break; /* So match right here or not at all. */
3463 }
a687059c 3464
a0ed51b3
LW
3465 if (hardcount)
3466 c = hardcount;
3467 else
3468 c = scan - PL_reginput;
3280af22 3469 PL_reginput = scan;
a687059c 3470
c277df42
IZ
3471 DEBUG_r(
3472 {
3473 SV *prop = sv_newmortal();
3474
3475 regprop(prop, p);
3476 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3477 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3478 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3479 });
3480
a0d0e21e 3481 return(c);
a687059c
LW
3482}
3483
3484/*
c277df42
IZ
3485 - regrepeat_hard - repeatedly match something, report total lenth and length
3486 *
3487 * The repeater is supposed to have constant length.
3488 */
3489
76e3520e 3490STATIC I32
cea2e8a9 3491S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3492{
5c0ca799 3493 dTHR;
c277df42
IZ
3494 register char *scan;
3495 register char *start;
3280af22 3496 register char *loceol = PL_regeol;
a0ed51b3 3497 I32 l = 0;
708e3b05 3498 I32 count = 0, res = 1;
a0ed51b3
LW
3499
3500 if (!max)
3501 return 0;
c277df42 3502
3280af22 3503 start = PL_reginput;
a0ed51b3 3504 if (UTF) {
708e3b05 3505 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3506 if (!count++) {
3507 l = 0;
3508 while (start < PL_reginput) {
3509 l++;
3510 start += UTF8SKIP(start);
3511 }
3512 *lp = l;
3513 if (l == 0)
3514 return max;
3515 }
3516 if (count == max)
3517 return count;
3518 }
3519 }
3520 else {
708e3b05 3521 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3522 if (!count++) {
3523 *lp = l = PL_reginput - start;
3524 if (max != REG_INFTY && l*max < loceol - scan)
3525 loceol = scan + l*max;
3526 if (l == 0)
3527 return max;
c277df42
IZ
3528 }
3529 }
3530 }
708e3b05 3531 if (!res)
3280af22 3532 PL_reginput = scan;
c277df42 3533
a0ed51b3 3534 return count;
c277df42
IZ
3535}
3536
3537/*
cb8d8820 3538 - reginclass - determine if a character falls into a character class
bbce6d69 3539 */
3540
76e3520e 3541STATIC bool
936ed897 3542S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3543{
5c0ca799 3544 dTHR;
b8c5462f 3545 char flags = ANYOF_FLAGS(p);
bbce6d69 3546 bool match = FALSE;
3547
3548 c &= 0xFF;
b8c5462f 3549 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3550 match = TRUE;
3551 else if (flags & ANYOF_FOLD) {
3552 I32 cf;
3553 if (flags & ANYOF_LOCALE) {
3280af22 3554 PL_reg_flags |= RF_tainted;
22c35a8c 3555 cf = PL_fold_locale[c];
bbce6d69 3556 }
3557 else
22c35a8c 3558 cf = PL_fold[c];
b8c5462f 3559 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3560 match = TRUE;
3561 }
3562
b8c5462f 3563 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3564 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3565 if (
3566 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3570 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3571 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3572 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3573 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3574 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3575 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3576 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3577 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3578 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3579 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3580 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3581 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3582 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3583 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3584 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3585 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3586 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3587 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3588 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3589 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3590 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3591 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3592 ) /* How's that for a conditional? */
bbce6d69 3593 {
3594 match = TRUE;
3595 }
3596 }
3597
ae5c130c 3598 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3599}
3600
a0ed51b3 3601STATIC bool
cea2e8a9 3602S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3603{
3604 dTHR;
a0ed51b3
LW
3605 char flags = ARG1(f);
3606 bool match = FALSE;
3607 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3608
3609 if (swash_fetch(sv, p))
3610 match = TRUE;
3611 else if (flags & ANYOF_FOLD) {
3612 I32 cf;
dfe13c55 3613 U8 tmpbuf[10];
a0ed51b3
LW
3614 if (flags & ANYOF_LOCALE) {
3615 PL_reg_flags |= RF_tainted;
3616 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3617 }
3618 else
3619 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3620 if (swash_fetch(sv, tmpbuf))
3621 match = TRUE;
3622 }
3623
b8c5462f 3624 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3625
3626 return (flags & ANYOF_INVERT) ? !match : match;
3627}
161b471a 3628
dfe13c55 3629STATIC U8 *
cea2e8a9 3630S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3631{
3632 dTHR;
a0ed51b3
LW
3633 if (off >= 0) {
3634 while (off-- && s < (U8*)PL_regeol)
3635 s += UTF8SKIP(s);
3636 }
3637 else {
3638 while (off++) {
3639 if (s > (U8*)PL_bostr) {
3640 s--;
3641 if (*s & 0x80) {
3642 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3643 s--;
3644 } /* XXX could check well-formedness here */
3645 }
3646 }
3647 }
3648 return s;
3649}
161b471a 3650
dfe13c55 3651STATIC U8 *
cea2e8a9 3652S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3653{
c485e607 3654 dTHR;
a0ed51b3
LW
3655 if (off >= 0) {
3656 while (off-- && s < (U8*)PL_regeol)
3657 s += UTF8SKIP(s);
3658 if (off >= 0)
3659 return 0;
3660 }
3661 else {
3662 while (off++) {
3663 if (s > (U8*)PL_bostr) {
3664 s--;
3665 if (*s & 0x80) {
3666 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3667 s--;
3668 } /* XXX could check well-formedness here */
3669 }
3670 else
3671 break;
3672 }
3673 if (off <= 0)
3674 return 0;
3675 }
3676 return s;
3677}
51371543
GS
3678
3679#ifdef PERL_OBJECT
51371543
GS
3680#include "XSUB.h"
3681#endif
3682
3683static void
3684restore_pos(pTHXo_ void *arg)
3685{
3686 dTHR;
3687 if (PL_reg_eval_set) {
3688 if (PL_reg_oldsaved) {
3689 PL_reg_re->subbeg = PL_reg_oldsaved;
3690 PL_reg_re->sublen = PL_reg_oldsavedlen;
3691 RX_MATCH_COPIED_on(PL_reg_re);
3692 }
3693 PL_reg_magic->mg_len = PL_reg_oldpos;
3694 PL_reg_eval_set = 0;
3695 PL_curpm = PL_reg_oldcurpm;
3696 }
3697}
3698