This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix the BOM bug: not a byteorder bug, a signedness bug.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
349 /* SvCUR is not set on references: SvRV and SvPVX overlap */
350 && sv && !SvROK(sv)
351 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 352 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
adac82c7 355 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 356 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
357 if (SvTAIL(check)) {
358 slen = SvCUR(check); /* >= 1 */
cad2e5aa 359
2c2d71f5
JH
360 if ( strend - s > slen || strend - s < slen - 1
361 || (strend - s == slen && strend[-1] != '\n')) {
362 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
363 goto fail_finish;
cad2e5aa
JH
364 }
365 /* Now should match s[0..slen-2] */
366 slen--;
653099ff 367 if (slen && (*SvPVX(check) != *s
cad2e5aa 368 || (slen > 1
653099ff 369 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
370 report_neq:
371 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
372 goto fail_finish;
373 }
cad2e5aa 374 }
653099ff
GS
375 else if (*SvPVX(check) != *s
376 || ((slen = SvCUR(check)) > 1
377 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
378 goto report_neq;
379 goto success_at_start;
cad2e5aa 380 }
2c2d71f5 381 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 382 s = strpos;
2c2d71f5 383 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 384 end_shift = prog->minlen - start_shift -
653099ff 385 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 386 if (!ml_anch) {
653099ff
GS
387 I32 end = prog->check_offset_max + CHR_SVLEN(check)
388 - (SvTAIL(check) != 0);
2c2d71f5
JH
389 I32 eshift = strend - s - end;
390
391 if (end_shift < eshift)
392 end_shift = eshift;
393 }
cad2e5aa 394 }
2c2d71f5 395 else { /* Can match at random position */
cad2e5aa
JH
396 ml_anch = 0;
397 s = strpos;
2c2d71f5
JH
398 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
399 /* Should be nonnegative! */
400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
402 }
403
2c2d71f5 404#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 405 if (end_shift < 0)
6bbae5e6 406 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
407#endif
408
2c2d71f5
JH
409 restart:
410 /* Find a possible match in the region s..strend by looking for
411 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 412 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
413 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
414 I32 p = -1; /* Internal iterator of scream. */
415 I32 *pp = data ? data->scream_pos : &p;
416
2c2d71f5
JH
417 if (PL_screamfirst[BmRARE(check)] >= 0
418 || ( BmRARE(check) == '\n'
419 && (BmPREVIOUS(check) == SvCUR(check) - 1)
420 && SvTAIL(check) ))
421 s = screaminstr(sv, check,
422 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 423 else
2c2d71f5 424 goto fail_finish;
cad2e5aa
JH
425 if (data)
426 *data->scream_olds = s;
427 }
428 else
429 s = fbm_instr((unsigned char*)s + start_shift,
430 (unsigned char*)strend - end_shift,
2c2d71f5 431 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
432
433 /* Update the count-of-usability, remove useless subpatterns,
434 unshift s. */
2c2d71f5
JH
435
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
437 (s ? "Found" : "Did not find"),
438 ((check == prog->anchored_substr) ? "anchored" : "floating"),
439 PL_colors[0],
7b0972df
JH
440 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
441 SvPVX(check),
2c2d71f5
JH
442 PL_colors[1], (SvTAIL(check) ? "$" : ""),
443 (s ? " at offset " : "...\n") ) );
444
445 if (!s)
446 goto fail_finish;
447
6eb5f6b9
JH
448 check_at = s;
449
2c2d71f5 450 /* Finish the diagnostic message */
30944b6d 451 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
452
453 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
454 Start with the other substr.
455 XXXX no SCREAM optimization yet - and a very coarse implementation
456 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
457 *always* match. Probably should be marked during compile...
458 Probably it is right to do no SCREAM here...
459 */
460
461 if (prog->float_substr && prog->anchored_substr) {
30944b6d 462 /* Take into account the "other" substring. */
2c2d71f5
JH
463 /* XXXX May be hopelessly wrong for UTF... */
464 if (!other_last)
6eb5f6b9 465 other_last = strpos;
2c2d71f5 466 if (check == prog->float_substr) {
30944b6d
IZ
467 do_other_anchored:
468 {
2c2d71f5
JH
469 char *last = s - start_shift, *last1, *last2;
470 char *s1 = s;
471
472 tmp = PL_bostr;
473 t = s - prog->check_offset_max;
474 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
475 && (!(prog->reganch & ROPT_UTF8)
476 || (PL_bostr = strpos, /* Used in regcopmaybe() */
477 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
478 && t > strpos)))
30944b6d 479 /* EMPTY */;
2c2d71f5
JH
480 else
481 t = strpos;
482 t += prog->anchored_offset;
6eb5f6b9
JH
483 if (t < other_last) /* These positions already checked */
484 t = other_last;
2c2d71f5
JH
485 PL_bostr = tmp;
486 last2 = last1 = strend - prog->minlen;
487 if (last < last1)
488 last1 = last;
489 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
490 /* On end-of-str: see comment below. */
491 s = fbm_instr((unsigned char*)t,
492 (unsigned char*)last1 + prog->anchored_offset
493 + SvCUR(prog->anchored_substr)
494 - (SvTAIL(prog->anchored_substr)!=0),
495 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
496 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
497 (s ? "Found" : "Contradicts"),
498 PL_colors[0],
7b0972df
JH
499 (int)(SvCUR(prog->anchored_substr)
500 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
501 SvPVX(prog->anchored_substr),
502 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
503 if (!s) {
504 if (last1 >= last2) {
505 DEBUG_r(PerlIO_printf(Perl_debug_log,
506 ", giving up...\n"));
507 goto fail_finish;
508 }
509 DEBUG_r(PerlIO_printf(Perl_debug_log,
510 ", trying floating at offset %ld...\n",
30944b6d 511 (long)(s1 + 1 - i_strpos)));
2c2d71f5 512 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 513 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
514 s = HOPc(last, 1);
515 goto restart;
516 }
517 else {
518 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 519 (long)(s - i_strpos)));
2c2d71f5 520 t = s - prog->anchored_offset;
6eb5f6b9 521 other_last = s + 1;
30944b6d 522 s = s1;
2c2d71f5
JH
523 if (t == strpos)
524 goto try_at_start;
2c2d71f5
JH
525 goto try_at_offset;
526 }
30944b6d 527 }
2c2d71f5
JH
528 }
529 else { /* Take into account the floating substring. */
530 char *last, *last1;
531 char *s1 = s;
532
533 t = s - start_shift;
534 last1 = last = strend - prog->minlen + prog->float_min_offset;
535 if (last - t > prog->float_max_offset)
536 last = t + prog->float_max_offset;
537 s = t + prog->float_min_offset;
6eb5f6b9
JH
538 if (s < other_last)
539 s = other_last;
2c2d71f5
JH
540 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
541 /* fbm_instr() takes into account exact value of end-of-str
542 if the check is SvTAIL(ed). Since false positives are OK,
543 and end-of-str is not later than strend we are OK. */
544 s = fbm_instr((unsigned char*)s,
545 (unsigned char*)last + SvCUR(prog->float_substr)
546 - (SvTAIL(prog->float_substr)!=0),
547 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
548 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
549 (s ? "Found" : "Contradicts"),
550 PL_colors[0],
7b0972df
JH
551 (int)(SvCUR(prog->float_substr)
552 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
553 SvPVX(prog->float_substr),
554 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
555 if (!s) {
556 if (last1 == last) {
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 ", giving up...\n"));
559 goto fail_finish;
560 }
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 ", trying anchored starting at offset %ld...\n",
30944b6d 563 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 564 other_last = last + 1;
2c2d71f5
JH
565 PL_regeol = strend; /* Used in HOP() */
566 s = HOPc(t, 1);
567 goto restart;
568 }
569 else {
570 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 571 (long)(s - i_strpos)));
6eb5f6b9 572 other_last = s + 1;
30944b6d 573 s = s1;
2c2d71f5
JH
574 if (t == strpos)
575 goto try_at_start;
2c2d71f5
JH
576 goto try_at_offset;
577 }
578 }
cad2e5aa 579 }
2c2d71f5
JH
580
581 t = s - prog->check_offset_max;
582 tmp = PL_bostr;
583 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
584 && (!(prog->reganch & ROPT_UTF8)
585 || (PL_bostr = strpos, /* Used in regcopmaybe() */
586 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
587 && t > strpos)))) {
588 PL_bostr = tmp;
589 /* Fixed substring is found far enough so that the match
590 cannot start at strpos. */
591 try_at_offset:
cad2e5aa 592 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
593 /* Eventually fbm_*() should handle this, but often
594 anchored_offset is not 0, so this check will not be wasted. */
595 /* XXXX In the code below we prefer to look for "^" even in
596 presence of anchored substrings. And we search even
597 beyond the found float position. These pessimizations
598 are historical artefacts only. */
599 find_anchor:
2c2d71f5 600 while (t < strend - prog->minlen) {
cad2e5aa 601 if (*t == '\n') {
4ee3650e 602 if (t < check_at - prog->check_offset_min) {
30944b6d 603 if (prog->anchored_substr) {
4ee3650e
GS
604 /* Since we moved from the found position,
605 we definitely contradict the found anchored
30944b6d
IZ
606 substr. Due to the above check we do not
607 contradict "check" substr.
608 Thus we can arrive here only if check substr
609 is float. Redo checking for "other"=="fixed".
610 */
611 strpos = t + 1;
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
613 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
614 goto do_other_anchored;
615 }
4ee3650e
GS
616 /* We don't contradict the found floating substring. */
617 /* XXXX Why not check for STCLASS? */
cad2e5aa 618 s = t + 1;
2c2d71f5 619 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 620 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
621 goto set_useful;
622 }
4ee3650e
GS
623 /* Position contradicts check-string */
624 /* XXXX probably better to look for check-string
625 than for "\n", so one should lower the limit for t? */
626 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d
IZ
627 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
628 strpos = s = t + 1;
cad2e5aa
JH
629 goto restart;
630 }
631 t++;
632 }
2c2d71f5
JH
633 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]));
635 goto fail_finish;
cad2e5aa 636 }
f5952150
GS
637 else {
638 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
639 PL_colors[0],PL_colors[1]));
640 }
cad2e5aa
JH
641 s = t;
642 set_useful:
2c2d71f5 643 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
644 }
645 else {
2c2d71f5 646 PL_bostr = tmp;
f5952150 647 /* The found string does not prohibit matching at strpos,
2c2d71f5 648 - no optimization of calling REx engine can be performed,
f5952150
GS
649 unless it was an MBOL and we are not after MBOL,
650 or a future STCLASS check will fail this. */
2c2d71f5
JH
651 try_at_start:
652 /* Even in this situation we may use MBOL flag if strpos is offset
653 wrt the start of the string. */
05b4157f 654 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
655 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
656 /* May be due to an implicit anchor of m{.*foo} */
657 && !(prog->reganch & ROPT_IMPLICIT))
658 {
cad2e5aa
JH
659 t = strpos;
660 goto find_anchor;
661 }
30944b6d 662 DEBUG_r( if (ml_anch)
f5952150
GS
663 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
664 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 665 );
2c2d71f5 666 success_at_start:
30944b6d 667 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 668 && prog->check_substr /* Could be deleted already */
cad2e5aa 669 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
670 && prog->check_substr == prog->float_substr)
671 {
cad2e5aa 672 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 673 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
674 SvREFCNT_dec(prog->check_substr);
675 prog->check_substr = Nullsv; /* disable */
676 prog->float_substr = Nullsv; /* clear */
677 s = strpos;
3cf5c195
IZ
678 /* XXXX This is a remnant of the old implementation. It
679 looks wasteful, since now INTUIT can use many
6eb5f6b9 680 other heuristics. */
cad2e5aa
JH
681 prog->reganch &= ~RE_USE_INTUIT;
682 }
683 else
684 s = strpos;
685 }
686
6eb5f6b9
JH
687 /* Last resort... */
688 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
689 if (prog->regstclass) {
690 /* minlen == 0 is possible if regstclass is \b or \B,
691 and the fixed substr is ''$.
692 Since minlen is already taken into account, s+1 is before strend;
693 accidentally, minlen >= 1 guaranties no false positives at s + 1
694 even for \b or \B. But (minlen? 1 : 0) below assumes that
695 regstclass does not come from lookahead... */
696 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
697 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
698 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
699 ? STR_LEN(prog->regstclass)
700 : 1);
6eb5f6b9 701 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
702 ? s + (prog->minlen? cl_l : 0)
703 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 704 : strend) ;
8fdd422b 705 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
706
707 t = s;
76384e4a
GS
708 if (prog->reganch & ROPT_UTF8) {
709 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
710 PL_bostr = startpos;
711 }
6eb5f6b9
JH
712 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
713 if (!s) {
714#ifdef DEBUGGING
715 char *what;
716#endif
717 if (endpos == strend) {
718 DEBUG_r( PerlIO_printf(Perl_debug_log,
719 "Could not match STCLASS...\n") );
720 goto fail;
721 }
66e933ab
GS
722 DEBUG_r( PerlIO_printf(Perl_debug_log,
723 "This position contradicts STCLASS...\n") );
653099ff
GS
724 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
725 goto fail;
6eb5f6b9
JH
726 /* Contradict one of substrings */
727 if (prog->anchored_substr) {
6eb5f6b9
JH
728 if (prog->anchored_substr == check) {
729 DEBUG_r( what = "anchored" );
730 hop_and_restart:
731 PL_regeol = strend; /* Used in HOP() */
732 s = HOPc(t, 1);
66e933ab
GS
733 if (s + start_shift + end_shift > strend) {
734 /* XXXX Should be taken into account earlier? */
735 DEBUG_r( PerlIO_printf(Perl_debug_log,
736 "Could not match STCLASS...\n") );
737 goto fail;
738 }
6eb5f6b9 739 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 740 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
741 what, (long)(s + start_shift - i_strpos)) );
742 goto restart;
743 }
66e933ab 744 /* Have both, check_string is floating */
6eb5f6b9
JH
745 if (t + start_shift >= check_at) /* Contradicts floating=check */
746 goto retry_floating_check;
747 /* Recheck anchored substring, but not floating... */
748 s = check_at;
749 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 750 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
751 (long)(other_last - i_strpos)) );
752 goto do_other_anchored;
753 }
60e71179
GS
754 /* Another way we could have checked stclass at the
755 current position only: */
756 if (ml_anch) {
757 s = t = t + 1;
758 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
759 "Looking for /%s^%s/m starting at offset %ld...\n",
760 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 761 goto try_at_offset;
66e933ab 762 }
60e71179
GS
763 if (!prog->float_substr) /* Could have been deleted */
764 goto fail;
6eb5f6b9
JH
765 /* Check is floating subtring. */
766 retry_floating_check:
767 t = check_at - start_shift;
768 DEBUG_r( what = "floating" );
769 goto hop_and_restart;
770 }
771 DEBUG_r( if (t != s)
772 PerlIO_printf(Perl_debug_log,
773 "By STCLASS: moving %ld --> %ld\n",
774 (long)(t - i_strpos), (long)(s - i_strpos));
775 else
776 PerlIO_printf(Perl_debug_log,
777 "Does not contradict STCLASS...\n") );
778 }
2c2d71f5 779 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 780 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 781 return s;
2c2d71f5
JH
782
783 fail_finish: /* Substring not found */
66e933ab
GS
784 if (prog->check_substr) /* could be removed already */
785 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 786 fail:
2c2d71f5 787 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
788 PL_colors[4],PL_colors[5]));
789 return Nullch;
790}
9661b544 791
6eb5f6b9 792/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
793STATIC char *
794S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 795{
6eb5f6b9
JH
796 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
797 char *m;
d8093b23 798 STRLEN ln;
799 unsigned int c1;
800 unsigned int c2;
6eb5f6b9
JH
801 char *e;
802 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 803
6eb5f6b9
JH
804 /* We know what class it must start with. */
805 switch (OP(c)) {
806 case ANYOFUTF8:
807 while (s < strend) {
808 if (REGINCLASSUTF8(c, (U8*)s)) {
809 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 810 goto got_it;
3cf5c195 811 else
6eb5f6b9 812 tmp = doevery;
a0ed51b3 813 }
6eb5f6b9
JH
814 else
815 tmp = 1;
a0ed51b3
LW
816 s += UTF8SKIP(s);
817 }
6eb5f6b9
JH
818 break;
819 case ANYOF:
a0ed51b3 820 while (s < strend) {
d8093b23 821 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
822 if (tmp && (norun || regtry(prog, s)))
823 goto got_it;
824 else
825 tmp = doevery;
a0ed51b3 826 }
6eb5f6b9
JH
827 else
828 tmp = 1;
a0ed51b3 829 s++;
a0d0e21e 830 }
6eb5f6b9
JH
831 break;
832 case EXACTF:
833 m = STRING(c);
834 ln = STR_LEN(c);
d8093b23 835 c1 = *(U8*)m;
6eb5f6b9
JH
836 c2 = PL_fold[c1];
837 goto do_exactf;
838 case EXACTFL:
839 m = STRING(c);
840 ln = STR_LEN(c);
d8093b23 841 c1 = *(U8*)m;
6eb5f6b9
JH
842 c2 = PL_fold_locale[c1];
843 do_exactf:
844 e = strend - ln;
b3c9acc1 845
6eb5f6b9
JH
846 if (norun && e < s)
847 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
848 /* Here it is NOT UTF! */
849 if (c1 == c2) {
850 while (s <= e) {
d8093b23 851 if ( *(U8*)s == c1
66e933ab
GS
852 && (ln == 1 || !(OP(c) == EXACTF
853 ? ibcmp(s, m, ln)
854 : ibcmp_locale(s, m, ln)))
6eb5f6b9 855 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
856 goto got_it;
857 s++;
858 }
859 } else {
860 while (s <= e) {
d8093b23 861 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
862 && (ln == 1 || !(OP(c) == EXACTF
863 ? ibcmp(s, m, ln)
864 : ibcmp_locale(s, m, ln)))
6eb5f6b9 865 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
866 goto got_it;
867 s++;
868 }
869 }
870 break;
bbce6d69 871 case BOUNDL:
3280af22 872 PL_reg_flags |= RF_tainted;
bbce6d69 873 /* FALL THROUGH */
a0d0e21e 874 case BOUND:
6eb5f6b9 875 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 876 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 877 while (s < strend) {
95bac841 878 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 879 tmp = !tmp;
6eb5f6b9 880 if ((norun || regtry(prog, s)))
a0d0e21e 881 goto got_it;
a687059c 882 }
a0d0e21e
LW
883 s++;
884 }
6eb5f6b9 885 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
886 goto got_it;
887 break;
a0ed51b3
LW
888 case BOUNDLUTF8:
889 PL_reg_flags |= RF_tainted;
890 /* FALL THROUGH */
891 case BOUNDUTF8:
6eb5f6b9 892 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 893 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 894 while (s < strend) {
76384e4a 895 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
896 swash_fetch(PL_utf8_alnum, (U8*)s) :
897 isALNUM_LC_utf8((U8*)s)))
898 {
a0ed51b3 899 tmp = !tmp;
6eb5f6b9 900 if ((norun || regtry(prog, s)))
a0ed51b3
LW
901 goto got_it;
902 }
903 s += UTF8SKIP(s);
904 }
6eb5f6b9 905 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
906 goto got_it;
907 break;
bbce6d69 908 case NBOUNDL:
3280af22 909 PL_reg_flags |= RF_tainted;
bbce6d69 910 /* FALL THROUGH */
a0d0e21e 911 case NBOUND:
6eb5f6b9 912 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 913 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 914 while (s < strend) {
95bac841 915 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 916 tmp = !tmp;
6eb5f6b9 917 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
918 goto got_it;
919 s++;
920 }
6eb5f6b9 921 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
922 goto got_it;
923 break;
a0ed51b3
LW
924 case NBOUNDLUTF8:
925 PL_reg_flags |= RF_tainted;
926 /* FALL THROUGH */
927 case NBOUNDUTF8:
6eb5f6b9 928 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 929 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 930 while (s < strend) {
76384e4a 931 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
932 swash_fetch(PL_utf8_alnum, (U8*)s) :
933 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 934 tmp = !tmp;
6eb5f6b9 935 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
936 goto got_it;
937 s += UTF8SKIP(s);
938 }
6eb5f6b9 939 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
940 goto got_it;
941 break;
a0d0e21e
LW
942 case ALNUM:
943 while (s < strend) {
bbce6d69 944 if (isALNUM(*s)) {
6eb5f6b9 945 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
946 goto got_it;
947 else
948 tmp = doevery;
949 }
950 else
951 tmp = 1;
952 s++;
953 }
954 break;
a0ed51b3
LW
955 case ALNUMUTF8:
956 while (s < strend) {
dfe13c55 957 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 958 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
959 goto got_it;
960 else
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
965 s += UTF8SKIP(s);
966 }
967 break;
bbce6d69 968 case ALNUML:
3280af22 969 PL_reg_flags |= RF_tainted;
bbce6d69
PP
970 while (s < strend) {
971 if (isALNUM_LC(*s)) {
6eb5f6b9 972 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 973 goto got_it;
a687059c 974 else
a0d0e21e
LW
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
979 s++;
980 }
981 break;
a0ed51b3
LW
982 case ALNUMLUTF8:
983 PL_reg_flags |= RF_tainted;
984 while (s < strend) {
dfe13c55 985 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 986 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
987 goto got_it;
988 else
989 tmp = doevery;
990 }
991 else
992 tmp = 1;
993 s += UTF8SKIP(s);
994 }
995 break;
a0d0e21e
LW
996 case NALNUM:
997 while (s < strend) {
bbce6d69 998 if (!isALNUM(*s)) {
6eb5f6b9 999 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1000 goto got_it;
1001 else
1002 tmp = doevery;
1003 }
1004 else
1005 tmp = 1;
1006 s++;
1007 }
1008 break;
a0ed51b3
LW
1009 case NALNUMUTF8:
1010 while (s < strend) {
dfe13c55 1011 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1013 goto got_it;
1014 else
1015 tmp = doevery;
1016 }
1017 else
1018 tmp = 1;
1019 s += UTF8SKIP(s);
1020 }
1021 break;
bbce6d69 1022 case NALNUML:
3280af22 1023 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1024 while (s < strend) {
1025 if (!isALNUM_LC(*s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1027 goto got_it;
a687059c 1028 else
a0d0e21e 1029 tmp = doevery;
a687059c 1030 }
a0d0e21e
LW
1031 else
1032 tmp = 1;
1033 s++;
1034 }
1035 break;
a0ed51b3
LW
1036 case NALNUMLUTF8:
1037 PL_reg_flags |= RF_tainted;
1038 while (s < strend) {
dfe13c55 1039 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1040 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1041 goto got_it;
1042 else
1043 tmp = doevery;
1044 }
1045 else
1046 tmp = 1;
1047 s += UTF8SKIP(s);
1048 }
1049 break;
a0d0e21e
LW
1050 case SPACE:
1051 while (s < strend) {
1052 if (isSPACE(*s)) {
6eb5f6b9 1053 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1054 goto got_it;
1055 else
1056 tmp = doevery;
2304df62 1057 }
a0d0e21e
LW
1058 else
1059 tmp = 1;
1060 s++;
1061 }
1062 break;
a0ed51b3
LW
1063 case SPACEUTF8:
1064 while (s < strend) {
dfe13c55 1065 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s += UTF8SKIP(s);
1074 }
1075 break;
bbce6d69 1076 case SPACEL:
3280af22 1077 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1078 while (s < strend) {
1079 if (isSPACE_LC(*s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1081 goto got_it;
1082 else
1083 tmp = doevery;
1084 }
1085 else
1086 tmp = 1;
1087 s++;
1088 }
1089 break;
a0ed51b3
LW
1090 case SPACELUTF8:
1091 PL_reg_flags |= RF_tainted;
1092 while (s < strend) {
dfe13c55 1093 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1094 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
1099 else
1100 tmp = 1;
1101 s += UTF8SKIP(s);
1102 }
1103 break;
a0d0e21e
LW
1104 case NSPACE:
1105 while (s < strend) {
1106 if (!isSPACE(*s)) {
6eb5f6b9 1107 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1108 goto got_it;
1109 else
1110 tmp = doevery;
a687059c 1111 }
a0d0e21e
LW
1112 else
1113 tmp = 1;
1114 s++;
1115 }
1116 break;
a0ed51b3
LW
1117 case NSPACEUTF8:
1118 while (s < strend) {
dfe13c55 1119 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s += UTF8SKIP(s);
1128 }
1129 break;
bbce6d69 1130 case NSPACEL:
3280af22 1131 PL_reg_flags |= RF_tainted;
bbce6d69
PP
1132 while (s < strend) {
1133 if (!isSPACE_LC(*s)) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
bbce6d69
PP
1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s++;
1142 }
1143 break;
a0ed51b3
LW
1144 case NSPACELUTF8:
1145 PL_reg_flags |= RF_tainted;
1146 while (s < strend) {
dfe13c55 1147 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1148 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
1153 else
1154 tmp = 1;
1155 s += UTF8SKIP(s);
1156 }
1157 break;
a0d0e21e
LW
1158 case DIGIT:
1159 while (s < strend) {
1160 if (isDIGIT(*s)) {
6eb5f6b9 1161 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1162 goto got_it;
1163 else
1164 tmp = doevery;
2b69d0c2 1165 }
a0d0e21e
LW
1166 else
1167 tmp = 1;
1168 s++;
1169 }
1170 break;
a0ed51b3
LW
1171 case DIGITUTF8:
1172 while (s < strend) {
dfe13c55 1173 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s += UTF8SKIP(s);
1182 }
1183 break;
b8c5462f
JH
1184 case DIGITL:
1185 PL_reg_flags |= RF_tainted;
1186 while (s < strend) {
1187 if (isDIGIT_LC(*s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s++;
1196 }
1197 break;
1198 case DIGITLUTF8:
1199 PL_reg_flags |= RF_tainted;
1200 while (s < strend) {
1201 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1202 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1203 goto got_it;
1204 else
1205 tmp = doevery;
1206 }
1207 else
1208 tmp = 1;
1209 s += UTF8SKIP(s);
1210 }
1211 break;
a0d0e21e
LW
1212 case NDIGIT:
1213 while (s < strend) {
1214 if (!isDIGIT(*s)) {
6eb5f6b9 1215 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1216 goto got_it;
1217 else
1218 tmp = doevery;
a687059c 1219 }
a0d0e21e
LW
1220 else
1221 tmp = 1;
1222 s++;
1223 }
1224 break;
a0ed51b3
LW
1225 case NDIGITUTF8:
1226 while (s < strend) {
dfe13c55 1227 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s += UTF8SKIP(s);
1236 }
1237 break;
b8c5462f
JH
1238 case NDIGITL:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s++;
a0ed51b3 1250 }
b8c5462f
JH
1251 break;
1252 case NDIGITLUTF8:
1253 PL_reg_flags |= RF_tainted;
1254 while (s < strend) {
1255 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1256 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1257 goto got_it;
cf93c79d 1258 else
b8c5462f
JH
1259 tmp = doevery;
1260 }
1261 else
1262 tmp = 1;
1263 s += UTF8SKIP(s);
1264 }
1265 break;
b3c9acc1 1266 default:
3c3eec57
GS
1267 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1268 break;
d6a28714 1269 }
6eb5f6b9
JH
1270 return 0;
1271 got_it:
1272 return s;
1273}
1274
1275/*
1276 - regexec_flags - match a regexp against a string
1277 */
1278I32
1279Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1280 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1281/* strend: pointer to null at end of string */
1282/* strbeg: real beginning of string */
1283/* minend: end of match must be >=minend after stringarg. */
1284/* data: May be used for some additional optimizations. */
1285/* nosave: For optimizations. */
1286{
1287 dTHR;
1288 register char *s;
1289 register regnode *c;
1290 register char *startpos = stringarg;
6eb5f6b9
JH
1291 I32 minlen; /* must match at least this many chars */
1292 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1293 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1294 constant substr. */ /* CC */
1295 I32 end_shift = 0; /* Same for the end. */ /* CC */
1296 I32 scream_pos = -1; /* Internal iterator of scream. */
1297 char *scream_olds;
1298 SV* oreplsv = GvSV(PL_replgv);
1299
1300 PL_regcc = 0;
1301
1302 cache_re(prog);
1303#ifdef DEBUGGING
1304 PL_regnarrate = PL_debug & 512;
1305#endif
1306
1307 /* Be paranoid... */
1308 if (prog == NULL || startpos == NULL) {
1309 Perl_croak(aTHX_ "NULL regexp parameter");
1310 return 0;
1311 }
1312
1313 minlen = prog->minlen;
1314 if (strend - startpos < minlen) goto phooey;
1315
1316 if (startpos == strbeg) /* is ^ valid at stringarg? */
1317 PL_regprev = '\n';
1318 else {
1319 PL_regprev = (U32)stringarg[-1];
1320 if (!PL_multiline && PL_regprev == '\n')
1321 PL_regprev = '\0'; /* force ^ to NOT match */
1322 }
1323
1324 /* Check validity of program. */
1325 if (UCHARAT(prog->program) != REG_MAGIC) {
1326 Perl_croak(aTHX_ "corrupted regexp program");
1327 }
1328
1329 PL_reg_flags = 0;
1330 PL_reg_eval_set = 0;
1331 PL_reg_maxiter = 0;
1332
1333 if (prog->reganch & ROPT_UTF8)
1334 PL_reg_flags |= RF_utf8;
1335
1336 /* Mark beginning of line for ^ and lookbehind. */
1337 PL_regbol = startpos;
1338 PL_bostr = strbeg;
1339 PL_reg_sv = sv;
1340
1341 /* Mark end of line for $ (and such) */
1342 PL_regeol = strend;
1343
1344 /* see how far we have to get to not match where we matched before */
1345 PL_regtill = startpos+minend;
1346
1347 /* We start without call_cc context. */
1348 PL_reg_call_cc = 0;
1349
1350 /* If there is a "must appear" string, look for it. */
1351 s = startpos;
1352
1353 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1354 MAGIC *mg;
1355
1356 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1357 PL_reg_ganch = startpos;
1358 else if (sv && SvTYPE(sv) >= SVt_PVMG
1359 && SvMAGIC(sv)
1360 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1361 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1362 if (prog->reganch & ROPT_ANCH_GPOS) {
1363 if (s > PL_reg_ganch)
1364 goto phooey;
1365 s = PL_reg_ganch;
1366 }
1367 }
1368 else /* pos() not defined */
1369 PL_reg_ganch = strbeg;
1370 }
1371
1372 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1373 re_scream_pos_data d;
1374
1375 d.scream_olds = &scream_olds;
1376 d.scream_pos = &scream_pos;
1377 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1378 if (!s)
1379 goto phooey; /* not present */
1380 }
1381
1382 DEBUG_r( if (!PL_colorset) reginitcolors() );
1383 DEBUG_r(PerlIO_printf(Perl_debug_log,
1384 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1385 PL_colors[4],PL_colors[5],PL_colors[0],
1386 prog->precomp,
1387 PL_colors[1],
1388 (strlen(prog->precomp) > 60 ? "..." : ""),
1389 PL_colors[0],
1390 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1391 startpos, PL_colors[1],
1392 (strend - startpos > 60 ? "..." : ""))
1393 );
1394
1395 /* Simplest case: anchored match need be tried only once. */
1396 /* [unless only anchor is BOL and multiline is set] */
1397 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1398 if (s == startpos && regtry(prog, startpos))
1399 goto got_it;
1400 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1401 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1402 {
1403 char *end;
1404
1405 if (minlen)
1406 dontbother = minlen - 1;
1407 end = HOPc(strend, -dontbother) - 1;
1408 /* for multiline we only have to try after newlines */
1409 if (prog->check_substr) {
1410 if (s == startpos)
1411 goto after_try;
1412 while (1) {
1413 if (regtry(prog, s))
1414 goto got_it;
1415 after_try:
1416 if (s >= end)
1417 goto phooey;
1418 if (prog->reganch & RE_USE_INTUIT) {
1419 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1420 if (!s)
1421 goto phooey;
1422 }
1423 else
1424 s++;
1425 }
1426 } else {
1427 if (s > startpos)
1428 s--;
1429 while (s < end) {
1430 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1431 if (regtry(prog, s))
1432 goto got_it;
1433 }
1434 }
1435 }
1436 }
1437 goto phooey;
1438 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1439 if (regtry(prog, PL_reg_ganch))
1440 goto got_it;
1441 goto phooey;
1442 }
1443
1444 /* Messy cases: unanchored match. */
1445 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1446 /* we have /x+whatever/ */
1447 /* it must be a one character string (XXXX Except UTF?) */
1448 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1449#ifdef DEBUGGING
1450 int did_match = 0;
1451#endif
1452
6eb5f6b9
JH
1453 if (UTF) {
1454 while (s < strend) {
1455 if (*s == ch) {
bf93d4cc 1456 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1457 if (regtry(prog, s)) goto got_it;
1458 s += UTF8SKIP(s);
1459 while (s < strend && *s == ch)
1460 s += UTF8SKIP(s);
1461 }
1462 s += UTF8SKIP(s);
1463 }
1464 }
1465 else {
1466 while (s < strend) {
1467 if (*s == ch) {
bf93d4cc 1468 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1469 if (regtry(prog, s)) goto got_it;
1470 s++;
1471 while (s < strend && *s == ch)
1472 s++;
1473 }
1474 s++;
1475 }
1476 }
bf93d4cc
GS
1477 DEBUG_r(did_match ||
1478 PerlIO_printf(Perl_debug_log,
1479 "Did not find anchored character...\n"));
6eb5f6b9
JH
1480 }
1481 /*SUPPRESS 560*/
1482 else if (prog->anchored_substr != Nullsv
1483 || (prog->float_substr != Nullsv
1484 && prog->float_max_offset < strend - s)) {
1485 SV *must = prog->anchored_substr
1486 ? prog->anchored_substr : prog->float_substr;
1487 I32 back_max =
1488 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1489 I32 back_min =
1490 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1491 char *last = HOPc(strend, /* Cannot start after this */
1492 -(I32)(CHR_SVLEN(must)
1493 - (SvTAIL(must) != 0) + back_min));
1494 char *last1; /* Last position checked before */
bf93d4cc
GS
1495#ifdef DEBUGGING
1496 int did_match = 0;
1497#endif
6eb5f6b9
JH
1498
1499 if (s > PL_bostr)
1500 last1 = HOPc(s, -1);
1501 else
1502 last1 = s - 1; /* bogus */
1503
1504 /* XXXX check_substr already used to find `s', can optimize if
1505 check_substr==must. */
1506 scream_pos = -1;
1507 dontbother = end_shift;
1508 strend = HOPc(strend, -dontbother);
1509 while ( (s <= last) &&
1510 ((flags & REXEC_SCREAM)
1511 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1512 end_shift, &scream_pos, 0))
1513 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1514 (unsigned char*)strend, must,
1515 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1516 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1517 if (HOPc(s, -back_max) > last1) {
1518 last1 = HOPc(s, -back_min);
1519 s = HOPc(s, -back_max);
1520 }
1521 else {
1522 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1523
1524 last1 = HOPc(s, -back_min);
1525 s = t;
1526 }
1527 if (UTF) {
1528 while (s <= last1) {
1529 if (regtry(prog, s))
1530 goto got_it;
1531 s += UTF8SKIP(s);
1532 }
1533 }
1534 else {
1535 while (s <= last1) {
1536 if (regtry(prog, s))
1537 goto got_it;
1538 s++;
1539 }
1540 }
1541 }
bf93d4cc
GS
1542 DEBUG_r(did_match ||
1543 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1544 ((must == prog->anchored_substr)
1545 ? "anchored" : "floating"),
1546 PL_colors[0],
1547 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1548 SvPVX(must),
1549 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1550 goto phooey;
1551 }
155aba94 1552 else if ((c = prog->regstclass)) {
66e933ab
GS
1553 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1554 /* don't bother with what can't match */
6eb5f6b9
JH
1555 strend = HOPc(strend, -(minlen - 1));
1556 if (find_byclass(prog, c, s, strend, startpos, 0))
1557 goto got_it;
bf93d4cc 1558 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1559 }
1560 else {
1561 dontbother = 0;
1562 if (prog->float_substr != Nullsv) { /* Trim the end. */
1563 char *last;
d6a28714
JH
1564
1565 if (flags & REXEC_SCREAM) {
1566 last = screaminstr(sv, prog->float_substr, s - strbeg,
1567 end_shift, &scream_pos, 1); /* last one */
1568 if (!last)
1569 last = scream_olds; /* Only one occurence. */
b8c5462f 1570 }
d6a28714
JH
1571 else {
1572 STRLEN len;
1573 char *little = SvPV(prog->float_substr, len);
1574
1575 if (SvTAIL(prog->float_substr)) {
1576 if (memEQ(strend - len + 1, little, len - 1))
1577 last = strend - len + 1;
1578 else if (!PL_multiline)
1579 last = memEQ(strend - len, little, len)
1580 ? strend - len : Nullch;
b8c5462f 1581 else
d6a28714
JH
1582 goto find_last;
1583 } else {
1584 find_last:
1585 if (len)
1586 last = rninstr(s, strend, little, little + len);
b8c5462f 1587 else
d6a28714 1588 last = strend; /* matching `$' */
b8c5462f 1589 }
b8c5462f 1590 }
bf93d4cc
GS
1591 if (last == NULL) {
1592 DEBUG_r(PerlIO_printf(Perl_debug_log,
1593 "%sCan't trim the tail, match fails (should not happen)%s\n",
1594 PL_colors[4],PL_colors[5]));
1595 goto phooey; /* Should not happen! */
1596 }
d6a28714
JH
1597 dontbother = strend - last + prog->float_min_offset;
1598 }
1599 if (minlen && (dontbother < minlen))
1600 dontbother = minlen - 1;
1601 strend -= dontbother; /* this one's always in bytes! */
1602 /* We don't know much -- general case. */
1603 if (UTF) {
1604 for (;;) {
1605 if (regtry(prog, s))
1606 goto got_it;
1607 if (s >= strend)
1608 break;
b8c5462f 1609 s += UTF8SKIP(s);
d6a28714
JH
1610 };
1611 }
1612 else {
1613 do {
1614 if (regtry(prog, s))
1615 goto got_it;
1616 } while (s++ < strend);
1617 }
1618 }
1619
1620 /* Failure. */
1621 goto phooey;
1622
1623got_it:
1624 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1625
1626 if (PL_reg_eval_set) {
1627 /* Preserve the current value of $^R */
1628 if (oreplsv != GvSV(PL_replgv))
1629 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1630 restored, the value remains
1631 the same. */
1632 restore_pos(aTHXo_ 0);
1633 }
1634
1635 /* make sure $`, $&, $', and $digit will work later */
1636 if ( !(flags & REXEC_NOT_FIRST) ) {
1637 if (RX_MATCH_COPIED(prog)) {
1638 Safefree(prog->subbeg);
1639 RX_MATCH_COPIED_off(prog);
1640 }
1641 if (flags & REXEC_COPY_STR) {
1642 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1643
1644 s = savepvn(strbeg, i);
1645 prog->subbeg = s;
1646 prog->sublen = i;
1647 RX_MATCH_COPIED_on(prog);
1648 }
1649 else {
1650 prog->subbeg = strbeg;
1651 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1652 }
1653 }
1654
1655 return 1;
1656
1657phooey:
bf93d4cc
GS
1658 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1659 PL_colors[4],PL_colors[5]));
d6a28714
JH
1660 if (PL_reg_eval_set)
1661 restore_pos(aTHXo_ 0);
1662 return 0;
1663}
1664
1665/*
1666 - regtry - try match at specific point
1667 */
1668STATIC I32 /* 0 failure, 1 success */
1669S_regtry(pTHX_ regexp *prog, char *startpos)
1670{
1671 dTHR;
1672 register I32 i;
1673 register I32 *sp;
1674 register I32 *ep;
1675 CHECKPOINT lastcp;
1676
1677 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1678 MAGIC *mg;
1679
1680 PL_reg_eval_set = RS_init;
1681 DEBUG_r(DEBUG_s(
b900a521
JH
1682 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1683 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1684 ));
e8347627 1685 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1686 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1687 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1688 SAVETMPS;
1689 /* Apparently this is not needed, judging by wantarray. */
e8347627 1690 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1691 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1692
1693 if (PL_reg_sv) {
1694 /* Make $_ available to executed code. */
1695 if (PL_reg_sv != DEFSV) {
1696 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1697 SAVESPTR(DEFSV);
1698 DEFSV = PL_reg_sv;
b8c5462f 1699 }
d6a28714
JH
1700
1701 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1702 && (mg = mg_find(PL_reg_sv, 'g')))) {
1703 /* prepare for quick setting of pos */
1704 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1705 mg = mg_find(PL_reg_sv, 'g');
1706 mg->mg_len = -1;
b8c5462f 1707 }
d6a28714
JH
1708 PL_reg_magic = mg;
1709 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1710 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1711 }
1712 if (!PL_reg_curpm)
0f79a09d 1713 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1714 PL_reg_curpm->op_pmregexp = prog;
1715 PL_reg_oldcurpm = PL_curpm;
1716 PL_curpm = PL_reg_curpm;
1717 if (RX_MATCH_COPIED(prog)) {
1718 /* Here is a serious problem: we cannot rewrite subbeg,
1719 since it may be needed if this match fails. Thus
1720 $` inside (?{}) could fail... */
1721 PL_reg_oldsaved = prog->subbeg;
1722 PL_reg_oldsavedlen = prog->sublen;
1723 RX_MATCH_COPIED_off(prog);
1724 }
1725 else
1726 PL_reg_oldsaved = Nullch;
1727 prog->subbeg = PL_bostr;
1728 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1729 }
1730 prog->startp[0] = startpos - PL_bostr;
1731 PL_reginput = startpos;
1732 PL_regstartp = prog->startp;
1733 PL_regendp = prog->endp;
1734 PL_reglastparen = &prog->lastparen;
1735 prog->lastparen = 0;
1736 PL_regsize = 0;
1737 DEBUG_r(PL_reg_starttry = startpos);
1738 if (PL_reg_start_tmpl <= prog->nparens) {
1739 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1740 if(PL_reg_start_tmp)
1741 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1742 else
1743 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1744 }
1745
1746 /* XXXX What this code is doing here?!!! There should be no need
1747 to do this again and again, PL_reglastparen should take care of
1748 this! */
1749 sp = prog->startp;
1750 ep = prog->endp;
1751 if (prog->nparens) {
1752 for (i = prog->nparens; i >= 1; i--) {
1753 *++sp = -1;
1754 *++ep = -1;
1755 }
1756 }
1757 REGCP_SET;
1758 if (regmatch(prog->program + 1)) {
1759 prog->endp[0] = PL_reginput - PL_bostr;
1760 return 1;
1761 }
1762 REGCP_UNWIND;
1763 return 0;
1764}
1765
1766/*
1767 - regmatch - main matching routine
1768 *
1769 * Conceptually the strategy is simple: check to see whether the current
1770 * node matches, call self recursively to see whether the rest matches,
1771 * and then act accordingly. In practice we make some effort to avoid
1772 * recursion, in particular by going through "ordinary" nodes (that don't
1773 * need to know whether the rest of the match failed) by a loop instead of
1774 * by recursion.
1775 */
1776/* [lwall] I've hoisted the register declarations to the outer block in order to
1777 * maybe save a little bit of pushing and popping on the stack. It also takes
1778 * advantage of machines that use a register save mask on subroutine entry.
1779 */
1780STATIC I32 /* 0 failure, 1 success */
1781S_regmatch(pTHX_ regnode *prog)
1782{
1783 dTHR;
1784 register regnode *scan; /* Current node. */
1785 regnode *next; /* Next node. */
1786 regnode *inner; /* Next node in internal branch. */
1787 register I32 nextchr; /* renamed nextchr - nextchar colides with
1788 function of same name */
1789 register I32 n; /* no or next */
1790 register I32 ln; /* len or last */
1791 register char *s; /* operand or save */
1792 register char *locinput = PL_reginput;
1793 register I32 c1, c2, paren; /* case fold search, parenth */
1794 int minmod = 0, sw = 0, logical = 0;
1795#ifdef DEBUGGING
1796 PL_regindent++;
1797#endif
1798
1799 /* Note that nextchr is a byte even in UTF */
1800 nextchr = UCHARAT(locinput);
1801 scan = prog;
1802 while (scan != NULL) {
1803#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1804#ifdef DEBUGGING
1805# define sayYES goto yes
1806# define sayNO goto no
7821416a
IZ
1807# define sayYES_FINAL goto yes_final
1808# define sayYES_LOUD goto yes_loud
1809# define sayNO_FINAL goto no_final
1810# define sayNO_SILENT goto do_no
d6a28714
JH
1811# define saySAME(x) if (x) goto yes; else goto no
1812# define REPORT_CODE_OFF 24
1813#else
1814# define sayYES return 1
1815# define sayNO return 0
7821416a
IZ
1816# define sayYES_FINAL return 1
1817# define sayYES_LOUD return 1
1818# define sayNO_FINAL return 0
1819# define sayNO_SILENT return 0
d6a28714
JH
1820# define saySAME(x) return x
1821#endif
1822 DEBUG_r( {
1823 SV *prop = sv_newmortal();
1824 int docolor = *PL_colors[0];
1825 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1826 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1827 /* The part of the string before starttry has one color
1828 (pref0_len chars), between starttry and current
1829 position another one (pref_len - pref0_len chars),
1830 after the current position the third one.
1831 We assume that pref0_len <= pref_len, otherwise we
1832 decrease pref0_len. */
1833 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1834 ? (5 + taill) - l : locinput - PL_bostr);
1835 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1836
1837 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1838 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1839 ? (5 + taill) - pref_len : PL_regeol - locinput);
1840 if (pref0_len < 0)
1841 pref0_len = 0;
1842 if (pref0_len > pref_len)
1843 pref0_len = pref_len;
1844 regprop(prop, scan);
1845 PerlIO_printf(Perl_debug_log,
b900a521
JH
1846 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1847 (IV)(locinput - PL_bostr),
d6a28714
JH
1848 PL_colors[4], pref0_len,
1849 locinput - pref_len, PL_colors[5],
1850 PL_colors[2], pref_len - pref0_len,
1851 locinput - pref_len + pref0_len, PL_colors[3],
1852 (docolor ? "" : "> <"),
1853 PL_colors[0], l, locinput, PL_colors[1],
1854 15 - l - pref_len + 1,
1855 "",
b900a521 1856 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1857 SvPVX(prop));
1858 } );
1859
1860 next = scan + NEXT_OFF(scan);
1861 if (next == scan)
1862 next = NULL;
1863
1864 switch (OP(scan)) {
1865 case BOL:
1866 if (locinput == PL_bostr
1867 ? PL_regprev == '\n'
1868 : (PL_multiline &&
1869 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1870 {
1871 /* regtill = regbol; */
b8c5462f
JH
1872 break;
1873 }
d6a28714
JH
1874 sayNO;
1875 case MBOL:
1876 if (locinput == PL_bostr
1877 ? PL_regprev == '\n'
1878 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1879 {
b8c5462f
JH
1880 break;
1881 }
d6a28714
JH
1882 sayNO;
1883 case SBOL:
c2a73568 1884 if (locinput == PL_bostr)
b8c5462f 1885 break;
d6a28714
JH
1886 sayNO;
1887 case GPOS:
1888 if (locinput == PL_reg_ganch)
1889 break;
1890 sayNO;
1891 case EOL:
1892 if (PL_multiline)
1893 goto meol;
1894 else
1895 goto seol;
1896 case MEOL:
1897 meol:
1898 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1899 sayNO;
b8c5462f 1900 break;
d6a28714
JH
1901 case SEOL:
1902 seol:
1903 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1904 sayNO;
d6a28714 1905 if (PL_regeol - locinput > 1)
b8c5462f 1906 sayNO;
b8c5462f 1907 break;
d6a28714
JH
1908 case EOS:
1909 if (PL_regeol != locinput)
b8c5462f 1910 sayNO;
d6a28714
JH
1911 break;
1912 case SANYUTF8:
b8c5462f 1913 if (nextchr & 0x80) {
b8c5462f 1914 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1915 if (locinput > PL_regeol)
1916 sayNO;
b8c5462f
JH
1917 nextchr = UCHARAT(locinput);
1918 break;
1919 }
d6a28714 1920 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1921 sayNO;
b8c5462f 1922 nextchr = UCHARAT(++locinput);
a0d0e21e 1923 break;
d6a28714
JH
1924 case SANY:
1925 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1926 sayNO;
1927 nextchr = UCHARAT(++locinput);
b85d18e9 1928 break;
d6a28714 1929 case ANYUTF8:
a0ed51b3 1930 if (nextchr & 0x80) {
b8c5462f 1931 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1932 if (locinput > PL_regeol)
1933 sayNO;
a0ed51b3
LW
1934 nextchr = UCHARAT(locinput);
1935 break;
1936 }
155aba94 1937 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1938 sayNO;
1939 nextchr = UCHARAT(++locinput);
1940 break;
d6a28714 1941 case REG_ANY:
155aba94 1942 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1943 sayNO;
76e3520e 1944 nextchr = UCHARAT(++locinput);
a0d0e21e 1945 break;
d6a28714 1946 case EXACT:
cd439c50
IZ
1947 s = STRING(scan);
1948 ln = STR_LEN(scan);
d6a28714
JH
1949 /* Inline the first character, for speed. */
1950 if (UCHARAT(s) != nextchr)
1951 sayNO;
1952 if (PL_regeol - locinput < ln)
1953 sayNO;
1954 if (ln > 1 && memNE(s, locinput, ln))
1955 sayNO;
1956 locinput += ln;
1957 nextchr = UCHARAT(locinput);
1958 break;
1959 case EXACTFL:
b8c5462f
JH
1960 PL_reg_flags |= RF_tainted;
1961 /* FALL THROUGH */
d6a28714 1962 case EXACTF:
cd439c50
IZ
1963 s = STRING(scan);
1964 ln = STR_LEN(scan);
d6a28714
JH
1965
1966 if (UTF) {
1967 char *l = locinput;
1968 char *e = s + ln;
1969 c1 = OP(scan) == EXACTF;
1970 while (s < e) {
1971 if (l >= PL_regeol)
1972 sayNO;
1973 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1974 toLOWER_utf8((U8*)l) :
1975 toLOWER_LC_utf8((U8*)l)))
1976 {
1977 sayNO;
1978 }
1979 s += UTF8SKIP(s);
1980 l += UTF8SKIP(l);
b8c5462f 1981 }
d6a28714 1982 locinput = l;
a0ed51b3
LW
1983 nextchr = UCHARAT(locinput);
1984 break;
1985 }
d6a28714
JH
1986
1987 /* Inline the first character, for speed. */
1988 if (UCHARAT(s) != nextchr &&
1989 UCHARAT(s) != ((OP(scan) == EXACTF)
1990 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1991 sayNO;
d6a28714 1992 if (PL_regeol - locinput < ln)
b8c5462f 1993 sayNO;
d6a28714
JH
1994 if (ln > 1 && (OP(scan) == EXACTF
1995 ? ibcmp(s, locinput, ln)
1996 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1997 sayNO;
d6a28714
JH
1998 locinput += ln;
1999 nextchr = UCHARAT(locinput);
a0d0e21e 2000 break;
d6a28714 2001 case ANYOFUTF8:
d6a28714 2002 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2003 sayNO;
d6a28714
JH
2004 if (locinput >= PL_regeol)
2005 sayNO;
2006 locinput += PL_utf8skip[nextchr];
2007 nextchr = UCHARAT(locinput);
2008 break;
2009 case ANYOF:
d6a28714 2010 if (nextchr < 0)
b8c5462f 2011 nextchr = UCHARAT(locinput);
936ed897 2012 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2013 sayNO;
2014 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2015 sayNO;
b8c5462f
JH
2016 nextchr = UCHARAT(++locinput);
2017 break;
d6a28714 2018 case ALNUML:
b8c5462f
JH
2019 PL_reg_flags |= RF_tainted;
2020 /* FALL THROUGH */
d6a28714 2021 case ALNUM:
b8c5462f 2022 if (!nextchr)
4633a7c4 2023 sayNO;
d6a28714
JH
2024 if (!(OP(scan) == ALNUM
2025 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2026 sayNO;
2027 nextchr = UCHARAT(++locinput);
bbce6d69 2028 break;
d6a28714 2029 case ALNUMLUTF8:
3280af22 2030 PL_reg_flags |= RF_tainted;
bbce6d69 2031 /* FALL THROUGH */
d6a28714 2032 case ALNUMUTF8:
b8c5462f
JH
2033 if (!nextchr)
2034 sayNO;
2035 if (nextchr & 0x80) {
d6a28714
JH
2036 if (!(OP(scan) == ALNUMUTF8
2037 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2038 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2039 {
2040 sayNO;
a0ed51b3 2041 }
b8c5462f 2042 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2043 nextchr = UCHARAT(locinput);
2044 break;
2045 }
d6a28714
JH
2046 if (!(OP(scan) == ALNUMUTF8
2047 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2048 sayNO;
b8c5462f 2049 nextchr = UCHARAT(++locinput);
a0d0e21e 2050 break;
d6a28714 2051 case NALNUML:
b8c5462f
JH
2052 PL_reg_flags |= RF_tainted;
2053 /* FALL THROUGH */
d6a28714
JH
2054 case NALNUM:
2055 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2056 sayNO;
d6a28714
JH
2057 if (OP(scan) == NALNUM
2058 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2059 sayNO;
b8c5462f 2060 nextchr = UCHARAT(++locinput);
a0ed51b3 2061 break;
d6a28714 2062 case NALNUMLUTF8:
b8c5462f
JH
2063 PL_reg_flags |= RF_tainted;
2064 /* FALL THROUGH */
d6a28714 2065 case NALNUMUTF8:
3280af22 2066 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2067 sayNO;
b8c5462f 2068 if (nextchr & 0x80) {
d6a28714
JH
2069 if (OP(scan) == NALNUMUTF8
2070 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2071 : isALNUM_LC_utf8((U8*)locinput))
2072 {
b8c5462f 2073 sayNO;
d6a28714 2074 }
b8c5462f
JH
2075 locinput += PL_utf8skip[nextchr];
2076 nextchr = UCHARAT(locinput);
2077 break;
2078 }
d6a28714
JH
2079 if (OP(scan) == NALNUMUTF8
2080 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2081 sayNO;
76e3520e 2082 nextchr = UCHARAT(++locinput);
a0d0e21e 2083 break;
d6a28714
JH
2084 case BOUNDL:
2085 case NBOUNDL:
3280af22 2086 PL_reg_flags |= RF_tainted;
bbce6d69 2087 /* FALL THROUGH */
d6a28714
JH
2088 case BOUND:
2089 case NBOUND:
2090 /* was last char in word? */
2091 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2092 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2093 ln = isALNUM(ln);
2094 n = isALNUM(nextchr);
2095 }
2096 else {
2097 ln = isALNUM_LC(ln);
2098 n = isALNUM_LC(nextchr);
2099 }
2100 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2101 sayNO;
a0d0e21e 2102 break;
d6a28714
JH
2103 case BOUNDLUTF8:
2104 case NBOUNDLUTF8:
a0ed51b3
LW
2105 PL_reg_flags |= RF_tainted;
2106 /* FALL THROUGH */
d6a28714
JH
2107 case BOUNDUTF8:
2108 case NBOUNDUTF8:
2109 /* was last char in word? */
2110 ln = (locinput != PL_regbol)
2111 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2112 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2113 ln = isALNUM_uni(ln);
2114 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2115 }
d6a28714
JH
2116 else {
2117 ln = isALNUM_LC_uni(ln);
2118 n = isALNUM_LC_utf8((U8*)locinput);
2119 }
2120 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2121 sayNO;
a0ed51b3 2122 break;
d6a28714 2123 case SPACEL:
3280af22 2124 PL_reg_flags |= RF_tainted;
bbce6d69 2125 /* FALL THROUGH */
d6a28714 2126 case SPACE:
9442cb0e 2127 if (!nextchr)
4633a7c4 2128 sayNO;
d6a28714
JH
2129 if (!(OP(scan) == SPACE
2130 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2131 sayNO;
76e3520e 2132 nextchr = UCHARAT(++locinput);
a0d0e21e 2133 break;
d6a28714 2134 case SPACELUTF8:
a0ed51b3
LW
2135 PL_reg_flags |= RF_tainted;
2136 /* FALL THROUGH */
d6a28714 2137 case SPACEUTF8:
9442cb0e 2138 if (!nextchr)
a0ed51b3
LW
2139 sayNO;
2140 if (nextchr & 0x80) {
d6a28714 2141 if (!(OP(scan) == SPACEUTF8
9442cb0e 2142 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2143 : isSPACE_LC_utf8((U8*)locinput)))
2144 {
a0ed51b3 2145 sayNO;
d6a28714 2146 }
6f06b55f 2147 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2148 nextchr = UCHARAT(locinput);
2149 break;
2150 }
d6a28714
JH
2151 if (!(OP(scan) == SPACEUTF8
2152 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2153 sayNO;
2154 nextchr = UCHARAT(++locinput);
2155 break;
d6a28714 2156 case NSPACEL:
3280af22 2157 PL_reg_flags |= RF_tainted;
bbce6d69 2158 /* FALL THROUGH */
d6a28714 2159 case NSPACE:
9442cb0e 2160 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2161 sayNO;
9442cb0e 2162 if (OP(scan) == NSPACE
d6a28714 2163 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2164 sayNO;
b8c5462f 2165 nextchr = UCHARAT(++locinput);
a0d0e21e 2166 break;
d6a28714 2167 case NSPACELUTF8:
a0ed51b3
LW
2168 PL_reg_flags |= RF_tainted;
2169 /* FALL THROUGH */
d6a28714 2170 case NSPACEUTF8:
9442cb0e 2171 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2172 sayNO;
2173 if (nextchr & 0x80) {
d6a28714 2174 if (OP(scan) == NSPACEUTF8
9442cb0e 2175 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2176 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2177 {
2178 sayNO;
2179 }
2180 locinput += PL_utf8skip[nextchr];
2181 nextchr = UCHARAT(locinput);
2182 break;
a0ed51b3 2183 }
d6a28714
JH
2184 if (OP(scan) == NSPACEUTF8
2185 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2186 sayNO;
76e3520e 2187 nextchr = UCHARAT(++locinput);
a0d0e21e 2188 break;
d6a28714 2189 case DIGITL:
a0ed51b3
LW
2190 PL_reg_flags |= RF_tainted;
2191 /* FALL THROUGH */
d6a28714 2192 case DIGIT:
9442cb0e 2193 if (!nextchr)
a0ed51b3 2194 sayNO;
d6a28714
JH
2195 if (!(OP(scan) == DIGIT
2196 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2197 sayNO;
76e3520e 2198 nextchr = UCHARAT(++locinput);
a0d0e21e 2199 break;
d6a28714 2200 case DIGITLUTF8:
a0ed51b3
LW
2201 PL_reg_flags |= RF_tainted;
2202 /* FALL THROUGH */
d6a28714 2203 case DIGITUTF8:
a0ed51b3
LW
2204 if (!nextchr)
2205 sayNO;
2206 if (nextchr & 0x80) {
9442cb0e
GS
2207 if (!(OP(scan) == DIGITUTF8
2208 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2209 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2210 {
a0ed51b3 2211 sayNO;
dfe13c55 2212 }
6f06b55f 2213 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2214 nextchr = UCHARAT(locinput);
2215 break;
2216 }
9442cb0e
GS
2217 if (!(OP(scan) == DIGITUTF8
2218 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2219 sayNO;
2220 nextchr = UCHARAT(++locinput);
2221 break;
d6a28714 2222 case NDIGITL:
b8c5462f
JH
2223 PL_reg_flags |= RF_tainted;
2224 /* FALL THROUGH */
d6a28714 2225 case NDIGIT:
9442cb0e 2226 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2227 sayNO;
9442cb0e 2228 if (OP(scan) == NDIGIT
d6a28714 2229 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2230 sayNO;
76e3520e 2231 nextchr = UCHARAT(++locinput);
a0d0e21e 2232 break;
d6a28714 2233 case NDIGITLUTF8:
b8c5462f
JH
2234 PL_reg_flags |= RF_tainted;
2235 /* FALL THROUGH */
d6a28714 2236 case NDIGITUTF8:
b8c5462f
JH
2237 if (!nextchr && locinput >= PL_regeol)
2238 sayNO;
a0ed51b3 2239 if (nextchr & 0x80) {
9442cb0e
GS
2240 if (OP(scan) == NDIGITUTF8
2241 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2242 : isDIGIT_LC_utf8((U8*)locinput))
2243 {
a0ed51b3 2244 sayNO;
9442cb0e 2245 }
6f06b55f 2246 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2247 nextchr = UCHARAT(locinput);
2248 break;
2249 }
9442cb0e
GS
2250 if (OP(scan) == NDIGITUTF8
2251 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2252 sayNO;
2253 nextchr = UCHARAT(++locinput);
2254 break;
2255 case CLUMP:
dfe13c55 2256 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2257 sayNO;
6f06b55f 2258 locinput += PL_utf8skip[nextchr];
dfe13c55 2259 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2260 locinput += UTF8SKIP(locinput);
2261 if (locinput > PL_regeol)
2262 sayNO;
2263 nextchr = UCHARAT(locinput);
2264 break;
c8756f30 2265 case REFFL:
3280af22 2266 PL_reg_flags |= RF_tainted;
c8756f30 2267 /* FALL THROUGH */
c277df42 2268 case REF:
c8756f30 2269 case REFF:
c277df42 2270 n = ARG(scan); /* which paren pair */
cf93c79d 2271 ln = PL_regstartp[n];
2c2d71f5 2272 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2273 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2274 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2275 if (ln == PL_regendp[n])
a0d0e21e 2276 break;
a0ed51b3 2277
cf93c79d 2278 s = PL_bostr + ln;
a0ed51b3
LW
2279 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2280 char *l = locinput;
cf93c79d 2281 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2282 /*
2283 * Note that we can't do the "other character" lookup trick as
2284 * in the 8-bit case (no pun intended) because in Unicode we
2285 * have to map both upper and title case to lower case.
2286 */
2287 if (OP(scan) == REFF) {
2288 while (s < e) {
2289 if (l >= PL_regeol)
2290 sayNO;
dfe13c55 2291 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2292 sayNO;
2293 s += UTF8SKIP(s);
2294 l += UTF8SKIP(l);
2295 }
2296 }
2297 else {
2298 while (s < e) {
2299 if (l >= PL_regeol)
2300 sayNO;
dfe13c55 2301 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2302 sayNO;
2303 s += UTF8SKIP(s);
2304 l += UTF8SKIP(l);
2305 }
2306 }
2307 locinput = l;
2308 nextchr = UCHARAT(locinput);
2309 break;
2310 }
2311
a0d0e21e 2312 /* Inline the first character, for speed. */
76e3520e 2313 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2314 (OP(scan) == REF ||
2315 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2316 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2317 sayNO;
cf93c79d 2318 ln = PL_regendp[n] - ln;
3280af22 2319 if (locinput + ln > PL_regeol)
4633a7c4 2320 sayNO;
c8756f30
AK
2321 if (ln > 1 && (OP(scan) == REF
2322 ? memNE(s, locinput, ln)
2323 : (OP(scan) == REFF
2324 ? ibcmp(s, locinput, ln)
2325 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2326 sayNO;
a0d0e21e 2327 locinput += ln;
76e3520e 2328 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2329 break;
2330
2331 case NOTHING:
c277df42 2332 case TAIL:
a0d0e21e
LW
2333 break;
2334 case BACK:
2335 break;
c277df42
IZ
2336 case EVAL:
2337 {
2338 dSP;
533c011a 2339 OP_4tree *oop = PL_op;
3280af22
NIS
2340 COP *ocurcop = PL_curcop;
2341 SV **ocurpad = PL_curpad;
c277df42
IZ
2342 SV *ret;
2343
2344 n = ARG(scan);
533c011a 2345 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2346 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2347 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2348 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2349
cea2e8a9 2350 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2351 SPAGAIN;
2352 ret = POPs;
2353 PUTBACK;
2354
0f5d15d6
IZ
2355 PL_op = oop;
2356 PL_curpad = ocurpad;
2357 PL_curcop = ocurcop;
c277df42 2358 if (logical) {
0f5d15d6
IZ
2359 if (logical == 2) { /* Postponed subexpression. */
2360 regexp *re;
22c35a8c 2361 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2362 re_cc_state state;
0f5d15d6
IZ
2363 CHECKPOINT cp, lastcp;
2364
2365 if(SvROK(ret) || SvRMAGICAL(ret)) {
2366 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2367
2368 if(SvMAGICAL(sv))
2369 mg = mg_find(sv, 'r');
2370 }
2371 if (mg) {
2372 re = (regexp *)mg->mg_obj;
df0003d4 2373 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2374 }
2375 else {
2376 STRLEN len;
2377 char *t = SvPV(ret, len);
2378 PMOP pm;
2379 char *oprecomp = PL_regprecomp;
2380 I32 osize = PL_regsize;
2381 I32 onpar = PL_regnpar;
2382
2383 pm.op_pmflags = 0;
393fec97 2384 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2385 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2386 if (!(SvFLAGS(ret)
2387 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2388 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2389 PL_regprecomp = oprecomp;
2390 PL_regsize = osize;
2391 PL_regnpar = onpar;
2392 }
2393 DEBUG_r(
2394 PerlIO_printf(Perl_debug_log,
2395 "Entering embedded `%s%.60s%s%s'\n",
2396 PL_colors[0],
2397 re->precomp,
2398 PL_colors[1],
2399 (strlen(re->precomp) > 60 ? "..." : ""))
2400 );
2401 state.node = next;
2402 state.prev = PL_reg_call_cc;
2403 state.cc = PL_regcc;
2404 state.re = PL_reg_re;
2405
2ab05381 2406 PL_regcc = 0;
0f5d15d6
IZ
2407
2408 cp = regcppush(0); /* Save *all* the positions. */
2409 REGCP_SET;
2410 cache_re(re);
2411 state.ss = PL_savestack_ix;
2412 *PL_reglastparen = 0;
2413 PL_reg_call_cc = &state;
2414 PL_reginput = locinput;
2c2d71f5
JH
2415
2416 /* XXXX This is too dramatic a measure... */
2417 PL_reg_maxiter = 0;
2418
0f5d15d6 2419 if (regmatch(re->program + 1)) {
2c914db6
IZ
2420 /* Even though we succeeded, we need to restore
2421 global variables, since we may be wrapped inside
2422 SUSPEND, thus the match may be not finished yet. */
2423
2424 /* XXXX Do this only if SUSPENDed? */
2425 PL_reg_call_cc = state.prev;
2426 PL_regcc = state.cc;
2427 PL_reg_re = state.re;
2428 cache_re(PL_reg_re);
2429
2430 /* XXXX This is too dramatic a measure... */
2431 PL_reg_maxiter = 0;
2432
2433 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2434 ReREFCNT_dec(re);
2435 regcpblow(cp);
2436 sayYES;
2437 }
0f5d15d6
IZ
2438 ReREFCNT_dec(re);
2439 REGCP_UNWIND;
2440 regcppop();
2441 PL_reg_call_cc = state.prev;
2442 PL_regcc = state.cc;
2443 PL_reg_re = state.re;
d3790889 2444 cache_re(PL_reg_re);
2c2d71f5
JH
2445
2446 /* XXXX This is too dramatic a measure... */
2447 PL_reg_maxiter = 0;
2448
0f5d15d6
IZ
2449 sayNO;
2450 }
c277df42 2451 sw = SvTRUE(ret);
0f5d15d6 2452 logical = 0;
a0ed51b3
LW
2453 }
2454 else
3280af22 2455 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2456 break;
2457 }
a0d0e21e 2458 case OPEN:
c277df42 2459 n = ARG(scan); /* which paren pair */
3280af22
NIS
2460 PL_reg_start_tmp[n] = locinput;
2461 if (n > PL_regsize)
2462 PL_regsize = n;
a0d0e21e
LW
2463 break;
2464 case CLOSE:
c277df42 2465 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2466 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2467 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2468 if (n > *PL_reglastparen)
2469 *PL_reglastparen = n;
a0d0e21e 2470 break;
c277df42
IZ
2471 case GROUPP:
2472 n = ARG(scan); /* which paren pair */
cf93c79d 2473 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2474 break;
2475 case IFTHEN:
2c2d71f5 2476 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2477 if (sw)
2478 next = NEXTOPER(NEXTOPER(scan));
2479 else {
2480 next = scan + ARG(scan);
2481 if (OP(next) == IFTHEN) /* Fake one. */
2482 next = NEXTOPER(NEXTOPER(next));
2483 }
2484 break;
2485 case LOGICAL:
0f5d15d6 2486 logical = scan->flags;
c277df42 2487 break;
2ab05381
IZ
2488/*******************************************************************
2489 PL_regcc contains infoblock about the innermost (...)* loop, and
2490 a pointer to the next outer infoblock.
2491
2492 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2493
2494 1) After matching X, regnode for CURLYX is processed;
2495
2496 2) This regnode creates infoblock on the stack, and calls
2497 regmatch() recursively with the starting point at WHILEM node;
2498
2499 3) Each hit of WHILEM node tries to match A and Z (in the order
2500 depending on the current iteration, min/max of {min,max} and
2501 greediness). The information about where are nodes for "A"
2502 and "Z" is read from the infoblock, as is info on how many times "A"
2503 was already matched, and greediness.
2504
2505 4) After A matches, the same WHILEM node is hit again.
2506
2507 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2508 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2509 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2510 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2511 of the external loop.
2512
2513 Currently present infoblocks form a tree with a stem formed by PL_curcc
2514 and whatever it mentions via ->next, and additional attached trees
2515 corresponding to temporarily unset infoblocks as in "5" above.
2516
2517 In the following picture infoblocks for outer loop of
2518 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2519 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2520 infoblocks are drawn below the "reset" infoblock.
2521
2522 In fact in the picture below we do not show failed matches for Z and T
2523 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2524 more obvious *why* one needs to *temporary* unset infoblocks.]
2525
2526 Matched REx position InfoBlocks Comment
2527 (Y(A)*?Z)*?T x
2528 Y(A)*?Z)*?T x <- O
2529 Y (A)*?Z)*?T x <- O
2530 Y A)*?Z)*?T x <- O <- I
2531 YA )*?Z)*?T x <- O <- I
2532 YA A)*?Z)*?T x <- O <- I
2533 YAA )*?Z)*?T x <- O <- I
2534 YAA Z)*?T x <- O # Temporary unset I
2535 I
2536
2537 YAAZ Y(A)*?Z)*?T x <- O
2538 I
2539
2540 YAAZY (A)*?Z)*?T x <- O
2541 I
2542
2543 YAAZY A)*?Z)*?T x <- O <- I
2544 I
2545
2546 YAAZYA )*?Z)*?T x <- O <- I
2547 I
2548
2549 YAAZYA Z)*?T x <- O # Temporary unset I
2550 I,I
2551
2552 YAAZYAZ )*?T x <- O
2553 I,I
2554
2555 YAAZYAZ T x # Temporary unset O
2556 O
2557 I,I
2558
2559 YAAZYAZT x
2560 O
2561 I,I
2562 *******************************************************************/
a0d0e21e
LW
2563 case CURLYX: {
2564 CURCUR cc;
3280af22 2565 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2566
2567 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2568 next += ARG(next);
3280af22
NIS
2569 cc.oldcc = PL_regcc;
2570 PL_regcc = &cc;
2571 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2572 cc.cur = -1;
2573 cc.min = ARG1(scan);
2574 cc.max = ARG2(scan);
c277df42 2575 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2576 cc.next = next;
2577 cc.minmod = minmod;
2578 cc.lastloc = 0;
3280af22 2579 PL_reginput = locinput;
a0d0e21e
LW
2580 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2581 regcpblow(cp);
3280af22 2582 PL_regcc = cc.oldcc;
4633a7c4 2583 saySAME(n);
a0d0e21e
LW
2584 }
2585 /* NOT REACHED */
2586 case WHILEM: {
2587 /*
2588 * This is really hard to understand, because after we match
2589 * what we're trying to match, we must make sure the rest of
2c2d71f5 2590 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2591 * to go back UP the parse tree by recursing ever deeper. And
2592 * if it fails, we have to reset our parent's current state
2593 * that we can try again after backing off.
2594 */
2595
c277df42 2596 CHECKPOINT cp, lastcp;
3280af22 2597 CURCUR* cc = PL_regcc;
c277df42
IZ
2598 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2599
4633a7c4 2600 n = cc->cur + 1; /* how many we know we matched */
3280af22 2601 PL_reginput = locinput;
a0d0e21e 2602
c277df42
IZ
2603 DEBUG_r(
2604 PerlIO_printf(Perl_debug_log,
2605 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2606 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2607 (long)n, (long)cc->min,
2608 (long)cc->max, (long)cc)
2609 );
4633a7c4 2610
a0d0e21e
LW
2611 /* If degenerate scan matches "", assume scan done. */
2612
579cf2c3 2613 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2614 PL_regcc = cc->oldcc;
2ab05381
IZ
2615 if (PL_regcc)
2616 ln = PL_regcc->cur;
c277df42 2617 DEBUG_r(
c3464db5
DD
2618 PerlIO_printf(Perl_debug_log,
2619 "%*s empty match detected, try continuation...\n",
3280af22 2620 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2621 );
a0d0e21e 2622 if (regmatch(cc->next))
4633a7c4 2623 sayYES;
2ab05381
IZ
2624 if (PL_regcc)
2625 PL_regcc->cur = ln;
3280af22 2626 PL_regcc = cc;
4633a7c4 2627 sayNO;
a0d0e21e
LW
2628 }
2629
2630 /* First just match a string of min scans. */
2631
2632 if (n < cc->min) {
2633 cc->cur = n;
2634 cc->lastloc = locinput;
4633a7c4
LW
2635 if (regmatch(cc->scan))
2636 sayYES;
2637 cc->cur = n - 1;
c277df42 2638 cc->lastloc = lastloc;
4633a7c4 2639 sayNO;
a0d0e21e
LW
2640 }
2641
2c2d71f5
JH
2642 if (scan->flags) {
2643 /* Check whether we already were at this position.
2644 Postpone detection until we know the match is not
2645 *that* much linear. */
2646 if (!PL_reg_maxiter) {
2647 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2648 PL_reg_leftiter = PL_reg_maxiter;
2649 }
2650 if (PL_reg_leftiter-- == 0) {
2651 I32 size = (PL_reg_maxiter + 7)/8;
2652 if (PL_reg_poscache) {
2653 if (PL_reg_poscache_size < size) {
2654 Renew(PL_reg_poscache, size, char);
2655 PL_reg_poscache_size = size;
2656 }
2657 Zero(PL_reg_poscache, size, char);
2658 }
2659 else {
2660 PL_reg_poscache_size = size;
2661 Newz(29, PL_reg_poscache, size, char);
2662 }
2663 DEBUG_r(
2664 PerlIO_printf(Perl_debug_log,
2665 "%sDetected a super-linear match, switching on caching%s...\n",
2666 PL_colors[4], PL_colors[5])
2667 );
2668 }
2669 if (PL_reg_leftiter < 0) {
2670 I32 o = locinput - PL_bostr, b;
2671
2672 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2673 b = o % 8;
2674 o /= 8;
2675 if (PL_reg_poscache[o] & (1<<b)) {
2676 DEBUG_r(
2677 PerlIO_printf(Perl_debug_log,
2678 "%*s already tried at this position...\n",
2679 REPORT_CODE_OFF+PL_regindent*2, "")
2680 );
7821416a 2681 sayNO_SILENT;
2c2d71f5
JH
2682 }
2683 PL_reg_poscache[o] |= (1<<b);
2684 }
2685 }
2686
a0d0e21e
LW
2687 /* Prefer next over scan for minimal matching. */
2688
2689 if (cc->minmod) {
3280af22 2690 PL_regcc = cc->oldcc;
2ab05381
IZ
2691 if (PL_regcc)
2692 ln = PL_regcc->cur;
5f05dabc 2693 cp = regcppush(cc->parenfloor);
c277df42 2694 REGCP_SET;
5f05dabc 2695 if (regmatch(cc->next)) {
c277df42 2696 regcpblow(cp);
4633a7c4 2697 sayYES; /* All done. */
5f05dabc 2698 }
c277df42 2699 REGCP_UNWIND;
5f05dabc 2700 regcppop();
2ab05381
IZ
2701 if (PL_regcc)
2702 PL_regcc->cur = ln;
3280af22 2703 PL_regcc = cc;
a0d0e21e 2704
c277df42 2705 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2706 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2707 && !(PL_reg_flags & RF_warned)) {
2708 PL_reg_flags |= RF_warned;
e476b1b5 2709 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2710 "Complex regular subexpression recursion",
2711 REG_INFTY - 1);
c277df42 2712 }
4633a7c4 2713 sayNO;
c277df42 2714 }
a687059c 2715
c277df42 2716 DEBUG_r(
c3464db5
DD
2717 PerlIO_printf(Perl_debug_log,
2718 "%*s trying longer...\n",
3280af22 2719 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2720 );
a0d0e21e 2721 /* Try scanning more and see if it helps. */
3280af22 2722 PL_reginput = locinput;
a0d0e21e
LW
2723 cc->cur = n;
2724 cc->lastloc = locinput;
5f05dabc 2725 cp = regcppush(cc->parenfloor);
c277df42 2726 REGCP_SET;
5f05dabc 2727 if (regmatch(cc->scan)) {
c277df42 2728 regcpblow(cp);
4633a7c4 2729 sayYES;
5f05dabc 2730 }
c277df42 2731 REGCP_UNWIND;
5f05dabc 2732 regcppop();
4633a7c4 2733 cc->cur = n - 1;
c277df42 2734 cc->lastloc = lastloc;
4633a7c4 2735 sayNO;
a0d0e21e
LW
2736 }
2737
2738 /* Prefer scan over next for maximal matching. */
2739
2740 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2741 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2742 cc->cur = n;
2743 cc->lastloc = locinput;
c277df42 2744 REGCP_SET;
5f05dabc 2745 if (regmatch(cc->scan)) {
c277df42 2746 regcpblow(cp);
4633a7c4 2747 sayYES;
5f05dabc 2748 }
c277df42 2749 REGCP_UNWIND;
a0d0e21e 2750 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2751 PL_reginput = locinput;
c277df42 2752 DEBUG_r(
c3464db5
DD
2753 PerlIO_printf(Perl_debug_log,
2754 "%*s failed, try continuation...\n",
3280af22 2755 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2756 );
2757 }
e476b1b5 2758 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2759 && !(PL_reg_flags & RF_warned)) {
3280af22 2760 PL_reg_flags |= RF_warned;
e476b1b5 2761 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2762 "Complex regular subexpression recursion",
2763 REG_INFTY - 1);
a0d0e21e
LW
2764 }
2765
2766 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2767 PL_regcc = cc->oldcc;
2ab05381
IZ
2768 if (PL_regcc)
2769 ln = PL_regcc->cur;
a0d0e21e 2770 if (regmatch(cc->next))
4633a7c4 2771 sayYES;
2ab05381
IZ
2772 if (PL_regcc)
2773 PL_regcc->cur = ln;
3280af22 2774 PL_regcc = cc;
4633a7c4 2775 cc->cur = n - 1;
c277df42 2776 cc->lastloc = lastloc;
4633a7c4 2777 sayNO;
a0d0e21e
LW
2778 }
2779 /* NOT REACHED */
c277df42
IZ
2780 case BRANCHJ:
2781 next = scan + ARG(scan);
2782 if (next == scan)
2783 next = NULL;
2784 inner = NEXTOPER(NEXTOPER(scan));
2785 goto do_branch;
2786 case BRANCH:
2787 inner = NEXTOPER(scan);
2788 do_branch:
2789 {
2790 CHECKPOINT lastcp;
2791 c1 = OP(scan);
2792 if (OP(next) != c1) /* No choice. */
2793 next = inner; /* Avoid recursion. */
a0d0e21e 2794 else {
3280af22 2795 int lastparen = *PL_reglastparen;
c277df42
IZ
2796
2797 REGCP_SET;
a0d0e21e 2798 do {
3280af22 2799 PL_reginput = locinput;
c277df42 2800 if (regmatch(inner))
4633a7c4 2801 sayYES;
c277df42 2802 REGCP_UNWIND;
3280af22 2803 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2804 PL_regendp[n] = -1;
3280af22 2805 *PL_reglastparen = n;
c277df42 2806 scan = next;
a0d0e21e 2807 /*SUPPRESS 560*/
155aba94 2808 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2809 next += n;
a0d0e21e 2810 else
c277df42 2811 next = NULL;
c277df42
IZ
2812 inner = NEXTOPER(scan);
2813 if (c1 == BRANCHJ) {
2814 inner = NEXTOPER(inner);
2815 }
2816 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2817 sayNO;
a0d0e21e 2818 /* NOTREACHED */
a687059c 2819 }
a0d0e21e
LW
2820 }
2821 break;
2822 case MINMOD:
2823 minmod = 1;
2824 break;
c277df42
IZ
2825 case CURLYM:
2826 {
00db4c45 2827 I32 l = 0;
c277df42
IZ
2828 CHECKPOINT lastcp;
2829
2830 /* We suppose that the next guy does not need
2831 backtracking: in particular, it is of constant length,
2832 and has no parenths to influence future backrefs. */
2833 ln = ARG1(scan); /* min to match */
2834 n = ARG2(scan); /* max to match */
c277df42
IZ
2835 paren = scan->flags;
2836 if (paren) {
3280af22
NIS
2837 if (paren > PL_regsize)
2838 PL_regsize = paren;
2839 if (paren > *PL_reglastparen)
2840 *PL_reglastparen = paren;
c277df42 2841 }
dc45a647 2842 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2843 if (paren)
2844 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2845 PL_reginput = locinput;
c277df42
IZ
2846 if (minmod) {
2847 minmod = 0;
2848 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2849 sayNO;
5f4b28b2 2850 if (ln && l == 0 && n >= ln
c277df42
IZ
2851 /* In fact, this is tricky. If paren, then the
2852 fact that we did/didnot match may influence
2853 future execution. */
2854 && !(paren && ln == 0))
2855 ln = n;
3280af22 2856 locinput = PL_reginput;
22c35a8c 2857 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2858 c1 = (U8)*STRING(next);
c277df42 2859 if (OP(next) == EXACTF)
22c35a8c 2860 c2 = PL_fold[c1];
c277df42 2861 else if (OP(next) == EXACTFL)
22c35a8c 2862 c2 = PL_fold_locale[c1];
c277df42
IZ
2863 else
2864 c2 = c1;
a0ed51b3
LW
2865 }
2866 else
c277df42
IZ
2867 c1 = c2 = -1000;
2868 REGCP_SET;
5f4b28b2 2869 /* This may be improved if l == 0. */
c277df42
IZ
2870 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2871 /* If it could work, try it. */
2872 if (c1 == -1000 ||
3280af22
NIS
2873 UCHARAT(PL_reginput) == c1 ||
2874 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2875 {
2876 if (paren) {
2877 if (n) {
cf93c79d
IZ
2878 PL_regstartp[paren] =
2879 HOPc(PL_reginput, -l) - PL_bostr;
2880 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2881 }
2882 else
cf93c79d 2883 PL_regendp[paren] = -1;
c277df42
IZ
2884 }
2885 if (regmatch(next))
2886 sayYES;
2887 REGCP_UNWIND;
2888 }
2889 /* Couldn't or didn't -- move forward. */
3280af22 2890 PL_reginput = locinput;
c277df42
IZ
2891 if (regrepeat_hard(scan, 1, &l)) {
2892 ln++;
3280af22 2893 locinput = PL_reginput;
c277df42
IZ
2894 }
2895 else
2896 sayNO;
2897 }
a0ed51b3
LW
2898 }
2899 else {
c277df42
IZ
2900 n = regrepeat_hard(scan, n, &l);
2901 if (n != 0 && l == 0
2902 /* In fact, this is tricky. If paren, then the
2903 fact that we did/didnot match may influence
2904 future execution. */
2905 && !(paren && ln == 0))
2906 ln = n;
3280af22 2907 locinput = PL_reginput;
c277df42 2908 DEBUG_r(
5c0ca799 2909 PerlIO_printf(Perl_debug_log,
faccc32b 2910 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2911 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2912 (IV) n, (IV)l)
c277df42
IZ
2913 );
2914 if (n >= ln) {
22c35a8c 2915 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2916 c1 = (U8)*STRING(next);
c277df42 2917 if (OP(next) == EXACTF)
22c35a8c 2918 c2 = PL_fold[c1];
c277df42 2919 else if (OP(next) == EXACTFL)
22c35a8c 2920 c2 = PL_fold_locale[c1];
c277df42
IZ
2921 else
2922 c2 = c1;
a0ed51b3
LW
2923 }
2924 else
c277df42
IZ
2925 c1 = c2 = -1000;
2926 }
2927 REGCP_SET;
2928 while (n >= ln) {
2929 /* If it could work, try it. */
2930 if (c1 == -1000 ||
3280af22
NIS
2931 UCHARAT(PL_reginput) == c1 ||
2932 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2933 {
2934 DEBUG_r(
c3464db5 2935 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2936 "%*s trying tail with n=%"IVdf"...\n",
2937 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2938 );
2939 if (paren) {
2940 if (n) {
cf93c79d
IZ
2941 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2942 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2943 }
a0ed51b3 2944 else
cf93c79d 2945 PL_regendp[paren] = -1;
c277df42 2946 }
a0ed51b3
LW
2947 if (regmatch(next))
2948 sayYES;
2949 REGCP_UNWIND;
2950 }
c277df42
IZ
2951 /* Couldn't or didn't -- back up. */
2952 n--;
dfe13c55 2953 locinput = HOPc(locinput, -l);
3280af22 2954 PL_reginput = locinput;
c277df42
IZ
2955 }
2956 }
2957 sayNO;
2958 break;
2959 }
2960 case CURLYN:
2961 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2962 if (paren > PL_regsize)
2963 PL_regsize = paren;
2964 if (paren > *PL_reglastparen)
2965 *PL_reglastparen = paren;
c277df42
IZ
2966 ln = ARG1(scan); /* min to match */
2967 n = ARG2(scan); /* max to match */
dc45a647 2968 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2969 goto repeat;
a0d0e21e 2970 case CURLY:
c277df42 2971 paren = 0;
a0d0e21e
LW
2972 ln = ARG1(scan); /* min to match */
2973 n = ARG2(scan); /* max to match */
dc45a647 2974 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2975 goto repeat;
2976 case STAR:
2977 ln = 0;
c277df42 2978 n = REG_INFTY;
a0d0e21e 2979 scan = NEXTOPER(scan);
c277df42 2980 paren = 0;
a0d0e21e
LW
2981 goto repeat;
2982 case PLUS:
c277df42
IZ
2983 ln = 1;
2984 n = REG_INFTY;
2985 scan = NEXTOPER(scan);
2986 paren = 0;
2987 repeat:
a0d0e21e
LW
2988 /*
2989 * Lookahead to avoid useless match attempts
2990 * when we know what character comes next.
2991 */
22c35a8c 2992 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2993 c1 = (U8)*STRING(next);
bbce6d69 2994 if (OP(next) == EXACTF)
22c35a8c 2995 c2 = PL_fold[c1];
bbce6d69 2996 else if (OP(next) == EXACTFL)
22c35a8c 2997 c2 = PL_fold_locale[c1];
bbce6d69
PP
2998 else
2999 c2 = c1;
3000 }
a0d0e21e 3001 else
bbce6d69 3002 c1 = c2 = -1000;
3280af22 3003 PL_reginput = locinput;
f06a1d4e
HS
3004 if (paren)
3005 PL_regendp[paren] = -1;
a0d0e21e 3006 if (minmod) {
c277df42 3007 CHECKPOINT lastcp;
a0d0e21e
LW
3008 minmod = 0;
3009 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3010 sayNO;
a0ed51b3 3011 locinput = PL_reginput;
c277df42 3012 REGCP_SET;
0fe9bf95
IZ
3013 if (c1 != -1000) {
3014 char *e = locinput + n - ln; /* Should not check after this */
3015 char *old = locinput;
3016
3017 if (e >= PL_regeol || (n == REG_INFTY))
3018 e = PL_regeol - 1;
3019 while (1) {
3020 /* Find place 'next' could work */
3021 if (c1 == c2) {
3022 while (locinput <= e && *locinput != c1)
3023 locinput++;
3024 } else {
3025 while (locinput <= e
3026 && *locinput != c1
3027 && *locinput != c2)
3028 locinput++;
3029 }
3030 if (locinput > e)
3031 sayNO;
3032 /* PL_reginput == old now */
3033 if (locinput != old) {
3034 ln = 1; /* Did some */
3035 if (regrepeat(scan, locinput - old) <
3036 locinput - old)
3037 sayNO;
3038 }
3039 /* PL_reginput == locinput now */
3040 if (paren) {
3041 if (ln) {
cf93c79d
IZ
3042 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3043 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3044 }
3045 else
cf93c79d 3046 PL_regendp[paren] = -1;
0fe9bf95
IZ
3047 }
3048 if (regmatch(next))
3049 sayYES;
3050 PL_reginput = locinput; /* Could be reset... */
3051 REGCP_UNWIND;
3052 /* Couldn't or didn't -- move forward. */
3053 old = locinput++;
3054 }
3055 }
3056 else
c277df42 3057 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3058 /* If it could work, try it. */
bbce6d69 3059 if (c1 == -1000 ||
3280af22
NIS
3060 UCHARAT(PL_reginput) == c1 ||
3061 UCHARAT(PL_reginput) == c2)
bbce6d69 3062 {
c277df42
IZ
3063 if (paren) {
3064 if (n) {
cf93c79d
IZ
3065 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3066 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3067 }
3068 else
cf93c79d 3069 PL_regendp[paren] = -1;
c277df42 3070 }
a0d0e21e 3071 if (regmatch(next))
4633a7c4 3072 sayYES;
c277df42 3073 REGCP_UNWIND;
bbce6d69 3074 }
c277df42 3075 /* Couldn't or didn't -- move forward. */
a0ed51b3 3076 PL_reginput = locinput;
a0d0e21e
LW
3077 if (regrepeat(scan, 1)) {
3078 ln++;
a0ed51b3
LW
3079 locinput = PL_reginput;
3080 }
3081 else
4633a7c4 3082 sayNO;
a0d0e21e
LW
3083 }
3084 }
3085 else {
c277df42 3086 CHECKPOINT lastcp;
a0d0e21e 3087 n = regrepeat(scan, n);
a0ed51b3 3088 locinput = PL_reginput;
22c35a8c 3089 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3090 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3091 ln = n; /* why back off? */
1aeab75a
GS
3092 /* ...because $ and \Z can match before *and* after
3093 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3094 We should back off by one in this case. */
3095 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3096 ln--;
3097 }
c277df42
IZ
3098 REGCP_SET;
3099 if (paren) {
3100 while (n >= ln) {
3101 /* If it could work, try it. */
3102 if (c1 == -1000 ||
3280af22
NIS
3103 UCHARAT(PL_reginput) == c1 ||
3104 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3105 {
3106 if (paren && n) {
3107 if (n) {
cf93c79d
IZ
3108 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3109 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3110 }
3111 else
cf93c79d 3112 PL_regendp[paren] = -1;
c277df42
IZ
3113 }
3114 if (regmatch(next))
3115 sayYES;
3116 REGCP_UNWIND;
3117 }
3118 /* Couldn't or didn't -- back up. */
3119 n--;
dfe13c55 3120 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3121 }
a0ed51b3
LW
3122 }
3123 else {
c277df42
IZ
3124 while (n >= ln) {
3125 /* If it could work, try it. */
3126 if (c1 == -1000 ||
3280af22
NIS
3127 UCHARAT(PL_reginput) == c1 ||
3128 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3129 {
3130 if (regmatch(next))
3131 sayYES;
3132 REGCP_UNWIND;
3133 }
3134 /* Couldn't or didn't -- back up. */
3135 n--;
dfe13c55 3136 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3137 }
a0d0e21e
LW
3138 }
3139 }
4633a7c4 3140 sayNO;
c277df42 3141 break;
a0d0e21e 3142 case END:
0f5d15d6
IZ
3143 if (PL_reg_call_cc) {
3144 re_cc_state *cur_call_cc = PL_reg_call_cc;
3145 CURCUR *cctmp = PL_regcc;
3146 regexp *re = PL_reg_re;
3147 CHECKPOINT cp, lastcp;
3148
3149 cp = regcppush(0); /* Save *all* the positions. */
3150 REGCP_SET;
3151 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3152 the caller. */
3153 PL_reginput = locinput; /* Make position available to
3154 the callcc. */
3155 cache_re(PL_reg_call_cc->re);
3156 PL_regcc = PL_reg_call_cc->cc;
3157 PL_reg_call_cc = PL_reg_call_cc->prev;
3158 if (regmatch(cur_call_cc->node)) {
3159 PL_reg_call_cc = cur_call_cc;
3160 regcpblow(cp);
3161 sayYES;
3162 }
3163 REGCP_UNWIND;
3164 regcppop();
3165 PL_reg_call_cc = cur_call_cc;
3166 PL_regcc = cctmp;
3167 PL_reg_re = re;
3168 cache_re(re);
3169
3170 DEBUG_r(
3171 PerlIO_printf(Perl_debug_log,
3172 "%*s continuation failed...\n",
3173 REPORT_CODE_OFF+PL_regindent*2, "")
3174 );
7821416a 3175 sayNO_SILENT;
0f5d15d6 3176 }
7821416a
IZ
3177 if (locinput < PL_regtill) {
3178 DEBUG_r(PerlIO_printf(Perl_debug_log,
3179 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3180 PL_colors[4],
3181 (long)(locinput - PL_reg_starttry),
3182 (long)(PL_regtill - PL_reg_starttry),
3183 PL_colors[5]));
3184 sayNO_FINAL; /* Cannot match: too short. */
3185 }
3186 PL_reginput = locinput; /* put where regtry can find it */
3187 sayYES_FINAL; /* Success! */
7e5428c5 3188 case SUCCEED:
3280af22 3189 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3190 sayYES_LOUD; /* Success! */
c277df42
IZ
3191 case SUSPEND:
3192 n = 1;
9fe1d20c 3193 PL_reginput = locinput;
c277df42 3194 goto do_ifmatch;
a0d0e21e 3195 case UNLESSM:
c277df42 3196 n = 0;
a0ed51b3 3197 if (scan->flags) {
0fe9bf95
IZ
3198 if (UTF) { /* XXXX This is absolutely
3199 broken, we read before
3200 start of string. */
3201 s = HOPMAYBEc(locinput, -scan->flags);
3202 if (!s)
3203 goto say_yes;
3204 PL_reginput = s;
3205 }
3206 else {
3207 if (locinput < PL_bostr + scan->flags)
3208 goto say_yes;
3209 PL_reginput = locinput - scan->flags;
3210 goto do_ifmatch;
3211 }
a0ed51b3
LW
3212 }
3213 else
3214 PL_reginput = locinput;
c277df42
IZ
3215 goto do_ifmatch;
3216 case IFMATCH:
3217 n = 1;
a0ed51b3 3218 if (scan->flags) {
0fe9bf95
IZ
3219 if (UTF) { /* XXXX This is absolutely
3220 broken, we read before
3221 start of string. */
3222 s = HOPMAYBEc(locinput, -scan->flags);
3223 if (!s || s < PL_bostr)
3224 goto say_no;
3225 PL_reginput = s;
3226 }
3227 else {
3228 if (locinput < PL_bostr + scan->flags)
3229 goto say_no;
3230 PL_reginput = locinput - scan->flags;
3231 goto do_ifmatch;
3232 }
a0ed51b3
LW
3233 }
3234 else
3235 PL_reginput = locinput;
3236
c277df42 3237 do_ifmatch:
c277df42
IZ
3238 inner = NEXTOPER(NEXTOPER(scan));
3239 if (regmatch(inner) != n) {
3240 say_no:
3241 if (logical) {
3242 logical = 0;
3243 sw = 0;
3244 goto do_longjump;
a0ed51b3
LW
3245 }
3246 else
c277df42
IZ
3247 sayNO;
3248 }
3249 say_yes:
3250 if (logical) {
3251 logical = 0;
3252 sw = 1;
3253 }
fe44a5e8 3254 if (OP(scan) == SUSPEND) {
3280af22 3255 locinput = PL_reginput;
565764a8 3256 nextchr = UCHARAT(locinput);
fe44a5e8 3257 }
c277df42
IZ
3258 /* FALL THROUGH. */
3259 case LONGJMP:
3260 do_longjump:
3261 next = scan + ARG(scan);
3262 if (next == scan)
3263 next = NULL;
a0d0e21e
LW
3264 break;
3265 default:
b900a521 3266 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3267 PTR2UV(scan), OP(scan));
cea2e8a9 3268 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3269 }
a0d0e21e
LW
3270 scan = next;
3271 }
a687059c 3272
a0d0e21e
LW
3273 /*
3274 * We get here only if there's trouble -- normally "case END" is
3275 * the terminating point.
3276 */
cea2e8a9 3277 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3278 /*NOTREACHED*/
4633a7c4
LW
3279 sayNO;
3280
7821416a
IZ
3281yes_loud:
3282 DEBUG_r(
3283 PerlIO_printf(Perl_debug_log,
3284 "%*s %scould match...%s\n",
3285 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3286 );
3287 goto yes;
3288yes_final:
3289 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3290 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3291yes:
3292#ifdef DEBUGGING
3280af22 3293 PL_regindent--;
4633a7c4
LW
3294#endif
3295 return 1;
3296
3297no:
7821416a
IZ
3298 DEBUG_r(
3299 PerlIO_printf(Perl_debug_log,
3300 "%*s %sfailed...%s\n",
3301 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3302 );
3303 goto do_no;
3304no_final:
3305do_no:
4633a7c4 3306#ifdef DEBUGGING
3280af22 3307 PL_regindent--;
4633a7c4 3308#endif
a0d0e21e 3309 return 0;
a687059c
LW
3310}
3311
3312/*
3313 - regrepeat - repeatedly match something simple, report how many
3314 */
3315/*
3316 * [This routine now assumes that it will only match on things of length 1.
3317 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3318 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3319 */
76e3520e 3320STATIC I32
cea2e8a9 3321S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3322{
5c0ca799 3323 dTHR;
a0d0e21e 3324 register char *scan;
a0d0e21e 3325 register I32 c;
3280af22 3326 register char *loceol = PL_regeol;
a0ed51b3 3327 register I32 hardcount = 0;
a0d0e21e 3328
3280af22 3329 scan = PL_reginput;
c277df42 3330 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3331 loceol = scan + max;
a0d0e21e 3332 switch (OP(p)) {
22c35a8c 3333 case REG_ANY:
a0d0e21e
LW
3334 while (scan < loceol && *scan != '\n')
3335 scan++;
3336 break;
3337 case SANY:
3338 scan = loceol;
3339 break;
a0ed51b3
LW
3340 case ANYUTF8:
3341 loceol = PL_regeol;
3342 while (scan < loceol && *scan != '\n') {
3343 scan += UTF8SKIP(scan);
3344 hardcount++;
3345 }
3346 break;
3347 case SANYUTF8:
3348 loceol = PL_regeol;
3349 while (scan < loceol) {
3350 scan += UTF8SKIP(scan);
3351 hardcount++;
3352 }
3353 break;
bbce6d69 3354 case EXACT: /* length of string is 1 */
cd439c50 3355 c = (U8)*STRING(p);
bbce6d69
PP
3356 while (scan < loceol && UCHARAT(scan) == c)
3357 scan++;
3358 break;
3359 case EXACTF: /* length of string is 1 */
cd439c50 3360 c = (U8)*STRING(p);
bbce6d69 3361 while (scan < loceol &&
22c35a8c 3362 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69
PP
3363 scan++;
3364 break;
3365 case EXACTFL: /* length of string is 1 */
3280af22 3366 PL_reg_flags |= RF_tainted;
cd439c50 3367 c = (U8)*STRING(p);
bbce6d69 3368 while (scan < loceol &&
22c35a8c 3369 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3370 scan++;
3371 break;
a0ed51b3
LW
3372 case ANYOFUTF8:
3373 loceol = PL_regeol;
3374 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3375 scan += UTF8SKIP(scan);
3376 hardcount++;
3377 }
3378 break;
a0d0e21e 3379 case ANYOF:
936ed897 3380 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3381 scan++;
a0d0e21e
LW
3382 break;
3383 case ALNUM:
3384 while (scan < loceol && isALNUM(*scan))
3385 scan++;
3386 break;
a0ed51b3
LW
3387 case ALNUMUTF8:
3388 loceol = PL_regeol;
dfe13c55 3389 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3390 scan += UTF8SKIP(scan);
3391 hardcount++;
3392 }
3393 break;
bbce6d69 3394 case ALNUML:
3280af22 3395 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3396 while (scan < loceol && isALNUM_LC(*scan))
3397 scan++;
3398 break;
a0ed51b3
LW
3399 case ALNUMLUTF8:
3400 PL_reg_flags |= RF_tainted;
3401 loceol = PL_regeol;
dfe13c55 3402 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3403 scan += UTF8SKIP(scan);
3404 hardcount++;
3405 }
3406 break;
3407 break;
a0d0e21e
LW
3408 case NALNUM:
3409 while (scan < loceol && !isALNUM(*scan))
3410 scan++;
3411 break;
a0ed51b3
LW
3412 case NALNUMUTF8:
3413 loceol = PL_regeol;
dfe13c55 3414 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3415 scan += UTF8SKIP(scan);
3416 hardcount++;
3417 }
3418 break;
bbce6d69 3419 case NALNUML:
3280af22 3420 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3421 while (scan < loceol && !isALNUM_LC(*scan))
3422 scan++;
3423 break;
a0ed51b3
LW
3424 case NALNUMLUTF8:
3425 PL_reg_flags |= RF_tainted;
3426 loceol = PL_regeol;
dfe13c55 3427 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3428 scan += UTF8SKIP(scan);
3429 hardcount++;
3430 }
3431 break;
a0d0e21e
LW
3432 case SPACE:
3433 while (scan < loceol && isSPACE(*scan))
3434 scan++;
3435 break;
a0ed51b3
LW
3436 case SPACEUTF8:
3437 loceol = PL_regeol;
dfe13c55 3438 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3439 scan += UTF8SKIP(scan);
3440 hardcount++;
3441 }
3442 break;
bbce6d69 3443 case SPACEL:
3280af22 3444 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3445 while (scan < loceol && isSPACE_LC(*scan))
3446 scan++;
3447 break;
a0ed51b3
LW
3448 case SPACELUTF8:
3449 PL_reg_flags |= RF_tainted;
3450 loceol = PL_regeol;
dfe13c55 3451 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3452 scan += UTF8SKIP(scan);
3453 hardcount++;
3454 }
3455 break;
a0d0e21e
LW
3456 case NSPACE:
3457 while (scan < loceol && !isSPACE(*scan))
3458 scan++;
3459 break;
a0ed51b3
LW
3460 case NSPACEUTF8:
3461 loceol = PL_regeol;
dfe13c55 3462 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3463 scan += UTF8SKIP(scan);
3464 hardcount++;
3465 }
3466 break;
bbce6d69 3467 case NSPACEL:
3280af22 3468 PL_reg_flags |= RF_tainted;
bbce6d69
PP
3469 while (scan < loceol && !isSPACE_LC(*scan))
3470 scan++;
3471 break;
a0ed51b3
LW
3472 case NSPACELUTF8:
3473 PL_reg_flags |= RF_tainted;
3474 loceol = PL_regeol;
dfe13c55 3475 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3476 scan += UTF8SKIP(scan);
3477 hardcount++;
3478 }
3479 break;
a0d0e21e
LW
3480 case DIGIT:
3481 while (scan < loceol && isDIGIT(*scan))
3482 scan++;
3483 break;
a0ed51b3
LW
3484 case DIGITUTF8:
3485 loceol = PL_regeol;
dfe13c55 3486 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3487 scan += UTF8SKIP(scan);
3488 hardcount++;
3489 }
3490 break;
3491 break;
a0d0e21e
LW
3492 case NDIGIT:
3493 while (scan < loceol && !isDIGIT(*scan))
3494 scan++;
3495 break;
a0ed51b3
LW
3496 case NDIGITUTF8:
3497 loceol = PL_regeol;
dfe13c55 3498 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3499 scan += UTF8SKIP(scan);
3500 hardcount++;
3501 }
3502 break;
a0d0e21e
LW
3503 default: /* Called on something of 0 width. */
3504 break; /* So match right here or not at all. */
3505 }
a687059c 3506
a0ed51b3
LW
3507 if (hardcount)
3508 c = hardcount;
3509 else
3510 c = scan - PL_reginput;
3280af22 3511 PL_reginput = scan;
a687059c 3512
c277df42
IZ
3513 DEBUG_r(
3514 {
3515 SV *prop = sv_newmortal();
3516
3517 regprop(prop, p);
3518 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3519 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3520 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3521 });
3522
a0d0e21e 3523 return(c);
a687059c
LW
3524}
3525
3526/*
c277df42
IZ
3527 - regrepeat_hard - repeatedly match something, report total lenth and length
3528 *
3529 * The repeater is supposed to have constant length.
3530 */
3531
76e3520e 3532STATIC I32
cea2e8a9 3533S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3534{
5c0ca799 3535 dTHR;
c277df42
IZ
3536 register char *scan;
3537 register char *start;
3280af22 3538 register char *loceol = PL_regeol;
a0ed51b3 3539 I32 l = 0;
708e3b05 3540 I32 count = 0, res = 1;
a0ed51b3
LW
3541
3542 if (!max)
3543 return 0;
c277df42 3544
3280af22 3545 start = PL_reginput;
a0ed51b3 3546 if (UTF) {
708e3b05 3547 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3548 if (!count++) {
3549 l = 0;
3550 while (start < PL_reginput) {
3551 l++;
3552 start += UTF8SKIP(start);
3553 }
3554 *lp = l;
3555 if (l == 0)
3556 return max;
3557 }
3558 if (count == max)
3559 return count;
3560 }
3561 }
3562 else {
708e3b05 3563 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3564 if (!count++) {
3565 *lp = l = PL_reginput - start;
3566 if (max != REG_INFTY && l*max < loceol - scan)
3567 loceol = scan + l*max;
3568 if (l == 0)
3569 return max;
c277df42
IZ
3570 }
3571 }
3572 }
708e3b05 3573 if (!res)
3280af22 3574 PL_reginput = scan;
c277df42 3575
a0ed51b3 3576 return count;
c277df42
IZ
3577}
3578
3579/*
cb8d8820 3580 - reginclass - determine if a character falls into a character class
bbce6d69
PP
3581 */
3582
76e3520e 3583STATIC bool
936ed897 3584S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3585{
5c0ca799 3586 dTHR;
b8c5462f 3587 char flags = ANYOF_FLAGS(p);
bbce6d69
PP
3588 bool match = FALSE;
3589
3590 c &= 0xFF;
b8c5462f 3591 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69
PP
3592 match = TRUE;
3593 else if (flags & ANYOF_FOLD) {
3594 I32 cf;
3595 if (flags & ANYOF_LOCALE) {
3280af22 3596 PL_reg_flags |= RF_tainted;
22c35a8c 3597 cf = PL_fold_locale[c];
bbce6d69
PP
3598 }
3599 else
22c35a8c 3600 cf = PL_fold[c];
b8c5462f 3601 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69
PP
3602 match = TRUE;
3603 }
3604
b8c5462f 3605 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3606 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3607 if (
3608 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3620 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3621 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3622 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3623 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3624 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3625 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3626 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3627 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3628 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3629 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3630 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3631 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3632 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3633 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3634 ) /* How's that for a conditional? */
bbce6d69
PP
3635 {
3636 match = TRUE;
3637 }
3638 }
3639
ae5c130c 3640 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69
PP
3641}
3642
a0ed51b3 3643STATIC bool
cea2e8a9 3644S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3645{
3646 dTHR;
a0ed51b3
LW
3647 char flags = ARG1(f);
3648 bool match = FALSE;
3649 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3650
3651 if (swash_fetch(sv, p))
3652 match = TRUE;
3653 else if (flags & ANYOF_FOLD) {
806e7201 3654 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3655 if (flags & ANYOF_LOCALE) {
3656 PL_reg_flags |= RF_tainted;
3657 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3658 }
3659 else
3660 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3661 if (swash_fetch(sv, tmpbuf))
3662 match = TRUE;
3663 }
3664
b8c5462f 3665 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3666
3667 return (flags & ANYOF_INVERT) ? !match : match;
3668}
161b471a 3669
dfe13c55 3670STATIC U8 *
cea2e8a9 3671S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3672{
3673 dTHR;
a0ed51b3
LW
3674 if (off >= 0) {
3675 while (off-- && s < (U8*)PL_regeol)
3676 s += UTF8SKIP(s);
3677 }
3678 else {
3679 while (off++) {
3680 if (s > (U8*)PL_bostr) {
3681 s--;
3682 if (*s & 0x80) {
3683 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3684 s--;
3685 } /* XXX could check well-formedness here */
3686 }
3687 }
3688 }
3689 return s;
3690}
161b471a 3691
dfe13c55 3692STATIC U8 *
cea2e8a9 3693S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3694{
c485e607 3695 dTHR;
a0ed51b3
LW
3696 if (off >= 0) {
3697 while (off-- && s < (U8*)PL_regeol)
3698 s += UTF8SKIP(s);
3699 if (off >= 0)
3700 return 0;
3701 }
3702 else {
3703 while (off++) {
3704 if (s > (U8*)PL_bostr) {
3705 s--;
3706 if (*s & 0x80) {
3707 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3708 s--;
3709 } /* XXX could check well-formedness here */
3710 }
3711 else
3712 break;
3713 }
3714 if (off <= 0)
3715 return 0;
3716 }
3717 return s;
3718}
51371543
GS
3719
3720#ifdef PERL_OBJECT
51371543
GS
3721#include "XSUB.h"
3722#endif
3723
3724static void
3725restore_pos(pTHXo_ void *arg)