This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perlrequick.pod updates (from Mark Kvale <kvale@phy.ucsf.edu>)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
349 /* SvCUR is not set on references: SvRV and SvPVX overlap */
350 && sv && !SvROK(sv)
351 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 352 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
adac82c7 355 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 356 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
357 if (SvTAIL(check)) {
358 slen = SvCUR(check); /* >= 1 */
cad2e5aa 359
2c2d71f5
JH
360 if ( strend - s > slen || strend - s < slen - 1
361 || (strend - s == slen && strend[-1] != '\n')) {
362 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
363 goto fail_finish;
cad2e5aa
JH
364 }
365 /* Now should match s[0..slen-2] */
366 slen--;
653099ff 367 if (slen && (*SvPVX(check) != *s
cad2e5aa 368 || (slen > 1
653099ff 369 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
370 report_neq:
371 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
372 goto fail_finish;
373 }
cad2e5aa 374 }
653099ff
GS
375 else if (*SvPVX(check) != *s
376 || ((slen = SvCUR(check)) > 1
377 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
378 goto report_neq;
379 goto success_at_start;
cad2e5aa 380 }
2c2d71f5 381 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 382 s = strpos;
2c2d71f5 383 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 384 end_shift = prog->minlen - start_shift -
653099ff 385 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 386 if (!ml_anch) {
653099ff
GS
387 I32 end = prog->check_offset_max + CHR_SVLEN(check)
388 - (SvTAIL(check) != 0);
2c2d71f5
JH
389 I32 eshift = strend - s - end;
390
391 if (end_shift < eshift)
392 end_shift = eshift;
393 }
cad2e5aa 394 }
2c2d71f5 395 else { /* Can match at random position */
cad2e5aa
JH
396 ml_anch = 0;
397 s = strpos;
2c2d71f5
JH
398 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
399 /* Should be nonnegative! */
400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
402 }
403
2c2d71f5 404#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 405 if (end_shift < 0)
6bbae5e6 406 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
407#endif
408
2c2d71f5
JH
409 restart:
410 /* Find a possible match in the region s..strend by looking for
411 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 412 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
413 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
414 I32 p = -1; /* Internal iterator of scream. */
415 I32 *pp = data ? data->scream_pos : &p;
416
2c2d71f5
JH
417 if (PL_screamfirst[BmRARE(check)] >= 0
418 || ( BmRARE(check) == '\n'
419 && (BmPREVIOUS(check) == SvCUR(check) - 1)
420 && SvTAIL(check) ))
421 s = screaminstr(sv, check,
422 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 423 else
2c2d71f5 424 goto fail_finish;
cad2e5aa
JH
425 if (data)
426 *data->scream_olds = s;
427 }
428 else
429 s = fbm_instr((unsigned char*)s + start_shift,
430 (unsigned char*)strend - end_shift,
2c2d71f5 431 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
432
433 /* Update the count-of-usability, remove useless subpatterns,
434 unshift s. */
2c2d71f5
JH
435
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
437 (s ? "Found" : "Did not find"),
438 ((check == prog->anchored_substr) ? "anchored" : "floating"),
439 PL_colors[0],
7b0972df
JH
440 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
441 SvPVX(check),
2c2d71f5
JH
442 PL_colors[1], (SvTAIL(check) ? "$" : ""),
443 (s ? " at offset " : "...\n") ) );
444
445 if (!s)
446 goto fail_finish;
447
6eb5f6b9
JH
448 check_at = s;
449
2c2d71f5 450 /* Finish the diagnostic message */
30944b6d 451 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
452
453 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
454 Start with the other substr.
455 XXXX no SCREAM optimization yet - and a very coarse implementation
456 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
457 *always* match. Probably should be marked during compile...
458 Probably it is right to do no SCREAM here...
459 */
460
461 if (prog->float_substr && prog->anchored_substr) {
30944b6d 462 /* Take into account the "other" substring. */
2c2d71f5
JH
463 /* XXXX May be hopelessly wrong for UTF... */
464 if (!other_last)
6eb5f6b9 465 other_last = strpos;
2c2d71f5 466 if (check == prog->float_substr) {
30944b6d
IZ
467 do_other_anchored:
468 {
2c2d71f5
JH
469 char *last = s - start_shift, *last1, *last2;
470 char *s1 = s;
471
472 tmp = PL_bostr;
473 t = s - prog->check_offset_max;
474 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
475 && (!(prog->reganch & ROPT_UTF8)
476 || (PL_bostr = strpos, /* Used in regcopmaybe() */
477 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
478 && t > strpos)))
30944b6d 479 /* EMPTY */;
2c2d71f5
JH
480 else
481 t = strpos;
482 t += prog->anchored_offset;
6eb5f6b9
JH
483 if (t < other_last) /* These positions already checked */
484 t = other_last;
2c2d71f5
JH
485 PL_bostr = tmp;
486 last2 = last1 = strend - prog->minlen;
487 if (last < last1)
488 last1 = last;
489 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
490 /* On end-of-str: see comment below. */
491 s = fbm_instr((unsigned char*)t,
492 (unsigned char*)last1 + prog->anchored_offset
493 + SvCUR(prog->anchored_substr)
494 - (SvTAIL(prog->anchored_substr)!=0),
495 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
496 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
497 (s ? "Found" : "Contradicts"),
498 PL_colors[0],
7b0972df
JH
499 (int)(SvCUR(prog->anchored_substr)
500 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
501 SvPVX(prog->anchored_substr),
502 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
503 if (!s) {
504 if (last1 >= last2) {
505 DEBUG_r(PerlIO_printf(Perl_debug_log,
506 ", giving up...\n"));
507 goto fail_finish;
508 }
509 DEBUG_r(PerlIO_printf(Perl_debug_log,
510 ", trying floating at offset %ld...\n",
30944b6d 511 (long)(s1 + 1 - i_strpos)));
2c2d71f5 512 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 513 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
514 s = HOPc(last, 1);
515 goto restart;
516 }
517 else {
518 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 519 (long)(s - i_strpos)));
2c2d71f5 520 t = s - prog->anchored_offset;
6eb5f6b9 521 other_last = s + 1;
30944b6d 522 s = s1;
2c2d71f5
JH
523 if (t == strpos)
524 goto try_at_start;
2c2d71f5
JH
525 goto try_at_offset;
526 }
30944b6d 527 }
2c2d71f5
JH
528 }
529 else { /* Take into account the floating substring. */
530 char *last, *last1;
531 char *s1 = s;
532
533 t = s - start_shift;
534 last1 = last = strend - prog->minlen + prog->float_min_offset;
535 if (last - t > prog->float_max_offset)
536 last = t + prog->float_max_offset;
537 s = t + prog->float_min_offset;
6eb5f6b9
JH
538 if (s < other_last)
539 s = other_last;
2c2d71f5
JH
540 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
541 /* fbm_instr() takes into account exact value of end-of-str
542 if the check is SvTAIL(ed). Since false positives are OK,
543 and end-of-str is not later than strend we are OK. */
544 s = fbm_instr((unsigned char*)s,
545 (unsigned char*)last + SvCUR(prog->float_substr)
546 - (SvTAIL(prog->float_substr)!=0),
547 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
548 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
549 (s ? "Found" : "Contradicts"),
550 PL_colors[0],
7b0972df
JH
551 (int)(SvCUR(prog->float_substr)
552 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
553 SvPVX(prog->float_substr),
554 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
555 if (!s) {
556 if (last1 == last) {
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 ", giving up...\n"));
559 goto fail_finish;
560 }
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 ", trying anchored starting at offset %ld...\n",
30944b6d 563 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 564 other_last = last + 1;
2c2d71f5
JH
565 PL_regeol = strend; /* Used in HOP() */
566 s = HOPc(t, 1);
567 goto restart;
568 }
569 else {
570 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 571 (long)(s - i_strpos)));
6eb5f6b9 572 other_last = s + 1;
30944b6d 573 s = s1;
2c2d71f5
JH
574 if (t == strpos)
575 goto try_at_start;
2c2d71f5
JH
576 goto try_at_offset;
577 }
578 }
cad2e5aa 579 }
2c2d71f5
JH
580
581 t = s - prog->check_offset_max;
582 tmp = PL_bostr;
583 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
584 && (!(prog->reganch & ROPT_UTF8)
585 || (PL_bostr = strpos, /* Used in regcopmaybe() */
586 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
587 && t > strpos)))) {
588 PL_bostr = tmp;
589 /* Fixed substring is found far enough so that the match
590 cannot start at strpos. */
591 try_at_offset:
cad2e5aa 592 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
593 /* Eventually fbm_*() should handle this, but often
594 anchored_offset is not 0, so this check will not be wasted. */
595 /* XXXX In the code below we prefer to look for "^" even in
596 presence of anchored substrings. And we search even
597 beyond the found float position. These pessimizations
598 are historical artefacts only. */
599 find_anchor:
2c2d71f5 600 while (t < strend - prog->minlen) {
cad2e5aa
JH
601 if (*t == '\n') {
602 if (t < s - prog->check_offset_min) {
30944b6d
IZ
603 if (prog->anchored_substr) {
604 /* We definitely contradict the found anchored
605 substr. Due to the above check we do not
606 contradict "check" substr.
607 Thus we can arrive here only if check substr
608 is float. Redo checking for "other"=="fixed".
609 */
610 strpos = t + 1;
611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
612 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
613 goto do_other_anchored;
614 }
cad2e5aa 615 s = t + 1;
2c2d71f5 616 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 617 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
618 goto set_useful;
619 }
2c2d71f5 620 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
621 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
622 strpos = s = t + 1;
cad2e5aa
JH
623 goto restart;
624 }
625 t++;
626 }
2c2d71f5
JH
627 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
628 PL_colors[0],PL_colors[1]));
629 goto fail_finish;
cad2e5aa
JH
630 }
631 s = t;
632 set_useful:
2c2d71f5 633 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
634 }
635 else {
2c2d71f5
JH
636 PL_bostr = tmp;
637 /* The found string does not prohibit matching at beg-of-str
638 - no optimization of calling REx engine can be performed,
639 unless it was an MBOL and we are not after MBOL. */
640 try_at_start:
641 /* Even in this situation we may use MBOL flag if strpos is offset
642 wrt the start of the string. */
05b4157f 643 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
644 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
645 /* May be due to an implicit anchor of m{.*foo} */
646 && !(prog->reganch & ROPT_IMPLICIT))
647 {
cad2e5aa
JH
648 t = strpos;
649 goto find_anchor;
650 }
30944b6d
IZ
651 DEBUG_r( if (ml_anch)
652 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
653 PL_colors[0],PL_colors[1]);
654 );
2c2d71f5 655 success_at_start:
30944b6d 656 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 657 && prog->check_substr /* Could be deleted already */
cad2e5aa 658 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
659 && prog->check_substr == prog->float_substr)
660 {
cad2e5aa
JH
661 /* If flags & SOMETHING - do not do it many times on the same match */
662 SvREFCNT_dec(prog->check_substr);
663 prog->check_substr = Nullsv; /* disable */
664 prog->float_substr = Nullsv; /* clear */
665 s = strpos;
3cf5c195
IZ
666 /* XXXX This is a remnant of the old implementation. It
667 looks wasteful, since now INTUIT can use many
6eb5f6b9 668 other heuristics. */
cad2e5aa
JH
669 prog->reganch &= ~RE_USE_INTUIT;
670 }
671 else
672 s = strpos;
673 }
674
6eb5f6b9
JH
675 /* Last resort... */
676 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
677 if (prog->regstclass) {
678 /* minlen == 0 is possible if regstclass is \b or \B,
679 and the fixed substr is ''$.
680 Since minlen is already taken into account, s+1 is before strend;
681 accidentally, minlen >= 1 guaranties no false positives at s + 1
682 even for \b or \B. But (minlen? 1 : 0) below assumes that
683 regstclass does not come from lookahead... */
684 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
685 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
686 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
687 ? STR_LEN(prog->regstclass)
688 : 1);
6eb5f6b9 689 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
690 ? s + (prog->minlen? cl_l : 0)
691 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 692 : strend) ;
8fdd422b 693 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
694
695 t = s;
76384e4a
GS
696 if (prog->reganch & ROPT_UTF8) {
697 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
698 PL_bostr = startpos;
699 }
6eb5f6b9
JH
700 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
701 if (!s) {
702#ifdef DEBUGGING
703 char *what;
704#endif
705 if (endpos == strend) {
706 DEBUG_r( PerlIO_printf(Perl_debug_log,
707 "Could not match STCLASS...\n") );
708 goto fail;
709 }
66e933ab
GS
710 DEBUG_r( PerlIO_printf(Perl_debug_log,
711 "This position contradicts STCLASS...\n") );
653099ff
GS
712 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
713 goto fail;
6eb5f6b9
JH
714 /* Contradict one of substrings */
715 if (prog->anchored_substr) {
6eb5f6b9
JH
716 if (prog->anchored_substr == check) {
717 DEBUG_r( what = "anchored" );
718 hop_and_restart:
719 PL_regeol = strend; /* Used in HOP() */
720 s = HOPc(t, 1);
66e933ab
GS
721 if (s + start_shift + end_shift > strend) {
722 /* XXXX Should be taken into account earlier? */
723 DEBUG_r( PerlIO_printf(Perl_debug_log,
724 "Could not match STCLASS...\n") );
725 goto fail;
726 }
6eb5f6b9 727 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 728 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
729 what, (long)(s + start_shift - i_strpos)) );
730 goto restart;
731 }
66e933ab 732 /* Have both, check_string is floating */
6eb5f6b9
JH
733 if (t + start_shift >= check_at) /* Contradicts floating=check */
734 goto retry_floating_check;
735 /* Recheck anchored substring, but not floating... */
736 s = check_at;
737 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 738 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
739 (long)(other_last - i_strpos)) );
740 goto do_other_anchored;
741 }
60e71179
GS
742 /* Another way we could have checked stclass at the
743 current position only: */
744 if (ml_anch) {
745 s = t = t + 1;
746 DEBUG_r( PerlIO_printf(Perl_debug_log,
747 "Trying /^/m starting at offset %ld...\n",
748 (long)(t - i_strpos)) );
749 goto try_at_offset;
66e933ab 750 }
60e71179
GS
751 if (!prog->float_substr) /* Could have been deleted */
752 goto fail;
6eb5f6b9
JH
753 /* Check is floating subtring. */
754 retry_floating_check:
755 t = check_at - start_shift;
756 DEBUG_r( what = "floating" );
757 goto hop_and_restart;
758 }
759 DEBUG_r( if (t != s)
760 PerlIO_printf(Perl_debug_log,
761 "By STCLASS: moving %ld --> %ld\n",
762 (long)(t - i_strpos), (long)(s - i_strpos));
763 else
764 PerlIO_printf(Perl_debug_log,
765 "Does not contradict STCLASS...\n") );
766 }
2c2d71f5 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 768 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 769 return s;
2c2d71f5
JH
770
771 fail_finish: /* Substring not found */
66e933ab
GS
772 if (prog->check_substr) /* could be removed already */
773 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 774 fail:
2c2d71f5 775 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
776 PL_colors[4],PL_colors[5]));
777 return Nullch;
778}
9661b544 779
6eb5f6b9 780/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
781STATIC char *
782S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 783{
6eb5f6b9
JH
784 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
785 char *m;
d8093b23
G
786 STRLEN ln;
787 unsigned int c1;
788 unsigned int c2;
6eb5f6b9
JH
789 char *e;
790 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 791
6eb5f6b9
JH
792 /* We know what class it must start with. */
793 switch (OP(c)) {
794 case ANYOFUTF8:
795 while (s < strend) {
796 if (REGINCLASSUTF8(c, (U8*)s)) {
797 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 798 goto got_it;
3cf5c195 799 else
6eb5f6b9 800 tmp = doevery;
a0ed51b3 801 }
6eb5f6b9
JH
802 else
803 tmp = 1;
a0ed51b3
LW
804 s += UTF8SKIP(s);
805 }
6eb5f6b9
JH
806 break;
807 case ANYOF:
a0ed51b3 808 while (s < strend) {
d8093b23 809 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
810 if (tmp && (norun || regtry(prog, s)))
811 goto got_it;
812 else
813 tmp = doevery;
a0ed51b3 814 }
6eb5f6b9
JH
815 else
816 tmp = 1;
a0ed51b3 817 s++;
a0d0e21e 818 }
6eb5f6b9
JH
819 break;
820 case EXACTF:
821 m = STRING(c);
822 ln = STR_LEN(c);
d8093b23 823 c1 = *(U8*)m;
6eb5f6b9
JH
824 c2 = PL_fold[c1];
825 goto do_exactf;
826 case EXACTFL:
827 m = STRING(c);
828 ln = STR_LEN(c);
d8093b23 829 c1 = *(U8*)m;
6eb5f6b9
JH
830 c2 = PL_fold_locale[c1];
831 do_exactf:
832 e = strend - ln;
b3c9acc1 833
6eb5f6b9
JH
834 if (norun && e < s)
835 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
836 /* Here it is NOT UTF! */
837 if (c1 == c2) {
838 while (s <= e) {
d8093b23 839 if ( *(U8*)s == c1
66e933ab
GS
840 && (ln == 1 || !(OP(c) == EXACTF
841 ? ibcmp(s, m, ln)
842 : ibcmp_locale(s, m, ln)))
6eb5f6b9 843 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
844 goto got_it;
845 s++;
846 }
847 } else {
848 while (s <= e) {
d8093b23 849 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
850 && (ln == 1 || !(OP(c) == EXACTF
851 ? ibcmp(s, m, ln)
852 : ibcmp_locale(s, m, ln)))
6eb5f6b9 853 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
854 goto got_it;
855 s++;
856 }
857 }
858 break;
bbce6d69 859 case BOUNDL:
3280af22 860 PL_reg_flags |= RF_tainted;
bbce6d69 861 /* FALL THROUGH */
a0d0e21e 862 case BOUND:
6eb5f6b9 863 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 864 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 865 while (s < strend) {
95bac841 866 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 867 tmp = !tmp;
6eb5f6b9 868 if ((norun || regtry(prog, s)))
a0d0e21e 869 goto got_it;
a687059c 870 }
a0d0e21e
LW
871 s++;
872 }
6eb5f6b9 873 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
874 goto got_it;
875 break;
a0ed51b3
LW
876 case BOUNDLUTF8:
877 PL_reg_flags |= RF_tainted;
878 /* FALL THROUGH */
879 case BOUNDUTF8:
6eb5f6b9 880 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 881 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 882 while (s < strend) {
76384e4a 883 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
884 swash_fetch(PL_utf8_alnum, (U8*)s) :
885 isALNUM_LC_utf8((U8*)s)))
886 {
a0ed51b3 887 tmp = !tmp;
6eb5f6b9 888 if ((norun || regtry(prog, s)))
a0ed51b3
LW
889 goto got_it;
890 }
891 s += UTF8SKIP(s);
892 }
6eb5f6b9 893 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
894 goto got_it;
895 break;
bbce6d69 896 case NBOUNDL:
3280af22 897 PL_reg_flags |= RF_tainted;
bbce6d69 898 /* FALL THROUGH */
a0d0e21e 899 case NBOUND:
6eb5f6b9 900 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 901 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 902 while (s < strend) {
95bac841 903 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 904 tmp = !tmp;
6eb5f6b9 905 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
906 goto got_it;
907 s++;
908 }
6eb5f6b9 909 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
910 goto got_it;
911 break;
a0ed51b3
LW
912 case NBOUNDLUTF8:
913 PL_reg_flags |= RF_tainted;
914 /* FALL THROUGH */
915 case NBOUNDUTF8:
6eb5f6b9 916 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 917 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 918 while (s < strend) {
76384e4a 919 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
920 swash_fetch(PL_utf8_alnum, (U8*)s) :
921 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 922 tmp = !tmp;
6eb5f6b9 923 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
924 goto got_it;
925 s += UTF8SKIP(s);
926 }
6eb5f6b9 927 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
928 goto got_it;
929 break;
a0d0e21e
LW
930 case ALNUM:
931 while (s < strend) {
bbce6d69 932 if (isALNUM(*s)) {
6eb5f6b9 933 if (tmp && (norun || regtry(prog, s)))
bbce6d69 934 goto got_it;
935 else
936 tmp = doevery;
937 }
938 else
939 tmp = 1;
940 s++;
941 }
942 break;
a0ed51b3
LW
943 case ALNUMUTF8:
944 while (s < strend) {
dfe13c55 945 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 946 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
947 goto got_it;
948 else
949 tmp = doevery;
950 }
951 else
952 tmp = 1;
953 s += UTF8SKIP(s);
954 }
955 break;
bbce6d69 956 case ALNUML:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 while (s < strend) {
959 if (isALNUM_LC(*s)) {
6eb5f6b9 960 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 961 goto got_it;
a687059c 962 else
a0d0e21e
LW
963 tmp = doevery;
964 }
965 else
966 tmp = 1;
967 s++;
968 }
969 break;
a0ed51b3
LW
970 case ALNUMLUTF8:
971 PL_reg_flags |= RF_tainted;
972 while (s < strend) {
dfe13c55 973 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 974 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
975 goto got_it;
976 else
977 tmp = doevery;
978 }
979 else
980 tmp = 1;
981 s += UTF8SKIP(s);
982 }
983 break;
a0d0e21e
LW
984 case NALNUM:
985 while (s < strend) {
bbce6d69 986 if (!isALNUM(*s)) {
6eb5f6b9 987 if (tmp && (norun || regtry(prog, s)))
bbce6d69 988 goto got_it;
989 else
990 tmp = doevery;
991 }
992 else
993 tmp = 1;
994 s++;
995 }
996 break;
a0ed51b3
LW
997 case NALNUMUTF8:
998 while (s < strend) {
dfe13c55 999 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1000 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1001 goto got_it;
1002 else
1003 tmp = doevery;
1004 }
1005 else
1006 tmp = 1;
1007 s += UTF8SKIP(s);
1008 }
1009 break;
bbce6d69 1010 case NALNUML:
3280af22 1011 PL_reg_flags |= RF_tainted;
bbce6d69 1012 while (s < strend) {
1013 if (!isALNUM_LC(*s)) {
6eb5f6b9 1014 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1015 goto got_it;
a687059c 1016 else
a0d0e21e 1017 tmp = doevery;
a687059c 1018 }
a0d0e21e
LW
1019 else
1020 tmp = 1;
1021 s++;
1022 }
1023 break;
a0ed51b3
LW
1024 case NALNUMLUTF8:
1025 PL_reg_flags |= RF_tainted;
1026 while (s < strend) {
dfe13c55 1027 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
a0d0e21e
LW
1038 case SPACE:
1039 while (s < strend) {
1040 if (isSPACE(*s)) {
6eb5f6b9 1041 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1042 goto got_it;
1043 else
1044 tmp = doevery;
2304df62 1045 }
a0d0e21e
LW
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3
LW
1051 case SPACEUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1054 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case SPACEL:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (isSPACE_LC(*s)) {
6eb5f6b9 1068 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1069 goto got_it;
1070 else
1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case SPACELUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e
LW
1092 case NSPACE:
1093 while (s < strend) {
1094 if (!isSPACE(*s)) {
6eb5f6b9 1095 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1096 goto got_it;
1097 else
1098 tmp = doevery;
a687059c 1099 }
a0d0e21e
LW
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3
LW
1105 case NSPACEUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1108 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case NSPACEL:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (!isSPACE_LC(*s)) {
6eb5f6b9 1122 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3
LW
1132 case NSPACELUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e
LW
1146 case DIGIT:
1147 while (s < strend) {
1148 if (isDIGIT(*s)) {
6eb5f6b9 1149 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1150 goto got_it;
1151 else
1152 tmp = doevery;
2b69d0c2 1153 }
a0d0e21e
LW
1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3
LW
1159 case DIGITUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1162 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
b8c5462f
JH
1172 case DIGITL:
1173 PL_reg_flags |= RF_tainted;
1174 while (s < strend) {
1175 if (isDIGIT_LC(*s)) {
6eb5f6b9 1176 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
1186 case DIGITLUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
1189 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e
LW
1200 case NDIGIT:
1201 while (s < strend) {
1202 if (!isDIGIT(*s)) {
6eb5f6b9 1203 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1204 goto got_it;
1205 else
1206 tmp = doevery;
a687059c 1207 }
a0d0e21e
LW
1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3
LW
1213 case NDIGITUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1216 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
b8c5462f
JH
1226 case NDIGITL:
1227 PL_reg_flags |= RF_tainted;
1228 while (s < strend) {
1229 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1230 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
a0ed51b3 1238 }
b8c5462f
JH
1239 break;
1240 case NDIGITLUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1245 goto got_it;
cf93c79d 1246 else
b8c5462f
JH
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
b3c9acc1 1254 default:
3c3eec57
GS
1255 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1256 break;
d6a28714 1257 }
6eb5f6b9
JH
1258 return 0;
1259 got_it:
1260 return s;
1261}
1262
1263/*
1264 - regexec_flags - match a regexp against a string
1265 */
1266I32
1267Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1268 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1269/* strend: pointer to null at end of string */
1270/* strbeg: real beginning of string */
1271/* minend: end of match must be >=minend after stringarg. */
1272/* data: May be used for some additional optimizations. */
1273/* nosave: For optimizations. */
1274{
1275 dTHR;
1276 register char *s;
1277 register regnode *c;
1278 register char *startpos = stringarg;
6eb5f6b9
JH
1279 I32 minlen; /* must match at least this many chars */
1280 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1281 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1282 constant substr. */ /* CC */
1283 I32 end_shift = 0; /* Same for the end. */ /* CC */
1284 I32 scream_pos = -1; /* Internal iterator of scream. */
1285 char *scream_olds;
1286 SV* oreplsv = GvSV(PL_replgv);
1287
1288 PL_regcc = 0;
1289
1290 cache_re(prog);
1291#ifdef DEBUGGING
1292 PL_regnarrate = PL_debug & 512;
1293#endif
1294
1295 /* Be paranoid... */
1296 if (prog == NULL || startpos == NULL) {
1297 Perl_croak(aTHX_ "NULL regexp parameter");
1298 return 0;
1299 }
1300
1301 minlen = prog->minlen;
1302 if (strend - startpos < minlen) goto phooey;
1303
1304 if (startpos == strbeg) /* is ^ valid at stringarg? */
1305 PL_regprev = '\n';
1306 else {
1307 PL_regprev = (U32)stringarg[-1];
1308 if (!PL_multiline && PL_regprev == '\n')
1309 PL_regprev = '\0'; /* force ^ to NOT match */
1310 }
1311
1312 /* Check validity of program. */
1313 if (UCHARAT(prog->program) != REG_MAGIC) {
1314 Perl_croak(aTHX_ "corrupted regexp program");
1315 }
1316
1317 PL_reg_flags = 0;
1318 PL_reg_eval_set = 0;
1319 PL_reg_maxiter = 0;
1320
1321 if (prog->reganch & ROPT_UTF8)
1322 PL_reg_flags |= RF_utf8;
1323
1324 /* Mark beginning of line for ^ and lookbehind. */
1325 PL_regbol = startpos;
1326 PL_bostr = strbeg;
1327 PL_reg_sv = sv;
1328
1329 /* Mark end of line for $ (and such) */
1330 PL_regeol = strend;
1331
1332 /* see how far we have to get to not match where we matched before */
1333 PL_regtill = startpos+minend;
1334
1335 /* We start without call_cc context. */
1336 PL_reg_call_cc = 0;
1337
1338 /* If there is a "must appear" string, look for it. */
1339 s = startpos;
1340
1341 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1342 MAGIC *mg;
1343
1344 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1345 PL_reg_ganch = startpos;
1346 else if (sv && SvTYPE(sv) >= SVt_PVMG
1347 && SvMAGIC(sv)
1348 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1349 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1350 if (prog->reganch & ROPT_ANCH_GPOS) {
1351 if (s > PL_reg_ganch)
1352 goto phooey;
1353 s = PL_reg_ganch;
1354 }
1355 }
1356 else /* pos() not defined */
1357 PL_reg_ganch = strbeg;
1358 }
1359
1360 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1361 re_scream_pos_data d;
1362
1363 d.scream_olds = &scream_olds;
1364 d.scream_pos = &scream_pos;
1365 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1366 if (!s)
1367 goto phooey; /* not present */
1368 }
1369
1370 DEBUG_r( if (!PL_colorset) reginitcolors() );
1371 DEBUG_r(PerlIO_printf(Perl_debug_log,
1372 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1373 PL_colors[4],PL_colors[5],PL_colors[0],
1374 prog->precomp,
1375 PL_colors[1],
1376 (strlen(prog->precomp) > 60 ? "..." : ""),
1377 PL_colors[0],
1378 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1379 startpos, PL_colors[1],
1380 (strend - startpos > 60 ? "..." : ""))
1381 );
1382
1383 /* Simplest case: anchored match need be tried only once. */
1384 /* [unless only anchor is BOL and multiline is set] */
1385 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1386 if (s == startpos && regtry(prog, startpos))
1387 goto got_it;
1388 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1389 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1390 {
1391 char *end;
1392
1393 if (minlen)
1394 dontbother = minlen - 1;
1395 end = HOPc(strend, -dontbother) - 1;
1396 /* for multiline we only have to try after newlines */
1397 if (prog->check_substr) {
1398 if (s == startpos)
1399 goto after_try;
1400 while (1) {
1401 if (regtry(prog, s))
1402 goto got_it;
1403 after_try:
1404 if (s >= end)
1405 goto phooey;
1406 if (prog->reganch & RE_USE_INTUIT) {
1407 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1408 if (!s)
1409 goto phooey;
1410 }
1411 else
1412 s++;
1413 }
1414 } else {
1415 if (s > startpos)
1416 s--;
1417 while (s < end) {
1418 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1419 if (regtry(prog, s))
1420 goto got_it;
1421 }
1422 }
1423 }
1424 }
1425 goto phooey;
1426 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1427 if (regtry(prog, PL_reg_ganch))
1428 goto got_it;
1429 goto phooey;
1430 }
1431
1432 /* Messy cases: unanchored match. */
1433 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1434 /* we have /x+whatever/ */
1435 /* it must be a one character string (XXXX Except UTF?) */
1436 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1437#ifdef DEBUGGING
1438 int did_match = 0;
1439#endif
1440
6eb5f6b9
JH
1441 if (UTF) {
1442 while (s < strend) {
1443 if (*s == ch) {
bf93d4cc 1444 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1445 if (regtry(prog, s)) goto got_it;
1446 s += UTF8SKIP(s);
1447 while (s < strend && *s == ch)
1448 s += UTF8SKIP(s);
1449 }
1450 s += UTF8SKIP(s);
1451 }
1452 }
1453 else {
1454 while (s < strend) {
1455 if (*s == ch) {
bf93d4cc 1456 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1457 if (regtry(prog, s)) goto got_it;
1458 s++;
1459 while (s < strend && *s == ch)
1460 s++;
1461 }
1462 s++;
1463 }
1464 }
bf93d4cc
GS
1465 DEBUG_r(did_match ||
1466 PerlIO_printf(Perl_debug_log,
1467 "Did not find anchored character...\n"));
6eb5f6b9
JH
1468 }
1469 /*SUPPRESS 560*/
1470 else if (prog->anchored_substr != Nullsv
1471 || (prog->float_substr != Nullsv
1472 && prog->float_max_offset < strend - s)) {
1473 SV *must = prog->anchored_substr
1474 ? prog->anchored_substr : prog->float_substr;
1475 I32 back_max =
1476 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1477 I32 back_min =
1478 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1479 char *last = HOPc(strend, /* Cannot start after this */
1480 -(I32)(CHR_SVLEN(must)
1481 - (SvTAIL(must) != 0) + back_min));
1482 char *last1; /* Last position checked before */
bf93d4cc
GS
1483#ifdef DEBUGGING
1484 int did_match = 0;
1485#endif
6eb5f6b9
JH
1486
1487 if (s > PL_bostr)
1488 last1 = HOPc(s, -1);
1489 else
1490 last1 = s - 1; /* bogus */
1491
1492 /* XXXX check_substr already used to find `s', can optimize if
1493 check_substr==must. */
1494 scream_pos = -1;
1495 dontbother = end_shift;
1496 strend = HOPc(strend, -dontbother);
1497 while ( (s <= last) &&
1498 ((flags & REXEC_SCREAM)
1499 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1500 end_shift, &scream_pos, 0))
1501 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1502 (unsigned char*)strend, must,
1503 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1504 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1505 if (HOPc(s, -back_max) > last1) {
1506 last1 = HOPc(s, -back_min);
1507 s = HOPc(s, -back_max);
1508 }
1509 else {
1510 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1511
1512 last1 = HOPc(s, -back_min);
1513 s = t;
1514 }
1515 if (UTF) {
1516 while (s <= last1) {
1517 if (regtry(prog, s))
1518 goto got_it;
1519 s += UTF8SKIP(s);
1520 }
1521 }
1522 else {
1523 while (s <= last1) {
1524 if (regtry(prog, s))
1525 goto got_it;
1526 s++;
1527 }
1528 }
1529 }
bf93d4cc
GS
1530 DEBUG_r(did_match ||
1531 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1532 ((must == prog->anchored_substr)
1533 ? "anchored" : "floating"),
1534 PL_colors[0],
1535 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1536 SvPVX(must),
1537 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1538 goto phooey;
1539 }
155aba94 1540 else if ((c = prog->regstclass)) {
66e933ab
GS
1541 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1542 /* don't bother with what can't match */
6eb5f6b9
JH
1543 strend = HOPc(strend, -(minlen - 1));
1544 if (find_byclass(prog, c, s, strend, startpos, 0))
1545 goto got_it;
bf93d4cc 1546 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1547 }
1548 else {
1549 dontbother = 0;
1550 if (prog->float_substr != Nullsv) { /* Trim the end. */
1551 char *last;
d6a28714
JH
1552
1553 if (flags & REXEC_SCREAM) {
1554 last = screaminstr(sv, prog->float_substr, s - strbeg,
1555 end_shift, &scream_pos, 1); /* last one */
1556 if (!last)
1557 last = scream_olds; /* Only one occurence. */
b8c5462f 1558 }
d6a28714
JH
1559 else {
1560 STRLEN len;
1561 char *little = SvPV(prog->float_substr, len);
1562
1563 if (SvTAIL(prog->float_substr)) {
1564 if (memEQ(strend - len + 1, little, len - 1))
1565 last = strend - len + 1;
1566 else if (!PL_multiline)
1567 last = memEQ(strend - len, little, len)
1568 ? strend - len : Nullch;
b8c5462f 1569 else
d6a28714
JH
1570 goto find_last;
1571 } else {
1572 find_last:
1573 if (len)
1574 last = rninstr(s, strend, little, little + len);
b8c5462f 1575 else
d6a28714 1576 last = strend; /* matching `$' */
b8c5462f 1577 }
b8c5462f 1578 }
bf93d4cc
GS
1579 if (last == NULL) {
1580 DEBUG_r(PerlIO_printf(Perl_debug_log,
1581 "%sCan't trim the tail, match fails (should not happen)%s\n",
1582 PL_colors[4],PL_colors[5]));
1583 goto phooey; /* Should not happen! */
1584 }
d6a28714
JH
1585 dontbother = strend - last + prog->float_min_offset;
1586 }
1587 if (minlen && (dontbother < minlen))
1588 dontbother = minlen - 1;
1589 strend -= dontbother; /* this one's always in bytes! */
1590 /* We don't know much -- general case. */
1591 if (UTF) {
1592 for (;;) {
1593 if (regtry(prog, s))
1594 goto got_it;
1595 if (s >= strend)
1596 break;
b8c5462f 1597 s += UTF8SKIP(s);
d6a28714
JH
1598 };
1599 }
1600 else {
1601 do {
1602 if (regtry(prog, s))
1603 goto got_it;
1604 } while (s++ < strend);
1605 }
1606 }
1607
1608 /* Failure. */
1609 goto phooey;
1610
1611got_it:
1612 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1613
1614 if (PL_reg_eval_set) {
1615 /* Preserve the current value of $^R */
1616 if (oreplsv != GvSV(PL_replgv))
1617 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1618 restored, the value remains
1619 the same. */
1620 restore_pos(aTHXo_ 0);
1621 }
1622
1623 /* make sure $`, $&, $', and $digit will work later */
1624 if ( !(flags & REXEC_NOT_FIRST) ) {
1625 if (RX_MATCH_COPIED(prog)) {
1626 Safefree(prog->subbeg);
1627 RX_MATCH_COPIED_off(prog);
1628 }
1629 if (flags & REXEC_COPY_STR) {
1630 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1631
1632 s = savepvn(strbeg, i);
1633 prog->subbeg = s;
1634 prog->sublen = i;
1635 RX_MATCH_COPIED_on(prog);
1636 }
1637 else {
1638 prog->subbeg = strbeg;
1639 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1640 }
1641 }
1642
1643 return 1;
1644
1645phooey:
bf93d4cc
GS
1646 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1647 PL_colors[4],PL_colors[5]));
d6a28714
JH
1648 if (PL_reg_eval_set)
1649 restore_pos(aTHXo_ 0);
1650 return 0;
1651}
1652
1653/*
1654 - regtry - try match at specific point
1655 */
1656STATIC I32 /* 0 failure, 1 success */
1657S_regtry(pTHX_ regexp *prog, char *startpos)
1658{
1659 dTHR;
1660 register I32 i;
1661 register I32 *sp;
1662 register I32 *ep;
1663 CHECKPOINT lastcp;
1664
1665 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1666 MAGIC *mg;
1667
1668 PL_reg_eval_set = RS_init;
1669 DEBUG_r(DEBUG_s(
b900a521
JH
1670 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1671 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1672 ));
e8347627 1673 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1674 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1675 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1676 SAVETMPS;
1677 /* Apparently this is not needed, judging by wantarray. */
e8347627 1678 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1679 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1680
1681 if (PL_reg_sv) {
1682 /* Make $_ available to executed code. */
1683 if (PL_reg_sv != DEFSV) {
1684 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1685 SAVESPTR(DEFSV);
1686 DEFSV = PL_reg_sv;
b8c5462f 1687 }
d6a28714
JH
1688
1689 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1690 && (mg = mg_find(PL_reg_sv, 'g')))) {
1691 /* prepare for quick setting of pos */
1692 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1693 mg = mg_find(PL_reg_sv, 'g');
1694 mg->mg_len = -1;
b8c5462f 1695 }
d6a28714
JH
1696 PL_reg_magic = mg;
1697 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1698 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1699 }
1700 if (!PL_reg_curpm)
0f79a09d 1701 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1702 PL_reg_curpm->op_pmregexp = prog;
1703 PL_reg_oldcurpm = PL_curpm;
1704 PL_curpm = PL_reg_curpm;
1705 if (RX_MATCH_COPIED(prog)) {
1706 /* Here is a serious problem: we cannot rewrite subbeg,
1707 since it may be needed if this match fails. Thus
1708 $` inside (?{}) could fail... */
1709 PL_reg_oldsaved = prog->subbeg;
1710 PL_reg_oldsavedlen = prog->sublen;
1711 RX_MATCH_COPIED_off(prog);
1712 }
1713 else
1714 PL_reg_oldsaved = Nullch;
1715 prog->subbeg = PL_bostr;
1716 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1717 }
1718 prog->startp[0] = startpos - PL_bostr;
1719 PL_reginput = startpos;
1720 PL_regstartp = prog->startp;
1721 PL_regendp = prog->endp;
1722 PL_reglastparen = &prog->lastparen;
1723 prog->lastparen = 0;
1724 PL_regsize = 0;
1725 DEBUG_r(PL_reg_starttry = startpos);
1726 if (PL_reg_start_tmpl <= prog->nparens) {
1727 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1728 if(PL_reg_start_tmp)
1729 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1730 else
1731 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1732 }
1733
1734 /* XXXX What this code is doing here?!!! There should be no need
1735 to do this again and again, PL_reglastparen should take care of
1736 this! */
1737 sp = prog->startp;
1738 ep = prog->endp;
1739 if (prog->nparens) {
1740 for (i = prog->nparens; i >= 1; i--) {
1741 *++sp = -1;
1742 *++ep = -1;
1743 }
1744 }
1745 REGCP_SET;
1746 if (regmatch(prog->program + 1)) {
1747 prog->endp[0] = PL_reginput - PL_bostr;
1748 return 1;
1749 }
1750 REGCP_UNWIND;
1751 return 0;
1752}
1753
1754/*
1755 - regmatch - main matching routine
1756 *
1757 * Conceptually the strategy is simple: check to see whether the current
1758 * node matches, call self recursively to see whether the rest matches,
1759 * and then act accordingly. In practice we make some effort to avoid
1760 * recursion, in particular by going through "ordinary" nodes (that don't
1761 * need to know whether the rest of the match failed) by a loop instead of
1762 * by recursion.
1763 */
1764/* [lwall] I've hoisted the register declarations to the outer block in order to
1765 * maybe save a little bit of pushing and popping on the stack. It also takes
1766 * advantage of machines that use a register save mask on subroutine entry.
1767 */
1768STATIC I32 /* 0 failure, 1 success */
1769S_regmatch(pTHX_ regnode *prog)
1770{
1771 dTHR;
1772 register regnode *scan; /* Current node. */
1773 regnode *next; /* Next node. */
1774 regnode *inner; /* Next node in internal branch. */
1775 register I32 nextchr; /* renamed nextchr - nextchar colides with
1776 function of same name */
1777 register I32 n; /* no or next */
1778 register I32 ln; /* len or last */
1779 register char *s; /* operand or save */
1780 register char *locinput = PL_reginput;
1781 register I32 c1, c2, paren; /* case fold search, parenth */
1782 int minmod = 0, sw = 0, logical = 0;
1783#ifdef DEBUGGING
1784 PL_regindent++;
1785#endif
1786
1787 /* Note that nextchr is a byte even in UTF */
1788 nextchr = UCHARAT(locinput);
1789 scan = prog;
1790 while (scan != NULL) {
1791#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1792#ifdef DEBUGGING
1793# define sayYES goto yes
1794# define sayNO goto no
7821416a
IZ
1795# define sayYES_FINAL goto yes_final
1796# define sayYES_LOUD goto yes_loud
1797# define sayNO_FINAL goto no_final
1798# define sayNO_SILENT goto do_no
d6a28714
JH
1799# define saySAME(x) if (x) goto yes; else goto no
1800# define REPORT_CODE_OFF 24
1801#else
1802# define sayYES return 1
1803# define sayNO return 0
7821416a
IZ
1804# define sayYES_FINAL return 1
1805# define sayYES_LOUD return 1
1806# define sayNO_FINAL return 0
1807# define sayNO_SILENT return 0
d6a28714
JH
1808# define saySAME(x) return x
1809#endif
1810 DEBUG_r( {
1811 SV *prop = sv_newmortal();
1812 int docolor = *PL_colors[0];
1813 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1814 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1815 /* The part of the string before starttry has one color
1816 (pref0_len chars), between starttry and current
1817 position another one (pref_len - pref0_len chars),
1818 after the current position the third one.
1819 We assume that pref0_len <= pref_len, otherwise we
1820 decrease pref0_len. */
1821 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1822 ? (5 + taill) - l : locinput - PL_bostr);
1823 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1824
1825 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1826 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1827 ? (5 + taill) - pref_len : PL_regeol - locinput);
1828 if (pref0_len < 0)
1829 pref0_len = 0;
1830 if (pref0_len > pref_len)
1831 pref0_len = pref_len;
1832 regprop(prop, scan);
1833 PerlIO_printf(Perl_debug_log,
b900a521
JH
1834 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1835 (IV)(locinput - PL_bostr),
d6a28714
JH
1836 PL_colors[4], pref0_len,
1837 locinput - pref_len, PL_colors[5],
1838 PL_colors[2], pref_len - pref0_len,
1839 locinput - pref_len + pref0_len, PL_colors[3],
1840 (docolor ? "" : "> <"),
1841 PL_colors[0], l, locinput, PL_colors[1],
1842 15 - l - pref_len + 1,
1843 "",
b900a521 1844 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1845 SvPVX(prop));
1846 } );
1847
1848 next = scan + NEXT_OFF(scan);
1849 if (next == scan)
1850 next = NULL;
1851
1852 switch (OP(scan)) {
1853 case BOL:
1854 if (locinput == PL_bostr
1855 ? PL_regprev == '\n'
1856 : (PL_multiline &&
1857 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1858 {
1859 /* regtill = regbol; */
b8c5462f
JH
1860 break;
1861 }
d6a28714
JH
1862 sayNO;
1863 case MBOL:
1864 if (locinput == PL_bostr
1865 ? PL_regprev == '\n'
1866 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1867 {
b8c5462f
JH
1868 break;
1869 }
d6a28714
JH
1870 sayNO;
1871 case SBOL:
1872 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1873 break;
d6a28714
JH
1874 sayNO;
1875 case GPOS:
1876 if (locinput == PL_reg_ganch)
1877 break;
1878 sayNO;
1879 case EOL:
1880 if (PL_multiline)
1881 goto meol;
1882 else
1883 goto seol;
1884 case MEOL:
1885 meol:
1886 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1887 sayNO;
b8c5462f 1888 break;
d6a28714
JH
1889 case SEOL:
1890 seol:
1891 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1892 sayNO;
d6a28714 1893 if (PL_regeol - locinput > 1)
b8c5462f 1894 sayNO;
b8c5462f 1895 break;
d6a28714
JH
1896 case EOS:
1897 if (PL_regeol != locinput)
b8c5462f 1898 sayNO;
d6a28714
JH
1899 break;
1900 case SANYUTF8:
b8c5462f 1901 if (nextchr & 0x80) {
b8c5462f 1902 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1903 if (locinput > PL_regeol)
1904 sayNO;
b8c5462f
JH
1905 nextchr = UCHARAT(locinput);
1906 break;
1907 }
d6a28714 1908 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1909 sayNO;
b8c5462f 1910 nextchr = UCHARAT(++locinput);
a0d0e21e 1911 break;
d6a28714
JH
1912 case SANY:
1913 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1914 sayNO;
1915 nextchr = UCHARAT(++locinput);
b85d18e9 1916 break;
d6a28714 1917 case ANYUTF8:
a0ed51b3 1918 if (nextchr & 0x80) {
b8c5462f 1919 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1920 if (locinput > PL_regeol)
1921 sayNO;
a0ed51b3
LW
1922 nextchr = UCHARAT(locinput);
1923 break;
1924 }
155aba94 1925 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1926 sayNO;
1927 nextchr = UCHARAT(++locinput);
1928 break;
d6a28714 1929 case REG_ANY:
155aba94 1930 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1931 sayNO;
76e3520e 1932 nextchr = UCHARAT(++locinput);
a0d0e21e 1933 break;
d6a28714 1934 case EXACT:
cd439c50
IZ
1935 s = STRING(scan);
1936 ln = STR_LEN(scan);
d6a28714
JH
1937 /* Inline the first character, for speed. */
1938 if (UCHARAT(s) != nextchr)
1939 sayNO;
1940 if (PL_regeol - locinput < ln)
1941 sayNO;
1942 if (ln > 1 && memNE(s, locinput, ln))
1943 sayNO;
1944 locinput += ln;
1945 nextchr = UCHARAT(locinput);
1946 break;
1947 case EXACTFL:
b8c5462f
JH
1948 PL_reg_flags |= RF_tainted;
1949 /* FALL THROUGH */
d6a28714 1950 case EXACTF:
cd439c50
IZ
1951 s = STRING(scan);
1952 ln = STR_LEN(scan);
d6a28714
JH
1953
1954 if (UTF) {
1955 char *l = locinput;
1956 char *e = s + ln;
1957 c1 = OP(scan) == EXACTF;
1958 while (s < e) {
1959 if (l >= PL_regeol)
1960 sayNO;
1961 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1962 toLOWER_utf8((U8*)l) :
1963 toLOWER_LC_utf8((U8*)l)))
1964 {
1965 sayNO;
1966 }
1967 s += UTF8SKIP(s);
1968 l += UTF8SKIP(l);
b8c5462f 1969 }
d6a28714 1970 locinput = l;
a0ed51b3
LW
1971 nextchr = UCHARAT(locinput);
1972 break;
1973 }
d6a28714
JH
1974
1975 /* Inline the first character, for speed. */
1976 if (UCHARAT(s) != nextchr &&
1977 UCHARAT(s) != ((OP(scan) == EXACTF)
1978 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1979 sayNO;
d6a28714 1980 if (PL_regeol - locinput < ln)
b8c5462f 1981 sayNO;
d6a28714
JH
1982 if (ln > 1 && (OP(scan) == EXACTF
1983 ? ibcmp(s, locinput, ln)
1984 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1985 sayNO;
d6a28714
JH
1986 locinput += ln;
1987 nextchr = UCHARAT(locinput);
a0d0e21e 1988 break;
d6a28714 1989 case ANYOFUTF8:
d6a28714 1990 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1991 sayNO;
d6a28714
JH
1992 if (locinput >= PL_regeol)
1993 sayNO;
1994 locinput += PL_utf8skip[nextchr];
1995 nextchr = UCHARAT(locinput);
1996 break;
1997 case ANYOF:
d6a28714 1998 if (nextchr < 0)
b8c5462f 1999 nextchr = UCHARAT(locinput);
936ed897 2000 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2001 sayNO;
2002 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2003 sayNO;
b8c5462f
JH
2004 nextchr = UCHARAT(++locinput);
2005 break;
d6a28714 2006 case ALNUML:
b8c5462f
JH
2007 PL_reg_flags |= RF_tainted;
2008 /* FALL THROUGH */
d6a28714 2009 case ALNUM:
b8c5462f 2010 if (!nextchr)
4633a7c4 2011 sayNO;
d6a28714
JH
2012 if (!(OP(scan) == ALNUM
2013 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2014 sayNO;
2015 nextchr = UCHARAT(++locinput);
bbce6d69 2016 break;
d6a28714 2017 case ALNUMLUTF8:
3280af22 2018 PL_reg_flags |= RF_tainted;
bbce6d69 2019 /* FALL THROUGH */
d6a28714 2020 case ALNUMUTF8:
b8c5462f
JH
2021 if (!nextchr)
2022 sayNO;
2023 if (nextchr & 0x80) {
d6a28714
JH
2024 if (!(OP(scan) == ALNUMUTF8
2025 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2026 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2027 {
2028 sayNO;
a0ed51b3 2029 }
b8c5462f 2030 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2031 nextchr = UCHARAT(locinput);
2032 break;
2033 }
d6a28714
JH
2034 if (!(OP(scan) == ALNUMUTF8
2035 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2036 sayNO;
b8c5462f 2037 nextchr = UCHARAT(++locinput);
a0d0e21e 2038 break;
d6a28714 2039 case NALNUML:
b8c5462f
JH
2040 PL_reg_flags |= RF_tainted;
2041 /* FALL THROUGH */
d6a28714
JH
2042 case NALNUM:
2043 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2044 sayNO;
d6a28714
JH
2045 if (OP(scan) == NALNUM
2046 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2047 sayNO;
b8c5462f 2048 nextchr = UCHARAT(++locinput);
a0ed51b3 2049 break;
d6a28714 2050 case NALNUMLUTF8:
b8c5462f
JH
2051 PL_reg_flags |= RF_tainted;
2052 /* FALL THROUGH */
d6a28714 2053 case NALNUMUTF8:
3280af22 2054 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2055 sayNO;
b8c5462f 2056 if (nextchr & 0x80) {
d6a28714
JH
2057 if (OP(scan) == NALNUMUTF8
2058 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2059 : isALNUM_LC_utf8((U8*)locinput))
2060 {
b8c5462f 2061 sayNO;
d6a28714 2062 }
b8c5462f
JH
2063 locinput += PL_utf8skip[nextchr];
2064 nextchr = UCHARAT(locinput);
2065 break;
2066 }
d6a28714
JH
2067 if (OP(scan) == NALNUMUTF8
2068 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2069 sayNO;
76e3520e 2070 nextchr = UCHARAT(++locinput);
a0d0e21e 2071 break;
d6a28714
JH
2072 case BOUNDL:
2073 case NBOUNDL:
3280af22 2074 PL_reg_flags |= RF_tainted;
bbce6d69 2075 /* FALL THROUGH */
d6a28714
JH
2076 case BOUND:
2077 case NBOUND:
2078 /* was last char in word? */
2079 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2080 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2081 ln = isALNUM(ln);
2082 n = isALNUM(nextchr);
2083 }
2084 else {
2085 ln = isALNUM_LC(ln);
2086 n = isALNUM_LC(nextchr);
2087 }
2088 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2089 sayNO;
a0d0e21e 2090 break;
d6a28714
JH
2091 case BOUNDLUTF8:
2092 case NBOUNDLUTF8:
a0ed51b3
LW
2093 PL_reg_flags |= RF_tainted;
2094 /* FALL THROUGH */
d6a28714
JH
2095 case BOUNDUTF8:
2096 case NBOUNDUTF8:
2097 /* was last char in word? */
2098 ln = (locinput != PL_regbol)
2099 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2100 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2101 ln = isALNUM_uni(ln);
2102 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2103 }
d6a28714
JH
2104 else {
2105 ln = isALNUM_LC_uni(ln);
2106 n = isALNUM_LC_utf8((U8*)locinput);
2107 }
2108 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2109 sayNO;
a0ed51b3 2110 break;
d6a28714 2111 case SPACEL:
3280af22 2112 PL_reg_flags |= RF_tainted;
bbce6d69 2113 /* FALL THROUGH */
d6a28714 2114 case SPACE:
9442cb0e 2115 if (!nextchr)
4633a7c4 2116 sayNO;
d6a28714
JH
2117 if (!(OP(scan) == SPACE
2118 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2119 sayNO;
76e3520e 2120 nextchr = UCHARAT(++locinput);
a0d0e21e 2121 break;
d6a28714 2122 case SPACELUTF8:
a0ed51b3
LW
2123 PL_reg_flags |= RF_tainted;
2124 /* FALL THROUGH */
d6a28714 2125 case SPACEUTF8:
9442cb0e 2126 if (!nextchr)
a0ed51b3
LW
2127 sayNO;
2128 if (nextchr & 0x80) {
d6a28714 2129 if (!(OP(scan) == SPACEUTF8
9442cb0e 2130 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2131 : isSPACE_LC_utf8((U8*)locinput)))
2132 {
a0ed51b3 2133 sayNO;
d6a28714 2134 }
6f06b55f 2135 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2136 nextchr = UCHARAT(locinput);
2137 break;
2138 }
d6a28714
JH
2139 if (!(OP(scan) == SPACEUTF8
2140 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2141 sayNO;
2142 nextchr = UCHARAT(++locinput);
2143 break;
d6a28714 2144 case NSPACEL:
3280af22 2145 PL_reg_flags |= RF_tainted;
bbce6d69 2146 /* FALL THROUGH */
d6a28714 2147 case NSPACE:
9442cb0e 2148 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2149 sayNO;
9442cb0e 2150 if (OP(scan) == NSPACE
d6a28714 2151 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2152 sayNO;
b8c5462f 2153 nextchr = UCHARAT(++locinput);
a0d0e21e 2154 break;
d6a28714 2155 case NSPACELUTF8:
a0ed51b3
LW
2156 PL_reg_flags |= RF_tainted;
2157 /* FALL THROUGH */
d6a28714 2158 case NSPACEUTF8:
9442cb0e 2159 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2160 sayNO;
2161 if (nextchr & 0x80) {
d6a28714 2162 if (OP(scan) == NSPACEUTF8
9442cb0e 2163 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2164 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2165 {
2166 sayNO;
2167 }
2168 locinput += PL_utf8skip[nextchr];
2169 nextchr = UCHARAT(locinput);
2170 break;
a0ed51b3 2171 }
d6a28714
JH
2172 if (OP(scan) == NSPACEUTF8
2173 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2174 sayNO;
76e3520e 2175 nextchr = UCHARAT(++locinput);
a0d0e21e 2176 break;
d6a28714 2177 case DIGITL:
a0ed51b3
LW
2178 PL_reg_flags |= RF_tainted;
2179 /* FALL THROUGH */
d6a28714 2180 case DIGIT:
9442cb0e 2181 if (!nextchr)
a0ed51b3 2182 sayNO;
d6a28714
JH
2183 if (!(OP(scan) == DIGIT
2184 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2185 sayNO;
76e3520e 2186 nextchr = UCHARAT(++locinput);
a0d0e21e 2187 break;
d6a28714 2188 case DIGITLUTF8:
a0ed51b3
LW
2189 PL_reg_flags |= RF_tainted;
2190 /* FALL THROUGH */
d6a28714 2191 case DIGITUTF8:
a0ed51b3
LW
2192 if (!nextchr)
2193 sayNO;
2194 if (nextchr & 0x80) {
9442cb0e
GS
2195 if (!(OP(scan) == DIGITUTF8
2196 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2197 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2198 {
a0ed51b3 2199 sayNO;
dfe13c55 2200 }
6f06b55f 2201 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2202 nextchr = UCHARAT(locinput);
2203 break;
2204 }
9442cb0e
GS
2205 if (!(OP(scan) == DIGITUTF8
2206 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2207 sayNO;
2208 nextchr = UCHARAT(++locinput);
2209 break;
d6a28714 2210 case NDIGITL:
b8c5462f
JH
2211 PL_reg_flags |= RF_tainted;
2212 /* FALL THROUGH */
d6a28714 2213 case NDIGIT:
9442cb0e 2214 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2215 sayNO;
9442cb0e 2216 if (OP(scan) == NDIGIT
d6a28714 2217 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2218 sayNO;
76e3520e 2219 nextchr = UCHARAT(++locinput);
a0d0e21e 2220 break;
d6a28714 2221 case NDIGITLUTF8:
b8c5462f
JH
2222 PL_reg_flags |= RF_tainted;
2223 /* FALL THROUGH */
d6a28714 2224 case NDIGITUTF8:
b8c5462f
JH
2225 if (!nextchr && locinput >= PL_regeol)
2226 sayNO;
a0ed51b3 2227 if (nextchr & 0x80) {
9442cb0e
GS
2228 if (OP(scan) == NDIGITUTF8
2229 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2230 : isDIGIT_LC_utf8((U8*)locinput))
2231 {
a0ed51b3 2232 sayNO;
9442cb0e 2233 }
6f06b55f 2234 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2235 nextchr = UCHARAT(locinput);
2236 break;
2237 }
9442cb0e
GS
2238 if (OP(scan) == NDIGITUTF8
2239 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2240 sayNO;
2241 nextchr = UCHARAT(++locinput);
2242 break;
2243 case CLUMP:
dfe13c55 2244 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2245 sayNO;
6f06b55f 2246 locinput += PL_utf8skip[nextchr];
dfe13c55 2247 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2248 locinput += UTF8SKIP(locinput);
2249 if (locinput > PL_regeol)
2250 sayNO;
2251 nextchr = UCHARAT(locinput);
2252 break;
c8756f30 2253 case REFFL:
3280af22 2254 PL_reg_flags |= RF_tainted;
c8756f30 2255 /* FALL THROUGH */
c277df42 2256 case REF:
c8756f30 2257 case REFF:
c277df42 2258 n = ARG(scan); /* which paren pair */
cf93c79d 2259 ln = PL_regstartp[n];
2c2d71f5 2260 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2261 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2262 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2263 if (ln == PL_regendp[n])
a0d0e21e 2264 break;
a0ed51b3 2265
cf93c79d 2266 s = PL_bostr + ln;
a0ed51b3
LW
2267 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2268 char *l = locinput;
cf93c79d 2269 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2270 /*
2271 * Note that we can't do the "other character" lookup trick as
2272 * in the 8-bit case (no pun intended) because in Unicode we
2273 * have to map both upper and title case to lower case.
2274 */
2275 if (OP(scan) == REFF) {
2276 while (s < e) {
2277 if (l >= PL_regeol)
2278 sayNO;
dfe13c55 2279 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2280 sayNO;
2281 s += UTF8SKIP(s);
2282 l += UTF8SKIP(l);
2283 }
2284 }
2285 else {
2286 while (s < e) {
2287 if (l >= PL_regeol)
2288 sayNO;
dfe13c55 2289 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2290 sayNO;
2291 s += UTF8SKIP(s);
2292 l += UTF8SKIP(l);
2293 }
2294 }
2295 locinput = l;
2296 nextchr = UCHARAT(locinput);
2297 break;
2298 }
2299
a0d0e21e 2300 /* Inline the first character, for speed. */
76e3520e 2301 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2302 (OP(scan) == REF ||
2303 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2304 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2305 sayNO;
cf93c79d 2306 ln = PL_regendp[n] - ln;
3280af22 2307 if (locinput + ln > PL_regeol)
4633a7c4 2308 sayNO;
c8756f30
AK
2309 if (ln > 1 && (OP(scan) == REF
2310 ? memNE(s, locinput, ln)
2311 : (OP(scan) == REFF
2312 ? ibcmp(s, locinput, ln)
2313 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2314 sayNO;
a0d0e21e 2315 locinput += ln;
76e3520e 2316 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2317 break;
2318
2319 case NOTHING:
c277df42 2320 case TAIL:
a0d0e21e
LW
2321 break;
2322 case BACK:
2323 break;
c277df42
IZ
2324 case EVAL:
2325 {
2326 dSP;
533c011a 2327 OP_4tree *oop = PL_op;
3280af22
NIS
2328 COP *ocurcop = PL_curcop;
2329 SV **ocurpad = PL_curpad;
c277df42
IZ
2330 SV *ret;
2331
2332 n = ARG(scan);
533c011a 2333 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2334 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2335 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2336 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2337
cea2e8a9 2338 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2339 SPAGAIN;
2340 ret = POPs;
2341 PUTBACK;
2342
0f5d15d6
IZ
2343 PL_op = oop;
2344 PL_curpad = ocurpad;
2345 PL_curcop = ocurcop;
c277df42 2346 if (logical) {
0f5d15d6
IZ
2347 if (logical == 2) { /* Postponed subexpression. */
2348 regexp *re;
22c35a8c 2349 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2350 re_cc_state state;
0f5d15d6
IZ
2351 CHECKPOINT cp, lastcp;
2352
2353 if(SvROK(ret) || SvRMAGICAL(ret)) {
2354 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2355
2356 if(SvMAGICAL(sv))
2357 mg = mg_find(sv, 'r');
2358 }
2359 if (mg) {
2360 re = (regexp *)mg->mg_obj;
df0003d4 2361 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2362 }
2363 else {
2364 STRLEN len;
2365 char *t = SvPV(ret, len);
2366 PMOP pm;
2367 char *oprecomp = PL_regprecomp;
2368 I32 osize = PL_regsize;
2369 I32 onpar = PL_regnpar;
2370
2371 pm.op_pmflags = 0;
393fec97 2372 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2373 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2374 if (!(SvFLAGS(ret)
2375 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2376 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2377 PL_regprecomp = oprecomp;
2378 PL_regsize = osize;
2379 PL_regnpar = onpar;
2380 }
2381 DEBUG_r(
2382 PerlIO_printf(Perl_debug_log,
2383 "Entering embedded `%s%.60s%s%s'\n",
2384 PL_colors[0],
2385 re->precomp,
2386 PL_colors[1],
2387 (strlen(re->precomp) > 60 ? "..." : ""))
2388 );
2389 state.node = next;
2390 state.prev = PL_reg_call_cc;
2391 state.cc = PL_regcc;
2392 state.re = PL_reg_re;
2393
2ab05381 2394 PL_regcc = 0;
0f5d15d6
IZ
2395
2396 cp = regcppush(0); /* Save *all* the positions. */
2397 REGCP_SET;
2398 cache_re(re);
2399 state.ss = PL_savestack_ix;
2400 *PL_reglastparen = 0;
2401 PL_reg_call_cc = &state;
2402 PL_reginput = locinput;
2c2d71f5
JH
2403
2404 /* XXXX This is too dramatic a measure... */
2405 PL_reg_maxiter = 0;
2406
0f5d15d6 2407 if (regmatch(re->program + 1)) {
2c914db6
IZ
2408 /* Even though we succeeded, we need to restore
2409 global variables, since we may be wrapped inside
2410 SUSPEND, thus the match may be not finished yet. */
2411
2412 /* XXXX Do this only if SUSPENDed? */
2413 PL_reg_call_cc = state.prev;
2414 PL_regcc = state.cc;
2415 PL_reg_re = state.re;
2416 cache_re(PL_reg_re);
2417
2418 /* XXXX This is too dramatic a measure... */
2419 PL_reg_maxiter = 0;
2420
2421 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2422 ReREFCNT_dec(re);
2423 regcpblow(cp);
2424 sayYES;
2425 }
0f5d15d6
IZ
2426 ReREFCNT_dec(re);
2427 REGCP_UNWIND;
2428 regcppop();
2429 PL_reg_call_cc = state.prev;
2430 PL_regcc = state.cc;
2431 PL_reg_re = state.re;
d3790889 2432 cache_re(PL_reg_re);
2c2d71f5
JH
2433
2434 /* XXXX This is too dramatic a measure... */
2435 PL_reg_maxiter = 0;
2436
0f5d15d6
IZ
2437 sayNO;
2438 }
c277df42 2439 sw = SvTRUE(ret);
0f5d15d6 2440 logical = 0;
a0ed51b3
LW
2441 }
2442 else
3280af22 2443 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2444 break;
2445 }
a0d0e21e 2446 case OPEN:
c277df42 2447 n = ARG(scan); /* which paren pair */
3280af22
NIS
2448 PL_reg_start_tmp[n] = locinput;
2449 if (n > PL_regsize)
2450 PL_regsize = n;
a0d0e21e
LW
2451 break;
2452 case CLOSE:
c277df42 2453 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2454 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2455 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2456 if (n > *PL_reglastparen)
2457 *PL_reglastparen = n;
a0d0e21e 2458 break;
c277df42
IZ
2459 case GROUPP:
2460 n = ARG(scan); /* which paren pair */
cf93c79d 2461 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2462 break;
2463 case IFTHEN:
2c2d71f5 2464 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2465 if (sw)
2466 next = NEXTOPER(NEXTOPER(scan));
2467 else {
2468 next = scan + ARG(scan);
2469 if (OP(next) == IFTHEN) /* Fake one. */
2470 next = NEXTOPER(NEXTOPER(next));
2471 }
2472 break;
2473 case LOGICAL:
0f5d15d6 2474 logical = scan->flags;
c277df42 2475 break;
2ab05381
IZ
2476/*******************************************************************
2477 PL_regcc contains infoblock about the innermost (...)* loop, and
2478 a pointer to the next outer infoblock.
2479
2480 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2481
2482 1) After matching X, regnode for CURLYX is processed;
2483
2484 2) This regnode creates infoblock on the stack, and calls
2485 regmatch() recursively with the starting point at WHILEM node;
2486
2487 3) Each hit of WHILEM node tries to match A and Z (in the order
2488 depending on the current iteration, min/max of {min,max} and
2489 greediness). The information about where are nodes for "A"
2490 and "Z" is read from the infoblock, as is info on how many times "A"
2491 was already matched, and greediness.
2492
2493 4) After A matches, the same WHILEM node is hit again.
2494
2495 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2496 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2497 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2498 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2499 of the external loop.
2500
2501 Currently present infoblocks form a tree with a stem formed by PL_curcc
2502 and whatever it mentions via ->next, and additional attached trees
2503 corresponding to temporarily unset infoblocks as in "5" above.
2504
2505 In the following picture infoblocks for outer loop of
2506 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2507 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2508 infoblocks are drawn below the "reset" infoblock.
2509
2510 In fact in the picture below we do not show failed matches for Z and T
2511 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2512 more obvious *why* one needs to *temporary* unset infoblocks.]
2513
2514 Matched REx position InfoBlocks Comment
2515 (Y(A)*?Z)*?T x
2516 Y(A)*?Z)*?T x <- O
2517 Y (A)*?Z)*?T x <- O
2518 Y A)*?Z)*?T x <- O <- I
2519 YA )*?Z)*?T x <- O <- I
2520 YA A)*?Z)*?T x <- O <- I
2521 YAA )*?Z)*?T x <- O <- I
2522 YAA Z)*?T x <- O # Temporary unset I
2523 I
2524
2525 YAAZ Y(A)*?Z)*?T x <- O
2526 I
2527
2528 YAAZY (A)*?Z)*?T x <- O
2529 I
2530
2531 YAAZY A)*?Z)*?T x <- O <- I
2532 I
2533
2534 YAAZYA )*?Z)*?T x <- O <- I
2535 I
2536
2537 YAAZYA Z)*?T x <- O # Temporary unset I
2538 I,I
2539
2540 YAAZYAZ )*?T x <- O
2541 I,I
2542
2543 YAAZYAZ T x # Temporary unset O
2544 O
2545 I,I
2546
2547 YAAZYAZT x
2548 O
2549 I,I
2550 *******************************************************************/
a0d0e21e
LW
2551 case CURLYX: {
2552 CURCUR cc;
3280af22 2553 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2554
2555 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2556 next += ARG(next);
3280af22
NIS
2557 cc.oldcc = PL_regcc;
2558 PL_regcc = &cc;
2559 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2560 cc.cur = -1;
2561 cc.min = ARG1(scan);
2562 cc.max = ARG2(scan);
c277df42 2563 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2564 cc.next = next;
2565 cc.minmod = minmod;
2566 cc.lastloc = 0;
3280af22 2567 PL_reginput = locinput;
a0d0e21e
LW
2568 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2569 regcpblow(cp);
3280af22 2570 PL_regcc = cc.oldcc;
4633a7c4 2571 saySAME(n);
a0d0e21e
LW
2572 }
2573 /* NOT REACHED */
2574 case WHILEM: {
2575 /*
2576 * This is really hard to understand, because after we match
2577 * what we're trying to match, we must make sure the rest of
2c2d71f5 2578 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2579 * to go back UP the parse tree by recursing ever deeper. And
2580 * if it fails, we have to reset our parent's current state
2581 * that we can try again after backing off.
2582 */
2583
c277df42 2584 CHECKPOINT cp, lastcp;
3280af22 2585 CURCUR* cc = PL_regcc;
c277df42
IZ
2586 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2587
4633a7c4 2588 n = cc->cur + 1; /* how many we know we matched */
3280af22 2589 PL_reginput = locinput;
a0d0e21e 2590
c277df42
IZ
2591 DEBUG_r(
2592 PerlIO_printf(Perl_debug_log,
2593 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2594 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2595 (long)n, (long)cc->min,
2596 (long)cc->max, (long)cc)
2597 );
4633a7c4 2598
a0d0e21e
LW
2599 /* If degenerate scan matches "", assume scan done. */
2600
579cf2c3 2601 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2602 PL_regcc = cc->oldcc;
2ab05381
IZ
2603 if (PL_regcc)
2604 ln = PL_regcc->cur;
c277df42 2605 DEBUG_r(
c3464db5
DD
2606 PerlIO_printf(Perl_debug_log,
2607 "%*s empty match detected, try continuation...\n",
3280af22 2608 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2609 );
a0d0e21e 2610 if (regmatch(cc->next))
4633a7c4 2611 sayYES;
2ab05381
IZ
2612 if (PL_regcc)
2613 PL_regcc->cur = ln;
3280af22 2614 PL_regcc = cc;
4633a7c4 2615 sayNO;
a0d0e21e
LW
2616 }
2617
2618 /* First just match a string of min scans. */
2619
2620 if (n < cc->min) {
2621 cc->cur = n;
2622 cc->lastloc = locinput;
4633a7c4
LW
2623 if (regmatch(cc->scan))
2624 sayYES;
2625 cc->cur = n - 1;
c277df42 2626 cc->lastloc = lastloc;
4633a7c4 2627 sayNO;
a0d0e21e
LW
2628 }
2629
2c2d71f5
JH
2630 if (scan->flags) {
2631 /* Check whether we already were at this position.
2632 Postpone detection until we know the match is not
2633 *that* much linear. */
2634 if (!PL_reg_maxiter) {
2635 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2636 PL_reg_leftiter = PL_reg_maxiter;
2637 }
2638 if (PL_reg_leftiter-- == 0) {
2639 I32 size = (PL_reg_maxiter + 7)/8;
2640 if (PL_reg_poscache) {
2641 if (PL_reg_poscache_size < size) {
2642 Renew(PL_reg_poscache, size, char);
2643 PL_reg_poscache_size = size;
2644 }
2645 Zero(PL_reg_poscache, size, char);
2646 }
2647 else {
2648 PL_reg_poscache_size = size;
2649 Newz(29, PL_reg_poscache, size, char);
2650 }
2651 DEBUG_r(
2652 PerlIO_printf(Perl_debug_log,
2653 "%sDetected a super-linear match, switching on caching%s...\n",
2654 PL_colors[4], PL_colors[5])
2655 );
2656 }
2657 if (PL_reg_leftiter < 0) {
2658 I32 o = locinput - PL_bostr, b;
2659
2660 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2661 b = o % 8;
2662 o /= 8;
2663 if (PL_reg_poscache[o] & (1<<b)) {
2664 DEBUG_r(
2665 PerlIO_printf(Perl_debug_log,
2666 "%*s already tried at this position...\n",
2667 REPORT_CODE_OFF+PL_regindent*2, "")
2668 );
7821416a 2669 sayNO_SILENT;
2c2d71f5
JH
2670 }
2671 PL_reg_poscache[o] |= (1<<b);
2672 }
2673 }
2674
a0d0e21e
LW
2675 /* Prefer next over scan for minimal matching. */
2676
2677 if (cc->minmod) {
3280af22 2678 PL_regcc = cc->oldcc;
2ab05381
IZ
2679 if (PL_regcc)
2680 ln = PL_regcc->cur;
5f05dabc 2681 cp = regcppush(cc->parenfloor);
c277df42 2682 REGCP_SET;
5f05dabc 2683 if (regmatch(cc->next)) {
c277df42 2684 regcpblow(cp);
4633a7c4 2685 sayYES; /* All done. */
5f05dabc 2686 }
c277df42 2687 REGCP_UNWIND;
5f05dabc 2688 regcppop();
2ab05381
IZ
2689 if (PL_regcc)
2690 PL_regcc->cur = ln;
3280af22 2691 PL_regcc = cc;
a0d0e21e 2692
c277df42 2693 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2694 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2695 && !(PL_reg_flags & RF_warned)) {
2696 PL_reg_flags |= RF_warned;
e476b1b5 2697 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2698 "Complex regular subexpression recursion",
2699 REG_INFTY - 1);
c277df42 2700 }
4633a7c4 2701 sayNO;
c277df42 2702 }
a687059c 2703
c277df42 2704 DEBUG_r(
c3464db5
DD
2705 PerlIO_printf(Perl_debug_log,
2706 "%*s trying longer...\n",
3280af22 2707 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2708 );
a0d0e21e 2709 /* Try scanning more and see if it helps. */
3280af22 2710 PL_reginput = locinput;
a0d0e21e
LW
2711 cc->cur = n;
2712 cc->lastloc = locinput;
5f05dabc 2713 cp = regcppush(cc->parenfloor);
c277df42 2714 REGCP_SET;
5f05dabc 2715 if (regmatch(cc->scan)) {
c277df42 2716 regcpblow(cp);
4633a7c4 2717 sayYES;
5f05dabc 2718 }
c277df42 2719 REGCP_UNWIND;
5f05dabc 2720 regcppop();
4633a7c4 2721 cc->cur = n - 1;
c277df42 2722 cc->lastloc = lastloc;
4633a7c4 2723 sayNO;
a0d0e21e
LW
2724 }
2725
2726 /* Prefer scan over next for maximal matching. */
2727
2728 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2729 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2730 cc->cur = n;
2731 cc->lastloc = locinput;
c277df42 2732 REGCP_SET;
5f05dabc 2733 if (regmatch(cc->scan)) {
c277df42 2734 regcpblow(cp);
4633a7c4 2735 sayYES;
5f05dabc 2736 }
c277df42 2737 REGCP_UNWIND;
a0d0e21e 2738 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2739 PL_reginput = locinput;
c277df42 2740 DEBUG_r(
c3464db5
DD
2741 PerlIO_printf(Perl_debug_log,
2742 "%*s failed, try continuation...\n",
3280af22 2743 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2744 );
2745 }
e476b1b5 2746 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2747 && !(PL_reg_flags & RF_warned)) {
3280af22 2748 PL_reg_flags |= RF_warned;
e476b1b5 2749 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2750 "Complex regular subexpression recursion",
2751 REG_INFTY - 1);
a0d0e21e
LW
2752 }
2753
2754 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2755 PL_regcc = cc->oldcc;
2ab05381
IZ
2756 if (PL_regcc)
2757 ln = PL_regcc->cur;
a0d0e21e 2758 if (regmatch(cc->next))
4633a7c4 2759 sayYES;
2ab05381
IZ
2760 if (PL_regcc)
2761 PL_regcc->cur = ln;
3280af22 2762 PL_regcc = cc;
4633a7c4 2763 cc->cur = n - 1;
c277df42 2764 cc->lastloc = lastloc;
4633a7c4 2765 sayNO;
a0d0e21e
LW
2766 }
2767 /* NOT REACHED */
c277df42
IZ
2768 case BRANCHJ:
2769 next = scan + ARG(scan);
2770 if (next == scan)
2771 next = NULL;
2772 inner = NEXTOPER(NEXTOPER(scan));
2773 goto do_branch;
2774 case BRANCH:
2775 inner = NEXTOPER(scan);
2776 do_branch:
2777 {
2778 CHECKPOINT lastcp;
2779 c1 = OP(scan);
2780 if (OP(next) != c1) /* No choice. */
2781 next = inner; /* Avoid recursion. */
a0d0e21e 2782 else {
3280af22 2783 int lastparen = *PL_reglastparen;
c277df42
IZ
2784
2785 REGCP_SET;
a0d0e21e 2786 do {
3280af22 2787 PL_reginput = locinput;
c277df42 2788 if (regmatch(inner))
4633a7c4 2789 sayYES;
c277df42 2790 REGCP_UNWIND;
3280af22 2791 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2792 PL_regendp[n] = -1;
3280af22 2793 *PL_reglastparen = n;
c277df42 2794 scan = next;
a0d0e21e 2795 /*SUPPRESS 560*/
155aba94 2796 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2797 next += n;
a0d0e21e 2798 else
c277df42 2799 next = NULL;
c277df42
IZ
2800 inner = NEXTOPER(scan);
2801 if (c1 == BRANCHJ) {
2802 inner = NEXTOPER(inner);
2803 }
2804 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2805 sayNO;
a0d0e21e 2806 /* NOTREACHED */
a687059c 2807 }
a0d0e21e
LW
2808 }
2809 break;
2810 case MINMOD:
2811 minmod = 1;
2812 break;
c277df42
IZ
2813 case CURLYM:
2814 {
00db4c45 2815 I32 l = 0;
c277df42
IZ
2816 CHECKPOINT lastcp;
2817
2818 /* We suppose that the next guy does not need
2819 backtracking: in particular, it is of constant length,
2820 and has no parenths to influence future backrefs. */
2821 ln = ARG1(scan); /* min to match */
2822 n = ARG2(scan); /* max to match */
c277df42
IZ
2823 paren = scan->flags;
2824 if (paren) {
3280af22
NIS
2825 if (paren > PL_regsize)
2826 PL_regsize = paren;
2827 if (paren > *PL_reglastparen)
2828 *PL_reglastparen = paren;
c277df42 2829 }
dc45a647 2830 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2831 if (paren)
2832 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2833 PL_reginput = locinput;
c277df42
IZ
2834 if (minmod) {
2835 minmod = 0;
2836 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2837 sayNO;
5f4b28b2 2838 if (ln && l == 0 && n >= ln
c277df42
IZ
2839 /* In fact, this is tricky. If paren, then the
2840 fact that we did/didnot match may influence
2841 future execution. */
2842 && !(paren && ln == 0))
2843 ln = n;
3280af22 2844 locinput = PL_reginput;
22c35a8c 2845 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2846 c1 = (U8)*STRING(next);
c277df42 2847 if (OP(next) == EXACTF)
22c35a8c 2848 c2 = PL_fold[c1];
c277df42 2849 else if (OP(next) == EXACTFL)
22c35a8c 2850 c2 = PL_fold_locale[c1];
c277df42
IZ
2851 else
2852 c2 = c1;
a0ed51b3
LW
2853 }
2854 else
c277df42
IZ
2855 c1 = c2 = -1000;
2856 REGCP_SET;
5f4b28b2 2857 /* This may be improved if l == 0. */
c277df42
IZ
2858 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2859 /* If it could work, try it. */
2860 if (c1 == -1000 ||
3280af22
NIS
2861 UCHARAT(PL_reginput) == c1 ||
2862 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2863 {
2864 if (paren) {
2865 if (n) {
cf93c79d
IZ
2866 PL_regstartp[paren] =
2867 HOPc(PL_reginput, -l) - PL_bostr;
2868 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2869 }
2870 else
cf93c79d 2871 PL_regendp[paren] = -1;
c277df42
IZ
2872 }
2873 if (regmatch(next))
2874 sayYES;
2875 REGCP_UNWIND;
2876 }
2877 /* Couldn't or didn't -- move forward. */
3280af22 2878 PL_reginput = locinput;
c277df42
IZ
2879 if (regrepeat_hard(scan, 1, &l)) {
2880 ln++;
3280af22 2881 locinput = PL_reginput;
c277df42
IZ
2882 }
2883 else
2884 sayNO;
2885 }
a0ed51b3
LW
2886 }
2887 else {
c277df42
IZ
2888 n = regrepeat_hard(scan, n, &l);
2889 if (n != 0 && l == 0
2890 /* In fact, this is tricky. If paren, then the
2891 fact that we did/didnot match may influence
2892 future execution. */
2893 && !(paren && ln == 0))
2894 ln = n;
3280af22 2895 locinput = PL_reginput;
c277df42 2896 DEBUG_r(
5c0ca799 2897 PerlIO_printf(Perl_debug_log,
faccc32b 2898 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2899 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2900 (IV) n, (IV)l)
c277df42
IZ
2901 );
2902 if (n >= ln) {
22c35a8c 2903 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2904 c1 = (U8)*STRING(next);
c277df42 2905 if (OP(next) == EXACTF)
22c35a8c 2906 c2 = PL_fold[c1];
c277df42 2907 else if (OP(next) == EXACTFL)
22c35a8c 2908 c2 = PL_fold_locale[c1];
c277df42
IZ
2909 else
2910 c2 = c1;
a0ed51b3
LW
2911 }
2912 else
c277df42
IZ
2913 c1 = c2 = -1000;
2914 }
2915 REGCP_SET;
2916 while (n >= ln) {
2917 /* If it could work, try it. */
2918 if (c1 == -1000 ||
3280af22
NIS
2919 UCHARAT(PL_reginput) == c1 ||
2920 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2921 {
2922 DEBUG_r(
c3464db5 2923 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2924 "%*s trying tail with n=%"IVdf"...\n",
2925 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2926 );
2927 if (paren) {
2928 if (n) {
cf93c79d
IZ
2929 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2930 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2931 }
a0ed51b3 2932 else
cf93c79d 2933 PL_regendp[paren] = -1;
c277df42 2934 }
a0ed51b3
LW
2935 if (regmatch(next))
2936 sayYES;
2937 REGCP_UNWIND;
2938 }
c277df42
IZ
2939 /* Couldn't or didn't -- back up. */
2940 n--;
dfe13c55 2941 locinput = HOPc(locinput, -l);
3280af22 2942 PL_reginput = locinput;
c277df42
IZ
2943 }
2944 }
2945 sayNO;
2946 break;
2947 }
2948 case CURLYN:
2949 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2950 if (paren > PL_regsize)
2951 PL_regsize = paren;
2952 if (paren > *PL_reglastparen)
2953 *PL_reglastparen = paren;
c277df42
IZ
2954 ln = ARG1(scan); /* min to match */
2955 n = ARG2(scan); /* max to match */
dc45a647 2956 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2957 goto repeat;
a0d0e21e 2958 case CURLY:
c277df42 2959 paren = 0;
a0d0e21e
LW
2960 ln = ARG1(scan); /* min to match */
2961 n = ARG2(scan); /* max to match */
dc45a647 2962 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2963 goto repeat;
2964 case STAR:
2965 ln = 0;
c277df42 2966 n = REG_INFTY;
a0d0e21e 2967 scan = NEXTOPER(scan);
c277df42 2968 paren = 0;
a0d0e21e
LW
2969 goto repeat;
2970 case PLUS:
c277df42
IZ
2971 ln = 1;
2972 n = REG_INFTY;
2973 scan = NEXTOPER(scan);
2974 paren = 0;
2975 repeat:
a0d0e21e
LW
2976 /*
2977 * Lookahead to avoid useless match attempts
2978 * when we know what character comes next.
2979 */
22c35a8c 2980 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2981 c1 = (U8)*STRING(next);
bbce6d69 2982 if (OP(next) == EXACTF)
22c35a8c 2983 c2 = PL_fold[c1];
bbce6d69 2984 else if (OP(next) == EXACTFL)
22c35a8c 2985 c2 = PL_fold_locale[c1];
bbce6d69 2986 else
2987 c2 = c1;
2988 }
a0d0e21e 2989 else
bbce6d69 2990 c1 = c2 = -1000;
3280af22 2991 PL_reginput = locinput;
a0d0e21e 2992 if (minmod) {
c277df42 2993 CHECKPOINT lastcp;
a0d0e21e
LW
2994 minmod = 0;
2995 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2996 sayNO;
a0ed51b3 2997 locinput = PL_reginput;
c277df42 2998 REGCP_SET;
0fe9bf95
IZ
2999 if (c1 != -1000) {
3000 char *e = locinput + n - ln; /* Should not check after this */
3001 char *old = locinput;
3002
3003 if (e >= PL_regeol || (n == REG_INFTY))
3004 e = PL_regeol - 1;
3005 while (1) {
3006 /* Find place 'next' could work */
3007 if (c1 == c2) {
3008 while (locinput <= e && *locinput != c1)
3009 locinput++;
3010 } else {
3011 while (locinput <= e
3012 && *locinput != c1
3013 && *locinput != c2)
3014 locinput++;
3015 }
3016 if (locinput > e)
3017 sayNO;
3018 /* PL_reginput == old now */
3019 if (locinput != old) {
3020 ln = 1; /* Did some */
3021 if (regrepeat(scan, locinput - old) <
3022 locinput - old)
3023 sayNO;
3024 }
3025 /* PL_reginput == locinput now */
3026 if (paren) {
3027 if (ln) {
cf93c79d
IZ
3028 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3029 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3030 }
3031 else
cf93c79d 3032 PL_regendp[paren] = -1;
0fe9bf95
IZ
3033 }
3034 if (regmatch(next))
3035 sayYES;
3036 PL_reginput = locinput; /* Could be reset... */
3037 REGCP_UNWIND;
3038 /* Couldn't or didn't -- move forward. */
3039 old = locinput++;
3040 }
3041 }
3042 else
c277df42 3043 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3044 /* If it could work, try it. */
bbce6d69 3045 if (c1 == -1000 ||
3280af22
NIS
3046 UCHARAT(PL_reginput) == c1 ||
3047 UCHARAT(PL_reginput) == c2)
bbce6d69 3048 {
c277df42
IZ
3049 if (paren) {
3050 if (n) {
cf93c79d
IZ
3051 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3052 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3053 }
3054 else
cf93c79d 3055 PL_regendp[paren] = -1;
c277df42 3056 }
a0d0e21e 3057 if (regmatch(next))
4633a7c4 3058 sayYES;
c277df42 3059 REGCP_UNWIND;
bbce6d69 3060 }
c277df42 3061 /* Couldn't or didn't -- move forward. */
a0ed51b3 3062 PL_reginput = locinput;
a0d0e21e
LW
3063 if (regrepeat(scan, 1)) {
3064 ln++;
a0ed51b3
LW
3065 locinput = PL_reginput;
3066 }
3067 else
4633a7c4 3068 sayNO;
a0d0e21e
LW
3069 }
3070 }
3071 else {
c277df42 3072 CHECKPOINT lastcp;
a0d0e21e 3073 n = regrepeat(scan, n);
a0ed51b3 3074 locinput = PL_reginput;
22c35a8c 3075 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3076 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3077 ln = n; /* why back off? */
1aeab75a
GS
3078 /* ...because $ and \Z can match before *and* after
3079 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3080 We should back off by one in this case. */
3081 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3082 ln--;
3083 }
c277df42
IZ
3084 REGCP_SET;
3085 if (paren) {
3086 while (n >= ln) {
3087 /* If it could work, try it. */
3088 if (c1 == -1000 ||
3280af22
NIS
3089 UCHARAT(PL_reginput) == c1 ||
3090 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3091 {
3092 if (paren && n) {
3093 if (n) {
cf93c79d
IZ
3094 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3095 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3096 }
3097 else
cf93c79d 3098 PL_regendp[paren] = -1;
c277df42
IZ
3099 }
3100 if (regmatch(next))
3101 sayYES;
3102 REGCP_UNWIND;
3103 }
3104 /* Couldn't or didn't -- back up. */
3105 n--;
dfe13c55 3106 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3107 }
a0ed51b3
LW
3108 }
3109 else {
c277df42
IZ
3110 while (n >= ln) {
3111 /* If it could work, try it. */
3112 if (c1 == -1000 ||
3280af22
NIS
3113 UCHARAT(PL_reginput) == c1 ||
3114 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3115 {
3116 if (regmatch(next))
3117 sayYES;
3118 REGCP_UNWIND;
3119 }
3120 /* Couldn't or didn't -- back up. */
3121 n--;
dfe13c55 3122 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3123 }
a0d0e21e
LW
3124 }
3125 }
4633a7c4 3126 sayNO;
c277df42 3127 break;
a0d0e21e 3128 case END:
0f5d15d6
IZ
3129 if (PL_reg_call_cc) {
3130 re_cc_state *cur_call_cc = PL_reg_call_cc;
3131 CURCUR *cctmp = PL_regcc;
3132 regexp *re = PL_reg_re;
3133 CHECKPOINT cp, lastcp;
3134
3135 cp = regcppush(0); /* Save *all* the positions. */
3136 REGCP_SET;
3137 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3138 the caller. */
3139 PL_reginput = locinput; /* Make position available to
3140 the callcc. */
3141 cache_re(PL_reg_call_cc->re);
3142 PL_regcc = PL_reg_call_cc->cc;
3143 PL_reg_call_cc = PL_reg_call_cc->prev;
3144 if (regmatch(cur_call_cc->node)) {
3145 PL_reg_call_cc = cur_call_cc;
3146 regcpblow(cp);
3147 sayYES;
3148 }
3149 REGCP_UNWIND;
3150 regcppop();
3151 PL_reg_call_cc = cur_call_cc;
3152 PL_regcc = cctmp;
3153 PL_reg_re = re;
3154 cache_re(re);
3155
3156 DEBUG_r(
3157 PerlIO_printf(Perl_debug_log,
3158 "%*s continuation failed...\n",
3159 REPORT_CODE_OFF+PL_regindent*2, "")
3160 );
7821416a 3161 sayNO_SILENT;
0f5d15d6 3162 }
7821416a
IZ
3163 if (locinput < PL_regtill) {
3164 DEBUG_r(PerlIO_printf(Perl_debug_log,
3165 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3166 PL_colors[4],
3167 (long)(locinput - PL_reg_starttry),
3168 (long)(PL_regtill - PL_reg_starttry),
3169 PL_colors[5]));
3170 sayNO_FINAL; /* Cannot match: too short. */
3171 }
3172 PL_reginput = locinput; /* put where regtry can find it */
3173 sayYES_FINAL; /* Success! */
7e5428c5 3174 case SUCCEED:
3280af22 3175 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3176 sayYES_LOUD; /* Success! */
c277df42
IZ
3177 case SUSPEND:
3178 n = 1;
9fe1d20c 3179 PL_reginput = locinput;
c277df42 3180 goto do_ifmatch;
a0d0e21e 3181 case UNLESSM:
c277df42 3182 n = 0;
a0ed51b3 3183 if (scan->flags) {
0fe9bf95
IZ
3184 if (UTF) { /* XXXX This is absolutely
3185 broken, we read before
3186 start of string. */
3187 s = HOPMAYBEc(locinput, -scan->flags);
3188 if (!s)
3189 goto say_yes;
3190 PL_reginput = s;
3191 }
3192 else {
3193 if (locinput < PL_bostr + scan->flags)
3194 goto say_yes;
3195 PL_reginput = locinput - scan->flags;
3196 goto do_ifmatch;
3197 }
a0ed51b3
LW
3198 }
3199 else
3200 PL_reginput = locinput;
c277df42
IZ
3201 goto do_ifmatch;
3202 case IFMATCH:
3203 n = 1;
a0ed51b3 3204 if (scan->flags) {
0fe9bf95
IZ
3205 if (UTF) { /* XXXX This is absolutely
3206 broken, we read before
3207 start of string. */
3208 s = HOPMAYBEc(locinput, -scan->flags);
3209 if (!s || s < PL_bostr)
3210 goto say_no;
3211 PL_reginput = s;
3212 }
3213 else {
3214 if (locinput < PL_bostr + scan->flags)
3215 goto say_no;
3216 PL_reginput = locinput - scan->flags;
3217 goto do_ifmatch;
3218 }
a0ed51b3
LW
3219 }
3220 else
3221 PL_reginput = locinput;
3222
c277df42 3223 do_ifmatch:
c277df42
IZ
3224 inner = NEXTOPER(NEXTOPER(scan));
3225 if (regmatch(inner) != n) {
3226 say_no:
3227 if (logical) {
3228 logical = 0;
3229 sw = 0;
3230 goto do_longjump;
a0ed51b3
LW
3231 }
3232 else
c277df42
IZ
3233 sayNO;
3234 }
3235 say_yes:
3236 if (logical) {
3237 logical = 0;
3238 sw = 1;
3239 }
fe44a5e8 3240 if (OP(scan) == SUSPEND) {
3280af22 3241 locinput = PL_reginput;
565764a8 3242 nextchr = UCHARAT(locinput);
fe44a5e8 3243 }
c277df42
IZ
3244 /* FALL THROUGH. */
3245 case LONGJMP:
3246 do_longjump:
3247 next = scan + ARG(scan);
3248 if (next == scan)
3249 next = NULL;
a0d0e21e
LW
3250 break;
3251 default:
b900a521 3252 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3253 PTR2UV(scan), OP(scan));
cea2e8a9 3254 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3255 }
a0d0e21e
LW
3256 scan = next;
3257 }
a687059c 3258
a0d0e21e
LW
3259 /*
3260 * We get here only if there's trouble -- normally "case END" is
3261 * the terminating point.
3262 */
cea2e8a9 3263 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3264 /*NOTREACHED*/
4633a7c4
LW
3265 sayNO;
3266
7821416a
IZ
3267yes_loud:
3268 DEBUG_r(
3269 PerlIO_printf(Perl_debug_log,
3270 "%*s %scould match...%s\n",
3271 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3272 );
3273 goto yes;
3274yes_final:
3275 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3276 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3277yes:
3278#ifdef DEBUGGING
3280af22 3279 PL_regindent--;
4633a7c4
LW
3280#endif
3281 return 1;
3282
3283no:
7821416a
IZ
3284 DEBUG_r(
3285 PerlIO_printf(Perl_debug_log,
3286 "%*s %sfailed...%s\n",
3287 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3288 );
3289 goto do_no;
3290no_final:
3291do_no:
4633a7c4 3292#ifdef DEBUGGING
3280af22 3293 PL_regindent--;
4633a7c4 3294#endif
a0d0e21e 3295 return 0;
a687059c
LW
3296}
3297
3298/*
3299 - regrepeat - repeatedly match something simple, report how many
3300 */
3301/*
3302 * [This routine now assumes that it will only match on things of length 1.
3303 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3304 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3305 */
76e3520e 3306STATIC I32
cea2e8a9 3307S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3308{
5c0ca799 3309 dTHR;
a0d0e21e 3310 register char *scan;
a0d0e21e 3311 register I32 c;
3280af22 3312 register char *loceol = PL_regeol;
a0ed51b3 3313 register I32 hardcount = 0;
a0d0e21e 3314
3280af22 3315 scan = PL_reginput;
c277df42 3316 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3317 loceol = scan + max;
a0d0e21e 3318 switch (OP(p)) {
22c35a8c 3319 case REG_ANY:
a0d0e21e
LW
3320 while (scan < loceol && *scan != '\n')
3321 scan++;
3322 break;
3323 case SANY:
3324 scan = loceol;
3325 break;
a0ed51b3
LW
3326 case ANYUTF8:
3327 loceol = PL_regeol;
3328 while (scan < loceol && *scan != '\n') {
3329 scan += UTF8SKIP(scan);
3330 hardcount++;
3331 }
3332 break;
3333 case SANYUTF8:
3334 loceol = PL_regeol;
3335 while (scan < loceol) {
3336 scan += UTF8SKIP(scan);
3337 hardcount++;
3338 }
3339 break;
bbce6d69 3340 case EXACT: /* length of string is 1 */
cd439c50 3341 c = (U8)*STRING(p);
bbce6d69 3342 while (scan < loceol && UCHARAT(scan) == c)
3343 scan++;
3344 break;
3345 case EXACTF: /* length of string is 1 */
cd439c50 3346 c = (U8)*STRING(p);
bbce6d69 3347 while (scan < loceol &&
22c35a8c 3348 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3349 scan++;
3350 break;
3351 case EXACTFL: /* length of string is 1 */
3280af22 3352 PL_reg_flags |= RF_tainted;
cd439c50 3353 c = (U8)*STRING(p);
bbce6d69 3354 while (scan < loceol &&
22c35a8c 3355 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3356 scan++;
3357 break;
a0ed51b3
LW
3358 case ANYOFUTF8:
3359 loceol = PL_regeol;
3360 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3361 scan += UTF8SKIP(scan);
3362 hardcount++;
3363 }
3364 break;
a0d0e21e 3365 case ANYOF:
936ed897 3366 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3367 scan++;
a0d0e21e
LW
3368 break;
3369 case ALNUM:
3370 while (scan < loceol && isALNUM(*scan))
3371 scan++;
3372 break;
a0ed51b3
LW
3373 case ALNUMUTF8:
3374 loceol = PL_regeol;
dfe13c55 3375 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3376 scan += UTF8SKIP(scan);
3377 hardcount++;
3378 }
3379 break;
bbce6d69 3380 case ALNUML:
3280af22 3381 PL_reg_flags |= RF_tainted;
bbce6d69 3382 while (scan < loceol && isALNUM_LC(*scan))
3383 scan++;
3384 break;
a0ed51b3
LW
3385 case ALNUMLUTF8:
3386 PL_reg_flags |= RF_tainted;
3387 loceol = PL_regeol;
dfe13c55 3388 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3389 scan += UTF8SKIP(scan);
3390 hardcount++;
3391 }
3392 break;
3393 break;
a0d0e21e
LW
3394 case NALNUM:
3395 while (scan < loceol && !isALNUM(*scan))
3396 scan++;
3397 break;
a0ed51b3
LW
3398 case NALNUMUTF8:
3399 loceol = PL_regeol;
dfe13c55 3400 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3401 scan += UTF8SKIP(scan);
3402 hardcount++;
3403 }
3404 break;
bbce6d69 3405 case NALNUML:
3280af22 3406 PL_reg_flags |= RF_tainted;
bbce6d69 3407 while (scan < loceol && !isALNUM_LC(*scan))
3408 scan++;
3409 break;
a0ed51b3
LW
3410 case NALNUMLUTF8:
3411 PL_reg_flags |= RF_tainted;
3412 loceol = PL_regeol;
dfe13c55 3413 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3414 scan += UTF8SKIP(scan);
3415 hardcount++;
3416 }
3417 break;
a0d0e21e
LW
3418 case SPACE:
3419 while (scan < loceol && isSPACE(*scan))
3420 scan++;
3421 break;
a0ed51b3
LW
3422 case SPACEUTF8:
3423 loceol = PL_regeol;
dfe13c55 3424 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3425 scan += UTF8SKIP(scan);
3426 hardcount++;
3427 }
3428 break;
bbce6d69 3429 case SPACEL:
3280af22 3430 PL_reg_flags |= RF_tainted;
bbce6d69 3431 while (scan < loceol && isSPACE_LC(*scan))
3432 scan++;
3433 break;
a0ed51b3
LW
3434 case SPACELUTF8:
3435 PL_reg_flags |= RF_tainted;
3436 loceol = PL_regeol;
dfe13c55 3437 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3438 scan += UTF8SKIP(scan);
3439 hardcount++;
3440 }
3441 break;
a0d0e21e
LW
3442 case NSPACE:
3443 while (scan < loceol && !isSPACE(*scan))
3444 scan++;
3445 break;
a0ed51b3
LW
3446 case NSPACEUTF8:
3447 loceol = PL_regeol;
dfe13c55 3448 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3449 scan += UTF8SKIP(scan);
3450 hardcount++;
3451 }
3452 break;
bbce6d69 3453 case NSPACEL:
3280af22 3454 PL_reg_flags |= RF_tainted;
bbce6d69 3455 while (scan < loceol && !isSPACE_LC(*scan))
3456 scan++;
3457 break;
a0ed51b3
LW
3458 case NSPACELUTF8:
3459 PL_reg_flags |= RF_tainted;
3460 loceol = PL_regeol;
dfe13c55 3461 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3462 scan += UTF8SKIP(scan);
3463 hardcount++;
3464 }
3465 break;
a0d0e21e
LW
3466 case DIGIT:
3467 while (scan < loceol && isDIGIT(*scan))
3468 scan++;
3469 break;
a0ed51b3
LW
3470 case DIGITUTF8:
3471 loceol = PL_regeol;
dfe13c55 3472 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3473 scan += UTF8SKIP(scan);
3474 hardcount++;
3475 }
3476 break;
3477 break;
a0d0e21e
LW
3478 case NDIGIT:
3479 while (scan < loceol && !isDIGIT(*scan))
3480 scan++;
3481 break;
a0ed51b3
LW
3482 case NDIGITUTF8:
3483 loceol = PL_regeol;
dfe13c55 3484 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3485 scan += UTF8SKIP(scan);
3486 hardcount++;
3487 }
3488 break;
a0d0e21e
LW
3489 default: /* Called on something of 0 width. */
3490 break; /* So match right here or not at all. */
3491 }
a687059c 3492
a0ed51b3
LW
3493 if (hardcount)
3494 c = hardcount;
3495 else
3496 c = scan - PL_reginput;
3280af22 3497 PL_reginput = scan;
a687059c 3498
c277df42
IZ
3499 DEBUG_r(
3500 {
3501 SV *prop = sv_newmortal();
3502
3503 regprop(prop, p);
3504 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3505 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3506 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3507 });
3508
a0d0e21e 3509 return(c);
a687059c
LW
3510}
3511
3512/*
c277df42
IZ
3513 - regrepeat_hard - repeatedly match something, report total lenth and length
3514 *
3515 * The repeater is supposed to have constant length.
3516 */
3517
76e3520e 3518STATIC I32
cea2e8a9 3519S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3520{
5c0ca799 3521 dTHR;
c277df42
IZ
3522 register char *scan;
3523 register char *start;
3280af22 3524 register char *loceol = PL_regeol;
a0ed51b3 3525 I32 l = 0;
708e3b05 3526 I32 count = 0, res = 1;
a0ed51b3
LW
3527
3528 if (!max)
3529 return 0;
c277df42 3530
3280af22 3531 start = PL_reginput;
a0ed51b3 3532 if (UTF) {
708e3b05 3533 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3534 if (!count++) {
3535 l = 0;
3536 while (start < PL_reginput) {
3537 l++;
3538 start += UTF8SKIP(start);
3539 }
3540 *lp = l;
3541 if (l == 0)
3542 return max;
3543 }
3544 if (count == max)
3545 return count;
3546 }
3547 }
3548 else {
708e3b05 3549 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3550 if (!count++) {
3551 *lp = l = PL_reginput - start;
3552 if (max != REG_INFTY && l*max < loceol - scan)
3553 loceol = scan + l*max;
3554 if (l == 0)
3555 return max;
c277df42
IZ
3556 }
3557 }
3558 }
708e3b05 3559 if (!res)
3280af22 3560 PL_reginput = scan;
c277df42 3561
a0ed51b3 3562 return count;
c277df42
IZ
3563}
3564
3565/*
cb8d8820 3566 - reginclass - determine if a character falls into a character class
bbce6d69 3567 */
3568
76e3520e 3569STATIC bool
936ed897 3570S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3571{
5c0ca799 3572 dTHR;
b8c5462f 3573 char flags = ANYOF_FLAGS(p);
bbce6d69 3574 bool match = FALSE;
3575
3576 c &= 0xFF;
b8c5462f 3577 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3578 match = TRUE;
3579 else if (flags & ANYOF_FOLD) {
3580 I32 cf;
3581 if (flags & ANYOF_LOCALE) {
3280af22 3582 PL_reg_flags |= RF_tainted;
22c35a8c 3583 cf = PL_fold_locale[c];
bbce6d69 3584 }
3585 else
22c35a8c 3586 cf = PL_fold[c];
b8c5462f 3587 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3588 match = TRUE;
3589 }
3590
b8c5462f 3591 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3592 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3593 if (
3594 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3595 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3596 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3597 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3598 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3599 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3600 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3601 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3602 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3603 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3604 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3605 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3606 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3607 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3608 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3609 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3610 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3611 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3612 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3613 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3614 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3615 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3616 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3617 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3620 ) /* How's that for a conditional? */
bbce6d69 3621 {
3622 match = TRUE;
3623 }
3624 }
3625
ae5c130c 3626 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3627}
3628
a0ed51b3 3629STATIC bool
cea2e8a9 3630S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3631{
3632 dTHR;
a0ed51b3
LW
3633 char flags = ARG1(f);
3634 bool match = FALSE;
3635 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3636
3637 if (swash_fetch(sv, p))
3638 match = TRUE;
3639 else if (flags & ANYOF_FOLD) {
806e7201 3640 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3641 if (flags & ANYOF_LOCALE) {
3642 PL_reg_flags |= RF_tainted;
3643 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3644 }
3645 else
3646 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3647 if (swash_fetch(sv, tmpbuf))
3648 match = TRUE;
3649 }
3650
b8c5462f 3651 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3652
3653 return (flags & ANYOF_INVERT) ? !match : match;
3654}
161b471a 3655
dfe13c55 3656STATIC U8 *
cea2e8a9 3657S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3658{
3659 dTHR;
a0ed51b3
LW
3660 if (off >= 0) {
3661 while (off-- && s < (U8*)PL_regeol)
3662 s += UTF8SKIP(s);
3663 }
3664 else {
3665 while (off++) {
3666 if (s > (U8*)PL_bostr) {
3667 s--;
3668 if (*s & 0x80) {
3669 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3670 s--;
3671 } /* XXX could check well-formedness here */
3672 }
3673 }
3674 }
3675 return s;
3676}
161b471a 3677
dfe13c55 3678STATIC U8 *
cea2e8a9 3679S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3680{
c485e607 3681 dTHR;
a0ed51b3
LW
3682 if (off >= 0) {
3683 while (off-- && s < (U8*)PL_regeol)
3684 s += UTF8SKIP(s);
3685 if (off >= 0)
3686 return 0;
3687 }
3688 else {
3689 while (off++) {
3690 if (s > (U8*)PL_bostr) {
3691 s--;
3692 if (*s & 0x80) {
3693 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3694 s--;
3695 } /* XXX could check well-formedness here */
3696 }
3697 else
3698 break;
3699 }
3700 if (off <= 0)
3701 return 0;
3702 }
3703 return s;
3704}
51371543
GS
3705
3706#ifdef PERL_OBJECT
51371543
GS
3707#include "XSUB.h"
3708#endif
3709
3710static void
3711restore_pos(pTHXo_ void *arg)
3712{
3713 dTHR;
3714 if (PL_reg_eval_set) {
3715 if (PL_reg_oldsaved) {
3716 PL_reg_re->subbeg = PL_reg_oldsaved;
3717 PL_reg_re->sublen = PL_reg_oldsavedlen;
3718 RX_MATCH_COPIED_on(PL_reg_re);
3719 }
3720 PL_reg_magic->mg_len = PL_reg_oldpos;
3721 PL_reg_eval_set = 0;
3722 PL_curpm = PL_reg_oldcurpm;
3723 }
3724}
3725