This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
use SAVEINT() rather than SAVEDESTRUCTOR() for saving PL_expect etc.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5
JH
337 }
338 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
339 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
340 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 341 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
342
343 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 344 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
345 I32 slen;
346
347 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
348 && (sv && (strpos + SvCUR(sv) != strend)) ) {
349 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 350 goto fail;
2c2d71f5 351 }
adac82c7 352 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 353 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
354 if (SvTAIL(prog->check_substr)) {
355 slen = SvCUR(prog->check_substr); /* >= 1 */
356
2c2d71f5
JH
357 if ( strend - s > slen || strend - s < slen - 1
358 || (strend - s == slen && strend[-1] != '\n')) {
359 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
360 goto fail_finish;
cad2e5aa
JH
361 }
362 /* Now should match s[0..slen-2] */
363 slen--;
364 if (slen && (*SvPVX(prog->check_substr) != *s
365 || (slen > 1
2c2d71f5
JH
366 && memNE(SvPVX(prog->check_substr), s, slen)))) {
367 report_neq:
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
369 goto fail_finish;
370 }
cad2e5aa
JH
371 }
372 else if (*SvPVX(prog->check_substr) != *s
373 || ((slen = SvCUR(prog->check_substr)) > 1
374 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
375 goto report_neq;
376 goto success_at_start;
cad2e5aa 377 }
2c2d71f5 378 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 379 s = strpos;
2c2d71f5
JH
380 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
381 /* Should be nonnegative! */
382 end_shift = prog->minlen - start_shift -
383 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
384 if (!ml_anch) {
385 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
386 - (SvTAIL(prog->check_substr) != 0);
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
399 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
407 check = prog->check_substr;
408 restart:
409 /* Find a possible match in the region s..strend by looking for
410 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 411 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
412 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
413 I32 p = -1; /* Internal iterator of scream. */
414 I32 *pp = data ? data->scream_pos : &p;
415
2c2d71f5
JH
416 if (PL_screamfirst[BmRARE(check)] >= 0
417 || ( BmRARE(check) == '\n'
418 && (BmPREVIOUS(check) == SvCUR(check) - 1)
419 && SvTAIL(check) ))
420 s = screaminstr(sv, check,
421 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 422 else
2c2d71f5 423 goto fail_finish;
cad2e5aa
JH
424 if (data)
425 *data->scream_olds = s;
426 }
427 else
428 s = fbm_instr((unsigned char*)s + start_shift,
429 (unsigned char*)strend - end_shift,
2c2d71f5 430 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
431
432 /* Update the count-of-usability, remove useless subpatterns,
433 unshift s. */
2c2d71f5
JH
434
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
436 (s ? "Found" : "Did not find"),
437 ((check == prog->anchored_substr) ? "anchored" : "floating"),
438 PL_colors[0],
7b0972df
JH
439 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
440 SvPVX(check),
2c2d71f5
JH
441 PL_colors[1], (SvTAIL(check) ? "$" : ""),
442 (s ? " at offset " : "...\n") ) );
443
444 if (!s)
445 goto fail_finish;
446
6eb5f6b9
JH
447 check_at = s;
448
2c2d71f5 449 /* Finish the diagnostic message */
30944b6d 450 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
451
452 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
453 Start with the other substr.
454 XXXX no SCREAM optimization yet - and a very coarse implementation
455 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
456 *always* match. Probably should be marked during compile...
457 Probably it is right to do no SCREAM here...
458 */
459
460 if (prog->float_substr && prog->anchored_substr) {
30944b6d 461 /* Take into account the "other" substring. */
2c2d71f5
JH
462 /* XXXX May be hopelessly wrong for UTF... */
463 if (!other_last)
6eb5f6b9 464 other_last = strpos;
2c2d71f5 465 if (check == prog->float_substr) {
30944b6d
IZ
466 do_other_anchored:
467 {
2c2d71f5
JH
468 char *last = s - start_shift, *last1, *last2;
469 char *s1 = s;
470
471 tmp = PL_bostr;
472 t = s - prog->check_offset_max;
473 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
474 && (!(prog->reganch & ROPT_UTF8)
475 || (PL_bostr = strpos, /* Used in regcopmaybe() */
476 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
477 && t > strpos)))
30944b6d 478 /* EMPTY */;
2c2d71f5
JH
479 else
480 t = strpos;
481 t += prog->anchored_offset;
6eb5f6b9
JH
482 if (t < other_last) /* These positions already checked */
483 t = other_last;
2c2d71f5
JH
484 PL_bostr = tmp;
485 last2 = last1 = strend - prog->minlen;
486 if (last < last1)
487 last1 = last;
488 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
489 /* On end-of-str: see comment below. */
490 s = fbm_instr((unsigned char*)t,
491 (unsigned char*)last1 + prog->anchored_offset
492 + SvCUR(prog->anchored_substr)
493 - (SvTAIL(prog->anchored_substr)!=0),
494 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
495 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
496 (s ? "Found" : "Contradicts"),
497 PL_colors[0],
7b0972df
JH
498 (int)(SvCUR(prog->anchored_substr)
499 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
500 SvPVX(prog->anchored_substr),
501 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
502 if (!s) {
503 if (last1 >= last2) {
504 DEBUG_r(PerlIO_printf(Perl_debug_log,
505 ", giving up...\n"));
506 goto fail_finish;
507 }
508 DEBUG_r(PerlIO_printf(Perl_debug_log,
509 ", trying floating at offset %ld...\n",
30944b6d 510 (long)(s1 + 1 - i_strpos)));
2c2d71f5 511 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 512 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
513 s = HOPc(last, 1);
514 goto restart;
515 }
516 else {
517 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 518 (long)(s - i_strpos)));
2c2d71f5 519 t = s - prog->anchored_offset;
6eb5f6b9 520 other_last = s + 1;
30944b6d 521 s = s1;
2c2d71f5
JH
522 if (t == strpos)
523 goto try_at_start;
2c2d71f5
JH
524 goto try_at_offset;
525 }
30944b6d 526 }
2c2d71f5
JH
527 }
528 else { /* Take into account the floating substring. */
529 char *last, *last1;
530 char *s1 = s;
531
532 t = s - start_shift;
533 last1 = last = strend - prog->minlen + prog->float_min_offset;
534 if (last - t > prog->float_max_offset)
535 last = t + prog->float_max_offset;
536 s = t + prog->float_min_offset;
6eb5f6b9
JH
537 if (s < other_last)
538 s = other_last;
2c2d71f5
JH
539 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
540 /* fbm_instr() takes into account exact value of end-of-str
541 if the check is SvTAIL(ed). Since false positives are OK,
542 and end-of-str is not later than strend we are OK. */
543 s = fbm_instr((unsigned char*)s,
544 (unsigned char*)last + SvCUR(prog->float_substr)
545 - (SvTAIL(prog->float_substr)!=0),
546 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
547 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
548 (s ? "Found" : "Contradicts"),
549 PL_colors[0],
7b0972df
JH
550 (int)(SvCUR(prog->float_substr)
551 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
552 SvPVX(prog->float_substr),
553 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
554 if (!s) {
555 if (last1 == last) {
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
557 ", giving up...\n"));
558 goto fail_finish;
559 }
560 DEBUG_r(PerlIO_printf(Perl_debug_log,
561 ", trying anchored starting at offset %ld...\n",
30944b6d 562 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 563 other_last = last + 1;
2c2d71f5
JH
564 PL_regeol = strend; /* Used in HOP() */
565 s = HOPc(t, 1);
566 goto restart;
567 }
568 else {
569 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 570 (long)(s - i_strpos)));
6eb5f6b9 571 other_last = s + 1;
30944b6d 572 s = s1;
2c2d71f5
JH
573 if (t == strpos)
574 goto try_at_start;
2c2d71f5
JH
575 goto try_at_offset;
576 }
577 }
cad2e5aa 578 }
2c2d71f5
JH
579
580 t = s - prog->check_offset_max;
581 tmp = PL_bostr;
582 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
583 && (!(prog->reganch & ROPT_UTF8)
584 || (PL_bostr = strpos, /* Used in regcopmaybe() */
585 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
586 && t > strpos)))) {
587 PL_bostr = tmp;
588 /* Fixed substring is found far enough so that the match
589 cannot start at strpos. */
590 try_at_offset:
cad2e5aa 591 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
592 /* Eventually fbm_*() should handle this, but often
593 anchored_offset is not 0, so this check will not be wasted. */
594 /* XXXX In the code below we prefer to look for "^" even in
595 presence of anchored substrings. And we search even
596 beyond the found float position. These pessimizations
597 are historical artefacts only. */
598 find_anchor:
2c2d71f5 599 while (t < strend - prog->minlen) {
cad2e5aa
JH
600 if (*t == '\n') {
601 if (t < s - prog->check_offset_min) {
30944b6d
IZ
602 if (prog->anchored_substr) {
603 /* We definitely contradict the found anchored
604 substr. Due to the above check we do not
605 contradict "check" substr.
606 Thus we can arrive here only if check substr
607 is float. Redo checking for "other"=="fixed".
608 */
609 strpos = t + 1;
610 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
611 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
612 goto do_other_anchored;
613 }
cad2e5aa 614 s = t + 1;
2c2d71f5 615 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 616 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
617 goto set_useful;
618 }
2c2d71f5 619 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
620 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
621 strpos = s = t + 1;
cad2e5aa
JH
622 goto restart;
623 }
624 t++;
625 }
2c2d71f5
JH
626 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
627 PL_colors[0],PL_colors[1]));
628 goto fail_finish;
cad2e5aa
JH
629 }
630 s = t;
631 set_useful:
2c2d71f5 632 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
633 }
634 else {
2c2d71f5
JH
635 PL_bostr = tmp;
636 /* The found string does not prohibit matching at beg-of-str
637 - no optimization of calling REx engine can be performed,
638 unless it was an MBOL and we are not after MBOL. */
639 try_at_start:
640 /* Even in this situation we may use MBOL flag if strpos is offset
641 wrt the start of the string. */
642 if (ml_anch && sv
cad2e5aa
JH
643 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
644 t = strpos;
645 goto find_anchor;
646 }
30944b6d
IZ
647 DEBUG_r( if (ml_anch)
648 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
649 PL_colors[0],PL_colors[1]);
650 );
2c2d71f5 651 success_at_start:
30944b6d 652 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 653 && prog->check_substr /* Could be deleted already */
cad2e5aa 654 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
655 && prog->check_substr == prog->float_substr)
656 {
cad2e5aa
JH
657 /* If flags & SOMETHING - do not do it many times on the same match */
658 SvREFCNT_dec(prog->check_substr);
659 prog->check_substr = Nullsv; /* disable */
660 prog->float_substr = Nullsv; /* clear */
661 s = strpos;
3cf5c195
IZ
662 /* XXXX This is a remnant of the old implementation. It
663 looks wasteful, since now INTUIT can use many
6eb5f6b9 664 other heuristics. */
cad2e5aa
JH
665 prog->reganch &= ~RE_USE_INTUIT;
666 }
667 else
668 s = strpos;
669 }
670
6eb5f6b9
JH
671 /* Last resort... */
672 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
673 if (prog->regstclass) {
674 /* minlen == 0 is possible if regstclass is \b or \B,
675 and the fixed substr is ''$.
676 Since minlen is already taken into account, s+1 is before strend;
677 accidentally, minlen >= 1 guaranties no false positives at s + 1
678 even for \b or \B. But (minlen? 1 : 0) below assumes that
679 regstclass does not come from lookahead... */
680 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
681 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
682 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
683 ? STR_LEN(prog->regstclass)
684 : 1);
6eb5f6b9 685 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
686 ? s + (prog->minlen? cl_l : 0)
687 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9
JH
688 : strend) ;
689 char *startpos = sv ? strend - SvCUR(sv) : s;
690
691 t = s;
692 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
693 if (!s) {
694#ifdef DEBUGGING
695 char *what;
696#endif
697 if (endpos == strend) {
698 DEBUG_r( PerlIO_printf(Perl_debug_log,
699 "Could not match STCLASS...\n") );
700 goto fail;
701 }
66e933ab
GS
702 DEBUG_r( PerlIO_printf(Perl_debug_log,
703 "This position contradicts STCLASS...\n") );
6eb5f6b9
JH
704 /* Contradict one of substrings */
705 if (prog->anchored_substr) {
6eb5f6b9
JH
706 if (prog->anchored_substr == check) {
707 DEBUG_r( what = "anchored" );
708 hop_and_restart:
709 PL_regeol = strend; /* Used in HOP() */
710 s = HOPc(t, 1);
66e933ab
GS
711 if (s + start_shift + end_shift > strend) {
712 /* XXXX Should be taken into account earlier? */
713 DEBUG_r( PerlIO_printf(Perl_debug_log,
714 "Could not match STCLASS...\n") );
715 goto fail;
716 }
6eb5f6b9 717 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 718 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
719 what, (long)(s + start_shift - i_strpos)) );
720 goto restart;
721 }
66e933ab 722 /* Have both, check_string is floating */
6eb5f6b9
JH
723 if (t + start_shift >= check_at) /* Contradicts floating=check */
724 goto retry_floating_check;
725 /* Recheck anchored substring, but not floating... */
726 s = check_at;
727 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 728 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
729 (long)(other_last - i_strpos)) );
730 goto do_other_anchored;
731 }
66e933ab
GS
732 if (!prog->float_substr) { /* Could have been deleted */
733 if (ml_anch) {
734 s = t = t + 1;
735 goto try_at_offset;
736 }
737 goto fail;
738 }
6eb5f6b9
JH
739 /* Check is floating subtring. */
740 retry_floating_check:
741 t = check_at - start_shift;
742 DEBUG_r( what = "floating" );
743 goto hop_and_restart;
744 }
745 DEBUG_r( if (t != s)
746 PerlIO_printf(Perl_debug_log,
747 "By STCLASS: moving %ld --> %ld\n",
748 (long)(t - i_strpos), (long)(s - i_strpos));
749 else
750 PerlIO_printf(Perl_debug_log,
751 "Does not contradict STCLASS...\n") );
752 }
2c2d71f5 753 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 754 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 755 return s;
2c2d71f5
JH
756
757 fail_finish: /* Substring not found */
66e933ab
GS
758 if (prog->check_substr) /* could be removed already */
759 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 760 fail:
2c2d71f5 761 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
762 PL_colors[4],PL_colors[5]));
763 return Nullch;
764}
9661b544 765
6eb5f6b9 766/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
767STATIC char *
768S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 769{
6eb5f6b9
JH
770 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
771 char *m;
772 int ln;
773 int c1;
774 int c2;
775 char *e;
776 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 777
6eb5f6b9
JH
778 /* We know what class it must start with. */
779 switch (OP(c)) {
780 case ANYOFUTF8:
781 while (s < strend) {
782 if (REGINCLASSUTF8(c, (U8*)s)) {
783 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 784 goto got_it;
3cf5c195 785 else
6eb5f6b9 786 tmp = doevery;
a0ed51b3 787 }
6eb5f6b9
JH
788 else
789 tmp = 1;
a0ed51b3
LW
790 s += UTF8SKIP(s);
791 }
6eb5f6b9
JH
792 break;
793 case ANYOF:
a0ed51b3 794 while (s < strend) {
6eb5f6b9
JH
795 if (REGINCLASS(c, *s)) {
796 if (tmp && (norun || regtry(prog, s)))
797 goto got_it;
798 else
799 tmp = doevery;
a0ed51b3 800 }
6eb5f6b9
JH
801 else
802 tmp = 1;
a0ed51b3 803 s++;
a0d0e21e 804 }
6eb5f6b9
JH
805 break;
806 case EXACTF:
807 m = STRING(c);
808 ln = STR_LEN(c);
809 c1 = *m;
810 c2 = PL_fold[c1];
811 goto do_exactf;
812 case EXACTFL:
813 m = STRING(c);
814 ln = STR_LEN(c);
815 c1 = *m;
816 c2 = PL_fold_locale[c1];
817 do_exactf:
818 e = strend - ln;
b3c9acc1 819
6eb5f6b9
JH
820 if (norun && e < s)
821 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
822 /* Here it is NOT UTF! */
823 if (c1 == c2) {
824 while (s <= e) {
825 if ( *s == c1
66e933ab
GS
826 && (ln == 1 || !(OP(c) == EXACTF
827 ? ibcmp(s, m, ln)
828 : ibcmp_locale(s, m, ln)))
6eb5f6b9 829 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
830 goto got_it;
831 s++;
832 }
833 } else {
834 while (s <= e) {
835 if ( (*s == c1 || *s == c2)
66e933ab
GS
836 && (ln == 1 || !(OP(c) == EXACTF
837 ? ibcmp(s, m, ln)
838 : ibcmp_locale(s, m, ln)))
6eb5f6b9 839 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
840 goto got_it;
841 s++;
842 }
843 }
844 break;
bbce6d69 845 case BOUNDL:
3280af22 846 PL_reg_flags |= RF_tainted;
bbce6d69 847 /* FALL THROUGH */
a0d0e21e 848 case BOUND:
6eb5f6b9 849 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 850 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 851 while (s < strend) {
95bac841 852 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 853 tmp = !tmp;
6eb5f6b9 854 if ((norun || regtry(prog, s)))
a0d0e21e 855 goto got_it;
a687059c 856 }
a0d0e21e
LW
857 s++;
858 }
6eb5f6b9 859 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
860 goto got_it;
861 break;
a0ed51b3
LW
862 case BOUNDLUTF8:
863 PL_reg_flags |= RF_tainted;
864 /* FALL THROUGH */
865 case BOUNDUTF8:
6eb5f6b9 866 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
867 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
868 while (s < strend) {
dfe13c55
GS
869 if (tmp == !(OP(c) == BOUND ?
870 swash_fetch(PL_utf8_alnum, (U8*)s) :
871 isALNUM_LC_utf8((U8*)s)))
872 {
a0ed51b3 873 tmp = !tmp;
6eb5f6b9 874 if ((norun || regtry(prog, s)))
a0ed51b3
LW
875 goto got_it;
876 }
877 s += UTF8SKIP(s);
878 }
6eb5f6b9 879 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
880 goto got_it;
881 break;
bbce6d69 882 case NBOUNDL:
3280af22 883 PL_reg_flags |= RF_tainted;
bbce6d69 884 /* FALL THROUGH */
a0d0e21e 885 case NBOUND:
6eb5f6b9 886 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 887 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 888 while (s < strend) {
95bac841 889 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 890 tmp = !tmp;
6eb5f6b9 891 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
892 goto got_it;
893 s++;
894 }
6eb5f6b9 895 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
896 goto got_it;
897 break;
a0ed51b3
LW
898 case NBOUNDLUTF8:
899 PL_reg_flags |= RF_tainted;
900 /* FALL THROUGH */
901 case NBOUNDUTF8:
6eb5f6b9 902 if (prog->minlen)
dfe13c55 903 strend = reghop_c(strend, -1);
6eb5f6b9 904 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
905 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
906 while (s < strend) {
dfe13c55
GS
907 if (tmp == !(OP(c) == NBOUND ?
908 swash_fetch(PL_utf8_alnum, (U8*)s) :
909 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 910 tmp = !tmp;
6eb5f6b9 911 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
912 goto got_it;
913 s += UTF8SKIP(s);
914 }
6eb5f6b9 915 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
916 goto got_it;
917 break;
a0d0e21e
LW
918 case ALNUM:
919 while (s < strend) {
bbce6d69 920 if (isALNUM(*s)) {
6eb5f6b9 921 if (tmp && (norun || regtry(prog, s)))
bbce6d69 922 goto got_it;
923 else
924 tmp = doevery;
925 }
926 else
927 tmp = 1;
928 s++;
929 }
930 break;
a0ed51b3
LW
931 case ALNUMUTF8:
932 while (s < strend) {
dfe13c55 933 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 934 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
935 goto got_it;
936 else
937 tmp = doevery;
938 }
939 else
940 tmp = 1;
941 s += UTF8SKIP(s);
942 }
943 break;
bbce6d69 944 case ALNUML:
3280af22 945 PL_reg_flags |= RF_tainted;
bbce6d69 946 while (s < strend) {
947 if (isALNUM_LC(*s)) {
6eb5f6b9 948 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 949 goto got_it;
a687059c 950 else
a0d0e21e
LW
951 tmp = doevery;
952 }
953 else
954 tmp = 1;
955 s++;
956 }
957 break;
a0ed51b3
LW
958 case ALNUMLUTF8:
959 PL_reg_flags |= RF_tainted;
960 while (s < strend) {
dfe13c55 961 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 962 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
963 goto got_it;
964 else
965 tmp = doevery;
966 }
967 else
968 tmp = 1;
969 s += UTF8SKIP(s);
970 }
971 break;
a0d0e21e
LW
972 case NALNUM:
973 while (s < strend) {
bbce6d69 974 if (!isALNUM(*s)) {
6eb5f6b9 975 if (tmp && (norun || regtry(prog, s)))
bbce6d69 976 goto got_it;
977 else
978 tmp = doevery;
979 }
980 else
981 tmp = 1;
982 s++;
983 }
984 break;
a0ed51b3
LW
985 case NALNUMUTF8:
986 while (s < strend) {
dfe13c55 987 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 988 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
989 goto got_it;
990 else
991 tmp = doevery;
992 }
993 else
994 tmp = 1;
995 s += UTF8SKIP(s);
996 }
997 break;
bbce6d69 998 case NALNUML:
3280af22 999 PL_reg_flags |= RF_tainted;
bbce6d69 1000 while (s < strend) {
1001 if (!isALNUM_LC(*s)) {
6eb5f6b9 1002 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1003 goto got_it;
a687059c 1004 else
a0d0e21e 1005 tmp = doevery;
a687059c 1006 }
a0d0e21e
LW
1007 else
1008 tmp = 1;
1009 s++;
1010 }
1011 break;
a0ed51b3
LW
1012 case NALNUMLUTF8:
1013 PL_reg_flags |= RF_tainted;
1014 while (s < strend) {
dfe13c55 1015 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1016 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1017 goto got_it;
1018 else
1019 tmp = doevery;
1020 }
1021 else
1022 tmp = 1;
1023 s += UTF8SKIP(s);
1024 }
1025 break;
a0d0e21e
LW
1026 case SPACE:
1027 while (s < strend) {
1028 if (isSPACE(*s)) {
6eb5f6b9 1029 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1030 goto got_it;
1031 else
1032 tmp = doevery;
2304df62 1033 }
a0d0e21e
LW
1034 else
1035 tmp = 1;
1036 s++;
1037 }
1038 break;
a0ed51b3
LW
1039 case SPACEUTF8:
1040 while (s < strend) {
dfe13c55 1041 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1042 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
1047 else
1048 tmp = 1;
1049 s += UTF8SKIP(s);
1050 }
1051 break;
bbce6d69 1052 case SPACEL:
3280af22 1053 PL_reg_flags |= RF_tainted;
bbce6d69 1054 while (s < strend) {
1055 if (isSPACE_LC(*s)) {
6eb5f6b9 1056 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1057 goto got_it;
1058 else
1059 tmp = doevery;
1060 }
1061 else
1062 tmp = 1;
1063 s++;
1064 }
1065 break;
a0ed51b3
LW
1066 case SPACELUTF8:
1067 PL_reg_flags |= RF_tainted;
1068 while (s < strend) {
dfe13c55 1069 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1070 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1071 goto got_it;
1072 else
1073 tmp = doevery;
1074 }
1075 else
1076 tmp = 1;
1077 s += UTF8SKIP(s);
1078 }
1079 break;
a0d0e21e
LW
1080 case NSPACE:
1081 while (s < strend) {
1082 if (!isSPACE(*s)) {
6eb5f6b9 1083 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1084 goto got_it;
1085 else
1086 tmp = doevery;
a687059c 1087 }
a0d0e21e
LW
1088 else
1089 tmp = 1;
1090 s++;
1091 }
1092 break;
a0ed51b3
LW
1093 case NSPACEUTF8:
1094 while (s < strend) {
dfe13c55 1095 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1096 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1097 goto got_it;
1098 else
1099 tmp = doevery;
1100 }
1101 else
1102 tmp = 1;
1103 s += UTF8SKIP(s);
1104 }
1105 break;
bbce6d69 1106 case NSPACEL:
3280af22 1107 PL_reg_flags |= RF_tainted;
bbce6d69 1108 while (s < strend) {
1109 if (!isSPACE_LC(*s)) {
6eb5f6b9 1110 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
1115 else
1116 tmp = 1;
1117 s++;
1118 }
1119 break;
a0ed51b3
LW
1120 case NSPACELUTF8:
1121 PL_reg_flags |= RF_tainted;
1122 while (s < strend) {
dfe13c55 1123 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1124 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1125 goto got_it;
1126 else
1127 tmp = doevery;
1128 }
1129 else
1130 tmp = 1;
1131 s += UTF8SKIP(s);
1132 }
1133 break;
a0d0e21e
LW
1134 case DIGIT:
1135 while (s < strend) {
1136 if (isDIGIT(*s)) {
6eb5f6b9 1137 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1138 goto got_it;
1139 else
1140 tmp = doevery;
2b69d0c2 1141 }
a0d0e21e
LW
1142 else
1143 tmp = 1;
1144 s++;
1145 }
1146 break;
a0ed51b3
LW
1147 case DIGITUTF8:
1148 while (s < strend) {
dfe13c55 1149 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1150 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1151 goto got_it;
1152 else
1153 tmp = doevery;
1154 }
1155 else
1156 tmp = 1;
1157 s += UTF8SKIP(s);
1158 }
1159 break;
b8c5462f
JH
1160 case DIGITL:
1161 PL_reg_flags |= RF_tainted;
1162 while (s < strend) {
1163 if (isDIGIT_LC(*s)) {
6eb5f6b9 1164 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
1169 else
1170 tmp = 1;
1171 s++;
1172 }
1173 break;
1174 case DIGITLUTF8:
1175 PL_reg_flags |= RF_tainted;
1176 while (s < strend) {
1177 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1178 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1179 goto got_it;
1180 else
1181 tmp = doevery;
1182 }
1183 else
1184 tmp = 1;
1185 s += UTF8SKIP(s);
1186 }
1187 break;
a0d0e21e
LW
1188 case NDIGIT:
1189 while (s < strend) {
1190 if (!isDIGIT(*s)) {
6eb5f6b9 1191 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1192 goto got_it;
1193 else
1194 tmp = doevery;
a687059c 1195 }
a0d0e21e
LW
1196 else
1197 tmp = 1;
1198 s++;
1199 }
1200 break;
a0ed51b3
LW
1201 case NDIGITUTF8:
1202 while (s < strend) {
dfe13c55 1203 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1204 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
1209 else
1210 tmp = 1;
1211 s += UTF8SKIP(s);
1212 }
1213 break;
b8c5462f
JH
1214 case NDIGITL:
1215 PL_reg_flags |= RF_tainted;
1216 while (s < strend) {
1217 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1218 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1219 goto got_it;
1220 else
1221 tmp = doevery;
1222 }
1223 else
1224 tmp = 1;
1225 s++;
a0ed51b3 1226 }
b8c5462f
JH
1227 break;
1228 case NDIGITLUTF8:
1229 PL_reg_flags |= RF_tainted;
1230 while (s < strend) {
1231 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1232 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1233 goto got_it;
cf93c79d 1234 else
b8c5462f
JH
1235 tmp = doevery;
1236 }
1237 else
1238 tmp = 1;
1239 s += UTF8SKIP(s);
1240 }
1241 break;
b3c9acc1 1242 default:
3c3eec57
GS
1243 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1244 break;
d6a28714 1245 }
6eb5f6b9
JH
1246 return 0;
1247 got_it:
1248 return s;
1249}
1250
1251/*
1252 - regexec_flags - match a regexp against a string
1253 */
1254I32
1255Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1256 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1257/* strend: pointer to null at end of string */
1258/* strbeg: real beginning of string */
1259/* minend: end of match must be >=minend after stringarg. */
1260/* data: May be used for some additional optimizations. */
1261/* nosave: For optimizations. */
1262{
1263 dTHR;
1264 register char *s;
1265 register regnode *c;
1266 register char *startpos = stringarg;
1267 register I32 tmp;
1268 I32 minlen; /* must match at least this many chars */
1269 I32 dontbother = 0; /* how many characters not to try at end */
1270 I32 start_shift = 0; /* Offset of the start to find
1271 constant substr. */ /* CC */
1272 I32 end_shift = 0; /* Same for the end. */ /* CC */
1273 I32 scream_pos = -1; /* Internal iterator of scream. */
1274 char *scream_olds;
1275 SV* oreplsv = GvSV(PL_replgv);
1276
1277 PL_regcc = 0;
1278
1279 cache_re(prog);
1280#ifdef DEBUGGING
1281 PL_regnarrate = PL_debug & 512;
1282#endif
1283
1284 /* Be paranoid... */
1285 if (prog == NULL || startpos == NULL) {
1286 Perl_croak(aTHX_ "NULL regexp parameter");
1287 return 0;
1288 }
1289
1290 minlen = prog->minlen;
1291 if (strend - startpos < minlen) goto phooey;
1292
1293 if (startpos == strbeg) /* is ^ valid at stringarg? */
1294 PL_regprev = '\n';
1295 else {
1296 PL_regprev = (U32)stringarg[-1];
1297 if (!PL_multiline && PL_regprev == '\n')
1298 PL_regprev = '\0'; /* force ^ to NOT match */
1299 }
1300
1301 /* Check validity of program. */
1302 if (UCHARAT(prog->program) != REG_MAGIC) {
1303 Perl_croak(aTHX_ "corrupted regexp program");
1304 }
1305
1306 PL_reg_flags = 0;
1307 PL_reg_eval_set = 0;
1308 PL_reg_maxiter = 0;
1309
1310 if (prog->reganch & ROPT_UTF8)
1311 PL_reg_flags |= RF_utf8;
1312
1313 /* Mark beginning of line for ^ and lookbehind. */
1314 PL_regbol = startpos;
1315 PL_bostr = strbeg;
1316 PL_reg_sv = sv;
1317
1318 /* Mark end of line for $ (and such) */
1319 PL_regeol = strend;
1320
1321 /* see how far we have to get to not match where we matched before */
1322 PL_regtill = startpos+minend;
1323
1324 /* We start without call_cc context. */
1325 PL_reg_call_cc = 0;
1326
1327 /* If there is a "must appear" string, look for it. */
1328 s = startpos;
1329
1330 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1331 MAGIC *mg;
1332
1333 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1334 PL_reg_ganch = startpos;
1335 else if (sv && SvTYPE(sv) >= SVt_PVMG
1336 && SvMAGIC(sv)
1337 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1338 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1339 if (prog->reganch & ROPT_ANCH_GPOS) {
1340 if (s > PL_reg_ganch)
1341 goto phooey;
1342 s = PL_reg_ganch;
1343 }
1344 }
1345 else /* pos() not defined */
1346 PL_reg_ganch = strbeg;
1347 }
1348
1349 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1350 re_scream_pos_data d;
1351
1352 d.scream_olds = &scream_olds;
1353 d.scream_pos = &scream_pos;
1354 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1355 if (!s)
1356 goto phooey; /* not present */
1357 }
1358
1359 DEBUG_r( if (!PL_colorset) reginitcolors() );
1360 DEBUG_r(PerlIO_printf(Perl_debug_log,
1361 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1362 PL_colors[4],PL_colors[5],PL_colors[0],
1363 prog->precomp,
1364 PL_colors[1],
1365 (strlen(prog->precomp) > 60 ? "..." : ""),
1366 PL_colors[0],
1367 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1368 startpos, PL_colors[1],
1369 (strend - startpos > 60 ? "..." : ""))
1370 );
1371
1372 /* Simplest case: anchored match need be tried only once. */
1373 /* [unless only anchor is BOL and multiline is set] */
1374 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1375 if (s == startpos && regtry(prog, startpos))
1376 goto got_it;
1377 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1378 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1379 {
1380 char *end;
1381
1382 if (minlen)
1383 dontbother = minlen - 1;
1384 end = HOPc(strend, -dontbother) - 1;
1385 /* for multiline we only have to try after newlines */
1386 if (prog->check_substr) {
1387 if (s == startpos)
1388 goto after_try;
1389 while (1) {
1390 if (regtry(prog, s))
1391 goto got_it;
1392 after_try:
1393 if (s >= end)
1394 goto phooey;
1395 if (prog->reganch & RE_USE_INTUIT) {
1396 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1397 if (!s)
1398 goto phooey;
1399 }
1400 else
1401 s++;
1402 }
1403 } else {
1404 if (s > startpos)
1405 s--;
1406 while (s < end) {
1407 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1408 if (regtry(prog, s))
1409 goto got_it;
1410 }
1411 }
1412 }
1413 }
1414 goto phooey;
1415 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1416 if (regtry(prog, PL_reg_ganch))
1417 goto got_it;
1418 goto phooey;
1419 }
1420
1421 /* Messy cases: unanchored match. */
1422 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1423 /* we have /x+whatever/ */
1424 /* it must be a one character string (XXXX Except UTF?) */
1425 char ch = SvPVX(prog->anchored_substr)[0];
1426 if (UTF) {
1427 while (s < strend) {
1428 if (*s == ch) {
1429 if (regtry(prog, s)) goto got_it;
1430 s += UTF8SKIP(s);
1431 while (s < strend && *s == ch)
1432 s += UTF8SKIP(s);
1433 }
1434 s += UTF8SKIP(s);
1435 }
1436 }
1437 else {
1438 while (s < strend) {
1439 if (*s == ch) {
1440 if (regtry(prog, s)) goto got_it;
1441 s++;
1442 while (s < strend && *s == ch)
1443 s++;
1444 }
1445 s++;
1446 }
1447 }
1448 }
1449 /*SUPPRESS 560*/
1450 else if (prog->anchored_substr != Nullsv
1451 || (prog->float_substr != Nullsv
1452 && prog->float_max_offset < strend - s)) {
1453 SV *must = prog->anchored_substr
1454 ? prog->anchored_substr : prog->float_substr;
1455 I32 back_max =
1456 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1457 I32 back_min =
1458 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1459 I32 delta = back_max - back_min;
1460 char *last = HOPc(strend, /* Cannot start after this */
1461 -(I32)(CHR_SVLEN(must)
1462 - (SvTAIL(must) != 0) + back_min));
1463 char *last1; /* Last position checked before */
1464
1465 if (s > PL_bostr)
1466 last1 = HOPc(s, -1);
1467 else
1468 last1 = s - 1; /* bogus */
1469
1470 /* XXXX check_substr already used to find `s', can optimize if
1471 check_substr==must. */
1472 scream_pos = -1;
1473 dontbother = end_shift;
1474 strend = HOPc(strend, -dontbother);
1475 while ( (s <= last) &&
1476 ((flags & REXEC_SCREAM)
1477 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1478 end_shift, &scream_pos, 0))
1479 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1480 (unsigned char*)strend, must,
1481 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1482 if (HOPc(s, -back_max) > last1) {
1483 last1 = HOPc(s, -back_min);
1484 s = HOPc(s, -back_max);
1485 }
1486 else {
1487 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1488
1489 last1 = HOPc(s, -back_min);
1490 s = t;
1491 }
1492 if (UTF) {
1493 while (s <= last1) {
1494 if (regtry(prog, s))
1495 goto got_it;
1496 s += UTF8SKIP(s);
1497 }
1498 }
1499 else {
1500 while (s <= last1) {
1501 if (regtry(prog, s))
1502 goto got_it;
1503 s++;
1504 }
1505 }
1506 }
1507 goto phooey;
1508 }
1509 else if (c = prog->regstclass) {
66e933ab
GS
1510 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1511 /* don't bother with what can't match */
6eb5f6b9
JH
1512 strend = HOPc(strend, -(minlen - 1));
1513 if (find_byclass(prog, c, s, strend, startpos, 0))
1514 goto got_it;
d6a28714
JH
1515 }
1516 else {
1517 dontbother = 0;
1518 if (prog->float_substr != Nullsv) { /* Trim the end. */
1519 char *last;
1520 I32 oldpos = scream_pos;
1521
1522 if (flags & REXEC_SCREAM) {
1523 last = screaminstr(sv, prog->float_substr, s - strbeg,
1524 end_shift, &scream_pos, 1); /* last one */
1525 if (!last)
1526 last = scream_olds; /* Only one occurence. */
b8c5462f 1527 }
d6a28714
JH
1528 else {
1529 STRLEN len;
1530 char *little = SvPV(prog->float_substr, len);
1531
1532 if (SvTAIL(prog->float_substr)) {
1533 if (memEQ(strend - len + 1, little, len - 1))
1534 last = strend - len + 1;
1535 else if (!PL_multiline)
1536 last = memEQ(strend - len, little, len)
1537 ? strend - len : Nullch;
b8c5462f 1538 else
d6a28714
JH
1539 goto find_last;
1540 } else {
1541 find_last:
1542 if (len)
1543 last = rninstr(s, strend, little, little + len);
b8c5462f 1544 else
d6a28714 1545 last = strend; /* matching `$' */
b8c5462f 1546 }
b8c5462f 1547 }
d6a28714
JH
1548 if (last == NULL) goto phooey; /* Should not happen! */
1549 dontbother = strend - last + prog->float_min_offset;
1550 }
1551 if (minlen && (dontbother < minlen))
1552 dontbother = minlen - 1;
1553 strend -= dontbother; /* this one's always in bytes! */
1554 /* We don't know much -- general case. */
1555 if (UTF) {
1556 for (;;) {
1557 if (regtry(prog, s))
1558 goto got_it;
1559 if (s >= strend)
1560 break;
b8c5462f 1561 s += UTF8SKIP(s);
d6a28714
JH
1562 };
1563 }
1564 else {
1565 do {
1566 if (regtry(prog, s))
1567 goto got_it;
1568 } while (s++ < strend);
1569 }
1570 }
1571
1572 /* Failure. */
1573 goto phooey;
1574
1575got_it:
1576 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1577
1578 if (PL_reg_eval_set) {
1579 /* Preserve the current value of $^R */
1580 if (oreplsv != GvSV(PL_replgv))
1581 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1582 restored, the value remains
1583 the same. */
1584 restore_pos(aTHXo_ 0);
1585 }
1586
1587 /* make sure $`, $&, $', and $digit will work later */
1588 if ( !(flags & REXEC_NOT_FIRST) ) {
1589 if (RX_MATCH_COPIED(prog)) {
1590 Safefree(prog->subbeg);
1591 RX_MATCH_COPIED_off(prog);
1592 }
1593 if (flags & REXEC_COPY_STR) {
1594 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1595
1596 s = savepvn(strbeg, i);
1597 prog->subbeg = s;
1598 prog->sublen = i;
1599 RX_MATCH_COPIED_on(prog);
1600 }
1601 else {
1602 prog->subbeg = strbeg;
1603 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1604 }
1605 }
1606
1607 return 1;
1608
1609phooey:
1610 if (PL_reg_eval_set)
1611 restore_pos(aTHXo_ 0);
1612 return 0;
1613}
1614
1615/*
1616 - regtry - try match at specific point
1617 */
1618STATIC I32 /* 0 failure, 1 success */
1619S_regtry(pTHX_ regexp *prog, char *startpos)
1620{
1621 dTHR;
1622 register I32 i;
1623 register I32 *sp;
1624 register I32 *ep;
1625 CHECKPOINT lastcp;
1626
1627 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1628 MAGIC *mg;
1629
1630 PL_reg_eval_set = RS_init;
1631 DEBUG_r(DEBUG_s(
b900a521
JH
1632 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1633 (IV)(PL_stack_sp - PL_stack_base));
d6a28714
JH
1634 ));
1635 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1636 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1637 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1638 SAVETMPS;
1639 /* Apparently this is not needed, judging by wantarray. */
1640 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1641 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1642
1643 if (PL_reg_sv) {
1644 /* Make $_ available to executed code. */
1645 if (PL_reg_sv != DEFSV) {
1646 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1647 SAVESPTR(DEFSV);
1648 DEFSV = PL_reg_sv;
b8c5462f 1649 }
d6a28714
JH
1650
1651 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1652 && (mg = mg_find(PL_reg_sv, 'g')))) {
1653 /* prepare for quick setting of pos */
1654 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1655 mg = mg_find(PL_reg_sv, 'g');
1656 mg->mg_len = -1;
b8c5462f 1657 }
d6a28714
JH
1658 PL_reg_magic = mg;
1659 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1660 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1661 }
1662 if (!PL_reg_curpm)
1663 New(22,PL_reg_curpm, 1, PMOP);
1664 PL_reg_curpm->op_pmregexp = prog;
1665 PL_reg_oldcurpm = PL_curpm;
1666 PL_curpm = PL_reg_curpm;
1667 if (RX_MATCH_COPIED(prog)) {
1668 /* Here is a serious problem: we cannot rewrite subbeg,
1669 since it may be needed if this match fails. Thus
1670 $` inside (?{}) could fail... */
1671 PL_reg_oldsaved = prog->subbeg;
1672 PL_reg_oldsavedlen = prog->sublen;
1673 RX_MATCH_COPIED_off(prog);
1674 }
1675 else
1676 PL_reg_oldsaved = Nullch;
1677 prog->subbeg = PL_bostr;
1678 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1679 }
1680 prog->startp[0] = startpos - PL_bostr;
1681 PL_reginput = startpos;
1682 PL_regstartp = prog->startp;
1683 PL_regendp = prog->endp;
1684 PL_reglastparen = &prog->lastparen;
1685 prog->lastparen = 0;
1686 PL_regsize = 0;
1687 DEBUG_r(PL_reg_starttry = startpos);
1688 if (PL_reg_start_tmpl <= prog->nparens) {
1689 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1690 if(PL_reg_start_tmp)
1691 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1692 else
1693 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1694 }
1695
1696 /* XXXX What this code is doing here?!!! There should be no need
1697 to do this again and again, PL_reglastparen should take care of
1698 this! */
1699 sp = prog->startp;
1700 ep = prog->endp;
1701 if (prog->nparens) {
1702 for (i = prog->nparens; i >= 1; i--) {
1703 *++sp = -1;
1704 *++ep = -1;
1705 }
1706 }
1707 REGCP_SET;
1708 if (regmatch(prog->program + 1)) {
1709 prog->endp[0] = PL_reginput - PL_bostr;
1710 return 1;
1711 }
1712 REGCP_UNWIND;
1713 return 0;
1714}
1715
1716/*
1717 - regmatch - main matching routine
1718 *
1719 * Conceptually the strategy is simple: check to see whether the current
1720 * node matches, call self recursively to see whether the rest matches,
1721 * and then act accordingly. In practice we make some effort to avoid
1722 * recursion, in particular by going through "ordinary" nodes (that don't
1723 * need to know whether the rest of the match failed) by a loop instead of
1724 * by recursion.
1725 */
1726/* [lwall] I've hoisted the register declarations to the outer block in order to
1727 * maybe save a little bit of pushing and popping on the stack. It also takes
1728 * advantage of machines that use a register save mask on subroutine entry.
1729 */
1730STATIC I32 /* 0 failure, 1 success */
1731S_regmatch(pTHX_ regnode *prog)
1732{
1733 dTHR;
1734 register regnode *scan; /* Current node. */
1735 regnode *next; /* Next node. */
1736 regnode *inner; /* Next node in internal branch. */
1737 register I32 nextchr; /* renamed nextchr - nextchar colides with
1738 function of same name */
1739 register I32 n; /* no or next */
1740 register I32 ln; /* len or last */
1741 register char *s; /* operand or save */
1742 register char *locinput = PL_reginput;
1743 register I32 c1, c2, paren; /* case fold search, parenth */
1744 int minmod = 0, sw = 0, logical = 0;
1745#ifdef DEBUGGING
1746 PL_regindent++;
1747#endif
1748
1749 /* Note that nextchr is a byte even in UTF */
1750 nextchr = UCHARAT(locinput);
1751 scan = prog;
1752 while (scan != NULL) {
1753#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1754#ifdef DEBUGGING
1755# define sayYES goto yes
1756# define sayNO goto no
7821416a
IZ
1757# define sayYES_FINAL goto yes_final
1758# define sayYES_LOUD goto yes_loud
1759# define sayNO_FINAL goto no_final
1760# define sayNO_SILENT goto do_no
d6a28714
JH
1761# define saySAME(x) if (x) goto yes; else goto no
1762# define REPORT_CODE_OFF 24
1763#else
1764# define sayYES return 1
1765# define sayNO return 0
7821416a
IZ
1766# define sayYES_FINAL return 1
1767# define sayYES_LOUD return 1
1768# define sayNO_FINAL return 0
1769# define sayNO_SILENT return 0
d6a28714
JH
1770# define saySAME(x) return x
1771#endif
1772 DEBUG_r( {
1773 SV *prop = sv_newmortal();
1774 int docolor = *PL_colors[0];
1775 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1776 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1777 /* The part of the string before starttry has one color
1778 (pref0_len chars), between starttry and current
1779 position another one (pref_len - pref0_len chars),
1780 after the current position the third one.
1781 We assume that pref0_len <= pref_len, otherwise we
1782 decrease pref0_len. */
1783 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1784 ? (5 + taill) - l : locinput - PL_bostr);
1785 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1786
1787 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1788 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1789 ? (5 + taill) - pref_len : PL_regeol - locinput);
1790 if (pref0_len < 0)
1791 pref0_len = 0;
1792 if (pref0_len > pref_len)
1793 pref0_len = pref_len;
1794 regprop(prop, scan);
1795 PerlIO_printf(Perl_debug_log,
b900a521
JH
1796 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1797 (IV)(locinput - PL_bostr),
d6a28714
JH
1798 PL_colors[4], pref0_len,
1799 locinput - pref_len, PL_colors[5],
1800 PL_colors[2], pref_len - pref0_len,
1801 locinput - pref_len + pref0_len, PL_colors[3],
1802 (docolor ? "" : "> <"),
1803 PL_colors[0], l, locinput, PL_colors[1],
1804 15 - l - pref_len + 1,
1805 "",
b900a521 1806 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1807 SvPVX(prop));
1808 } );
1809
1810 next = scan + NEXT_OFF(scan);
1811 if (next == scan)
1812 next = NULL;
1813
1814 switch (OP(scan)) {
1815 case BOL:
1816 if (locinput == PL_bostr
1817 ? PL_regprev == '\n'
1818 : (PL_multiline &&
1819 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1820 {
1821 /* regtill = regbol; */
b8c5462f
JH
1822 break;
1823 }
d6a28714
JH
1824 sayNO;
1825 case MBOL:
1826 if (locinput == PL_bostr
1827 ? PL_regprev == '\n'
1828 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1829 {
b8c5462f
JH
1830 break;
1831 }
d6a28714
JH
1832 sayNO;
1833 case SBOL:
1834 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1835 break;
d6a28714
JH
1836 sayNO;
1837 case GPOS:
1838 if (locinput == PL_reg_ganch)
1839 break;
1840 sayNO;
1841 case EOL:
1842 if (PL_multiline)
1843 goto meol;
1844 else
1845 goto seol;
1846 case MEOL:
1847 meol:
1848 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1849 sayNO;
b8c5462f 1850 break;
d6a28714
JH
1851 case SEOL:
1852 seol:
1853 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1854 sayNO;
d6a28714 1855 if (PL_regeol - locinput > 1)
b8c5462f 1856 sayNO;
b8c5462f 1857 break;
d6a28714
JH
1858 case EOS:
1859 if (PL_regeol != locinput)
b8c5462f 1860 sayNO;
d6a28714
JH
1861 break;
1862 case SANYUTF8:
b8c5462f 1863 if (nextchr & 0x80) {
b8c5462f 1864 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1865 if (locinput > PL_regeol)
1866 sayNO;
b8c5462f
JH
1867 nextchr = UCHARAT(locinput);
1868 break;
1869 }
d6a28714 1870 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1871 sayNO;
b8c5462f 1872 nextchr = UCHARAT(++locinput);
a0d0e21e 1873 break;
d6a28714
JH
1874 case SANY:
1875 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1876 sayNO;
1877 nextchr = UCHARAT(++locinput);
b85d18e9 1878 break;
d6a28714 1879 case ANYUTF8:
a0ed51b3 1880 if (nextchr & 0x80) {
b8c5462f 1881 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1882 if (locinput > PL_regeol)
1883 sayNO;
a0ed51b3
LW
1884 nextchr = UCHARAT(locinput);
1885 break;
1886 }
d6a28714 1887 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1888 sayNO;
1889 nextchr = UCHARAT(++locinput);
1890 break;
d6a28714
JH
1891 case REG_ANY:
1892 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1893 sayNO;
76e3520e 1894 nextchr = UCHARAT(++locinput);
a0d0e21e 1895 break;
d6a28714 1896 case EXACT:
cd439c50
IZ
1897 s = STRING(scan);
1898 ln = STR_LEN(scan);
d6a28714
JH
1899 /* Inline the first character, for speed. */
1900 if (UCHARAT(s) != nextchr)
1901 sayNO;
1902 if (PL_regeol - locinput < ln)
1903 sayNO;
1904 if (ln > 1 && memNE(s, locinput, ln))
1905 sayNO;
1906 locinput += ln;
1907 nextchr = UCHARAT(locinput);
1908 break;
1909 case EXACTFL:
b8c5462f
JH
1910 PL_reg_flags |= RF_tainted;
1911 /* FALL THROUGH */
d6a28714 1912 case EXACTF:
cd439c50
IZ
1913 s = STRING(scan);
1914 ln = STR_LEN(scan);
d6a28714
JH
1915
1916 if (UTF) {
1917 char *l = locinput;
1918 char *e = s + ln;
1919 c1 = OP(scan) == EXACTF;
1920 while (s < e) {
1921 if (l >= PL_regeol)
1922 sayNO;
1923 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1924 toLOWER_utf8((U8*)l) :
1925 toLOWER_LC_utf8((U8*)l)))
1926 {
1927 sayNO;
1928 }
1929 s += UTF8SKIP(s);
1930 l += UTF8SKIP(l);
b8c5462f 1931 }
d6a28714 1932 locinput = l;
a0ed51b3
LW
1933 nextchr = UCHARAT(locinput);
1934 break;
1935 }
d6a28714
JH
1936
1937 /* Inline the first character, for speed. */
1938 if (UCHARAT(s) != nextchr &&
1939 UCHARAT(s) != ((OP(scan) == EXACTF)
1940 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1941 sayNO;
d6a28714 1942 if (PL_regeol - locinput < ln)
b8c5462f 1943 sayNO;
d6a28714
JH
1944 if (ln > 1 && (OP(scan) == EXACTF
1945 ? ibcmp(s, locinput, ln)
1946 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1947 sayNO;
d6a28714
JH
1948 locinput += ln;
1949 nextchr = UCHARAT(locinput);
a0d0e21e 1950 break;
d6a28714 1951 case ANYOFUTF8:
d6a28714 1952 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1953 sayNO;
d6a28714
JH
1954 if (locinput >= PL_regeol)
1955 sayNO;
1956 locinput += PL_utf8skip[nextchr];
1957 nextchr = UCHARAT(locinput);
1958 break;
1959 case ANYOF:
d6a28714 1960 if (nextchr < 0)
b8c5462f 1961 nextchr = UCHARAT(locinput);
936ed897 1962 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1963 sayNO;
1964 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1965 sayNO;
b8c5462f
JH
1966 nextchr = UCHARAT(++locinput);
1967 break;
d6a28714 1968 case ALNUML:
b8c5462f
JH
1969 PL_reg_flags |= RF_tainted;
1970 /* FALL THROUGH */
d6a28714 1971 case ALNUM:
b8c5462f 1972 if (!nextchr)
4633a7c4 1973 sayNO;
d6a28714
JH
1974 if (!(OP(scan) == ALNUM
1975 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1976 sayNO;
1977 nextchr = UCHARAT(++locinput);
bbce6d69 1978 break;
d6a28714 1979 case ALNUMLUTF8:
3280af22 1980 PL_reg_flags |= RF_tainted;
bbce6d69 1981 /* FALL THROUGH */
d6a28714 1982 case ALNUMUTF8:
b8c5462f
JH
1983 if (!nextchr)
1984 sayNO;
1985 if (nextchr & 0x80) {
d6a28714
JH
1986 if (!(OP(scan) == ALNUMUTF8
1987 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1988 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1989 {
1990 sayNO;
a0ed51b3 1991 }
b8c5462f 1992 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1993 nextchr = UCHARAT(locinput);
1994 break;
1995 }
d6a28714
JH
1996 if (!(OP(scan) == ALNUMUTF8
1997 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1998 sayNO;
b8c5462f 1999 nextchr = UCHARAT(++locinput);
a0d0e21e 2000 break;
d6a28714 2001 case NALNUML:
b8c5462f
JH
2002 PL_reg_flags |= RF_tainted;
2003 /* FALL THROUGH */
d6a28714
JH
2004 case NALNUM:
2005 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2006 sayNO;
d6a28714
JH
2007 if (OP(scan) == NALNUM
2008 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2009 sayNO;
b8c5462f 2010 nextchr = UCHARAT(++locinput);
a0ed51b3 2011 break;
d6a28714 2012 case NALNUMLUTF8:
b8c5462f
JH
2013 PL_reg_flags |= RF_tainted;
2014 /* FALL THROUGH */
d6a28714 2015 case NALNUMUTF8:
3280af22 2016 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2017 sayNO;
b8c5462f 2018 if (nextchr & 0x80) {
d6a28714
JH
2019 if (OP(scan) == NALNUMUTF8
2020 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2021 : isALNUM_LC_utf8((U8*)locinput))
2022 {
b8c5462f 2023 sayNO;
d6a28714 2024 }
b8c5462f
JH
2025 locinput += PL_utf8skip[nextchr];
2026 nextchr = UCHARAT(locinput);
2027 break;
2028 }
d6a28714
JH
2029 if (OP(scan) == NALNUMUTF8
2030 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2031 sayNO;
76e3520e 2032 nextchr = UCHARAT(++locinput);
a0d0e21e 2033 break;
d6a28714
JH
2034 case BOUNDL:
2035 case NBOUNDL:
3280af22 2036 PL_reg_flags |= RF_tainted;
bbce6d69 2037 /* FALL THROUGH */
d6a28714
JH
2038 case BOUND:
2039 case NBOUND:
2040 /* was last char in word? */
2041 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2042 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2043 ln = isALNUM(ln);
2044 n = isALNUM(nextchr);
2045 }
2046 else {
2047 ln = isALNUM_LC(ln);
2048 n = isALNUM_LC(nextchr);
2049 }
2050 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2051 sayNO;
a0d0e21e 2052 break;
d6a28714
JH
2053 case BOUNDLUTF8:
2054 case NBOUNDLUTF8:
a0ed51b3
LW
2055 PL_reg_flags |= RF_tainted;
2056 /* FALL THROUGH */
d6a28714
JH
2057 case BOUNDUTF8:
2058 case NBOUNDUTF8:
2059 /* was last char in word? */
2060 ln = (locinput != PL_regbol)
2061 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2062 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2063 ln = isALNUM_uni(ln);
2064 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2065 }
d6a28714
JH
2066 else {
2067 ln = isALNUM_LC_uni(ln);
2068 n = isALNUM_LC_utf8((U8*)locinput);
2069 }
2070 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2071 sayNO;
a0ed51b3 2072 break;
d6a28714 2073 case SPACEL:
3280af22 2074 PL_reg_flags |= RF_tainted;
bbce6d69 2075 /* FALL THROUGH */
d6a28714
JH
2076 case SPACE:
2077 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2078 sayNO;
d6a28714
JH
2079 if (!(OP(scan) == SPACE
2080 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2081 sayNO;
76e3520e 2082 nextchr = UCHARAT(++locinput);
a0d0e21e 2083 break;
d6a28714 2084 case SPACELUTF8:
a0ed51b3
LW
2085 PL_reg_flags |= RF_tainted;
2086 /* FALL THROUGH */
d6a28714 2087 case SPACEUTF8:
a0ed51b3
LW
2088 if (!nextchr && locinput >= PL_regeol)
2089 sayNO;
2090 if (nextchr & 0x80) {
d6a28714
JH
2091 if (!(OP(scan) == SPACEUTF8
2092 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2093 : isSPACE_LC_utf8((U8*)locinput)))
2094 {
a0ed51b3 2095 sayNO;
d6a28714 2096 }
6f06b55f 2097 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2098 nextchr = UCHARAT(locinput);
2099 break;
2100 }
d6a28714
JH
2101 if (!(OP(scan) == SPACEUTF8
2102 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2103 sayNO;
2104 nextchr = UCHARAT(++locinput);
2105 break;
d6a28714 2106 case NSPACEL:
3280af22 2107 PL_reg_flags |= RF_tainted;
bbce6d69 2108 /* FALL THROUGH */
d6a28714 2109 case NSPACE:
b8c5462f
JH
2110 if (!nextchr)
2111 sayNO;
d6a28714
JH
2112 if (OP(scan) == SPACE
2113 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2114 sayNO;
b8c5462f 2115 nextchr = UCHARAT(++locinput);
a0d0e21e 2116 break;
d6a28714 2117 case NSPACELUTF8:
a0ed51b3
LW
2118 PL_reg_flags |= RF_tainted;
2119 /* FALL THROUGH */
d6a28714 2120 case NSPACEUTF8:
b8c5462f
JH
2121 if (!nextchr)
2122 sayNO;
2123 if (nextchr & 0x80) {
d6a28714
JH
2124 if (OP(scan) == NSPACEUTF8
2125 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2126 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2127 {
2128 sayNO;
2129 }
2130 locinput += PL_utf8skip[nextchr];
2131 nextchr = UCHARAT(locinput);
2132 break;
a0ed51b3 2133 }
d6a28714
JH
2134 if (OP(scan) == NSPACEUTF8
2135 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2136 sayNO;
76e3520e 2137 nextchr = UCHARAT(++locinput);
a0d0e21e 2138 break;
d6a28714 2139 case DIGITL:
a0ed51b3
LW
2140 PL_reg_flags |= RF_tainted;
2141 /* FALL THROUGH */
d6a28714 2142 case DIGIT:
a0ed51b3
LW
2143 if (!nextchr && locinput >= PL_regeol)
2144 sayNO;
d6a28714
JH
2145 if (!(OP(scan) == DIGIT
2146 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2147 sayNO;
76e3520e 2148 nextchr = UCHARAT(++locinput);
a0d0e21e 2149 break;
d6a28714 2150 case DIGITLUTF8:
a0ed51b3
LW
2151 PL_reg_flags |= RF_tainted;
2152 /* FALL THROUGH */
d6a28714 2153 case DIGITUTF8:
a0ed51b3
LW
2154 if (!nextchr)
2155 sayNO;
2156 if (nextchr & 0x80) {
d6a28714
JH
2157 if (OP(scan) == NDIGITUTF8
2158 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2159 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2160 {
a0ed51b3 2161 sayNO;
dfe13c55 2162 }
6f06b55f 2163 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2164 nextchr = UCHARAT(locinput);
2165 break;
2166 }
d6a28714 2167 if (!isDIGIT(nextchr))
a0ed51b3
LW
2168 sayNO;
2169 nextchr = UCHARAT(++locinput);
2170 break;
d6a28714 2171 case NDIGITL:
b8c5462f
JH
2172 PL_reg_flags |= RF_tainted;
2173 /* FALL THROUGH */
d6a28714 2174 case NDIGIT:
b8c5462f
JH
2175 if (!nextchr)
2176 sayNO;
d6a28714
JH
2177 if (OP(scan) == DIGIT
2178 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2179 sayNO;
76e3520e 2180 nextchr = UCHARAT(++locinput);
a0d0e21e 2181 break;
d6a28714 2182 case NDIGITLUTF8:
b8c5462f
JH
2183 PL_reg_flags |= RF_tainted;
2184 /* FALL THROUGH */
d6a28714 2185 case NDIGITUTF8:
b8c5462f
JH
2186 if (!nextchr && locinput >= PL_regeol)
2187 sayNO;
a0ed51b3 2188 if (nextchr & 0x80) {
d6a28714 2189 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2190 sayNO;
6f06b55f 2191 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2192 nextchr = UCHARAT(locinput);
2193 break;
2194 }
d6a28714 2195 if (isDIGIT(nextchr))
a0ed51b3
LW
2196 sayNO;
2197 nextchr = UCHARAT(++locinput);
2198 break;
2199 case CLUMP:
dfe13c55 2200 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2201 sayNO;
6f06b55f 2202 locinput += PL_utf8skip[nextchr];
dfe13c55 2203 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2204 locinput += UTF8SKIP(locinput);
2205 if (locinput > PL_regeol)
2206 sayNO;
2207 nextchr = UCHARAT(locinput);
2208 break;
c8756f30 2209 case REFFL:
3280af22 2210 PL_reg_flags |= RF_tainted;
c8756f30 2211 /* FALL THROUGH */
c277df42 2212 case REF:
c8756f30 2213 case REFF:
c277df42 2214 n = ARG(scan); /* which paren pair */
cf93c79d 2215 ln = PL_regstartp[n];
2c2d71f5 2216 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2217 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2218 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2219 if (ln == PL_regendp[n])
a0d0e21e 2220 break;
a0ed51b3 2221
cf93c79d 2222 s = PL_bostr + ln;
a0ed51b3
LW
2223 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2224 char *l = locinput;
cf93c79d 2225 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2226 /*
2227 * Note that we can't do the "other character" lookup trick as
2228 * in the 8-bit case (no pun intended) because in Unicode we
2229 * have to map both upper and title case to lower case.
2230 */
2231 if (OP(scan) == REFF) {
2232 while (s < e) {
2233 if (l >= PL_regeol)
2234 sayNO;
dfe13c55 2235 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2236 sayNO;
2237 s += UTF8SKIP(s);
2238 l += UTF8SKIP(l);
2239 }
2240 }
2241 else {
2242 while (s < e) {
2243 if (l >= PL_regeol)
2244 sayNO;
dfe13c55 2245 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2246 sayNO;
2247 s += UTF8SKIP(s);
2248 l += UTF8SKIP(l);
2249 }
2250 }
2251 locinput = l;
2252 nextchr = UCHARAT(locinput);
2253 break;
2254 }
2255
a0d0e21e 2256 /* Inline the first character, for speed. */
76e3520e 2257 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2258 (OP(scan) == REF ||
2259 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2260 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2261 sayNO;
cf93c79d 2262 ln = PL_regendp[n] - ln;
3280af22 2263 if (locinput + ln > PL_regeol)
4633a7c4 2264 sayNO;
c8756f30
AK
2265 if (ln > 1 && (OP(scan) == REF
2266 ? memNE(s, locinput, ln)
2267 : (OP(scan) == REFF
2268 ? ibcmp(s, locinput, ln)
2269 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2270 sayNO;
a0d0e21e 2271 locinput += ln;
76e3520e 2272 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2273 break;
2274
2275 case NOTHING:
c277df42 2276 case TAIL:
a0d0e21e
LW
2277 break;
2278 case BACK:
2279 break;
c277df42
IZ
2280 case EVAL:
2281 {
2282 dSP;
533c011a 2283 OP_4tree *oop = PL_op;
3280af22
NIS
2284 COP *ocurcop = PL_curcop;
2285 SV **ocurpad = PL_curpad;
c277df42
IZ
2286 SV *ret;
2287
2288 n = ARG(scan);
533c011a 2289 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2290 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2291 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2292 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2293
cea2e8a9 2294 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2295 SPAGAIN;
2296 ret = POPs;
2297 PUTBACK;
2298
0f5d15d6
IZ
2299 PL_op = oop;
2300 PL_curpad = ocurpad;
2301 PL_curcop = ocurcop;
c277df42 2302 if (logical) {
0f5d15d6
IZ
2303 if (logical == 2) { /* Postponed subexpression. */
2304 regexp *re;
22c35a8c 2305 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2306 re_cc_state state;
0f5d15d6
IZ
2307 CHECKPOINT cp, lastcp;
2308
2309 if(SvROK(ret) || SvRMAGICAL(ret)) {
2310 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2311
2312 if(SvMAGICAL(sv))
2313 mg = mg_find(sv, 'r');
2314 }
2315 if (mg) {
2316 re = (regexp *)mg->mg_obj;
df0003d4 2317 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2318 }
2319 else {
2320 STRLEN len;
2321 char *t = SvPV(ret, len);
2322 PMOP pm;
2323 char *oprecomp = PL_regprecomp;
2324 I32 osize = PL_regsize;
2325 I32 onpar = PL_regnpar;
2326
2327 pm.op_pmflags = 0;
cea2e8a9 2328 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2329 if (!(SvFLAGS(ret)
2330 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2331 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2332 PL_regprecomp = oprecomp;
2333 PL_regsize = osize;
2334 PL_regnpar = onpar;
2335 }
2336 DEBUG_r(
2337 PerlIO_printf(Perl_debug_log,
2338 "Entering embedded `%s%.60s%s%s'\n",
2339 PL_colors[0],
2340 re->precomp,
2341 PL_colors[1],
2342 (strlen(re->precomp) > 60 ? "..." : ""))
2343 );
2344 state.node = next;
2345 state.prev = PL_reg_call_cc;
2346 state.cc = PL_regcc;
2347 state.re = PL_reg_re;
2348
2ab05381 2349 PL_regcc = 0;
0f5d15d6
IZ
2350
2351 cp = regcppush(0); /* Save *all* the positions. */
2352 REGCP_SET;
2353 cache_re(re);
2354 state.ss = PL_savestack_ix;
2355 *PL_reglastparen = 0;
2356 PL_reg_call_cc = &state;
2357 PL_reginput = locinput;
2c2d71f5
JH
2358
2359 /* XXXX This is too dramatic a measure... */
2360 PL_reg_maxiter = 0;
2361
0f5d15d6 2362 if (regmatch(re->program + 1)) {
2c914db6
IZ
2363 /* Even though we succeeded, we need to restore
2364 global variables, since we may be wrapped inside
2365 SUSPEND, thus the match may be not finished yet. */
2366
2367 /* XXXX Do this only if SUSPENDed? */
2368 PL_reg_call_cc = state.prev;
2369 PL_regcc = state.cc;
2370 PL_reg_re = state.re;
2371 cache_re(PL_reg_re);
2372
2373 /* XXXX This is too dramatic a measure... */
2374 PL_reg_maxiter = 0;
2375
2376 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2377 ReREFCNT_dec(re);
2378 regcpblow(cp);
2379 sayYES;
2380 }
0f5d15d6
IZ
2381 ReREFCNT_dec(re);
2382 REGCP_UNWIND;
2383 regcppop();
2384 PL_reg_call_cc = state.prev;
2385 PL_regcc = state.cc;
2386 PL_reg_re = state.re;
d3790889 2387 cache_re(PL_reg_re);
2c2d71f5
JH
2388
2389 /* XXXX This is too dramatic a measure... */
2390 PL_reg_maxiter = 0;
2391
0f5d15d6
IZ
2392 sayNO;
2393 }
c277df42 2394 sw = SvTRUE(ret);
0f5d15d6 2395 logical = 0;
a0ed51b3
LW
2396 }
2397 else
3280af22 2398 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2399 break;
2400 }
a0d0e21e 2401 case OPEN:
c277df42 2402 n = ARG(scan); /* which paren pair */
3280af22
NIS
2403 PL_reg_start_tmp[n] = locinput;
2404 if (n > PL_regsize)
2405 PL_regsize = n;
a0d0e21e
LW
2406 break;
2407 case CLOSE:
c277df42 2408 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2409 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2410 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2411 if (n > *PL_reglastparen)
2412 *PL_reglastparen = n;
a0d0e21e 2413 break;
c277df42
IZ
2414 case GROUPP:
2415 n = ARG(scan); /* which paren pair */
cf93c79d 2416 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2417 break;
2418 case IFTHEN:
2c2d71f5 2419 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2420 if (sw)
2421 next = NEXTOPER(NEXTOPER(scan));
2422 else {
2423 next = scan + ARG(scan);
2424 if (OP(next) == IFTHEN) /* Fake one. */
2425 next = NEXTOPER(NEXTOPER(next));
2426 }
2427 break;
2428 case LOGICAL:
0f5d15d6 2429 logical = scan->flags;
c277df42 2430 break;
2ab05381
IZ
2431/*******************************************************************
2432 PL_regcc contains infoblock about the innermost (...)* loop, and
2433 a pointer to the next outer infoblock.
2434
2435 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2436
2437 1) After matching X, regnode for CURLYX is processed;
2438
2439 2) This regnode creates infoblock on the stack, and calls
2440 regmatch() recursively with the starting point at WHILEM node;
2441
2442 3) Each hit of WHILEM node tries to match A and Z (in the order
2443 depending on the current iteration, min/max of {min,max} and
2444 greediness). The information about where are nodes for "A"
2445 and "Z" is read from the infoblock, as is info on how many times "A"
2446 was already matched, and greediness.
2447
2448 4) After A matches, the same WHILEM node is hit again.
2449
2450 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2451 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2452 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2453 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2454 of the external loop.
2455
2456 Currently present infoblocks form a tree with a stem formed by PL_curcc
2457 and whatever it mentions via ->next, and additional attached trees
2458 corresponding to temporarily unset infoblocks as in "5" above.
2459
2460 In the following picture infoblocks for outer loop of
2461 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2462 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2463 infoblocks are drawn below the "reset" infoblock.
2464
2465 In fact in the picture below we do not show failed matches for Z and T
2466 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2467 more obvious *why* one needs to *temporary* unset infoblocks.]
2468
2469 Matched REx position InfoBlocks Comment
2470 (Y(A)*?Z)*?T x
2471 Y(A)*?Z)*?T x <- O
2472 Y (A)*?Z)*?T x <- O
2473 Y A)*?Z)*?T x <- O <- I
2474 YA )*?Z)*?T x <- O <- I
2475 YA A)*?Z)*?T x <- O <- I
2476 YAA )*?Z)*?T x <- O <- I
2477 YAA Z)*?T x <- O # Temporary unset I
2478 I
2479
2480 YAAZ Y(A)*?Z)*?T x <- O
2481 I
2482
2483 YAAZY (A)*?Z)*?T x <- O
2484 I
2485
2486 YAAZY A)*?Z)*?T x <- O <- I
2487 I
2488
2489 YAAZYA )*?Z)*?T x <- O <- I
2490 I
2491
2492 YAAZYA Z)*?T x <- O # Temporary unset I
2493 I,I
2494
2495 YAAZYAZ )*?T x <- O
2496 I,I
2497
2498 YAAZYAZ T x # Temporary unset O
2499 O
2500 I,I
2501
2502 YAAZYAZT x
2503 O
2504 I,I
2505 *******************************************************************/
a0d0e21e
LW
2506 case CURLYX: {
2507 CURCUR cc;
3280af22 2508 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2509
2510 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2511 next += ARG(next);
3280af22
NIS
2512 cc.oldcc = PL_regcc;
2513 PL_regcc = &cc;
2514 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2515 cc.cur = -1;
2516 cc.min = ARG1(scan);
2517 cc.max = ARG2(scan);
c277df42 2518 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2519 cc.next = next;
2520 cc.minmod = minmod;
2521 cc.lastloc = 0;
3280af22 2522 PL_reginput = locinput;
a0d0e21e
LW
2523 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2524 regcpblow(cp);
3280af22 2525 PL_regcc = cc.oldcc;
4633a7c4 2526 saySAME(n);
a0d0e21e
LW
2527 }
2528 /* NOT REACHED */
2529 case WHILEM: {
2530 /*
2531 * This is really hard to understand, because after we match
2532 * what we're trying to match, we must make sure the rest of
2c2d71f5 2533 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2534 * to go back UP the parse tree by recursing ever deeper. And
2535 * if it fails, we have to reset our parent's current state
2536 * that we can try again after backing off.
2537 */
2538
c277df42 2539 CHECKPOINT cp, lastcp;
3280af22 2540 CURCUR* cc = PL_regcc;
c277df42
IZ
2541 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2542
4633a7c4 2543 n = cc->cur + 1; /* how many we know we matched */
3280af22 2544 PL_reginput = locinput;
a0d0e21e 2545
c277df42
IZ
2546 DEBUG_r(
2547 PerlIO_printf(Perl_debug_log,
2548 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2549 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2550 (long)n, (long)cc->min,
2551 (long)cc->max, (long)cc)
2552 );
4633a7c4 2553
a0d0e21e
LW
2554 /* If degenerate scan matches "", assume scan done. */
2555
579cf2c3 2556 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2557 PL_regcc = cc->oldcc;
2ab05381
IZ
2558 if (PL_regcc)
2559 ln = PL_regcc->cur;
c277df42 2560 DEBUG_r(
c3464db5
DD
2561 PerlIO_printf(Perl_debug_log,
2562 "%*s empty match detected, try continuation...\n",
3280af22 2563 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2564 );
a0d0e21e 2565 if (regmatch(cc->next))
4633a7c4 2566 sayYES;
2ab05381
IZ
2567 if (PL_regcc)
2568 PL_regcc->cur = ln;
3280af22 2569 PL_regcc = cc;
4633a7c4 2570 sayNO;
a0d0e21e
LW
2571 }
2572
2573 /* First just match a string of min scans. */
2574
2575 if (n < cc->min) {
2576 cc->cur = n;
2577 cc->lastloc = locinput;
4633a7c4
LW
2578 if (regmatch(cc->scan))
2579 sayYES;
2580 cc->cur = n - 1;
c277df42 2581 cc->lastloc = lastloc;
4633a7c4 2582 sayNO;
a0d0e21e
LW
2583 }
2584
2c2d71f5
JH
2585 if (scan->flags) {
2586 /* Check whether we already were at this position.
2587 Postpone detection until we know the match is not
2588 *that* much linear. */
2589 if (!PL_reg_maxiter) {
2590 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2591 PL_reg_leftiter = PL_reg_maxiter;
2592 }
2593 if (PL_reg_leftiter-- == 0) {
2594 I32 size = (PL_reg_maxiter + 7)/8;
2595 if (PL_reg_poscache) {
2596 if (PL_reg_poscache_size < size) {
2597 Renew(PL_reg_poscache, size, char);
2598 PL_reg_poscache_size = size;
2599 }
2600 Zero(PL_reg_poscache, size, char);
2601 }
2602 else {
2603 PL_reg_poscache_size = size;
2604 Newz(29, PL_reg_poscache, size, char);
2605 }
2606 DEBUG_r(
2607 PerlIO_printf(Perl_debug_log,
2608 "%sDetected a super-linear match, switching on caching%s...\n",
2609 PL_colors[4], PL_colors[5])
2610 );
2611 }
2612 if (PL_reg_leftiter < 0) {
2613 I32 o = locinput - PL_bostr, b;
2614
2615 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2616 b = o % 8;
2617 o /= 8;
2618 if (PL_reg_poscache[o] & (1<<b)) {
2619 DEBUG_r(
2620 PerlIO_printf(Perl_debug_log,
2621 "%*s already tried at this position...\n",
2622 REPORT_CODE_OFF+PL_regindent*2, "")
2623 );
7821416a 2624 sayNO_SILENT;
2c2d71f5
JH
2625 }
2626 PL_reg_poscache[o] |= (1<<b);
2627 }
2628 }
2629
a0d0e21e
LW
2630 /* Prefer next over scan for minimal matching. */
2631
2632 if (cc->minmod) {
3280af22 2633 PL_regcc = cc->oldcc;
2ab05381
IZ
2634 if (PL_regcc)
2635 ln = PL_regcc->cur;
5f05dabc 2636 cp = regcppush(cc->parenfloor);
c277df42 2637 REGCP_SET;
5f05dabc 2638 if (regmatch(cc->next)) {
c277df42 2639 regcpblow(cp);
4633a7c4 2640 sayYES; /* All done. */
5f05dabc 2641 }
c277df42 2642 REGCP_UNWIND;
5f05dabc 2643 regcppop();
2ab05381
IZ
2644 if (PL_regcc)
2645 PL_regcc->cur = ln;
3280af22 2646 PL_regcc = cc;
a0d0e21e 2647
c277df42 2648 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2649 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2650 && !(PL_reg_flags & RF_warned)) {
2651 PL_reg_flags |= RF_warned;
cea2e8a9 2652 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2653 "Complex regular subexpression recursion",
2654 REG_INFTY - 1);
c277df42 2655 }
4633a7c4 2656 sayNO;
c277df42 2657 }
a687059c 2658
c277df42 2659 DEBUG_r(
c3464db5
DD
2660 PerlIO_printf(Perl_debug_log,
2661 "%*s trying longer...\n",
3280af22 2662 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2663 );
a0d0e21e 2664 /* Try scanning more and see if it helps. */
3280af22 2665 PL_reginput = locinput;
a0d0e21e
LW
2666 cc->cur = n;
2667 cc->lastloc = locinput;
5f05dabc 2668 cp = regcppush(cc->parenfloor);
c277df42 2669 REGCP_SET;
5f05dabc 2670 if (regmatch(cc->scan)) {
c277df42 2671 regcpblow(cp);
4633a7c4 2672 sayYES;
5f05dabc 2673 }
c277df42 2674 REGCP_UNWIND;
5f05dabc 2675 regcppop();
4633a7c4 2676 cc->cur = n - 1;
c277df42 2677 cc->lastloc = lastloc;
4633a7c4 2678 sayNO;
a0d0e21e
LW
2679 }
2680
2681 /* Prefer scan over next for maximal matching. */
2682
2683 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2684 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2685 cc->cur = n;
2686 cc->lastloc = locinput;
c277df42 2687 REGCP_SET;
5f05dabc 2688 if (regmatch(cc->scan)) {
c277df42 2689 regcpblow(cp);
4633a7c4 2690 sayYES;
5f05dabc 2691 }
c277df42 2692 REGCP_UNWIND;
a0d0e21e 2693 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2694 PL_reginput = locinput;
c277df42 2695 DEBUG_r(
c3464db5
DD
2696 PerlIO_printf(Perl_debug_log,
2697 "%*s failed, try continuation...\n",
3280af22 2698 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2699 );
2700 }
599cee73
PM
2701 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2702 && !(PL_reg_flags & RF_warned)) {
3280af22 2703 PL_reg_flags |= RF_warned;
cea2e8a9 2704 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2705 "Complex regular subexpression recursion",
2706 REG_INFTY - 1);
a0d0e21e
LW
2707 }
2708
2709 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2710 PL_regcc = cc->oldcc;
2ab05381
IZ
2711 if (PL_regcc)
2712 ln = PL_regcc->cur;
a0d0e21e 2713 if (regmatch(cc->next))
4633a7c4 2714 sayYES;
2ab05381
IZ
2715 if (PL_regcc)
2716 PL_regcc->cur = ln;
3280af22 2717 PL_regcc = cc;
4633a7c4 2718 cc->cur = n - 1;
c277df42 2719 cc->lastloc = lastloc;
4633a7c4 2720 sayNO;
a0d0e21e
LW
2721 }
2722 /* NOT REACHED */
c277df42
IZ
2723 case BRANCHJ:
2724 next = scan + ARG(scan);
2725 if (next == scan)
2726 next = NULL;
2727 inner = NEXTOPER(NEXTOPER(scan));
2728 goto do_branch;
2729 case BRANCH:
2730 inner = NEXTOPER(scan);
2731 do_branch:
2732 {
2733 CHECKPOINT lastcp;
2734 c1 = OP(scan);
2735 if (OP(next) != c1) /* No choice. */
2736 next = inner; /* Avoid recursion. */
a0d0e21e 2737 else {
3280af22 2738 int lastparen = *PL_reglastparen;
c277df42
IZ
2739
2740 REGCP_SET;
a0d0e21e 2741 do {
3280af22 2742 PL_reginput = locinput;
c277df42 2743 if (regmatch(inner))
4633a7c4 2744 sayYES;
c277df42 2745 REGCP_UNWIND;
3280af22 2746 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2747 PL_regendp[n] = -1;
3280af22 2748 *PL_reglastparen = n;
c277df42 2749 scan = next;
a0d0e21e 2750 /*SUPPRESS 560*/
c277df42
IZ
2751 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2752 next += n;
a0d0e21e 2753 else
c277df42 2754 next = NULL;
c277df42
IZ
2755 inner = NEXTOPER(scan);
2756 if (c1 == BRANCHJ) {
2757 inner = NEXTOPER(inner);
2758 }
2759 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2760 sayNO;
a0d0e21e 2761 /* NOTREACHED */
a687059c 2762 }
a0d0e21e
LW
2763 }
2764 break;
2765 case MINMOD:
2766 minmod = 1;
2767 break;
c277df42
IZ
2768 case CURLYM:
2769 {
00db4c45 2770 I32 l = 0;
c277df42
IZ
2771 CHECKPOINT lastcp;
2772
2773 /* We suppose that the next guy does not need
2774 backtracking: in particular, it is of constant length,
2775 and has no parenths to influence future backrefs. */
2776 ln = ARG1(scan); /* min to match */
2777 n = ARG2(scan); /* max to match */
c277df42
IZ
2778 paren = scan->flags;
2779 if (paren) {
3280af22
NIS
2780 if (paren > PL_regsize)
2781 PL_regsize = paren;
2782 if (paren > *PL_reglastparen)
2783 *PL_reglastparen = paren;
c277df42 2784 }
dc45a647 2785 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2786 if (paren)
2787 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2788 PL_reginput = locinput;
c277df42
IZ
2789 if (minmod) {
2790 minmod = 0;
2791 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2792 sayNO;
5f4b28b2 2793 if (ln && l == 0 && n >= ln
c277df42
IZ
2794 /* In fact, this is tricky. If paren, then the
2795 fact that we did/didnot match may influence
2796 future execution. */
2797 && !(paren && ln == 0))
2798 ln = n;
3280af22 2799 locinput = PL_reginput;
22c35a8c 2800 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2801 c1 = (U8)*STRING(next);
c277df42 2802 if (OP(next) == EXACTF)
22c35a8c 2803 c2 = PL_fold[c1];
c277df42 2804 else if (OP(next) == EXACTFL)
22c35a8c 2805 c2 = PL_fold_locale[c1];
c277df42
IZ
2806 else
2807 c2 = c1;
a0ed51b3
LW
2808 }
2809 else
c277df42
IZ
2810 c1 = c2 = -1000;
2811 REGCP_SET;
5f4b28b2 2812 /* This may be improved if l == 0. */
c277df42
IZ
2813 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2814 /* If it could work, try it. */
2815 if (c1 == -1000 ||
3280af22
NIS
2816 UCHARAT(PL_reginput) == c1 ||
2817 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2818 {
2819 if (paren) {
2820 if (n) {
cf93c79d
IZ
2821 PL_regstartp[paren] =
2822 HOPc(PL_reginput, -l) - PL_bostr;
2823 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2824 }
2825 else
cf93c79d 2826 PL_regendp[paren] = -1;
c277df42
IZ
2827 }
2828 if (regmatch(next))
2829 sayYES;
2830 REGCP_UNWIND;
2831 }
2832 /* Couldn't or didn't -- move forward. */
3280af22 2833 PL_reginput = locinput;
c277df42
IZ
2834 if (regrepeat_hard(scan, 1, &l)) {
2835 ln++;
3280af22 2836 locinput = PL_reginput;
c277df42
IZ
2837 }
2838 else
2839 sayNO;
2840 }
a0ed51b3
LW
2841 }
2842 else {
c277df42
IZ
2843 n = regrepeat_hard(scan, n, &l);
2844 if (n != 0 && l == 0
2845 /* In fact, this is tricky. If paren, then the
2846 fact that we did/didnot match may influence
2847 future execution. */
2848 && !(paren && ln == 0))
2849 ln = n;
3280af22 2850 locinput = PL_reginput;
c277df42 2851 DEBUG_r(
5c0ca799 2852 PerlIO_printf(Perl_debug_log,
faccc32b 2853 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2854 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2855 (IV) n, (IV)l)
c277df42
IZ
2856 );
2857 if (n >= ln) {
22c35a8c 2858 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2859 c1 = (U8)*STRING(next);
c277df42 2860 if (OP(next) == EXACTF)
22c35a8c 2861 c2 = PL_fold[c1];
c277df42 2862 else if (OP(next) == EXACTFL)
22c35a8c 2863 c2 = PL_fold_locale[c1];
c277df42
IZ
2864 else
2865 c2 = c1;
a0ed51b3
LW
2866 }
2867 else
c277df42
IZ
2868 c1 = c2 = -1000;
2869 }
2870 REGCP_SET;
2871 while (n >= ln) {
2872 /* If it could work, try it. */
2873 if (c1 == -1000 ||
3280af22
NIS
2874 UCHARAT(PL_reginput) == c1 ||
2875 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2876 {
2877 DEBUG_r(
c3464db5 2878 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2879 "%*s trying tail with n=%"IVdf"...\n",
2880 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2881 );
2882 if (paren) {
2883 if (n) {
cf93c79d
IZ
2884 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2885 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2886 }
a0ed51b3 2887 else
cf93c79d 2888 PL_regendp[paren] = -1;
c277df42 2889 }
a0ed51b3
LW
2890 if (regmatch(next))
2891 sayYES;
2892 REGCP_UNWIND;
2893 }
c277df42
IZ
2894 /* Couldn't or didn't -- back up. */
2895 n--;
dfe13c55 2896 locinput = HOPc(locinput, -l);
3280af22 2897 PL_reginput = locinput;
c277df42
IZ
2898 }
2899 }
2900 sayNO;
2901 break;
2902 }
2903 case CURLYN:
2904 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2905 if (paren > PL_regsize)
2906 PL_regsize = paren;
2907 if (paren > *PL_reglastparen)
2908 *PL_reglastparen = paren;
c277df42
IZ
2909 ln = ARG1(scan); /* min to match */
2910 n = ARG2(scan); /* max to match */
dc45a647 2911 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2912 goto repeat;
a0d0e21e 2913 case CURLY:
c277df42 2914 paren = 0;
a0d0e21e
LW
2915 ln = ARG1(scan); /* min to match */
2916 n = ARG2(scan); /* max to match */
dc45a647 2917 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2918 goto repeat;
2919 case STAR:
2920 ln = 0;
c277df42 2921 n = REG_INFTY;
a0d0e21e 2922 scan = NEXTOPER(scan);
c277df42 2923 paren = 0;
a0d0e21e
LW
2924 goto repeat;
2925 case PLUS:
c277df42
IZ
2926 ln = 1;
2927 n = REG_INFTY;
2928 scan = NEXTOPER(scan);
2929 paren = 0;
2930 repeat:
a0d0e21e
LW
2931 /*
2932 * Lookahead to avoid useless match attempts
2933 * when we know what character comes next.
2934 */
22c35a8c 2935 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2936 c1 = (U8)*STRING(next);
bbce6d69 2937 if (OP(next) == EXACTF)
22c35a8c 2938 c2 = PL_fold[c1];
bbce6d69 2939 else if (OP(next) == EXACTFL)
22c35a8c 2940 c2 = PL_fold_locale[c1];
bbce6d69 2941 else
2942 c2 = c1;
2943 }
a0d0e21e 2944 else
bbce6d69 2945 c1 = c2 = -1000;
3280af22 2946 PL_reginput = locinput;
a0d0e21e 2947 if (minmod) {
c277df42 2948 CHECKPOINT lastcp;
a0d0e21e
LW
2949 minmod = 0;
2950 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2951 sayNO;
a0ed51b3 2952 locinput = PL_reginput;
c277df42 2953 REGCP_SET;
0fe9bf95
IZ
2954 if (c1 != -1000) {
2955 char *e = locinput + n - ln; /* Should not check after this */
2956 char *old = locinput;
2957
2958 if (e >= PL_regeol || (n == REG_INFTY))
2959 e = PL_regeol - 1;
2960 while (1) {
2961 /* Find place 'next' could work */
2962 if (c1 == c2) {
2963 while (locinput <= e && *locinput != c1)
2964 locinput++;
2965 } else {
2966 while (locinput <= e
2967 && *locinput != c1
2968 && *locinput != c2)
2969 locinput++;
2970 }
2971 if (locinput > e)
2972 sayNO;
2973 /* PL_reginput == old now */
2974 if (locinput != old) {
2975 ln = 1; /* Did some */
2976 if (regrepeat(scan, locinput - old) <
2977 locinput - old)
2978 sayNO;
2979 }
2980 /* PL_reginput == locinput now */
2981 if (paren) {
2982 if (ln) {
cf93c79d
IZ
2983 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2984 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2985 }
2986 else
cf93c79d 2987 PL_regendp[paren] = -1;
0fe9bf95
IZ
2988 }
2989 if (regmatch(next))
2990 sayYES;
2991 PL_reginput = locinput; /* Could be reset... */
2992 REGCP_UNWIND;
2993 /* Couldn't or didn't -- move forward. */
2994 old = locinput++;
2995 }
2996 }
2997 else
c277df42 2998 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2999 /* If it could work, try it. */
bbce6d69 3000 if (c1 == -1000 ||
3280af22
NIS
3001 UCHARAT(PL_reginput) == c1 ||
3002 UCHARAT(PL_reginput) == c2)
bbce6d69 3003 {
c277df42
IZ
3004 if (paren) {
3005 if (n) {
cf93c79d
IZ
3006 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3007 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3008 }
3009 else
cf93c79d 3010 PL_regendp[paren] = -1;
c277df42 3011 }
a0d0e21e 3012 if (regmatch(next))
4633a7c4 3013 sayYES;
c277df42 3014 REGCP_UNWIND;
bbce6d69 3015 }
c277df42 3016 /* Couldn't or didn't -- move forward. */
a0ed51b3 3017 PL_reginput = locinput;
a0d0e21e
LW
3018 if (regrepeat(scan, 1)) {
3019 ln++;
a0ed51b3
LW
3020 locinput = PL_reginput;
3021 }
3022 else
4633a7c4 3023 sayNO;
a0d0e21e
LW
3024 }
3025 }
3026 else {
c277df42 3027 CHECKPOINT lastcp;
a0d0e21e 3028 n = regrepeat(scan, n);
a0ed51b3 3029 locinput = PL_reginput;
22c35a8c 3030 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3031 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3032 ln = n; /* why back off? */
c277df42
IZ
3033 REGCP_SET;
3034 if (paren) {
3035 while (n >= ln) {
3036 /* If it could work, try it. */
3037 if (c1 == -1000 ||
3280af22
NIS
3038 UCHARAT(PL_reginput) == c1 ||
3039 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3040 {
3041 if (paren && n) {
3042 if (n) {
cf93c79d
IZ
3043 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3044 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3045 }
3046 else
cf93c79d 3047 PL_regendp[paren] = -1;
c277df42
IZ
3048 }
3049 if (regmatch(next))
3050 sayYES;
3051 REGCP_UNWIND;
3052 }
3053 /* Couldn't or didn't -- back up. */
3054 n--;
dfe13c55 3055 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3056 }
a0ed51b3
LW
3057 }
3058 else {
c277df42
IZ
3059 while (n >= ln) {
3060 /* If it could work, try it. */
3061 if (c1 == -1000 ||
3280af22
NIS
3062 UCHARAT(PL_reginput) == c1 ||
3063 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3064 {
3065 if (regmatch(next))
3066 sayYES;
3067 REGCP_UNWIND;
3068 }
3069 /* Couldn't or didn't -- back up. */
3070 n--;
dfe13c55 3071 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3072 }
a0d0e21e
LW
3073 }
3074 }
4633a7c4 3075 sayNO;
c277df42 3076 break;
a0d0e21e 3077 case END:
0f5d15d6
IZ
3078 if (PL_reg_call_cc) {
3079 re_cc_state *cur_call_cc = PL_reg_call_cc;
3080 CURCUR *cctmp = PL_regcc;
3081 regexp *re = PL_reg_re;
3082 CHECKPOINT cp, lastcp;
3083
3084 cp = regcppush(0); /* Save *all* the positions. */
3085 REGCP_SET;
3086 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3087 the caller. */
3088 PL_reginput = locinput; /* Make position available to
3089 the callcc. */
3090 cache_re(PL_reg_call_cc->re);
3091 PL_regcc = PL_reg_call_cc->cc;
3092 PL_reg_call_cc = PL_reg_call_cc->prev;
3093 if (regmatch(cur_call_cc->node)) {
3094 PL_reg_call_cc = cur_call_cc;
3095 regcpblow(cp);
3096 sayYES;
3097 }
3098 REGCP_UNWIND;
3099 regcppop();
3100 PL_reg_call_cc = cur_call_cc;
3101 PL_regcc = cctmp;
3102 PL_reg_re = re;
3103 cache_re(re);
3104
3105 DEBUG_r(
3106 PerlIO_printf(Perl_debug_log,
3107 "%*s continuation failed...\n",
3108 REPORT_CODE_OFF+PL_regindent*2, "")
3109 );
7821416a 3110 sayNO_SILENT;
0f5d15d6 3111 }
7821416a
IZ
3112 if (locinput < PL_regtill) {
3113 DEBUG_r(PerlIO_printf(Perl_debug_log,
3114 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3115 PL_colors[4],
3116 (long)(locinput - PL_reg_starttry),
3117 (long)(PL_regtill - PL_reg_starttry),
3118 PL_colors[5]));
3119 sayNO_FINAL; /* Cannot match: too short. */
3120 }
3121 PL_reginput = locinput; /* put where regtry can find it */
3122 sayYES_FINAL; /* Success! */
7e5428c5 3123 case SUCCEED:
3280af22 3124 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3125 sayYES_LOUD; /* Success! */
c277df42
IZ
3126 case SUSPEND:
3127 n = 1;
9fe1d20c 3128 PL_reginput = locinput;
c277df42 3129 goto do_ifmatch;
a0d0e21e 3130 case UNLESSM:
c277df42 3131 n = 0;
a0ed51b3 3132 if (scan->flags) {
0fe9bf95
IZ
3133 if (UTF) { /* XXXX This is absolutely
3134 broken, we read before
3135 start of string. */
3136 s = HOPMAYBEc(locinput, -scan->flags);
3137 if (!s)
3138 goto say_yes;
3139 PL_reginput = s;
3140 }
3141 else {
3142 if (locinput < PL_bostr + scan->flags)
3143 goto say_yes;
3144 PL_reginput = locinput - scan->flags;
3145 goto do_ifmatch;
3146 }
a0ed51b3
LW
3147 }
3148 else
3149 PL_reginput = locinput;
c277df42
IZ
3150 goto do_ifmatch;
3151 case IFMATCH:
3152 n = 1;
a0ed51b3 3153 if (scan->flags) {
0fe9bf95
IZ
3154 if (UTF) { /* XXXX This is absolutely
3155 broken, we read before
3156 start of string. */
3157 s = HOPMAYBEc(locinput, -scan->flags);
3158 if (!s || s < PL_bostr)
3159 goto say_no;
3160 PL_reginput = s;
3161 }
3162 else {
3163 if (locinput < PL_bostr + scan->flags)
3164 goto say_no;
3165 PL_reginput = locinput - scan->flags;
3166 goto do_ifmatch;
3167 }
a0ed51b3
LW
3168 }
3169 else
3170 PL_reginput = locinput;
3171
c277df42 3172 do_ifmatch:
c277df42
IZ
3173 inner = NEXTOPER(NEXTOPER(scan));
3174 if (regmatch(inner) != n) {
3175 say_no:
3176 if (logical) {
3177 logical = 0;
3178 sw = 0;
3179 goto do_longjump;
a0ed51b3
LW
3180 }
3181 else
c277df42
IZ
3182 sayNO;
3183 }
3184 say_yes:
3185 if (logical) {
3186 logical = 0;
3187 sw = 1;
3188 }
fe44a5e8 3189 if (OP(scan) == SUSPEND) {
3280af22 3190 locinput = PL_reginput;
565764a8 3191 nextchr = UCHARAT(locinput);
fe44a5e8 3192 }
c277df42
IZ
3193 /* FALL THROUGH. */
3194 case LONGJMP:
3195 do_longjump:
3196 next = scan + ARG(scan);
3197 if (next == scan)
3198 next = NULL;
a0d0e21e
LW
3199 break;
3200 default:
b900a521 3201 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3202 PTR2UV(scan), OP(scan));
cea2e8a9 3203 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3204 }
a0d0e21e
LW
3205 scan = next;
3206 }
a687059c 3207
a0d0e21e
LW
3208 /*
3209 * We get here only if there's trouble -- normally "case END" is
3210 * the terminating point.
3211 */
cea2e8a9 3212 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3213 /*NOTREACHED*/
4633a7c4
LW
3214 sayNO;
3215
7821416a
IZ
3216yes_loud:
3217 DEBUG_r(
3218 PerlIO_printf(Perl_debug_log,
3219 "%*s %scould match...%s\n",
3220 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3221 );
3222 goto yes;
3223yes_final:
3224 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3225 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3226yes:
3227#ifdef DEBUGGING
3280af22 3228 PL_regindent--;
4633a7c4
LW
3229#endif
3230 return 1;
3231
3232no:
7821416a
IZ
3233 DEBUG_r(
3234 PerlIO_printf(Perl_debug_log,
3235 "%*s %sfailed...%s\n",
3236 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3237 );
3238 goto do_no;
3239no_final:
3240do_no:
4633a7c4 3241#ifdef DEBUGGING
3280af22 3242 PL_regindent--;
4633a7c4 3243#endif
a0d0e21e 3244 return 0;
a687059c
LW
3245}
3246
3247/*
3248 - regrepeat - repeatedly match something simple, report how many
3249 */
3250/*
3251 * [This routine now assumes that it will only match on things of length 1.
3252 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3253 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3254 */
76e3520e 3255STATIC I32
cea2e8a9 3256S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3257{
5c0ca799 3258 dTHR;
a0d0e21e 3259 register char *scan;
a0d0e21e 3260 register I32 c;
3280af22 3261 register char *loceol = PL_regeol;
a0ed51b3 3262 register I32 hardcount = 0;
a0d0e21e 3263
3280af22 3264 scan = PL_reginput;
c277df42 3265 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3266 loceol = scan + max;
a0d0e21e 3267 switch (OP(p)) {
22c35a8c 3268 case REG_ANY:
a0d0e21e
LW
3269 while (scan < loceol && *scan != '\n')
3270 scan++;
3271 break;
3272 case SANY:
3273 scan = loceol;
3274 break;
a0ed51b3
LW
3275 case ANYUTF8:
3276 loceol = PL_regeol;
3277 while (scan < loceol && *scan != '\n') {
3278 scan += UTF8SKIP(scan);
3279 hardcount++;
3280 }
3281 break;
3282 case SANYUTF8:
3283 loceol = PL_regeol;
3284 while (scan < loceol) {
3285 scan += UTF8SKIP(scan);
3286 hardcount++;
3287 }
3288 break;
bbce6d69 3289 case EXACT: /* length of string is 1 */
cd439c50 3290 c = (U8)*STRING(p);
bbce6d69 3291 while (scan < loceol && UCHARAT(scan) == c)
3292 scan++;
3293 break;
3294 case EXACTF: /* length of string is 1 */
cd439c50 3295 c = (U8)*STRING(p);
bbce6d69 3296 while (scan < loceol &&
22c35a8c 3297 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3298 scan++;
3299 break;
3300 case EXACTFL: /* length of string is 1 */
3280af22 3301 PL_reg_flags |= RF_tainted;
cd439c50 3302 c = (U8)*STRING(p);
bbce6d69 3303 while (scan < loceol &&
22c35a8c 3304 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3305 scan++;
3306 break;
a0ed51b3
LW
3307 case ANYOFUTF8:
3308 loceol = PL_regeol;
3309 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3310 scan += UTF8SKIP(scan);
3311 hardcount++;
3312 }
3313 break;
a0d0e21e 3314 case ANYOF:
936ed897 3315 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3316 scan++;
a0d0e21e
LW
3317 break;
3318 case ALNUM:
3319 while (scan < loceol && isALNUM(*scan))
3320 scan++;
3321 break;
a0ed51b3
LW
3322 case ALNUMUTF8:
3323 loceol = PL_regeol;
dfe13c55 3324 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3325 scan += UTF8SKIP(scan);
3326 hardcount++;
3327 }
3328 break;
bbce6d69 3329 case ALNUML:
3280af22 3330 PL_reg_flags |= RF_tainted;
bbce6d69 3331 while (scan < loceol && isALNUM_LC(*scan))
3332 scan++;
3333 break;
a0ed51b3
LW
3334 case ALNUMLUTF8:
3335 PL_reg_flags |= RF_tainted;
3336 loceol = PL_regeol;
dfe13c55 3337 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3338 scan += UTF8SKIP(scan);
3339 hardcount++;
3340 }
3341 break;
3342 break;
a0d0e21e
LW
3343 case NALNUM:
3344 while (scan < loceol && !isALNUM(*scan))
3345 scan++;
3346 break;
a0ed51b3
LW
3347 case NALNUMUTF8:
3348 loceol = PL_regeol;
dfe13c55 3349 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3350 scan += UTF8SKIP(scan);
3351 hardcount++;
3352 }
3353 break;
bbce6d69 3354 case NALNUML:
3280af22 3355 PL_reg_flags |= RF_tainted;
bbce6d69 3356 while (scan < loceol && !isALNUM_LC(*scan))
3357 scan++;
3358 break;
a0ed51b3
LW
3359 case NALNUMLUTF8:
3360 PL_reg_flags |= RF_tainted;
3361 loceol = PL_regeol;
dfe13c55 3362 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3363 scan += UTF8SKIP(scan);
3364 hardcount++;
3365 }
3366 break;
a0d0e21e
LW
3367 case SPACE:
3368 while (scan < loceol && isSPACE(*scan))
3369 scan++;
3370 break;
a0ed51b3
LW
3371 case SPACEUTF8:
3372 loceol = PL_regeol;
dfe13c55 3373 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3374 scan += UTF8SKIP(scan);
3375 hardcount++;
3376 }
3377 break;
bbce6d69 3378 case SPACEL:
3280af22 3379 PL_reg_flags |= RF_tainted;
bbce6d69 3380 while (scan < loceol && isSPACE_LC(*scan))
3381 scan++;
3382 break;
a0ed51b3
LW
3383 case SPACELUTF8:
3384 PL_reg_flags |= RF_tainted;
3385 loceol = PL_regeol;
dfe13c55 3386 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3387 scan += UTF8SKIP(scan);
3388 hardcount++;
3389 }
3390 break;
a0d0e21e
LW
3391 case NSPACE:
3392 while (scan < loceol && !isSPACE(*scan))
3393 scan++;
3394 break;
a0ed51b3
LW
3395 case NSPACEUTF8:
3396 loceol = PL_regeol;
dfe13c55 3397 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3398 scan += UTF8SKIP(scan);
3399 hardcount++;
3400 }
3401 break;
bbce6d69 3402 case NSPACEL:
3280af22 3403 PL_reg_flags |= RF_tainted;
bbce6d69 3404 while (scan < loceol && !isSPACE_LC(*scan))
3405 scan++;
3406 break;
a0ed51b3
LW
3407 case NSPACELUTF8:
3408 PL_reg_flags |= RF_tainted;
3409 loceol = PL_regeol;
dfe13c55 3410 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3411 scan += UTF8SKIP(scan);
3412 hardcount++;
3413 }
3414 break;
a0d0e21e
LW
3415 case DIGIT:
3416 while (scan < loceol && isDIGIT(*scan))
3417 scan++;
3418 break;
a0ed51b3
LW
3419 case DIGITUTF8:
3420 loceol = PL_regeol;
dfe13c55 3421 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3422 scan += UTF8SKIP(scan);
3423 hardcount++;
3424 }
3425 break;
3426 break;
a0d0e21e
LW
3427 case NDIGIT:
3428 while (scan < loceol && !isDIGIT(*scan))
3429 scan++;
3430 break;
a0ed51b3
LW
3431 case NDIGITUTF8:
3432 loceol = PL_regeol;
dfe13c55 3433 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3434 scan += UTF8SKIP(scan);
3435 hardcount++;
3436 }
3437 break;
a0d0e21e
LW
3438 default: /* Called on something of 0 width. */
3439 break; /* So match right here or not at all. */
3440 }
a687059c 3441
a0ed51b3
LW
3442 if (hardcount)
3443 c = hardcount;
3444 else
3445 c = scan - PL_reginput;
3280af22 3446 PL_reginput = scan;
a687059c 3447
c277df42
IZ
3448 DEBUG_r(
3449 {
3450 SV *prop = sv_newmortal();
3451
3452 regprop(prop, p);
3453 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3454 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3455 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3456 });
3457
a0d0e21e 3458 return(c);
a687059c
LW
3459}
3460
3461/*
c277df42
IZ
3462 - regrepeat_hard - repeatedly match something, report total lenth and length
3463 *
3464 * The repeater is supposed to have constant length.
3465 */
3466
76e3520e 3467STATIC I32
cea2e8a9 3468S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3469{
5c0ca799 3470 dTHR;
c277df42
IZ
3471 register char *scan;
3472 register char *start;
3280af22 3473 register char *loceol = PL_regeol;
a0ed51b3 3474 I32 l = 0;
708e3b05 3475 I32 count = 0, res = 1;
a0ed51b3
LW
3476
3477 if (!max)
3478 return 0;
c277df42 3479
3280af22 3480 start = PL_reginput;
a0ed51b3 3481 if (UTF) {
708e3b05 3482 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3483 if (!count++) {
3484 l = 0;
3485 while (start < PL_reginput) {
3486 l++;
3487 start += UTF8SKIP(start);
3488 }
3489 *lp = l;
3490 if (l == 0)
3491 return max;
3492 }
3493 if (count == max)
3494 return count;
3495 }
3496 }
3497 else {
708e3b05 3498 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3499 if (!count++) {
3500 *lp = l = PL_reginput - start;
3501 if (max != REG_INFTY && l*max < loceol - scan)
3502 loceol = scan + l*max;
3503 if (l == 0)
3504 return max;
c277df42
IZ
3505 }
3506 }
3507 }
708e3b05 3508 if (!res)
3280af22 3509 PL_reginput = scan;
c277df42 3510
a0ed51b3 3511 return count;
c277df42
IZ
3512}
3513
3514/*
cb8d8820 3515 - reginclass - determine if a character falls into a character class
bbce6d69 3516 */
3517
76e3520e 3518STATIC bool
936ed897 3519S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3520{
5c0ca799 3521 dTHR;
b8c5462f 3522 char flags = ANYOF_FLAGS(p);
bbce6d69 3523 bool match = FALSE;
3524
3525 c &= 0xFF;
b8c5462f 3526 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3527 match = TRUE;
3528 else if (flags & ANYOF_FOLD) {
3529 I32 cf;
3530 if (flags & ANYOF_LOCALE) {
3280af22 3531 PL_reg_flags |= RF_tainted;
22c35a8c 3532 cf = PL_fold_locale[c];
bbce6d69 3533 }
3534 else
22c35a8c 3535 cf = PL_fold[c];
b8c5462f 3536 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3537 match = TRUE;
3538 }
3539
b8c5462f 3540 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3541 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3542 if (
3543 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3544 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3545 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3546 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3547 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3548 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3549 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3550 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3551 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3552 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3553 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3554 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3555 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3556 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3557 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3558 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3559 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3560 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3561 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3562 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3563 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3564 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3565 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3569 ) /* How's that for a conditional? */
bbce6d69 3570 {
3571 match = TRUE;
3572 }
3573 }
3574
ae5c130c 3575 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3576}
3577
a0ed51b3 3578STATIC bool
cea2e8a9 3579S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3580{
3581 dTHR;
a0ed51b3
LW
3582 char flags = ARG1(f);
3583 bool match = FALSE;
3584 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3585
3586 if (swash_fetch(sv, p))
3587 match = TRUE;
3588 else if (flags & ANYOF_FOLD) {
3589 I32 cf;
dfe13c55 3590 U8 tmpbuf[10];
a0ed51b3
LW
3591 if (flags & ANYOF_LOCALE) {
3592 PL_reg_flags |= RF_tainted;
3593 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3594 }
3595 else
3596 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3597 if (swash_fetch(sv, tmpbuf))
3598 match = TRUE;
3599 }
3600
b8c5462f 3601 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3602
3603 return (flags & ANYOF_INVERT) ? !match : match;
3604}
161b471a 3605
dfe13c55 3606STATIC U8 *
cea2e8a9 3607S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3608{
3609 dTHR;
a0ed51b3
LW
3610 if (off >= 0) {
3611 while (off-- && s < (U8*)PL_regeol)
3612 s += UTF8SKIP(s);
3613 }
3614 else {
3615 while (off++) {
3616 if (s > (U8*)PL_bostr) {
3617 s--;
3618 if (*s & 0x80) {
3619 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3620 s--;
3621 } /* XXX could check well-formedness here */
3622 }
3623 }
3624 }
3625 return s;
3626}
161b471a 3627
dfe13c55 3628STATIC U8 *
cea2e8a9 3629S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3630{
c485e607 3631 dTHR;
a0ed51b3
LW
3632 if (off >= 0) {
3633 while (off-- && s < (U8*)PL_regeol)
3634 s += UTF8SKIP(s);
3635 if (off >= 0)
3636 return 0;
3637 }
3638 else {
3639 while (off++) {
3640 if (s > (U8*)PL_bostr) {
3641 s--;
3642 if (*s & 0x80) {
3643 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3644 s--;
3645 } /* XXX could check well-formedness here */
3646 }
3647 else
3648 break;
3649 }
3650 if (off <= 0)
3651 return 0;
3652 }
3653 return s;
3654}
51371543
GS
3655
3656#ifdef PERL_OBJECT
51371543
GS
3657#include "XSUB.h"
3658#endif
3659
3660static void
3661restore_pos(pTHXo_ void *arg)
3662{
3663 dTHR;
3664 if (PL_reg_eval_set) {
3665 if (PL_reg_oldsaved) {
3666 PL_reg_re->subbeg = PL_reg_oldsaved;
3667 PL_reg_re->sublen = PL_reg_oldsavedlen;
3668 RX_MATCH_COPIED_on(PL_reg_re);
3669 }
3670 PL_reg_magic->mg_len = PL_reg_oldpos;
3671 PL_reg_eval_set = 0;
3672 PL_curpm = PL_reg_oldcurpm;
3673 }
3674}
3675