This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
various nits identified by warnings unmasked by recent changes
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
349 && (sv && (strpos + SvCUR(sv) != strend)) ) {
350 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 351 goto fail;
2c2d71f5 352 }
adac82c7 353 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 354 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
355 if (SvTAIL(check)) {
356 slen = SvCUR(check); /* >= 1 */
cad2e5aa 357
2c2d71f5
JH
358 if ( strend - s > slen || strend - s < slen - 1
359 || (strend - s == slen && strend[-1] != '\n')) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
361 goto fail_finish;
cad2e5aa
JH
362 }
363 /* Now should match s[0..slen-2] */
364 slen--;
653099ff 365 if (slen && (*SvPVX(check) != *s
cad2e5aa 366 || (slen > 1
653099ff 367 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
368 report_neq:
369 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
370 goto fail_finish;
371 }
cad2e5aa 372 }
653099ff
GS
373 else if (*SvPVX(check) != *s
374 || ((slen = SvCUR(check)) > 1
375 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
376 goto report_neq;
377 goto success_at_start;
cad2e5aa 378 }
2c2d71f5 379 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 380 s = strpos;
2c2d71f5 381 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 382 end_shift = prog->minlen - start_shift -
653099ff 383 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 384 if (!ml_anch) {
653099ff
GS
385 I32 end = prog->check_offset_max + CHR_SVLEN(check)
386 - (SvTAIL(check) != 0);
2c2d71f5
JH
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
653099ff 399 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
2c2d71f5
JH
407 restart:
408 /* Find a possible match in the region s..strend by looking for
409 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 410 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
411 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
412 I32 p = -1; /* Internal iterator of scream. */
413 I32 *pp = data ? data->scream_pos : &p;
414
2c2d71f5
JH
415 if (PL_screamfirst[BmRARE(check)] >= 0
416 || ( BmRARE(check) == '\n'
417 && (BmPREVIOUS(check) == SvCUR(check) - 1)
418 && SvTAIL(check) ))
419 s = screaminstr(sv, check,
420 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 421 else
2c2d71f5 422 goto fail_finish;
cad2e5aa
JH
423 if (data)
424 *data->scream_olds = s;
425 }
426 else
427 s = fbm_instr((unsigned char*)s + start_shift,
428 (unsigned char*)strend - end_shift,
2c2d71f5 429 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
430
431 /* Update the count-of-usability, remove useless subpatterns,
432 unshift s. */
2c2d71f5
JH
433
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
435 (s ? "Found" : "Did not find"),
436 ((check == prog->anchored_substr) ? "anchored" : "floating"),
437 PL_colors[0],
7b0972df
JH
438 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
439 SvPVX(check),
2c2d71f5
JH
440 PL_colors[1], (SvTAIL(check) ? "$" : ""),
441 (s ? " at offset " : "...\n") ) );
442
443 if (!s)
444 goto fail_finish;
445
6eb5f6b9
JH
446 check_at = s;
447
2c2d71f5 448 /* Finish the diagnostic message */
30944b6d 449 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
450
451 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
452 Start with the other substr.
453 XXXX no SCREAM optimization yet - and a very coarse implementation
454 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
455 *always* match. Probably should be marked during compile...
456 Probably it is right to do no SCREAM here...
457 */
458
459 if (prog->float_substr && prog->anchored_substr) {
30944b6d 460 /* Take into account the "other" substring. */
2c2d71f5
JH
461 /* XXXX May be hopelessly wrong for UTF... */
462 if (!other_last)
6eb5f6b9 463 other_last = strpos;
2c2d71f5 464 if (check == prog->float_substr) {
30944b6d
IZ
465 do_other_anchored:
466 {
2c2d71f5
JH
467 char *last = s - start_shift, *last1, *last2;
468 char *s1 = s;
469
470 tmp = PL_bostr;
471 t = s - prog->check_offset_max;
472 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
473 && (!(prog->reganch & ROPT_UTF8)
474 || (PL_bostr = strpos, /* Used in regcopmaybe() */
475 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
476 && t > strpos)))
30944b6d 477 /* EMPTY */;
2c2d71f5
JH
478 else
479 t = strpos;
480 t += prog->anchored_offset;
6eb5f6b9
JH
481 if (t < other_last) /* These positions already checked */
482 t = other_last;
2c2d71f5
JH
483 PL_bostr = tmp;
484 last2 = last1 = strend - prog->minlen;
485 if (last < last1)
486 last1 = last;
487 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
488 /* On end-of-str: see comment below. */
489 s = fbm_instr((unsigned char*)t,
490 (unsigned char*)last1 + prog->anchored_offset
491 + SvCUR(prog->anchored_substr)
492 - (SvTAIL(prog->anchored_substr)!=0),
493 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
494 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
495 (s ? "Found" : "Contradicts"),
496 PL_colors[0],
7b0972df
JH
497 (int)(SvCUR(prog->anchored_substr)
498 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
499 SvPVX(prog->anchored_substr),
500 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
501 if (!s) {
502 if (last1 >= last2) {
503 DEBUG_r(PerlIO_printf(Perl_debug_log,
504 ", giving up...\n"));
505 goto fail_finish;
506 }
507 DEBUG_r(PerlIO_printf(Perl_debug_log,
508 ", trying floating at offset %ld...\n",
30944b6d 509 (long)(s1 + 1 - i_strpos)));
2c2d71f5 510 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 511 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
512 s = HOPc(last, 1);
513 goto restart;
514 }
515 else {
516 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 517 (long)(s - i_strpos)));
2c2d71f5 518 t = s - prog->anchored_offset;
6eb5f6b9 519 other_last = s + 1;
30944b6d 520 s = s1;
2c2d71f5
JH
521 if (t == strpos)
522 goto try_at_start;
2c2d71f5
JH
523 goto try_at_offset;
524 }
30944b6d 525 }
2c2d71f5
JH
526 }
527 else { /* Take into account the floating substring. */
528 char *last, *last1;
529 char *s1 = s;
530
531 t = s - start_shift;
532 last1 = last = strend - prog->minlen + prog->float_min_offset;
533 if (last - t > prog->float_max_offset)
534 last = t + prog->float_max_offset;
535 s = t + prog->float_min_offset;
6eb5f6b9
JH
536 if (s < other_last)
537 s = other_last;
2c2d71f5
JH
538 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
539 /* fbm_instr() takes into account exact value of end-of-str
540 if the check is SvTAIL(ed). Since false positives are OK,
541 and end-of-str is not later than strend we are OK. */
542 s = fbm_instr((unsigned char*)s,
543 (unsigned char*)last + SvCUR(prog->float_substr)
544 - (SvTAIL(prog->float_substr)!=0),
545 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
546 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
547 (s ? "Found" : "Contradicts"),
548 PL_colors[0],
7b0972df
JH
549 (int)(SvCUR(prog->float_substr)
550 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
551 SvPVX(prog->float_substr),
552 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
553 if (!s) {
554 if (last1 == last) {
555 DEBUG_r(PerlIO_printf(Perl_debug_log,
556 ", giving up...\n"));
557 goto fail_finish;
558 }
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", trying anchored starting at offset %ld...\n",
30944b6d 561 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 562 other_last = last + 1;
2c2d71f5
JH
563 PL_regeol = strend; /* Used in HOP() */
564 s = HOPc(t, 1);
565 goto restart;
566 }
567 else {
568 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 569 (long)(s - i_strpos)));
6eb5f6b9 570 other_last = s + 1;
30944b6d 571 s = s1;
2c2d71f5
JH
572 if (t == strpos)
573 goto try_at_start;
2c2d71f5
JH
574 goto try_at_offset;
575 }
576 }
cad2e5aa 577 }
2c2d71f5
JH
578
579 t = s - prog->check_offset_max;
580 tmp = PL_bostr;
581 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
582 && (!(prog->reganch & ROPT_UTF8)
583 || (PL_bostr = strpos, /* Used in regcopmaybe() */
584 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
585 && t > strpos)))) {
586 PL_bostr = tmp;
587 /* Fixed substring is found far enough so that the match
588 cannot start at strpos. */
589 try_at_offset:
cad2e5aa 590 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
591 /* Eventually fbm_*() should handle this, but often
592 anchored_offset is not 0, so this check will not be wasted. */
593 /* XXXX In the code below we prefer to look for "^" even in
594 presence of anchored substrings. And we search even
595 beyond the found float position. These pessimizations
596 are historical artefacts only. */
597 find_anchor:
2c2d71f5 598 while (t < strend - prog->minlen) {
cad2e5aa
JH
599 if (*t == '\n') {
600 if (t < s - prog->check_offset_min) {
30944b6d
IZ
601 if (prog->anchored_substr) {
602 /* We definitely contradict the found anchored
603 substr. Due to the above check we do not
604 contradict "check" substr.
605 Thus we can arrive here only if check substr
606 is float. Redo checking for "other"=="fixed".
607 */
608 strpos = t + 1;
609 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
610 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
611 goto do_other_anchored;
612 }
cad2e5aa 613 s = t + 1;
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 615 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
616 goto set_useful;
617 }
2c2d71f5 618 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
619 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
620 strpos = s = t + 1;
cad2e5aa
JH
621 goto restart;
622 }
623 t++;
624 }
2c2d71f5
JH
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
626 PL_colors[0],PL_colors[1]));
627 goto fail_finish;
cad2e5aa
JH
628 }
629 s = t;
630 set_useful:
2c2d71f5 631 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
632 }
633 else {
2c2d71f5
JH
634 PL_bostr = tmp;
635 /* The found string does not prohibit matching at beg-of-str
636 - no optimization of calling REx engine can be performed,
637 unless it was an MBOL and we are not after MBOL. */
638 try_at_start:
639 /* Even in this situation we may use MBOL flag if strpos is offset
640 wrt the start of the string. */
641 if (ml_anch && sv
cad2e5aa
JH
642 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
643 t = strpos;
644 goto find_anchor;
645 }
30944b6d
IZ
646 DEBUG_r( if (ml_anch)
647 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
648 PL_colors[0],PL_colors[1]);
649 );
2c2d71f5 650 success_at_start:
30944b6d 651 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 652 && prog->check_substr /* Could be deleted already */
cad2e5aa 653 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
654 && prog->check_substr == prog->float_substr)
655 {
cad2e5aa
JH
656 /* If flags & SOMETHING - do not do it many times on the same match */
657 SvREFCNT_dec(prog->check_substr);
658 prog->check_substr = Nullsv; /* disable */
659 prog->float_substr = Nullsv; /* clear */
660 s = strpos;
3cf5c195
IZ
661 /* XXXX This is a remnant of the old implementation. It
662 looks wasteful, since now INTUIT can use many
6eb5f6b9 663 other heuristics. */
cad2e5aa
JH
664 prog->reganch &= ~RE_USE_INTUIT;
665 }
666 else
667 s = strpos;
668 }
669
6eb5f6b9
JH
670 /* Last resort... */
671 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
672 if (prog->regstclass) {
673 /* minlen == 0 is possible if regstclass is \b or \B,
674 and the fixed substr is ''$.
675 Since minlen is already taken into account, s+1 is before strend;
676 accidentally, minlen >= 1 guaranties no false positives at s + 1
677 even for \b or \B. But (minlen? 1 : 0) below assumes that
678 regstclass does not come from lookahead... */
679 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
680 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
681 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
682 ? STR_LEN(prog->regstclass)
683 : 1);
6eb5f6b9 684 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
685 ? s + (prog->minlen? cl_l : 0)
686 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9
JH
687 : strend) ;
688 char *startpos = sv ? strend - SvCUR(sv) : s;
689
690 t = s;
691 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
692 if (!s) {
693#ifdef DEBUGGING
694 char *what;
695#endif
696 if (endpos == strend) {
697 DEBUG_r( PerlIO_printf(Perl_debug_log,
698 "Could not match STCLASS...\n") );
699 goto fail;
700 }
66e933ab
GS
701 DEBUG_r( PerlIO_printf(Perl_debug_log,
702 "This position contradicts STCLASS...\n") );
653099ff
GS
703 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
704 goto fail;
6eb5f6b9
JH
705 /* Contradict one of substrings */
706 if (prog->anchored_substr) {
6eb5f6b9
JH
707 if (prog->anchored_substr == check) {
708 DEBUG_r( what = "anchored" );
709 hop_and_restart:
710 PL_regeol = strend; /* Used in HOP() */
711 s = HOPc(t, 1);
66e933ab
GS
712 if (s + start_shift + end_shift > strend) {
713 /* XXXX Should be taken into account earlier? */
714 DEBUG_r( PerlIO_printf(Perl_debug_log,
715 "Could not match STCLASS...\n") );
716 goto fail;
717 }
6eb5f6b9 718 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 719 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
720 what, (long)(s + start_shift - i_strpos)) );
721 goto restart;
722 }
66e933ab 723 /* Have both, check_string is floating */
6eb5f6b9
JH
724 if (t + start_shift >= check_at) /* Contradicts floating=check */
725 goto retry_floating_check;
726 /* Recheck anchored substring, but not floating... */
727 s = check_at;
728 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 729 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
730 (long)(other_last - i_strpos)) );
731 goto do_other_anchored;
732 }
60e71179
GS
733 /* Another way we could have checked stclass at the
734 current position only: */
735 if (ml_anch) {
736 s = t = t + 1;
737 DEBUG_r( PerlIO_printf(Perl_debug_log,
738 "Trying /^/m starting at offset %ld...\n",
739 (long)(t - i_strpos)) );
740 goto try_at_offset;
66e933ab 741 }
60e71179
GS
742 if (!prog->float_substr) /* Could have been deleted */
743 goto fail;
6eb5f6b9
JH
744 /* Check is floating subtring. */
745 retry_floating_check:
746 t = check_at - start_shift;
747 DEBUG_r( what = "floating" );
748 goto hop_and_restart;
749 }
750 DEBUG_r( if (t != s)
751 PerlIO_printf(Perl_debug_log,
752 "By STCLASS: moving %ld --> %ld\n",
753 (long)(t - i_strpos), (long)(s - i_strpos));
754 else
755 PerlIO_printf(Perl_debug_log,
756 "Does not contradict STCLASS...\n") );
757 }
2c2d71f5 758 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 759 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 760 return s;
2c2d71f5
JH
761
762 fail_finish: /* Substring not found */
66e933ab
GS
763 if (prog->check_substr) /* could be removed already */
764 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 765 fail:
2c2d71f5 766 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
767 PL_colors[4],PL_colors[5]));
768 return Nullch;
769}
9661b544 770
6eb5f6b9 771/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
772STATIC char *
773S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 774{
6eb5f6b9
JH
775 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
776 char *m;
777 int ln;
778 int c1;
779 int c2;
780 char *e;
781 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 782
6eb5f6b9
JH
783 /* We know what class it must start with. */
784 switch (OP(c)) {
785 case ANYOFUTF8:
786 while (s < strend) {
787 if (REGINCLASSUTF8(c, (U8*)s)) {
788 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 789 goto got_it;
3cf5c195 790 else
6eb5f6b9 791 tmp = doevery;
a0ed51b3 792 }
6eb5f6b9
JH
793 else
794 tmp = 1;
a0ed51b3
LW
795 s += UTF8SKIP(s);
796 }
6eb5f6b9
JH
797 break;
798 case ANYOF:
a0ed51b3 799 while (s < strend) {
6eb5f6b9
JH
800 if (REGINCLASS(c, *s)) {
801 if (tmp && (norun || regtry(prog, s)))
802 goto got_it;
803 else
804 tmp = doevery;
a0ed51b3 805 }
6eb5f6b9
JH
806 else
807 tmp = 1;
a0ed51b3 808 s++;
a0d0e21e 809 }
6eb5f6b9
JH
810 break;
811 case EXACTF:
812 m = STRING(c);
813 ln = STR_LEN(c);
814 c1 = *m;
815 c2 = PL_fold[c1];
816 goto do_exactf;
817 case EXACTFL:
818 m = STRING(c);
819 ln = STR_LEN(c);
820 c1 = *m;
821 c2 = PL_fold_locale[c1];
822 do_exactf:
823 e = strend - ln;
b3c9acc1 824
6eb5f6b9
JH
825 if (norun && e < s)
826 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
827 /* Here it is NOT UTF! */
828 if (c1 == c2) {
829 while (s <= e) {
830 if ( *s == c1
66e933ab
GS
831 && (ln == 1 || !(OP(c) == EXACTF
832 ? ibcmp(s, m, ln)
833 : ibcmp_locale(s, m, ln)))
6eb5f6b9 834 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
835 goto got_it;
836 s++;
837 }
838 } else {
839 while (s <= e) {
840 if ( (*s == c1 || *s == c2)
66e933ab
GS
841 && (ln == 1 || !(OP(c) == EXACTF
842 ? ibcmp(s, m, ln)
843 : ibcmp_locale(s, m, ln)))
6eb5f6b9 844 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
845 goto got_it;
846 s++;
847 }
848 }
849 break;
bbce6d69 850 case BOUNDL:
3280af22 851 PL_reg_flags |= RF_tainted;
bbce6d69 852 /* FALL THROUGH */
a0d0e21e 853 case BOUND:
6eb5f6b9 854 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 855 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 856 while (s < strend) {
95bac841 857 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 858 tmp = !tmp;
6eb5f6b9 859 if ((norun || regtry(prog, s)))
a0d0e21e 860 goto got_it;
a687059c 861 }
a0d0e21e
LW
862 s++;
863 }
6eb5f6b9 864 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
865 goto got_it;
866 break;
a0ed51b3
LW
867 case BOUNDLUTF8:
868 PL_reg_flags |= RF_tainted;
869 /* FALL THROUGH */
870 case BOUNDUTF8:
6eb5f6b9 871 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
872 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
873 while (s < strend) {
dfe13c55
GS
874 if (tmp == !(OP(c) == BOUND ?
875 swash_fetch(PL_utf8_alnum, (U8*)s) :
876 isALNUM_LC_utf8((U8*)s)))
877 {
a0ed51b3 878 tmp = !tmp;
6eb5f6b9 879 if ((norun || regtry(prog, s)))
a0ed51b3
LW
880 goto got_it;
881 }
882 s += UTF8SKIP(s);
883 }
6eb5f6b9 884 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
885 goto got_it;
886 break;
bbce6d69 887 case NBOUNDL:
3280af22 888 PL_reg_flags |= RF_tainted;
bbce6d69 889 /* FALL THROUGH */
a0d0e21e 890 case NBOUND:
6eb5f6b9 891 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 892 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 893 while (s < strend) {
95bac841 894 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 895 tmp = !tmp;
6eb5f6b9 896 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
897 goto got_it;
898 s++;
899 }
6eb5f6b9 900 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
901 goto got_it;
902 break;
a0ed51b3
LW
903 case NBOUNDLUTF8:
904 PL_reg_flags |= RF_tainted;
905 /* FALL THROUGH */
906 case NBOUNDUTF8:
6eb5f6b9 907 if (prog->minlen)
dfe13c55 908 strend = reghop_c(strend, -1);
6eb5f6b9 909 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
910 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
911 while (s < strend) {
dfe13c55
GS
912 if (tmp == !(OP(c) == NBOUND ?
913 swash_fetch(PL_utf8_alnum, (U8*)s) :
914 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 915 tmp = !tmp;
6eb5f6b9 916 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
917 goto got_it;
918 s += UTF8SKIP(s);
919 }
6eb5f6b9 920 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
921 goto got_it;
922 break;
a0d0e21e
LW
923 case ALNUM:
924 while (s < strend) {
bbce6d69 925 if (isALNUM(*s)) {
6eb5f6b9 926 if (tmp && (norun || regtry(prog, s)))
bbce6d69 927 goto got_it;
928 else
929 tmp = doevery;
930 }
931 else
932 tmp = 1;
933 s++;
934 }
935 break;
a0ed51b3
LW
936 case ALNUMUTF8:
937 while (s < strend) {
dfe13c55 938 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 939 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
940 goto got_it;
941 else
942 tmp = doevery;
943 }
944 else
945 tmp = 1;
946 s += UTF8SKIP(s);
947 }
948 break;
bbce6d69 949 case ALNUML:
3280af22 950 PL_reg_flags |= RF_tainted;
bbce6d69 951 while (s < strend) {
952 if (isALNUM_LC(*s)) {
6eb5f6b9 953 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 954 goto got_it;
a687059c 955 else
a0d0e21e
LW
956 tmp = doevery;
957 }
958 else
959 tmp = 1;
960 s++;
961 }
962 break;
a0ed51b3
LW
963 case ALNUMLUTF8:
964 PL_reg_flags |= RF_tainted;
965 while (s < strend) {
dfe13c55 966 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 967 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
968 goto got_it;
969 else
970 tmp = doevery;
971 }
972 else
973 tmp = 1;
974 s += UTF8SKIP(s);
975 }
976 break;
a0d0e21e
LW
977 case NALNUM:
978 while (s < strend) {
bbce6d69 979 if (!isALNUM(*s)) {
6eb5f6b9 980 if (tmp && (norun || regtry(prog, s)))
bbce6d69 981 goto got_it;
982 else
983 tmp = doevery;
984 }
985 else
986 tmp = 1;
987 s++;
988 }
989 break;
a0ed51b3
LW
990 case NALNUMUTF8:
991 while (s < strend) {
dfe13c55 992 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 993 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
994 goto got_it;
995 else
996 tmp = doevery;
997 }
998 else
999 tmp = 1;
1000 s += UTF8SKIP(s);
1001 }
1002 break;
bbce6d69 1003 case NALNUML:
3280af22 1004 PL_reg_flags |= RF_tainted;
bbce6d69 1005 while (s < strend) {
1006 if (!isALNUM_LC(*s)) {
6eb5f6b9 1007 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1008 goto got_it;
a687059c 1009 else
a0d0e21e 1010 tmp = doevery;
a687059c 1011 }
a0d0e21e
LW
1012 else
1013 tmp = 1;
1014 s++;
1015 }
1016 break;
a0ed51b3
LW
1017 case NALNUMLUTF8:
1018 PL_reg_flags |= RF_tainted;
1019 while (s < strend) {
dfe13c55 1020 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1021 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1022 goto got_it;
1023 else
1024 tmp = doevery;
1025 }
1026 else
1027 tmp = 1;
1028 s += UTF8SKIP(s);
1029 }
1030 break;
a0d0e21e
LW
1031 case SPACE:
1032 while (s < strend) {
1033 if (isSPACE(*s)) {
6eb5f6b9 1034 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1035 goto got_it;
1036 else
1037 tmp = doevery;
2304df62 1038 }
a0d0e21e
LW
1039 else
1040 tmp = 1;
1041 s++;
1042 }
1043 break;
a0ed51b3
LW
1044 case SPACEUTF8:
1045 while (s < strend) {
dfe13c55 1046 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1047 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1048 goto got_it;
1049 else
1050 tmp = doevery;
1051 }
1052 else
1053 tmp = 1;
1054 s += UTF8SKIP(s);
1055 }
1056 break;
bbce6d69 1057 case SPACEL:
3280af22 1058 PL_reg_flags |= RF_tainted;
bbce6d69 1059 while (s < strend) {
1060 if (isSPACE_LC(*s)) {
6eb5f6b9 1061 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1062 goto got_it;
1063 else
1064 tmp = doevery;
1065 }
1066 else
1067 tmp = 1;
1068 s++;
1069 }
1070 break;
a0ed51b3
LW
1071 case SPACELUTF8:
1072 PL_reg_flags |= RF_tainted;
1073 while (s < strend) {
dfe13c55 1074 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1075 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1076 goto got_it;
1077 else
1078 tmp = doevery;
1079 }
1080 else
1081 tmp = 1;
1082 s += UTF8SKIP(s);
1083 }
1084 break;
a0d0e21e
LW
1085 case NSPACE:
1086 while (s < strend) {
1087 if (!isSPACE(*s)) {
6eb5f6b9 1088 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1089 goto got_it;
1090 else
1091 tmp = doevery;
a687059c 1092 }
a0d0e21e
LW
1093 else
1094 tmp = 1;
1095 s++;
1096 }
1097 break;
a0ed51b3
LW
1098 case NSPACEUTF8:
1099 while (s < strend) {
dfe13c55 1100 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1101 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1102 goto got_it;
1103 else
1104 tmp = doevery;
1105 }
1106 else
1107 tmp = 1;
1108 s += UTF8SKIP(s);
1109 }
1110 break;
bbce6d69 1111 case NSPACEL:
3280af22 1112 PL_reg_flags |= RF_tainted;
bbce6d69 1113 while (s < strend) {
1114 if (!isSPACE_LC(*s)) {
6eb5f6b9 1115 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1116 goto got_it;
1117 else
1118 tmp = doevery;
1119 }
1120 else
1121 tmp = 1;
1122 s++;
1123 }
1124 break;
a0ed51b3
LW
1125 case NSPACELUTF8:
1126 PL_reg_flags |= RF_tainted;
1127 while (s < strend) {
dfe13c55 1128 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1129 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1130 goto got_it;
1131 else
1132 tmp = doevery;
1133 }
1134 else
1135 tmp = 1;
1136 s += UTF8SKIP(s);
1137 }
1138 break;
a0d0e21e
LW
1139 case DIGIT:
1140 while (s < strend) {
1141 if (isDIGIT(*s)) {
6eb5f6b9 1142 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1143 goto got_it;
1144 else
1145 tmp = doevery;
2b69d0c2 1146 }
a0d0e21e
LW
1147 else
1148 tmp = 1;
1149 s++;
1150 }
1151 break;
a0ed51b3
LW
1152 case DIGITUTF8:
1153 while (s < strend) {
dfe13c55 1154 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1155 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1156 goto got_it;
1157 else
1158 tmp = doevery;
1159 }
1160 else
1161 tmp = 1;
1162 s += UTF8SKIP(s);
1163 }
1164 break;
b8c5462f
JH
1165 case DIGITL:
1166 PL_reg_flags |= RF_tainted;
1167 while (s < strend) {
1168 if (isDIGIT_LC(*s)) {
6eb5f6b9 1169 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1170 goto got_it;
1171 else
1172 tmp = doevery;
1173 }
1174 else
1175 tmp = 1;
1176 s++;
1177 }
1178 break;
1179 case DIGITLUTF8:
1180 PL_reg_flags |= RF_tainted;
1181 while (s < strend) {
1182 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1183 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1184 goto got_it;
1185 else
1186 tmp = doevery;
1187 }
1188 else
1189 tmp = 1;
1190 s += UTF8SKIP(s);
1191 }
1192 break;
a0d0e21e
LW
1193 case NDIGIT:
1194 while (s < strend) {
1195 if (!isDIGIT(*s)) {
6eb5f6b9 1196 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1197 goto got_it;
1198 else
1199 tmp = doevery;
a687059c 1200 }
a0d0e21e
LW
1201 else
1202 tmp = 1;
1203 s++;
1204 }
1205 break;
a0ed51b3
LW
1206 case NDIGITUTF8:
1207 while (s < strend) {
dfe13c55 1208 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1209 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1210 goto got_it;
1211 else
1212 tmp = doevery;
1213 }
1214 else
1215 tmp = 1;
1216 s += UTF8SKIP(s);
1217 }
1218 break;
b8c5462f
JH
1219 case NDIGITL:
1220 PL_reg_flags |= RF_tainted;
1221 while (s < strend) {
1222 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1223 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1224 goto got_it;
1225 else
1226 tmp = doevery;
1227 }
1228 else
1229 tmp = 1;
1230 s++;
a0ed51b3 1231 }
b8c5462f
JH
1232 break;
1233 case NDIGITLUTF8:
1234 PL_reg_flags |= RF_tainted;
1235 while (s < strend) {
1236 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1237 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1238 goto got_it;
cf93c79d 1239 else
b8c5462f
JH
1240 tmp = doevery;
1241 }
1242 else
1243 tmp = 1;
1244 s += UTF8SKIP(s);
1245 }
1246 break;
b3c9acc1 1247 default:
3c3eec57
GS
1248 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1249 break;
d6a28714 1250 }
6eb5f6b9
JH
1251 return 0;
1252 got_it:
1253 return s;
1254}
1255
1256/*
1257 - regexec_flags - match a regexp against a string
1258 */
1259I32
1260Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1261 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1262/* strend: pointer to null at end of string */
1263/* strbeg: real beginning of string */
1264/* minend: end of match must be >=minend after stringarg. */
1265/* data: May be used for some additional optimizations. */
1266/* nosave: For optimizations. */
1267{
1268 dTHR;
1269 register char *s;
1270 register regnode *c;
1271 register char *startpos = stringarg;
1272 register I32 tmp;
1273 I32 minlen; /* must match at least this many chars */
1274 I32 dontbother = 0; /* how many characters not to try at end */
1275 I32 start_shift = 0; /* Offset of the start to find
1276 constant substr. */ /* CC */
1277 I32 end_shift = 0; /* Same for the end. */ /* CC */
1278 I32 scream_pos = -1; /* Internal iterator of scream. */
1279 char *scream_olds;
1280 SV* oreplsv = GvSV(PL_replgv);
1281
1282 PL_regcc = 0;
1283
1284 cache_re(prog);
1285#ifdef DEBUGGING
1286 PL_regnarrate = PL_debug & 512;
1287#endif
1288
1289 /* Be paranoid... */
1290 if (prog == NULL || startpos == NULL) {
1291 Perl_croak(aTHX_ "NULL regexp parameter");
1292 return 0;
1293 }
1294
1295 minlen = prog->minlen;
1296 if (strend - startpos < minlen) goto phooey;
1297
1298 if (startpos == strbeg) /* is ^ valid at stringarg? */
1299 PL_regprev = '\n';
1300 else {
1301 PL_regprev = (U32)stringarg[-1];
1302 if (!PL_multiline && PL_regprev == '\n')
1303 PL_regprev = '\0'; /* force ^ to NOT match */
1304 }
1305
1306 /* Check validity of program. */
1307 if (UCHARAT(prog->program) != REG_MAGIC) {
1308 Perl_croak(aTHX_ "corrupted regexp program");
1309 }
1310
1311 PL_reg_flags = 0;
1312 PL_reg_eval_set = 0;
1313 PL_reg_maxiter = 0;
1314
1315 if (prog->reganch & ROPT_UTF8)
1316 PL_reg_flags |= RF_utf8;
1317
1318 /* Mark beginning of line for ^ and lookbehind. */
1319 PL_regbol = startpos;
1320 PL_bostr = strbeg;
1321 PL_reg_sv = sv;
1322
1323 /* Mark end of line for $ (and such) */
1324 PL_regeol = strend;
1325
1326 /* see how far we have to get to not match where we matched before */
1327 PL_regtill = startpos+minend;
1328
1329 /* We start without call_cc context. */
1330 PL_reg_call_cc = 0;
1331
1332 /* If there is a "must appear" string, look for it. */
1333 s = startpos;
1334
1335 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1336 MAGIC *mg;
1337
1338 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1339 PL_reg_ganch = startpos;
1340 else if (sv && SvTYPE(sv) >= SVt_PVMG
1341 && SvMAGIC(sv)
1342 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1343 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1344 if (prog->reganch & ROPT_ANCH_GPOS) {
1345 if (s > PL_reg_ganch)
1346 goto phooey;
1347 s = PL_reg_ganch;
1348 }
1349 }
1350 else /* pos() not defined */
1351 PL_reg_ganch = strbeg;
1352 }
1353
1354 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1355 re_scream_pos_data d;
1356
1357 d.scream_olds = &scream_olds;
1358 d.scream_pos = &scream_pos;
1359 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1360 if (!s)
1361 goto phooey; /* not present */
1362 }
1363
1364 DEBUG_r( if (!PL_colorset) reginitcolors() );
1365 DEBUG_r(PerlIO_printf(Perl_debug_log,
1366 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1367 PL_colors[4],PL_colors[5],PL_colors[0],
1368 prog->precomp,
1369 PL_colors[1],
1370 (strlen(prog->precomp) > 60 ? "..." : ""),
1371 PL_colors[0],
1372 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1373 startpos, PL_colors[1],
1374 (strend - startpos > 60 ? "..." : ""))
1375 );
1376
1377 /* Simplest case: anchored match need be tried only once. */
1378 /* [unless only anchor is BOL and multiline is set] */
1379 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1380 if (s == startpos && regtry(prog, startpos))
1381 goto got_it;
1382 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1383 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1384 {
1385 char *end;
1386
1387 if (minlen)
1388 dontbother = minlen - 1;
1389 end = HOPc(strend, -dontbother) - 1;
1390 /* for multiline we only have to try after newlines */
1391 if (prog->check_substr) {
1392 if (s == startpos)
1393 goto after_try;
1394 while (1) {
1395 if (regtry(prog, s))
1396 goto got_it;
1397 after_try:
1398 if (s >= end)
1399 goto phooey;
1400 if (prog->reganch & RE_USE_INTUIT) {
1401 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1402 if (!s)
1403 goto phooey;
1404 }
1405 else
1406 s++;
1407 }
1408 } else {
1409 if (s > startpos)
1410 s--;
1411 while (s < end) {
1412 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1413 if (regtry(prog, s))
1414 goto got_it;
1415 }
1416 }
1417 }
1418 }
1419 goto phooey;
1420 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1421 if (regtry(prog, PL_reg_ganch))
1422 goto got_it;
1423 goto phooey;
1424 }
1425
1426 /* Messy cases: unanchored match. */
1427 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1428 /* we have /x+whatever/ */
1429 /* it must be a one character string (XXXX Except UTF?) */
1430 char ch = SvPVX(prog->anchored_substr)[0];
1431 if (UTF) {
1432 while (s < strend) {
1433 if (*s == ch) {
1434 if (regtry(prog, s)) goto got_it;
1435 s += UTF8SKIP(s);
1436 while (s < strend && *s == ch)
1437 s += UTF8SKIP(s);
1438 }
1439 s += UTF8SKIP(s);
1440 }
1441 }
1442 else {
1443 while (s < strend) {
1444 if (*s == ch) {
1445 if (regtry(prog, s)) goto got_it;
1446 s++;
1447 while (s < strend && *s == ch)
1448 s++;
1449 }
1450 s++;
1451 }
1452 }
1453 }
1454 /*SUPPRESS 560*/
1455 else if (prog->anchored_substr != Nullsv
1456 || (prog->float_substr != Nullsv
1457 && prog->float_max_offset < strend - s)) {
1458 SV *must = prog->anchored_substr
1459 ? prog->anchored_substr : prog->float_substr;
1460 I32 back_max =
1461 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1462 I32 back_min =
1463 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1464 I32 delta = back_max - back_min;
1465 char *last = HOPc(strend, /* Cannot start after this */
1466 -(I32)(CHR_SVLEN(must)
1467 - (SvTAIL(must) != 0) + back_min));
1468 char *last1; /* Last position checked before */
1469
1470 if (s > PL_bostr)
1471 last1 = HOPc(s, -1);
1472 else
1473 last1 = s - 1; /* bogus */
1474
1475 /* XXXX check_substr already used to find `s', can optimize if
1476 check_substr==must. */
1477 scream_pos = -1;
1478 dontbother = end_shift;
1479 strend = HOPc(strend, -dontbother);
1480 while ( (s <= last) &&
1481 ((flags & REXEC_SCREAM)
1482 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1483 end_shift, &scream_pos, 0))
1484 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1485 (unsigned char*)strend, must,
1486 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1487 if (HOPc(s, -back_max) > last1) {
1488 last1 = HOPc(s, -back_min);
1489 s = HOPc(s, -back_max);
1490 }
1491 else {
1492 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1493
1494 last1 = HOPc(s, -back_min);
1495 s = t;
1496 }
1497 if (UTF) {
1498 while (s <= last1) {
1499 if (regtry(prog, s))
1500 goto got_it;
1501 s += UTF8SKIP(s);
1502 }
1503 }
1504 else {
1505 while (s <= last1) {
1506 if (regtry(prog, s))
1507 goto got_it;
1508 s++;
1509 }
1510 }
1511 }
1512 goto phooey;
1513 }
1514 else if (c = prog->regstclass) {
66e933ab
GS
1515 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1516 /* don't bother with what can't match */
6eb5f6b9
JH
1517 strend = HOPc(strend, -(minlen - 1));
1518 if (find_byclass(prog, c, s, strend, startpos, 0))
1519 goto got_it;
d6a28714
JH
1520 }
1521 else {
1522 dontbother = 0;
1523 if (prog->float_substr != Nullsv) { /* Trim the end. */
1524 char *last;
1525 I32 oldpos = scream_pos;
1526
1527 if (flags & REXEC_SCREAM) {
1528 last = screaminstr(sv, prog->float_substr, s - strbeg,
1529 end_shift, &scream_pos, 1); /* last one */
1530 if (!last)
1531 last = scream_olds; /* Only one occurence. */
b8c5462f 1532 }
d6a28714
JH
1533 else {
1534 STRLEN len;
1535 char *little = SvPV(prog->float_substr, len);
1536
1537 if (SvTAIL(prog->float_substr)) {
1538 if (memEQ(strend - len + 1, little, len - 1))
1539 last = strend - len + 1;
1540 else if (!PL_multiline)
1541 last = memEQ(strend - len, little, len)
1542 ? strend - len : Nullch;
b8c5462f 1543 else
d6a28714
JH
1544 goto find_last;
1545 } else {
1546 find_last:
1547 if (len)
1548 last = rninstr(s, strend, little, little + len);
b8c5462f 1549 else
d6a28714 1550 last = strend; /* matching `$' */
b8c5462f 1551 }
b8c5462f 1552 }
d6a28714
JH
1553 if (last == NULL) goto phooey; /* Should not happen! */
1554 dontbother = strend - last + prog->float_min_offset;
1555 }
1556 if (minlen && (dontbother < minlen))
1557 dontbother = minlen - 1;
1558 strend -= dontbother; /* this one's always in bytes! */
1559 /* We don't know much -- general case. */
1560 if (UTF) {
1561 for (;;) {
1562 if (regtry(prog, s))
1563 goto got_it;
1564 if (s >= strend)
1565 break;
b8c5462f 1566 s += UTF8SKIP(s);
d6a28714
JH
1567 };
1568 }
1569 else {
1570 do {
1571 if (regtry(prog, s))
1572 goto got_it;
1573 } while (s++ < strend);
1574 }
1575 }
1576
1577 /* Failure. */
1578 goto phooey;
1579
1580got_it:
1581 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1582
1583 if (PL_reg_eval_set) {
1584 /* Preserve the current value of $^R */
1585 if (oreplsv != GvSV(PL_replgv))
1586 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1587 restored, the value remains
1588 the same. */
1589 restore_pos(aTHXo_ 0);
1590 }
1591
1592 /* make sure $`, $&, $', and $digit will work later */
1593 if ( !(flags & REXEC_NOT_FIRST) ) {
1594 if (RX_MATCH_COPIED(prog)) {
1595 Safefree(prog->subbeg);
1596 RX_MATCH_COPIED_off(prog);
1597 }
1598 if (flags & REXEC_COPY_STR) {
1599 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1600
1601 s = savepvn(strbeg, i);
1602 prog->subbeg = s;
1603 prog->sublen = i;
1604 RX_MATCH_COPIED_on(prog);
1605 }
1606 else {
1607 prog->subbeg = strbeg;
1608 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1609 }
1610 }
1611
1612 return 1;
1613
1614phooey:
1615 if (PL_reg_eval_set)
1616 restore_pos(aTHXo_ 0);
1617 return 0;
1618}
1619
1620/*
1621 - regtry - try match at specific point
1622 */
1623STATIC I32 /* 0 failure, 1 success */
1624S_regtry(pTHX_ regexp *prog, char *startpos)
1625{
1626 dTHR;
1627 register I32 i;
1628 register I32 *sp;
1629 register I32 *ep;
1630 CHECKPOINT lastcp;
1631
1632 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1633 MAGIC *mg;
1634
1635 PL_reg_eval_set = RS_init;
1636 DEBUG_r(DEBUG_s(
b900a521
JH
1637 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1638 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1639 ));
e8347627 1640 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1641 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1642 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1643 SAVETMPS;
1644 /* Apparently this is not needed, judging by wantarray. */
e8347627 1645 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1646 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1647
1648 if (PL_reg_sv) {
1649 /* Make $_ available to executed code. */
1650 if (PL_reg_sv != DEFSV) {
1651 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1652 SAVESPTR(DEFSV);
1653 DEFSV = PL_reg_sv;
b8c5462f 1654 }
d6a28714
JH
1655
1656 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1657 && (mg = mg_find(PL_reg_sv, 'g')))) {
1658 /* prepare for quick setting of pos */
1659 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1660 mg = mg_find(PL_reg_sv, 'g');
1661 mg->mg_len = -1;
b8c5462f 1662 }
d6a28714
JH
1663 PL_reg_magic = mg;
1664 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1665 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1666 }
1667 if (!PL_reg_curpm)
1668 New(22,PL_reg_curpm, 1, PMOP);
1669 PL_reg_curpm->op_pmregexp = prog;
1670 PL_reg_oldcurpm = PL_curpm;
1671 PL_curpm = PL_reg_curpm;
1672 if (RX_MATCH_COPIED(prog)) {
1673 /* Here is a serious problem: we cannot rewrite subbeg,
1674 since it may be needed if this match fails. Thus
1675 $` inside (?{}) could fail... */
1676 PL_reg_oldsaved = prog->subbeg;
1677 PL_reg_oldsavedlen = prog->sublen;
1678 RX_MATCH_COPIED_off(prog);
1679 }
1680 else
1681 PL_reg_oldsaved = Nullch;
1682 prog->subbeg = PL_bostr;
1683 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1684 }
1685 prog->startp[0] = startpos - PL_bostr;
1686 PL_reginput = startpos;
1687 PL_regstartp = prog->startp;
1688 PL_regendp = prog->endp;
1689 PL_reglastparen = &prog->lastparen;
1690 prog->lastparen = 0;
1691 PL_regsize = 0;
1692 DEBUG_r(PL_reg_starttry = startpos);
1693 if (PL_reg_start_tmpl <= prog->nparens) {
1694 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1695 if(PL_reg_start_tmp)
1696 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1697 else
1698 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1699 }
1700
1701 /* XXXX What this code is doing here?!!! There should be no need
1702 to do this again and again, PL_reglastparen should take care of
1703 this! */
1704 sp = prog->startp;
1705 ep = prog->endp;
1706 if (prog->nparens) {
1707 for (i = prog->nparens; i >= 1; i--) {
1708 *++sp = -1;
1709 *++ep = -1;
1710 }
1711 }
1712 REGCP_SET;
1713 if (regmatch(prog->program + 1)) {
1714 prog->endp[0] = PL_reginput - PL_bostr;
1715 return 1;
1716 }
1717 REGCP_UNWIND;
1718 return 0;
1719}
1720
1721/*
1722 - regmatch - main matching routine
1723 *
1724 * Conceptually the strategy is simple: check to see whether the current
1725 * node matches, call self recursively to see whether the rest matches,
1726 * and then act accordingly. In practice we make some effort to avoid
1727 * recursion, in particular by going through "ordinary" nodes (that don't
1728 * need to know whether the rest of the match failed) by a loop instead of
1729 * by recursion.
1730 */
1731/* [lwall] I've hoisted the register declarations to the outer block in order to
1732 * maybe save a little bit of pushing and popping on the stack. It also takes
1733 * advantage of machines that use a register save mask on subroutine entry.
1734 */
1735STATIC I32 /* 0 failure, 1 success */
1736S_regmatch(pTHX_ regnode *prog)
1737{
1738 dTHR;
1739 register regnode *scan; /* Current node. */
1740 regnode *next; /* Next node. */
1741 regnode *inner; /* Next node in internal branch. */
1742 register I32 nextchr; /* renamed nextchr - nextchar colides with
1743 function of same name */
1744 register I32 n; /* no or next */
1745 register I32 ln; /* len or last */
1746 register char *s; /* operand or save */
1747 register char *locinput = PL_reginput;
1748 register I32 c1, c2, paren; /* case fold search, parenth */
1749 int minmod = 0, sw = 0, logical = 0;
1750#ifdef DEBUGGING
1751 PL_regindent++;
1752#endif
1753
1754 /* Note that nextchr is a byte even in UTF */
1755 nextchr = UCHARAT(locinput);
1756 scan = prog;
1757 while (scan != NULL) {
1758#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1759#ifdef DEBUGGING
1760# define sayYES goto yes
1761# define sayNO goto no
7821416a
IZ
1762# define sayYES_FINAL goto yes_final
1763# define sayYES_LOUD goto yes_loud
1764# define sayNO_FINAL goto no_final
1765# define sayNO_SILENT goto do_no
d6a28714
JH
1766# define saySAME(x) if (x) goto yes; else goto no
1767# define REPORT_CODE_OFF 24
1768#else
1769# define sayYES return 1
1770# define sayNO return 0
7821416a
IZ
1771# define sayYES_FINAL return 1
1772# define sayYES_LOUD return 1
1773# define sayNO_FINAL return 0
1774# define sayNO_SILENT return 0
d6a28714
JH
1775# define saySAME(x) return x
1776#endif
1777 DEBUG_r( {
1778 SV *prop = sv_newmortal();
1779 int docolor = *PL_colors[0];
1780 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1781 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1782 /* The part of the string before starttry has one color
1783 (pref0_len chars), between starttry and current
1784 position another one (pref_len - pref0_len chars),
1785 after the current position the third one.
1786 We assume that pref0_len <= pref_len, otherwise we
1787 decrease pref0_len. */
1788 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1789 ? (5 + taill) - l : locinput - PL_bostr);
1790 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1791
1792 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1793 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1794 ? (5 + taill) - pref_len : PL_regeol - locinput);
1795 if (pref0_len < 0)
1796 pref0_len = 0;
1797 if (pref0_len > pref_len)
1798 pref0_len = pref_len;
1799 regprop(prop, scan);
1800 PerlIO_printf(Perl_debug_log,
b900a521
JH
1801 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1802 (IV)(locinput - PL_bostr),
d6a28714
JH
1803 PL_colors[4], pref0_len,
1804 locinput - pref_len, PL_colors[5],
1805 PL_colors[2], pref_len - pref0_len,
1806 locinput - pref_len + pref0_len, PL_colors[3],
1807 (docolor ? "" : "> <"),
1808 PL_colors[0], l, locinput, PL_colors[1],
1809 15 - l - pref_len + 1,
1810 "",
b900a521 1811 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1812 SvPVX(prop));
1813 } );
1814
1815 next = scan + NEXT_OFF(scan);
1816 if (next == scan)
1817 next = NULL;
1818
1819 switch (OP(scan)) {
1820 case BOL:
1821 if (locinput == PL_bostr
1822 ? PL_regprev == '\n'
1823 : (PL_multiline &&
1824 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1825 {
1826 /* regtill = regbol; */
b8c5462f
JH
1827 break;
1828 }
d6a28714
JH
1829 sayNO;
1830 case MBOL:
1831 if (locinput == PL_bostr
1832 ? PL_regprev == '\n'
1833 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1834 {
b8c5462f
JH
1835 break;
1836 }
d6a28714
JH
1837 sayNO;
1838 case SBOL:
1839 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1840 break;
d6a28714
JH
1841 sayNO;
1842 case GPOS:
1843 if (locinput == PL_reg_ganch)
1844 break;
1845 sayNO;
1846 case EOL:
1847 if (PL_multiline)
1848 goto meol;
1849 else
1850 goto seol;
1851 case MEOL:
1852 meol:
1853 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1854 sayNO;
b8c5462f 1855 break;
d6a28714
JH
1856 case SEOL:
1857 seol:
1858 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1859 sayNO;
d6a28714 1860 if (PL_regeol - locinput > 1)
b8c5462f 1861 sayNO;
b8c5462f 1862 break;
d6a28714
JH
1863 case EOS:
1864 if (PL_regeol != locinput)
b8c5462f 1865 sayNO;
d6a28714
JH
1866 break;
1867 case SANYUTF8:
b8c5462f 1868 if (nextchr & 0x80) {
b8c5462f 1869 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1870 if (locinput > PL_regeol)
1871 sayNO;
b8c5462f
JH
1872 nextchr = UCHARAT(locinput);
1873 break;
1874 }
d6a28714 1875 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1876 sayNO;
b8c5462f 1877 nextchr = UCHARAT(++locinput);
a0d0e21e 1878 break;
d6a28714
JH
1879 case SANY:
1880 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1881 sayNO;
1882 nextchr = UCHARAT(++locinput);
b85d18e9 1883 break;
d6a28714 1884 case ANYUTF8:
a0ed51b3 1885 if (nextchr & 0x80) {
b8c5462f 1886 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1887 if (locinput > PL_regeol)
1888 sayNO;
a0ed51b3
LW
1889 nextchr = UCHARAT(locinput);
1890 break;
1891 }
d6a28714 1892 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1893 sayNO;
1894 nextchr = UCHARAT(++locinput);
1895 break;
d6a28714
JH
1896 case REG_ANY:
1897 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1898 sayNO;
76e3520e 1899 nextchr = UCHARAT(++locinput);
a0d0e21e 1900 break;
d6a28714 1901 case EXACT:
cd439c50
IZ
1902 s = STRING(scan);
1903 ln = STR_LEN(scan);
d6a28714
JH
1904 /* Inline the first character, for speed. */
1905 if (UCHARAT(s) != nextchr)
1906 sayNO;
1907 if (PL_regeol - locinput < ln)
1908 sayNO;
1909 if (ln > 1 && memNE(s, locinput, ln))
1910 sayNO;
1911 locinput += ln;
1912 nextchr = UCHARAT(locinput);
1913 break;
1914 case EXACTFL:
b8c5462f
JH
1915 PL_reg_flags |= RF_tainted;
1916 /* FALL THROUGH */
d6a28714 1917 case EXACTF:
cd439c50
IZ
1918 s = STRING(scan);
1919 ln = STR_LEN(scan);
d6a28714
JH
1920
1921 if (UTF) {
1922 char *l = locinput;
1923 char *e = s + ln;
1924 c1 = OP(scan) == EXACTF;
1925 while (s < e) {
1926 if (l >= PL_regeol)
1927 sayNO;
1928 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1929 toLOWER_utf8((U8*)l) :
1930 toLOWER_LC_utf8((U8*)l)))
1931 {
1932 sayNO;
1933 }
1934 s += UTF8SKIP(s);
1935 l += UTF8SKIP(l);
b8c5462f 1936 }
d6a28714 1937 locinput = l;
a0ed51b3
LW
1938 nextchr = UCHARAT(locinput);
1939 break;
1940 }
d6a28714
JH
1941
1942 /* Inline the first character, for speed. */
1943 if (UCHARAT(s) != nextchr &&
1944 UCHARAT(s) != ((OP(scan) == EXACTF)
1945 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1946 sayNO;
d6a28714 1947 if (PL_regeol - locinput < ln)
b8c5462f 1948 sayNO;
d6a28714
JH
1949 if (ln > 1 && (OP(scan) == EXACTF
1950 ? ibcmp(s, locinput, ln)
1951 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1952 sayNO;
d6a28714
JH
1953 locinput += ln;
1954 nextchr = UCHARAT(locinput);
a0d0e21e 1955 break;
d6a28714 1956 case ANYOFUTF8:
d6a28714 1957 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1958 sayNO;
d6a28714
JH
1959 if (locinput >= PL_regeol)
1960 sayNO;
1961 locinput += PL_utf8skip[nextchr];
1962 nextchr = UCHARAT(locinput);
1963 break;
1964 case ANYOF:
d6a28714 1965 if (nextchr < 0)
b8c5462f 1966 nextchr = UCHARAT(locinput);
936ed897 1967 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1968 sayNO;
1969 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1970 sayNO;
b8c5462f
JH
1971 nextchr = UCHARAT(++locinput);
1972 break;
d6a28714 1973 case ALNUML:
b8c5462f
JH
1974 PL_reg_flags |= RF_tainted;
1975 /* FALL THROUGH */
d6a28714 1976 case ALNUM:
b8c5462f 1977 if (!nextchr)
4633a7c4 1978 sayNO;
d6a28714
JH
1979 if (!(OP(scan) == ALNUM
1980 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1981 sayNO;
1982 nextchr = UCHARAT(++locinput);
bbce6d69 1983 break;
d6a28714 1984 case ALNUMLUTF8:
3280af22 1985 PL_reg_flags |= RF_tainted;
bbce6d69 1986 /* FALL THROUGH */
d6a28714 1987 case ALNUMUTF8:
b8c5462f
JH
1988 if (!nextchr)
1989 sayNO;
1990 if (nextchr & 0x80) {
d6a28714
JH
1991 if (!(OP(scan) == ALNUMUTF8
1992 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1993 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1994 {
1995 sayNO;
a0ed51b3 1996 }
b8c5462f 1997 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1998 nextchr = UCHARAT(locinput);
1999 break;
2000 }
d6a28714
JH
2001 if (!(OP(scan) == ALNUMUTF8
2002 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2003 sayNO;
b8c5462f 2004 nextchr = UCHARAT(++locinput);
a0d0e21e 2005 break;
d6a28714 2006 case NALNUML:
b8c5462f
JH
2007 PL_reg_flags |= RF_tainted;
2008 /* FALL THROUGH */
d6a28714
JH
2009 case NALNUM:
2010 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2011 sayNO;
d6a28714
JH
2012 if (OP(scan) == NALNUM
2013 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2014 sayNO;
b8c5462f 2015 nextchr = UCHARAT(++locinput);
a0ed51b3 2016 break;
d6a28714 2017 case NALNUMLUTF8:
b8c5462f
JH
2018 PL_reg_flags |= RF_tainted;
2019 /* FALL THROUGH */
d6a28714 2020 case NALNUMUTF8:
3280af22 2021 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2022 sayNO;
b8c5462f 2023 if (nextchr & 0x80) {
d6a28714
JH
2024 if (OP(scan) == NALNUMUTF8
2025 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2026 : isALNUM_LC_utf8((U8*)locinput))
2027 {
b8c5462f 2028 sayNO;
d6a28714 2029 }
b8c5462f
JH
2030 locinput += PL_utf8skip[nextchr];
2031 nextchr = UCHARAT(locinput);
2032 break;
2033 }
d6a28714
JH
2034 if (OP(scan) == NALNUMUTF8
2035 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2036 sayNO;
76e3520e 2037 nextchr = UCHARAT(++locinput);
a0d0e21e 2038 break;
d6a28714
JH
2039 case BOUNDL:
2040 case NBOUNDL:
3280af22 2041 PL_reg_flags |= RF_tainted;
bbce6d69 2042 /* FALL THROUGH */
d6a28714
JH
2043 case BOUND:
2044 case NBOUND:
2045 /* was last char in word? */
2046 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2047 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2048 ln = isALNUM(ln);
2049 n = isALNUM(nextchr);
2050 }
2051 else {
2052 ln = isALNUM_LC(ln);
2053 n = isALNUM_LC(nextchr);
2054 }
2055 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2056 sayNO;
a0d0e21e 2057 break;
d6a28714
JH
2058 case BOUNDLUTF8:
2059 case NBOUNDLUTF8:
a0ed51b3
LW
2060 PL_reg_flags |= RF_tainted;
2061 /* FALL THROUGH */
d6a28714
JH
2062 case BOUNDUTF8:
2063 case NBOUNDUTF8:
2064 /* was last char in word? */
2065 ln = (locinput != PL_regbol)
2066 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2067 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2068 ln = isALNUM_uni(ln);
2069 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2070 }
d6a28714
JH
2071 else {
2072 ln = isALNUM_LC_uni(ln);
2073 n = isALNUM_LC_utf8((U8*)locinput);
2074 }
2075 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2076 sayNO;
a0ed51b3 2077 break;
d6a28714 2078 case SPACEL:
3280af22 2079 PL_reg_flags |= RF_tainted;
bbce6d69 2080 /* FALL THROUGH */
d6a28714
JH
2081 case SPACE:
2082 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2083 sayNO;
d6a28714
JH
2084 if (!(OP(scan) == SPACE
2085 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2086 sayNO;
76e3520e 2087 nextchr = UCHARAT(++locinput);
a0d0e21e 2088 break;
d6a28714 2089 case SPACELUTF8:
a0ed51b3
LW
2090 PL_reg_flags |= RF_tainted;
2091 /* FALL THROUGH */
d6a28714 2092 case SPACEUTF8:
a0ed51b3
LW
2093 if (!nextchr && locinput >= PL_regeol)
2094 sayNO;
2095 if (nextchr & 0x80) {
d6a28714
JH
2096 if (!(OP(scan) == SPACEUTF8
2097 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2098 : isSPACE_LC_utf8((U8*)locinput)))
2099 {
a0ed51b3 2100 sayNO;
d6a28714 2101 }
6f06b55f 2102 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2103 nextchr = UCHARAT(locinput);
2104 break;
2105 }
d6a28714
JH
2106 if (!(OP(scan) == SPACEUTF8
2107 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2108 sayNO;
2109 nextchr = UCHARAT(++locinput);
2110 break;
d6a28714 2111 case NSPACEL:
3280af22 2112 PL_reg_flags |= RF_tainted;
bbce6d69 2113 /* FALL THROUGH */
d6a28714 2114 case NSPACE:
b8c5462f
JH
2115 if (!nextchr)
2116 sayNO;
d6a28714
JH
2117 if (OP(scan) == SPACE
2118 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2119 sayNO;
b8c5462f 2120 nextchr = UCHARAT(++locinput);
a0d0e21e 2121 break;
d6a28714 2122 case NSPACELUTF8:
a0ed51b3
LW
2123 PL_reg_flags |= RF_tainted;
2124 /* FALL THROUGH */
d6a28714 2125 case NSPACEUTF8:
b8c5462f
JH
2126 if (!nextchr)
2127 sayNO;
2128 if (nextchr & 0x80) {
d6a28714
JH
2129 if (OP(scan) == NSPACEUTF8
2130 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2131 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2132 {
2133 sayNO;
2134 }
2135 locinput += PL_utf8skip[nextchr];
2136 nextchr = UCHARAT(locinput);
2137 break;
a0ed51b3 2138 }
d6a28714
JH
2139 if (OP(scan) == NSPACEUTF8
2140 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2141 sayNO;
76e3520e 2142 nextchr = UCHARAT(++locinput);
a0d0e21e 2143 break;
d6a28714 2144 case DIGITL:
a0ed51b3
LW
2145 PL_reg_flags |= RF_tainted;
2146 /* FALL THROUGH */
d6a28714 2147 case DIGIT:
a0ed51b3
LW
2148 if (!nextchr && locinput >= PL_regeol)
2149 sayNO;
d6a28714
JH
2150 if (!(OP(scan) == DIGIT
2151 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2152 sayNO;
76e3520e 2153 nextchr = UCHARAT(++locinput);
a0d0e21e 2154 break;
d6a28714 2155 case DIGITLUTF8:
a0ed51b3
LW
2156 PL_reg_flags |= RF_tainted;
2157 /* FALL THROUGH */
d6a28714 2158 case DIGITUTF8:
a0ed51b3
LW
2159 if (!nextchr)
2160 sayNO;
2161 if (nextchr & 0x80) {
d6a28714
JH
2162 if (OP(scan) == NDIGITUTF8
2163 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2164 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2165 {
a0ed51b3 2166 sayNO;
dfe13c55 2167 }
6f06b55f 2168 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2169 nextchr = UCHARAT(locinput);
2170 break;
2171 }
d6a28714 2172 if (!isDIGIT(nextchr))
a0ed51b3
LW
2173 sayNO;
2174 nextchr = UCHARAT(++locinput);
2175 break;
d6a28714 2176 case NDIGITL:
b8c5462f
JH
2177 PL_reg_flags |= RF_tainted;
2178 /* FALL THROUGH */
d6a28714 2179 case NDIGIT:
b8c5462f
JH
2180 if (!nextchr)
2181 sayNO;
d6a28714
JH
2182 if (OP(scan) == DIGIT
2183 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2184 sayNO;
76e3520e 2185 nextchr = UCHARAT(++locinput);
a0d0e21e 2186 break;
d6a28714 2187 case NDIGITLUTF8:
b8c5462f
JH
2188 PL_reg_flags |= RF_tainted;
2189 /* FALL THROUGH */
d6a28714 2190 case NDIGITUTF8:
b8c5462f
JH
2191 if (!nextchr && locinput >= PL_regeol)
2192 sayNO;
a0ed51b3 2193 if (nextchr & 0x80) {
d6a28714 2194 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2195 sayNO;
6f06b55f 2196 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2197 nextchr = UCHARAT(locinput);
2198 break;
2199 }
d6a28714 2200 if (isDIGIT(nextchr))
a0ed51b3
LW
2201 sayNO;
2202 nextchr = UCHARAT(++locinput);
2203 break;
2204 case CLUMP:
dfe13c55 2205 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2206 sayNO;
6f06b55f 2207 locinput += PL_utf8skip[nextchr];
dfe13c55 2208 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2209 locinput += UTF8SKIP(locinput);
2210 if (locinput > PL_regeol)
2211 sayNO;
2212 nextchr = UCHARAT(locinput);
2213 break;
c8756f30 2214 case REFFL:
3280af22 2215 PL_reg_flags |= RF_tainted;
c8756f30 2216 /* FALL THROUGH */
c277df42 2217 case REF:
c8756f30 2218 case REFF:
c277df42 2219 n = ARG(scan); /* which paren pair */
cf93c79d 2220 ln = PL_regstartp[n];
2c2d71f5 2221 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2222 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2223 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2224 if (ln == PL_regendp[n])
a0d0e21e 2225 break;
a0ed51b3 2226
cf93c79d 2227 s = PL_bostr + ln;
a0ed51b3
LW
2228 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2229 char *l = locinput;
cf93c79d 2230 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2231 /*
2232 * Note that we can't do the "other character" lookup trick as
2233 * in the 8-bit case (no pun intended) because in Unicode we
2234 * have to map both upper and title case to lower case.
2235 */
2236 if (OP(scan) == REFF) {
2237 while (s < e) {
2238 if (l >= PL_regeol)
2239 sayNO;
dfe13c55 2240 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2241 sayNO;
2242 s += UTF8SKIP(s);
2243 l += UTF8SKIP(l);
2244 }
2245 }
2246 else {
2247 while (s < e) {
2248 if (l >= PL_regeol)
2249 sayNO;
dfe13c55 2250 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2251 sayNO;
2252 s += UTF8SKIP(s);
2253 l += UTF8SKIP(l);
2254 }
2255 }
2256 locinput = l;
2257 nextchr = UCHARAT(locinput);
2258 break;
2259 }
2260
a0d0e21e 2261 /* Inline the first character, for speed. */
76e3520e 2262 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2263 (OP(scan) == REF ||
2264 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2265 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2266 sayNO;
cf93c79d 2267 ln = PL_regendp[n] - ln;
3280af22 2268 if (locinput + ln > PL_regeol)
4633a7c4 2269 sayNO;
c8756f30
AK
2270 if (ln > 1 && (OP(scan) == REF
2271 ? memNE(s, locinput, ln)
2272 : (OP(scan) == REFF
2273 ? ibcmp(s, locinput, ln)
2274 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2275 sayNO;
a0d0e21e 2276 locinput += ln;
76e3520e 2277 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2278 break;
2279
2280 case NOTHING:
c277df42 2281 case TAIL:
a0d0e21e
LW
2282 break;
2283 case BACK:
2284 break;
c277df42
IZ
2285 case EVAL:
2286 {
2287 dSP;
533c011a 2288 OP_4tree *oop = PL_op;
3280af22
NIS
2289 COP *ocurcop = PL_curcop;
2290 SV **ocurpad = PL_curpad;
c277df42
IZ
2291 SV *ret;
2292
2293 n = ARG(scan);
533c011a 2294 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2295 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2296 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2297 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2298
cea2e8a9 2299 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2300 SPAGAIN;
2301 ret = POPs;
2302 PUTBACK;
2303
0f5d15d6
IZ
2304 PL_op = oop;
2305 PL_curpad = ocurpad;
2306 PL_curcop = ocurcop;
c277df42 2307 if (logical) {
0f5d15d6
IZ
2308 if (logical == 2) { /* Postponed subexpression. */
2309 regexp *re;
22c35a8c 2310 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2311 re_cc_state state;
0f5d15d6
IZ
2312 CHECKPOINT cp, lastcp;
2313
2314 if(SvROK(ret) || SvRMAGICAL(ret)) {
2315 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2316
2317 if(SvMAGICAL(sv))
2318 mg = mg_find(sv, 'r');
2319 }
2320 if (mg) {
2321 re = (regexp *)mg->mg_obj;
df0003d4 2322 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2323 }
2324 else {
2325 STRLEN len;
2326 char *t = SvPV(ret, len);
2327 PMOP pm;
2328 char *oprecomp = PL_regprecomp;
2329 I32 osize = PL_regsize;
2330 I32 onpar = PL_regnpar;
2331
2332 pm.op_pmflags = 0;
cea2e8a9 2333 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2334 if (!(SvFLAGS(ret)
2335 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2336 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2337 PL_regprecomp = oprecomp;
2338 PL_regsize = osize;
2339 PL_regnpar = onpar;
2340 }
2341 DEBUG_r(
2342 PerlIO_printf(Perl_debug_log,
2343 "Entering embedded `%s%.60s%s%s'\n",
2344 PL_colors[0],
2345 re->precomp,
2346 PL_colors[1],
2347 (strlen(re->precomp) > 60 ? "..." : ""))
2348 );
2349 state.node = next;
2350 state.prev = PL_reg_call_cc;
2351 state.cc = PL_regcc;
2352 state.re = PL_reg_re;
2353
2ab05381 2354 PL_regcc = 0;
0f5d15d6
IZ
2355
2356 cp = regcppush(0); /* Save *all* the positions. */
2357 REGCP_SET;
2358 cache_re(re);
2359 state.ss = PL_savestack_ix;
2360 *PL_reglastparen = 0;
2361 PL_reg_call_cc = &state;
2362 PL_reginput = locinput;
2c2d71f5
JH
2363
2364 /* XXXX This is too dramatic a measure... */
2365 PL_reg_maxiter = 0;
2366
0f5d15d6 2367 if (regmatch(re->program + 1)) {
2c914db6
IZ
2368 /* Even though we succeeded, we need to restore
2369 global variables, since we may be wrapped inside
2370 SUSPEND, thus the match may be not finished yet. */
2371
2372 /* XXXX Do this only if SUSPENDed? */
2373 PL_reg_call_cc = state.prev;
2374 PL_regcc = state.cc;
2375 PL_reg_re = state.re;
2376 cache_re(PL_reg_re);
2377
2378 /* XXXX This is too dramatic a measure... */
2379 PL_reg_maxiter = 0;
2380
2381 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2382 ReREFCNT_dec(re);
2383 regcpblow(cp);
2384 sayYES;
2385 }
0f5d15d6
IZ
2386 ReREFCNT_dec(re);
2387 REGCP_UNWIND;
2388 regcppop();
2389 PL_reg_call_cc = state.prev;
2390 PL_regcc = state.cc;
2391 PL_reg_re = state.re;
d3790889 2392 cache_re(PL_reg_re);
2c2d71f5
JH
2393
2394 /* XXXX This is too dramatic a measure... */
2395 PL_reg_maxiter = 0;
2396
0f5d15d6
IZ
2397 sayNO;
2398 }
c277df42 2399 sw = SvTRUE(ret);
0f5d15d6 2400 logical = 0;
a0ed51b3
LW
2401 }
2402 else
3280af22 2403 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2404 break;
2405 }
a0d0e21e 2406 case OPEN:
c277df42 2407 n = ARG(scan); /* which paren pair */
3280af22
NIS
2408 PL_reg_start_tmp[n] = locinput;
2409 if (n > PL_regsize)
2410 PL_regsize = n;
a0d0e21e
LW
2411 break;
2412 case CLOSE:
c277df42 2413 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2414 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2415 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2416 if (n > *PL_reglastparen)
2417 *PL_reglastparen = n;
a0d0e21e 2418 break;
c277df42
IZ
2419 case GROUPP:
2420 n = ARG(scan); /* which paren pair */
cf93c79d 2421 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2422 break;
2423 case IFTHEN:
2c2d71f5 2424 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2425 if (sw)
2426 next = NEXTOPER(NEXTOPER(scan));
2427 else {
2428 next = scan + ARG(scan);
2429 if (OP(next) == IFTHEN) /* Fake one. */
2430 next = NEXTOPER(NEXTOPER(next));
2431 }
2432 break;
2433 case LOGICAL:
0f5d15d6 2434 logical = scan->flags;
c277df42 2435 break;
2ab05381
IZ
2436/*******************************************************************
2437 PL_regcc contains infoblock about the innermost (...)* loop, and
2438 a pointer to the next outer infoblock.
2439
2440 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2441
2442 1) After matching X, regnode for CURLYX is processed;
2443
2444 2) This regnode creates infoblock on the stack, and calls
2445 regmatch() recursively with the starting point at WHILEM node;
2446
2447 3) Each hit of WHILEM node tries to match A and Z (in the order
2448 depending on the current iteration, min/max of {min,max} and
2449 greediness). The information about where are nodes for "A"
2450 and "Z" is read from the infoblock, as is info on how many times "A"
2451 was already matched, and greediness.
2452
2453 4) After A matches, the same WHILEM node is hit again.
2454
2455 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2456 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2457 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2458 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2459 of the external loop.
2460
2461 Currently present infoblocks form a tree with a stem formed by PL_curcc
2462 and whatever it mentions via ->next, and additional attached trees
2463 corresponding to temporarily unset infoblocks as in "5" above.
2464
2465 In the following picture infoblocks for outer loop of
2466 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2467 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2468 infoblocks are drawn below the "reset" infoblock.
2469
2470 In fact in the picture below we do not show failed matches for Z and T
2471 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2472 more obvious *why* one needs to *temporary* unset infoblocks.]
2473
2474 Matched REx position InfoBlocks Comment
2475 (Y(A)*?Z)*?T x
2476 Y(A)*?Z)*?T x <- O
2477 Y (A)*?Z)*?T x <- O
2478 Y A)*?Z)*?T x <- O <- I
2479 YA )*?Z)*?T x <- O <- I
2480 YA A)*?Z)*?T x <- O <- I
2481 YAA )*?Z)*?T x <- O <- I
2482 YAA Z)*?T x <- O # Temporary unset I
2483 I
2484
2485 YAAZ Y(A)*?Z)*?T x <- O
2486 I
2487
2488 YAAZY (A)*?Z)*?T x <- O
2489 I
2490
2491 YAAZY A)*?Z)*?T x <- O <- I
2492 I
2493
2494 YAAZYA )*?Z)*?T x <- O <- I
2495 I
2496
2497 YAAZYA Z)*?T x <- O # Temporary unset I
2498 I,I
2499
2500 YAAZYAZ )*?T x <- O
2501 I,I
2502
2503 YAAZYAZ T x # Temporary unset O
2504 O
2505 I,I
2506
2507 YAAZYAZT x
2508 O
2509 I,I
2510 *******************************************************************/
a0d0e21e
LW
2511 case CURLYX: {
2512 CURCUR cc;
3280af22 2513 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2514
2515 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2516 next += ARG(next);
3280af22
NIS
2517 cc.oldcc = PL_regcc;
2518 PL_regcc = &cc;
2519 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2520 cc.cur = -1;
2521 cc.min = ARG1(scan);
2522 cc.max = ARG2(scan);
c277df42 2523 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2524 cc.next = next;
2525 cc.minmod = minmod;
2526 cc.lastloc = 0;
3280af22 2527 PL_reginput = locinput;
a0d0e21e
LW
2528 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2529 regcpblow(cp);
3280af22 2530 PL_regcc = cc.oldcc;
4633a7c4 2531 saySAME(n);
a0d0e21e
LW
2532 }
2533 /* NOT REACHED */
2534 case WHILEM: {
2535 /*
2536 * This is really hard to understand, because after we match
2537 * what we're trying to match, we must make sure the rest of
2c2d71f5 2538 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2539 * to go back UP the parse tree by recursing ever deeper. And
2540 * if it fails, we have to reset our parent's current state
2541 * that we can try again after backing off.
2542 */
2543
c277df42 2544 CHECKPOINT cp, lastcp;
3280af22 2545 CURCUR* cc = PL_regcc;
c277df42
IZ
2546 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2547
4633a7c4 2548 n = cc->cur + 1; /* how many we know we matched */
3280af22 2549 PL_reginput = locinput;
a0d0e21e 2550
c277df42
IZ
2551 DEBUG_r(
2552 PerlIO_printf(Perl_debug_log,
2553 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2554 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2555 (long)n, (long)cc->min,
2556 (long)cc->max, (long)cc)
2557 );
4633a7c4 2558
a0d0e21e
LW
2559 /* If degenerate scan matches "", assume scan done. */
2560
579cf2c3 2561 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2562 PL_regcc = cc->oldcc;
2ab05381
IZ
2563 if (PL_regcc)
2564 ln = PL_regcc->cur;
c277df42 2565 DEBUG_r(
c3464db5
DD
2566 PerlIO_printf(Perl_debug_log,
2567 "%*s empty match detected, try continuation...\n",
3280af22 2568 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2569 );
a0d0e21e 2570 if (regmatch(cc->next))
4633a7c4 2571 sayYES;
2ab05381
IZ
2572 if (PL_regcc)
2573 PL_regcc->cur = ln;
3280af22 2574 PL_regcc = cc;
4633a7c4 2575 sayNO;
a0d0e21e
LW
2576 }
2577
2578 /* First just match a string of min scans. */
2579
2580 if (n < cc->min) {
2581 cc->cur = n;
2582 cc->lastloc = locinput;
4633a7c4
LW
2583 if (regmatch(cc->scan))
2584 sayYES;
2585 cc->cur = n - 1;
c277df42 2586 cc->lastloc = lastloc;
4633a7c4 2587 sayNO;
a0d0e21e
LW
2588 }
2589
2c2d71f5
JH
2590 if (scan->flags) {
2591 /* Check whether we already were at this position.
2592 Postpone detection until we know the match is not
2593 *that* much linear. */
2594 if (!PL_reg_maxiter) {
2595 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2596 PL_reg_leftiter = PL_reg_maxiter;
2597 }
2598 if (PL_reg_leftiter-- == 0) {
2599 I32 size = (PL_reg_maxiter + 7)/8;
2600 if (PL_reg_poscache) {
2601 if (PL_reg_poscache_size < size) {
2602 Renew(PL_reg_poscache, size, char);
2603 PL_reg_poscache_size = size;
2604 }
2605 Zero(PL_reg_poscache, size, char);
2606 }
2607 else {
2608 PL_reg_poscache_size = size;
2609 Newz(29, PL_reg_poscache, size, char);
2610 }
2611 DEBUG_r(
2612 PerlIO_printf(Perl_debug_log,
2613 "%sDetected a super-linear match, switching on caching%s...\n",
2614 PL_colors[4], PL_colors[5])
2615 );
2616 }
2617 if (PL_reg_leftiter < 0) {
2618 I32 o = locinput - PL_bostr, b;
2619
2620 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2621 b = o % 8;
2622 o /= 8;
2623 if (PL_reg_poscache[o] & (1<<b)) {
2624 DEBUG_r(
2625 PerlIO_printf(Perl_debug_log,
2626 "%*s already tried at this position...\n",
2627 REPORT_CODE_OFF+PL_regindent*2, "")
2628 );
7821416a 2629 sayNO_SILENT;
2c2d71f5
JH
2630 }
2631 PL_reg_poscache[o] |= (1<<b);
2632 }
2633 }
2634
a0d0e21e
LW
2635 /* Prefer next over scan for minimal matching. */
2636
2637 if (cc->minmod) {
3280af22 2638 PL_regcc = cc->oldcc;
2ab05381
IZ
2639 if (PL_regcc)
2640 ln = PL_regcc->cur;
5f05dabc 2641 cp = regcppush(cc->parenfloor);
c277df42 2642 REGCP_SET;
5f05dabc 2643 if (regmatch(cc->next)) {
c277df42 2644 regcpblow(cp);
4633a7c4 2645 sayYES; /* All done. */
5f05dabc 2646 }
c277df42 2647 REGCP_UNWIND;
5f05dabc 2648 regcppop();
2ab05381
IZ
2649 if (PL_regcc)
2650 PL_regcc->cur = ln;
3280af22 2651 PL_regcc = cc;
a0d0e21e 2652
c277df42 2653 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2654 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2655 && !(PL_reg_flags & RF_warned)) {
2656 PL_reg_flags |= RF_warned;
cea2e8a9 2657 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2658 "Complex regular subexpression recursion",
2659 REG_INFTY - 1);
c277df42 2660 }
4633a7c4 2661 sayNO;
c277df42 2662 }
a687059c 2663
c277df42 2664 DEBUG_r(
c3464db5
DD
2665 PerlIO_printf(Perl_debug_log,
2666 "%*s trying longer...\n",
3280af22 2667 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2668 );
a0d0e21e 2669 /* Try scanning more and see if it helps. */
3280af22 2670 PL_reginput = locinput;
a0d0e21e
LW
2671 cc->cur = n;
2672 cc->lastloc = locinput;
5f05dabc 2673 cp = regcppush(cc->parenfloor);
c277df42 2674 REGCP_SET;
5f05dabc 2675 if (regmatch(cc->scan)) {
c277df42 2676 regcpblow(cp);
4633a7c4 2677 sayYES;
5f05dabc 2678 }
c277df42 2679 REGCP_UNWIND;
5f05dabc 2680 regcppop();
4633a7c4 2681 cc->cur = n - 1;
c277df42 2682 cc->lastloc = lastloc;
4633a7c4 2683 sayNO;
a0d0e21e
LW
2684 }
2685
2686 /* Prefer scan over next for maximal matching. */
2687
2688 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2689 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2690 cc->cur = n;
2691 cc->lastloc = locinput;
c277df42 2692 REGCP_SET;
5f05dabc 2693 if (regmatch(cc->scan)) {
c277df42 2694 regcpblow(cp);
4633a7c4 2695 sayYES;
5f05dabc 2696 }
c277df42 2697 REGCP_UNWIND;
a0d0e21e 2698 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2699 PL_reginput = locinput;
c277df42 2700 DEBUG_r(
c3464db5
DD
2701 PerlIO_printf(Perl_debug_log,
2702 "%*s failed, try continuation...\n",
3280af22 2703 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2704 );
2705 }
599cee73
PM
2706 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2707 && !(PL_reg_flags & RF_warned)) {
3280af22 2708 PL_reg_flags |= RF_warned;
cea2e8a9 2709 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2710 "Complex regular subexpression recursion",
2711 REG_INFTY - 1);
a0d0e21e
LW
2712 }
2713
2714 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2715 PL_regcc = cc->oldcc;
2ab05381
IZ
2716 if (PL_regcc)
2717 ln = PL_regcc->cur;
a0d0e21e 2718 if (regmatch(cc->next))
4633a7c4 2719 sayYES;
2ab05381
IZ
2720 if (PL_regcc)
2721 PL_regcc->cur = ln;
3280af22 2722 PL_regcc = cc;
4633a7c4 2723 cc->cur = n - 1;
c277df42 2724 cc->lastloc = lastloc;
4633a7c4 2725 sayNO;
a0d0e21e
LW
2726 }
2727 /* NOT REACHED */
c277df42
IZ
2728 case BRANCHJ:
2729 next = scan + ARG(scan);
2730 if (next == scan)
2731 next = NULL;
2732 inner = NEXTOPER(NEXTOPER(scan));
2733 goto do_branch;
2734 case BRANCH:
2735 inner = NEXTOPER(scan);
2736 do_branch:
2737 {
2738 CHECKPOINT lastcp;
2739 c1 = OP(scan);
2740 if (OP(next) != c1) /* No choice. */
2741 next = inner; /* Avoid recursion. */
a0d0e21e 2742 else {
3280af22 2743 int lastparen = *PL_reglastparen;
c277df42
IZ
2744
2745 REGCP_SET;
a0d0e21e 2746 do {
3280af22 2747 PL_reginput = locinput;
c277df42 2748 if (regmatch(inner))
4633a7c4 2749 sayYES;
c277df42 2750 REGCP_UNWIND;
3280af22 2751 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2752 PL_regendp[n] = -1;
3280af22 2753 *PL_reglastparen = n;
c277df42 2754 scan = next;
a0d0e21e 2755 /*SUPPRESS 560*/
c277df42
IZ
2756 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2757 next += n;
a0d0e21e 2758 else
c277df42 2759 next = NULL;
c277df42
IZ
2760 inner = NEXTOPER(scan);
2761 if (c1 == BRANCHJ) {
2762 inner = NEXTOPER(inner);
2763 }
2764 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2765 sayNO;
a0d0e21e 2766 /* NOTREACHED */
a687059c 2767 }
a0d0e21e
LW
2768 }
2769 break;
2770 case MINMOD:
2771 minmod = 1;
2772 break;
c277df42
IZ
2773 case CURLYM:
2774 {
00db4c45 2775 I32 l = 0;
c277df42
IZ
2776 CHECKPOINT lastcp;
2777
2778 /* We suppose that the next guy does not need
2779 backtracking: in particular, it is of constant length,
2780 and has no parenths to influence future backrefs. */
2781 ln = ARG1(scan); /* min to match */
2782 n = ARG2(scan); /* max to match */
c277df42
IZ
2783 paren = scan->flags;
2784 if (paren) {
3280af22
NIS
2785 if (paren > PL_regsize)
2786 PL_regsize = paren;
2787 if (paren > *PL_reglastparen)
2788 *PL_reglastparen = paren;
c277df42 2789 }
dc45a647 2790 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2791 if (paren)
2792 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2793 PL_reginput = locinput;
c277df42
IZ
2794 if (minmod) {
2795 minmod = 0;
2796 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2797 sayNO;
5f4b28b2 2798 if (ln && l == 0 && n >= ln
c277df42
IZ
2799 /* In fact, this is tricky. If paren, then the
2800 fact that we did/didnot match may influence
2801 future execution. */
2802 && !(paren && ln == 0))
2803 ln = n;
3280af22 2804 locinput = PL_reginput;
22c35a8c 2805 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2806 c1 = (U8)*STRING(next);
c277df42 2807 if (OP(next) == EXACTF)
22c35a8c 2808 c2 = PL_fold[c1];
c277df42 2809 else if (OP(next) == EXACTFL)
22c35a8c 2810 c2 = PL_fold_locale[c1];
c277df42
IZ
2811 else
2812 c2 = c1;
a0ed51b3
LW
2813 }
2814 else
c277df42
IZ
2815 c1 = c2 = -1000;
2816 REGCP_SET;
5f4b28b2 2817 /* This may be improved if l == 0. */
c277df42
IZ
2818 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2819 /* If it could work, try it. */
2820 if (c1 == -1000 ||
3280af22
NIS
2821 UCHARAT(PL_reginput) == c1 ||
2822 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2823 {
2824 if (paren) {
2825 if (n) {
cf93c79d
IZ
2826 PL_regstartp[paren] =
2827 HOPc(PL_reginput, -l) - PL_bostr;
2828 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2829 }
2830 else
cf93c79d 2831 PL_regendp[paren] = -1;
c277df42
IZ
2832 }
2833 if (regmatch(next))
2834 sayYES;
2835 REGCP_UNWIND;
2836 }
2837 /* Couldn't or didn't -- move forward. */
3280af22 2838 PL_reginput = locinput;
c277df42
IZ
2839 if (regrepeat_hard(scan, 1, &l)) {
2840 ln++;
3280af22 2841 locinput = PL_reginput;
c277df42
IZ
2842 }
2843 else
2844 sayNO;
2845 }
a0ed51b3
LW
2846 }
2847 else {
c277df42
IZ
2848 n = regrepeat_hard(scan, n, &l);
2849 if (n != 0 && l == 0
2850 /* In fact, this is tricky. If paren, then the
2851 fact that we did/didnot match may influence
2852 future execution. */
2853 && !(paren && ln == 0))
2854 ln = n;
3280af22 2855 locinput = PL_reginput;
c277df42 2856 DEBUG_r(
5c0ca799 2857 PerlIO_printf(Perl_debug_log,
faccc32b 2858 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2859 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2860 (IV) n, (IV)l)
c277df42
IZ
2861 );
2862 if (n >= ln) {
22c35a8c 2863 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2864 c1 = (U8)*STRING(next);
c277df42 2865 if (OP(next) == EXACTF)
22c35a8c 2866 c2 = PL_fold[c1];
c277df42 2867 else if (OP(next) == EXACTFL)
22c35a8c 2868 c2 = PL_fold_locale[c1];
c277df42
IZ
2869 else
2870 c2 = c1;
a0ed51b3
LW
2871 }
2872 else
c277df42
IZ
2873 c1 = c2 = -1000;
2874 }
2875 REGCP_SET;
2876 while (n >= ln) {
2877 /* If it could work, try it. */
2878 if (c1 == -1000 ||
3280af22
NIS
2879 UCHARAT(PL_reginput) == c1 ||
2880 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2881 {
2882 DEBUG_r(
c3464db5 2883 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2884 "%*s trying tail with n=%"IVdf"...\n",
2885 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2886 );
2887 if (paren) {
2888 if (n) {
cf93c79d
IZ
2889 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2890 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2891 }
a0ed51b3 2892 else
cf93c79d 2893 PL_regendp[paren] = -1;
c277df42 2894 }
a0ed51b3
LW
2895 if (regmatch(next))
2896 sayYES;
2897 REGCP_UNWIND;
2898 }
c277df42
IZ
2899 /* Couldn't or didn't -- back up. */
2900 n--;
dfe13c55 2901 locinput = HOPc(locinput, -l);
3280af22 2902 PL_reginput = locinput;
c277df42
IZ
2903 }
2904 }
2905 sayNO;
2906 break;
2907 }
2908 case CURLYN:
2909 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2910 if (paren > PL_regsize)
2911 PL_regsize = paren;
2912 if (paren > *PL_reglastparen)
2913 *PL_reglastparen = paren;
c277df42
IZ
2914 ln = ARG1(scan); /* min to match */
2915 n = ARG2(scan); /* max to match */
dc45a647 2916 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2917 goto repeat;
a0d0e21e 2918 case CURLY:
c277df42 2919 paren = 0;
a0d0e21e
LW
2920 ln = ARG1(scan); /* min to match */
2921 n = ARG2(scan); /* max to match */
dc45a647 2922 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2923 goto repeat;
2924 case STAR:
2925 ln = 0;
c277df42 2926 n = REG_INFTY;
a0d0e21e 2927 scan = NEXTOPER(scan);
c277df42 2928 paren = 0;
a0d0e21e
LW
2929 goto repeat;
2930 case PLUS:
c277df42
IZ
2931 ln = 1;
2932 n = REG_INFTY;
2933 scan = NEXTOPER(scan);
2934 paren = 0;
2935 repeat:
a0d0e21e
LW
2936 /*
2937 * Lookahead to avoid useless match attempts
2938 * when we know what character comes next.
2939 */
22c35a8c 2940 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2941 c1 = (U8)*STRING(next);
bbce6d69 2942 if (OP(next) == EXACTF)
22c35a8c 2943 c2 = PL_fold[c1];
bbce6d69 2944 else if (OP(next) == EXACTFL)
22c35a8c 2945 c2 = PL_fold_locale[c1];
bbce6d69 2946 else
2947 c2 = c1;
2948 }
a0d0e21e 2949 else
bbce6d69 2950 c1 = c2 = -1000;
3280af22 2951 PL_reginput = locinput;
a0d0e21e 2952 if (minmod) {
c277df42 2953 CHECKPOINT lastcp;
a0d0e21e
LW
2954 minmod = 0;
2955 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2956 sayNO;
a0ed51b3 2957 locinput = PL_reginput;
c277df42 2958 REGCP_SET;
0fe9bf95
IZ
2959 if (c1 != -1000) {
2960 char *e = locinput + n - ln; /* Should not check after this */
2961 char *old = locinput;
2962
2963 if (e >= PL_regeol || (n == REG_INFTY))
2964 e = PL_regeol - 1;
2965 while (1) {
2966 /* Find place 'next' could work */
2967 if (c1 == c2) {
2968 while (locinput <= e && *locinput != c1)
2969 locinput++;
2970 } else {
2971 while (locinput <= e
2972 && *locinput != c1
2973 && *locinput != c2)
2974 locinput++;
2975 }
2976 if (locinput > e)
2977 sayNO;
2978 /* PL_reginput == old now */
2979 if (locinput != old) {
2980 ln = 1; /* Did some */
2981 if (regrepeat(scan, locinput - old) <
2982 locinput - old)
2983 sayNO;
2984 }
2985 /* PL_reginput == locinput now */
2986 if (paren) {
2987 if (ln) {
cf93c79d
IZ
2988 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2989 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2990 }
2991 else
cf93c79d 2992 PL_regendp[paren] = -1;
0fe9bf95
IZ
2993 }
2994 if (regmatch(next))
2995 sayYES;
2996 PL_reginput = locinput; /* Could be reset... */
2997 REGCP_UNWIND;
2998 /* Couldn't or didn't -- move forward. */
2999 old = locinput++;
3000 }
3001 }
3002 else
c277df42 3003 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3004 /* If it could work, try it. */
bbce6d69 3005 if (c1 == -1000 ||
3280af22
NIS
3006 UCHARAT(PL_reginput) == c1 ||
3007 UCHARAT(PL_reginput) == c2)
bbce6d69 3008 {
c277df42
IZ
3009 if (paren) {
3010 if (n) {
cf93c79d
IZ
3011 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3012 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3013 }
3014 else
cf93c79d 3015 PL_regendp[paren] = -1;
c277df42 3016 }
a0d0e21e 3017 if (regmatch(next))
4633a7c4 3018 sayYES;
c277df42 3019 REGCP_UNWIND;
bbce6d69 3020 }
c277df42 3021 /* Couldn't or didn't -- move forward. */
a0ed51b3 3022 PL_reginput = locinput;
a0d0e21e
LW
3023 if (regrepeat(scan, 1)) {
3024 ln++;
a0ed51b3
LW
3025 locinput = PL_reginput;
3026 }
3027 else
4633a7c4 3028 sayNO;
a0d0e21e
LW
3029 }
3030 }
3031 else {
c277df42 3032 CHECKPOINT lastcp;
a0d0e21e 3033 n = regrepeat(scan, n);
a0ed51b3 3034 locinput = PL_reginput;
22c35a8c 3035 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3036 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3037 ln = n; /* why back off? */
c277df42
IZ
3038 REGCP_SET;
3039 if (paren) {
3040 while (n >= ln) {
3041 /* If it could work, try it. */
3042 if (c1 == -1000 ||
3280af22
NIS
3043 UCHARAT(PL_reginput) == c1 ||
3044 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3045 {
3046 if (paren && n) {
3047 if (n) {
cf93c79d
IZ
3048 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3049 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3050 }
3051 else
cf93c79d 3052 PL_regendp[paren] = -1;
c277df42
IZ
3053 }
3054 if (regmatch(next))
3055 sayYES;
3056 REGCP_UNWIND;
3057 }
3058 /* Couldn't or didn't -- back up. */
3059 n--;
dfe13c55 3060 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3061 }
a0ed51b3
LW
3062 }
3063 else {
c277df42
IZ
3064 while (n >= ln) {
3065 /* If it could work, try it. */
3066 if (c1 == -1000 ||
3280af22
NIS
3067 UCHARAT(PL_reginput) == c1 ||
3068 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3069 {
3070 if (regmatch(next))
3071 sayYES;
3072 REGCP_UNWIND;
3073 }
3074 /* Couldn't or didn't -- back up. */
3075 n--;
dfe13c55 3076 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3077 }
a0d0e21e
LW
3078 }
3079 }
4633a7c4 3080 sayNO;
c277df42 3081 break;
a0d0e21e 3082 case END:
0f5d15d6
IZ
3083 if (PL_reg_call_cc) {
3084 re_cc_state *cur_call_cc = PL_reg_call_cc;
3085 CURCUR *cctmp = PL_regcc;
3086 regexp *re = PL_reg_re;
3087 CHECKPOINT cp, lastcp;
3088
3089 cp = regcppush(0); /* Save *all* the positions. */
3090 REGCP_SET;
3091 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3092 the caller. */
3093 PL_reginput = locinput; /* Make position available to
3094 the callcc. */
3095 cache_re(PL_reg_call_cc->re);
3096 PL_regcc = PL_reg_call_cc->cc;
3097 PL_reg_call_cc = PL_reg_call_cc->prev;
3098 if (regmatch(cur_call_cc->node)) {
3099 PL_reg_call_cc = cur_call_cc;
3100 regcpblow(cp);
3101 sayYES;
3102 }
3103 REGCP_UNWIND;
3104 regcppop();
3105 PL_reg_call_cc = cur_call_cc;
3106 PL_regcc = cctmp;
3107 PL_reg_re = re;
3108 cache_re(re);
3109
3110 DEBUG_r(
3111 PerlIO_printf(Perl_debug_log,
3112 "%*s continuation failed...\n",
3113 REPORT_CODE_OFF+PL_regindent*2, "")
3114 );
7821416a 3115 sayNO_SILENT;
0f5d15d6 3116 }
7821416a
IZ
3117 if (locinput < PL_regtill) {
3118 DEBUG_r(PerlIO_printf(Perl_debug_log,
3119 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3120 PL_colors[4],
3121 (long)(locinput - PL_reg_starttry),
3122 (long)(PL_regtill - PL_reg_starttry),
3123 PL_colors[5]));
3124 sayNO_FINAL; /* Cannot match: too short. */
3125 }
3126 PL_reginput = locinput; /* put where regtry can find it */
3127 sayYES_FINAL; /* Success! */
7e5428c5 3128 case SUCCEED:
3280af22 3129 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3130 sayYES_LOUD; /* Success! */
c277df42
IZ
3131 case SUSPEND:
3132 n = 1;
9fe1d20c 3133 PL_reginput = locinput;
c277df42 3134 goto do_ifmatch;
a0d0e21e 3135 case UNLESSM:
c277df42 3136 n = 0;
a0ed51b3 3137 if (scan->flags) {
0fe9bf95
IZ
3138 if (UTF) { /* XXXX This is absolutely
3139 broken, we read before
3140 start of string. */
3141 s = HOPMAYBEc(locinput, -scan->flags);
3142 if (!s)
3143 goto say_yes;
3144 PL_reginput = s;
3145 }
3146 else {
3147 if (locinput < PL_bostr + scan->flags)
3148 goto say_yes;
3149 PL_reginput = locinput - scan->flags;
3150 goto do_ifmatch;
3151 }
a0ed51b3
LW
3152 }
3153 else
3154 PL_reginput = locinput;
c277df42
IZ
3155 goto do_ifmatch;
3156 case IFMATCH:
3157 n = 1;
a0ed51b3 3158 if (scan->flags) {
0fe9bf95
IZ
3159 if (UTF) { /* XXXX This is absolutely
3160 broken, we read before
3161 start of string. */
3162 s = HOPMAYBEc(locinput, -scan->flags);
3163 if (!s || s < PL_bostr)
3164 goto say_no;
3165 PL_reginput = s;
3166 }
3167 else {
3168 if (locinput < PL_bostr + scan->flags)
3169 goto say_no;
3170 PL_reginput = locinput - scan->flags;
3171 goto do_ifmatch;
3172 }
a0ed51b3
LW
3173 }
3174 else
3175 PL_reginput = locinput;
3176
c277df42 3177 do_ifmatch:
c277df42
IZ
3178 inner = NEXTOPER(NEXTOPER(scan));
3179 if (regmatch(inner) != n) {
3180 say_no:
3181 if (logical) {
3182 logical = 0;
3183 sw = 0;
3184 goto do_longjump;
a0ed51b3
LW
3185 }
3186 else
c277df42
IZ
3187 sayNO;
3188 }
3189 say_yes:
3190 if (logical) {
3191 logical = 0;
3192 sw = 1;
3193 }
fe44a5e8 3194 if (OP(scan) == SUSPEND) {
3280af22 3195 locinput = PL_reginput;
565764a8 3196 nextchr = UCHARAT(locinput);
fe44a5e8 3197 }
c277df42
IZ
3198 /* FALL THROUGH. */
3199 case LONGJMP:
3200 do_longjump:
3201 next = scan + ARG(scan);
3202 if (next == scan)
3203 next = NULL;
a0d0e21e
LW
3204 break;
3205 default:
b900a521 3206 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3207 PTR2UV(scan), OP(scan));
cea2e8a9 3208 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3209 }
a0d0e21e
LW
3210 scan = next;
3211 }
a687059c 3212
a0d0e21e
LW
3213 /*
3214 * We get here only if there's trouble -- normally "case END" is
3215 * the terminating point.
3216 */
cea2e8a9 3217 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3218 /*NOTREACHED*/
4633a7c4
LW
3219 sayNO;
3220
7821416a
IZ
3221yes_loud:
3222 DEBUG_r(
3223 PerlIO_printf(Perl_debug_log,
3224 "%*s %scould match...%s\n",
3225 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3226 );
3227 goto yes;
3228yes_final:
3229 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3230 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3231yes:
3232#ifdef DEBUGGING
3280af22 3233 PL_regindent--;
4633a7c4
LW
3234#endif
3235 return 1;
3236
3237no:
7821416a
IZ
3238 DEBUG_r(
3239 PerlIO_printf(Perl_debug_log,
3240 "%*s %sfailed...%s\n",
3241 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3242 );
3243 goto do_no;
3244no_final:
3245do_no:
4633a7c4 3246#ifdef DEBUGGING
3280af22 3247 PL_regindent--;
4633a7c4 3248#endif
a0d0e21e 3249 return 0;
a687059c
LW
3250}
3251
3252/*
3253 - regrepeat - repeatedly match something simple, report how many
3254 */
3255/*
3256 * [This routine now assumes that it will only match on things of length 1.
3257 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3258 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3259 */
76e3520e 3260STATIC I32
cea2e8a9 3261S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3262{
5c0ca799 3263 dTHR;
a0d0e21e 3264 register char *scan;
a0d0e21e 3265 register I32 c;
3280af22 3266 register char *loceol = PL_regeol;
a0ed51b3 3267 register I32 hardcount = 0;
a0d0e21e 3268
3280af22 3269 scan = PL_reginput;
c277df42 3270 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3271 loceol = scan + max;
a0d0e21e 3272 switch (OP(p)) {
22c35a8c 3273 case REG_ANY:
a0d0e21e
LW
3274 while (scan < loceol && *scan != '\n')
3275 scan++;
3276 break;
3277 case SANY:
3278 scan = loceol;
3279 break;
a0ed51b3
LW
3280 case ANYUTF8:
3281 loceol = PL_regeol;
3282 while (scan < loceol && *scan != '\n') {
3283 scan += UTF8SKIP(scan);
3284 hardcount++;
3285 }
3286 break;
3287 case SANYUTF8:
3288 loceol = PL_regeol;
3289 while (scan < loceol) {
3290 scan += UTF8SKIP(scan);
3291 hardcount++;
3292 }
3293 break;
bbce6d69 3294 case EXACT: /* length of string is 1 */
cd439c50 3295 c = (U8)*STRING(p);
bbce6d69 3296 while (scan < loceol && UCHARAT(scan) == c)
3297 scan++;
3298 break;
3299 case EXACTF: /* length of string is 1 */
cd439c50 3300 c = (U8)*STRING(p);
bbce6d69 3301 while (scan < loceol &&
22c35a8c 3302 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3303 scan++;
3304 break;
3305 case EXACTFL: /* length of string is 1 */
3280af22 3306 PL_reg_flags |= RF_tainted;
cd439c50 3307 c = (U8)*STRING(p);
bbce6d69 3308 while (scan < loceol &&
22c35a8c 3309 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3310 scan++;
3311 break;
a0ed51b3
LW
3312 case ANYOFUTF8:
3313 loceol = PL_regeol;
3314 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3315 scan += UTF8SKIP(scan);
3316 hardcount++;
3317 }
3318 break;
a0d0e21e 3319 case ANYOF:
936ed897 3320 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3321 scan++;
a0d0e21e
LW
3322 break;
3323 case ALNUM:
3324 while (scan < loceol && isALNUM(*scan))
3325 scan++;
3326 break;
a0ed51b3
LW
3327 case ALNUMUTF8:
3328 loceol = PL_regeol;
dfe13c55 3329 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3330 scan += UTF8SKIP(scan);
3331 hardcount++;
3332 }
3333 break;
bbce6d69 3334 case ALNUML:
3280af22 3335 PL_reg_flags |= RF_tainted;
bbce6d69 3336 while (scan < loceol && isALNUM_LC(*scan))
3337 scan++;
3338 break;
a0ed51b3
LW
3339 case ALNUMLUTF8:
3340 PL_reg_flags |= RF_tainted;
3341 loceol = PL_regeol;
dfe13c55 3342 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3343 scan += UTF8SKIP(scan);
3344 hardcount++;
3345 }
3346 break;
3347 break;
a0d0e21e
LW
3348 case NALNUM:
3349 while (scan < loceol && !isALNUM(*scan))
3350 scan++;
3351 break;
a0ed51b3
LW
3352 case NALNUMUTF8:
3353 loceol = PL_regeol;
dfe13c55 3354 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3355 scan += UTF8SKIP(scan);
3356 hardcount++;
3357 }
3358 break;
bbce6d69 3359 case NALNUML:
3280af22 3360 PL_reg_flags |= RF_tainted;
bbce6d69 3361 while (scan < loceol && !isALNUM_LC(*scan))
3362 scan++;
3363 break;
a0ed51b3
LW
3364 case NALNUMLUTF8:
3365 PL_reg_flags |= RF_tainted;
3366 loceol = PL_regeol;
dfe13c55 3367 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3368 scan += UTF8SKIP(scan);
3369 hardcount++;
3370 }
3371 break;
a0d0e21e
LW
3372 case SPACE:
3373 while (scan < loceol && isSPACE(*scan))
3374 scan++;
3375 break;
a0ed51b3
LW
3376 case SPACEUTF8:
3377 loceol = PL_regeol;
dfe13c55 3378 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3379 scan += UTF8SKIP(scan);
3380 hardcount++;
3381 }
3382 break;
bbce6d69 3383 case SPACEL:
3280af22 3384 PL_reg_flags |= RF_tainted;
bbce6d69 3385 while (scan < loceol && isSPACE_LC(*scan))
3386 scan++;
3387 break;
a0ed51b3
LW
3388 case SPACELUTF8:
3389 PL_reg_flags |= RF_tainted;
3390 loceol = PL_regeol;
dfe13c55 3391 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3392 scan += UTF8SKIP(scan);
3393 hardcount++;
3394 }
3395 break;
a0d0e21e
LW
3396 case NSPACE:
3397 while (scan < loceol && !isSPACE(*scan))
3398 scan++;
3399 break;
a0ed51b3
LW
3400 case NSPACEUTF8:
3401 loceol = PL_regeol;
dfe13c55 3402 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3403 scan += UTF8SKIP(scan);
3404 hardcount++;
3405 }
3406 break;
bbce6d69 3407 case NSPACEL:
3280af22 3408 PL_reg_flags |= RF_tainted;
bbce6d69 3409 while (scan < loceol && !isSPACE_LC(*scan))
3410 scan++;
3411 break;
a0ed51b3
LW
3412 case NSPACELUTF8:
3413 PL_reg_flags |= RF_tainted;
3414 loceol = PL_regeol;
dfe13c55 3415 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3416 scan += UTF8SKIP(scan);
3417 hardcount++;
3418 }
3419 break;
a0d0e21e
LW
3420 case DIGIT:
3421 while (scan < loceol && isDIGIT(*scan))
3422 scan++;
3423 break;
a0ed51b3
LW
3424 case DIGITUTF8:
3425 loceol = PL_regeol;
dfe13c55 3426 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3427 scan += UTF8SKIP(scan);
3428 hardcount++;
3429 }
3430 break;
3431 break;
a0d0e21e
LW
3432 case NDIGIT:
3433 while (scan < loceol && !isDIGIT(*scan))
3434 scan++;
3435 break;
a0ed51b3
LW
3436 case NDIGITUTF8:
3437 loceol = PL_regeol;
dfe13c55 3438 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3439 scan += UTF8SKIP(scan);
3440 hardcount++;
3441 }
3442 break;
a0d0e21e
LW
3443 default: /* Called on something of 0 width. */
3444 break; /* So match right here or not at all. */
3445 }
a687059c 3446
a0ed51b3
LW
3447 if (hardcount)
3448 c = hardcount;
3449 else
3450 c = scan - PL_reginput;
3280af22 3451 PL_reginput = scan;
a687059c 3452
c277df42
IZ
3453 DEBUG_r(
3454 {
3455 SV *prop = sv_newmortal();
3456
3457 regprop(prop, p);
3458 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3459 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3460 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3461 });
3462
a0d0e21e 3463 return(c);
a687059c
LW
3464}
3465
3466/*
c277df42
IZ
3467 - regrepeat_hard - repeatedly match something, report total lenth and length
3468 *
3469 * The repeater is supposed to have constant length.
3470 */
3471
76e3520e 3472STATIC I32
cea2e8a9 3473S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3474{
5c0ca799 3475 dTHR;
c277df42
IZ
3476 register char *scan;
3477 register char *start;
3280af22 3478 register char *loceol = PL_regeol;
a0ed51b3 3479 I32 l = 0;
708e3b05 3480 I32 count = 0, res = 1;
a0ed51b3
LW
3481
3482 if (!max)
3483 return 0;
c277df42 3484
3280af22 3485 start = PL_reginput;
a0ed51b3 3486 if (UTF) {
708e3b05 3487 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3488 if (!count++) {
3489 l = 0;
3490 while (start < PL_reginput) {
3491 l++;
3492 start += UTF8SKIP(start);
3493 }
3494 *lp = l;
3495 if (l == 0)
3496 return max;
3497 }
3498 if (count == max)
3499 return count;
3500 }
3501 }
3502 else {
708e3b05 3503 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3504 if (!count++) {
3505 *lp = l = PL_reginput - start;
3506 if (max != REG_INFTY && l*max < loceol - scan)
3507 loceol = scan + l*max;
3508 if (l == 0)
3509 return max;
c277df42
IZ
3510 }
3511 }
3512 }
708e3b05 3513 if (!res)
3280af22 3514 PL_reginput = scan;
c277df42 3515
a0ed51b3 3516 return count;
c277df42
IZ
3517}
3518
3519/*
cb8d8820 3520 - reginclass - determine if a character falls into a character class
bbce6d69 3521 */
3522
76e3520e 3523STATIC bool
936ed897 3524S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3525{
5c0ca799 3526 dTHR;
b8c5462f 3527 char flags = ANYOF_FLAGS(p);
bbce6d69 3528 bool match = FALSE;
3529
3530 c &= 0xFF;
b8c5462f 3531 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3532 match = TRUE;
3533 else if (flags & ANYOF_FOLD) {
3534 I32 cf;
3535 if (flags & ANYOF_LOCALE) {
3280af22 3536 PL_reg_flags |= RF_tainted;
22c35a8c 3537 cf = PL_fold_locale[c];
bbce6d69 3538 }
3539 else
22c35a8c 3540 cf = PL_fold[c];
b8c5462f 3541 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3542 match = TRUE;
3543 }
3544
b8c5462f 3545 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3546 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3547 if (
3548 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3549 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3550 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3551 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3552 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3553 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3554 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3555 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3556 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3557 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3558 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3559 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3560 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3561 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3562 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3563 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3564 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3565 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3570 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3571 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3572 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3573 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3574 ) /* How's that for a conditional? */
bbce6d69 3575 {
3576 match = TRUE;
3577 }
3578 }
3579
ae5c130c 3580 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3581}
3582
a0ed51b3 3583STATIC bool
cea2e8a9 3584S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3585{
3586 dTHR;
a0ed51b3
LW
3587 char flags = ARG1(f);
3588 bool match = FALSE;
3589 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3590
3591 if (swash_fetch(sv, p))
3592 match = TRUE;
3593 else if (flags & ANYOF_FOLD) {
3594 I32 cf;
dfe13c55 3595 U8 tmpbuf[10];
a0ed51b3
LW
3596 if (flags & ANYOF_LOCALE) {
3597 PL_reg_flags |= RF_tainted;
3598 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3599 }
3600 else
3601 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3602 if (swash_fetch(sv, tmpbuf))
3603 match = TRUE;
3604 }
3605
b8c5462f 3606 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3607
3608 return (flags & ANYOF_INVERT) ? !match : match;
3609}
161b471a 3610
dfe13c55 3611STATIC U8 *
cea2e8a9 3612S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3613{
3614 dTHR;
a0ed51b3
LW
3615 if (off >= 0) {
3616 while (off-- && s < (U8*)PL_regeol)
3617 s += UTF8SKIP(s);
3618 }
3619 else {
3620 while (off++) {
3621 if (s > (U8*)PL_bostr) {
3622 s--;
3623 if (*s & 0x80) {
3624 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3625 s--;
3626 } /* XXX could check well-formedness here */
3627 }
3628 }
3629 }
3630 return s;
3631}
161b471a 3632
dfe13c55 3633STATIC U8 *
cea2e8a9 3634S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3635{
c485e607 3636 dTHR;
a0ed51b3
LW
3637 if (off >= 0) {
3638 while (off-- && s < (U8*)PL_regeol)
3639 s += UTF8SKIP(s);
3640 if (off >= 0)
3641 return 0;
3642 }
3643 else {
3644 while (off++) {
3645 if (s > (U8*)PL_bostr) {
3646 s--;
3647 if (*s & 0x80) {
3648 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3649 s--;
3650 } /* XXX could check well-formedness here */
3651 }
3652 else
3653 break;
3654 }
3655 if (off <= 0)
3656 return 0;
3657 }
3658 return s;
3659}
51371543
GS
3660
3661#ifdef PERL_OBJECT
51371543
GS
3662#include "XSUB.h"
3663#endif
3664
3665static void
3666restore_pos(pTHXo_ void *arg)
3667{
3668 dTHR;
3669 if (PL_reg_eval_set) {
3670 if (PL_reg_oldsaved) {
3671 PL_reg_re->subbeg = PL_reg_oldsaved;
3672 PL_reg_re->sublen = PL_reg_oldsavedlen;
3673 RX_MATCH_COPIED_on(PL_reg_re);
3674 }
3675 PL_reg_magic->mg_len = PL_reg_oldpos;
3676 PL_reg_eval_set = 0;
3677 PL_curpm = PL_reg_oldcurpm;
3678 }
3679}
3680