This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make certain MacOS Classic has NO_ENVIRON_ARRAY.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
29d1e993
HS
224#define TRYPAREN(paren, n, input) { \
225 if (paren) { \
226 if (n) { \
227 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
228 PL_regendp[paren] = input - PL_bostr; \
229 } \
230 else \
231 PL_regendp[paren] = -1; \
232 } \
233 if (regmatch(next)) \
234 sayYES; \
235 if (paren && n) \
236 PL_regendp[paren] = -1; \
237}
238
239
a687059c 240/*
e50aee73 241 * pregexec and friends
a687059c
LW
242 */
243
244/*
c277df42 245 - pregexec - match a regexp against a string
a687059c 246 */
c277df42 247I32
864dbfa3 248Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 249 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
250/* strend: pointer to null at end of string */
251/* strbeg: real beginning of string */
252/* minend: end of match must be >=minend after stringarg. */
253/* nosave: For optimizations. */
254{
255 return
256 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
257 nosave ? 0 : REXEC_COPY_STR);
258}
0f5d15d6
IZ
259
260STATIC void
cea2e8a9 261S_cache_re(pTHX_ regexp *prog)
0f5d15d6 262{
46124e9e 263 dTHR;
0f5d15d6
IZ
264 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
265#ifdef DEBUGGING
266 PL_regprogram = prog->program;
267#endif
268 PL_regnpar = prog->nparens;
269 PL_regdata = prog->data;
270 PL_reg_re = prog;
271}
22e551b9 272
cad2e5aa
JH
273/*
274 * Need to implement the following flags for reg_anch:
275 *
276 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
277 * USE_INTUIT_ML
278 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
279 * INTUIT_AUTORITATIVE_ML
280 * INTUIT_ONCE_NOML - Intuit can match in one location only.
281 * INTUIT_ONCE_ML
282 *
283 * Another flag for this function: SECOND_TIME (so that float substrs
284 * with giant delta may be not rechecked).
285 */
286
287/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
288
2c2d71f5 289/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
290 Otherwise, only SvCUR(sv) is used to get strbeg. */
291
292/* XXXX We assume that strpos is strbeg unless sv. */
293
6eb5f6b9
JH
294/* XXXX Some places assume that there is a fixed substring.
295 An update may be needed if optimizer marks as "INTUITable"
296 RExen without fixed substrings. Similarly, it is assumed that
297 lengths of all the strings are no more than minlen, thus they
298 cannot come from lookahead.
299 (Or minlen should take into account lookahead.) */
300
2c2d71f5
JH
301/* A failure to find a constant substring means that there is no need to make
302 an expensive call to REx engine, thus we celebrate a failure. Similarly,
303 finding a substring too deep into the string means that less calls to
30944b6d
IZ
304 regtry() should be needed.
305
306 REx compiler's optimizer found 4 possible hints:
307 a) Anchored substring;
308 b) Fixed substring;
309 c) Whether we are anchored (beginning-of-line or \G);
310 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 311 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
312 string which does not contradict any of them.
313 */
2c2d71f5 314
6eb5f6b9
JH
315/* Most of decisions we do here should have been done at compile time.
316 The nodes of the REx which we used for the search should have been
317 deleted from the finite automaton. */
318
cad2e5aa
JH
319char *
320Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
321 char *strend, U32 flags, re_scream_pos_data *data)
322{
2c2d71f5 323 register I32 start_shift;
cad2e5aa 324 /* Should be nonnegative! */
2c2d71f5
JH
325 register I32 end_shift;
326 register char *s;
327 register SV *check;
a1933d95 328 char *strbeg;
cad2e5aa
JH
329 char *t;
330 I32 ml_anch;
2c2d71f5 331 char *tmp;
6eb5f6b9
JH
332 register char *other_last = Nullch; /* other substr checked before this */
333 char *check_at; /* check substr found at this pos */
30944b6d
IZ
334#ifdef DEBUGGING
335 char *i_strpos = strpos;
336#endif
cad2e5aa
JH
337
338 DEBUG_r( if (!PL_colorset) reginitcolors() );
339 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 340 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
341 PL_colors[4],PL_colors[5],PL_colors[0],
342 prog->precomp,
343 PL_colors[1],
344 (strlen(prog->precomp) > 60 ? "..." : ""),
345 PL_colors[0],
b900a521 346 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
347 strpos, PL_colors[1],
348 (strend - strpos > 60 ? "..." : ""))
349 );
350
2c2d71f5
JH
351 if (prog->minlen > strend - strpos) {
352 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
a1933d95 355 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
653099ff 356 check = prog->check_substr;
2c2d71f5 357 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
358 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
359 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 360 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 361
5675c1a6
IZ
362 if (!ml_anch) {
363 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
364 /* SvCUR is not set on references: SvRV and SvPVX overlap */
365 && sv && !SvROK(sv)
a1933d95 366 && (strpos != strbeg)) {
5675c1a6
IZ
367 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
368 goto fail;
369 }
370 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 371 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
372 I32 slen;
373
adac82c7 374 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 375 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
376 if (SvTAIL(check)) {
377 slen = SvCUR(check); /* >= 1 */
cad2e5aa 378
2c2d71f5
JH
379 if ( strend - s > slen || strend - s < slen - 1
380 || (strend - s == slen && strend[-1] != '\n')) {
381 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
382 goto fail_finish;
cad2e5aa
JH
383 }
384 /* Now should match s[0..slen-2] */
385 slen--;
653099ff 386 if (slen && (*SvPVX(check) != *s
cad2e5aa 387 || (slen > 1
653099ff 388 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
389 report_neq:
390 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
391 goto fail_finish;
392 }
cad2e5aa 393 }
653099ff
GS
394 else if (*SvPVX(check) != *s
395 || ((slen = SvCUR(check)) > 1
396 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
397 goto report_neq;
398 goto success_at_start;
5675c1a6 399 }
cad2e5aa 400 }
2c2d71f5 401 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 402 s = strpos;
2c2d71f5 403 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 404 end_shift = prog->minlen - start_shift -
653099ff 405 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 406 if (!ml_anch) {
653099ff
GS
407 I32 end = prog->check_offset_max + CHR_SVLEN(check)
408 - (SvTAIL(check) != 0);
2c2d71f5
JH
409 I32 eshift = strend - s - end;
410
411 if (end_shift < eshift)
412 end_shift = eshift;
413 }
cad2e5aa 414 }
2c2d71f5 415 else { /* Can match at random position */
cad2e5aa
JH
416 ml_anch = 0;
417 s = strpos;
2c2d71f5
JH
418 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
419 /* Should be nonnegative! */
420 end_shift = prog->minlen - start_shift -
653099ff 421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
422 }
423
2c2d71f5 424#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 425 if (end_shift < 0)
6bbae5e6 426 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
427#endif
428
2c2d71f5
JH
429 restart:
430 /* Find a possible match in the region s..strend by looking for
431 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 432 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
433 I32 p = -1; /* Internal iterator of scream. */
434 I32 *pp = data ? data->scream_pos : &p;
435
2c2d71f5
JH
436 if (PL_screamfirst[BmRARE(check)] >= 0
437 || ( BmRARE(check) == '\n'
438 && (BmPREVIOUS(check) == SvCUR(check) - 1)
439 && SvTAIL(check) ))
440 s = screaminstr(sv, check,
441 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 442 else
2c2d71f5 443 goto fail_finish;
cad2e5aa
JH
444 if (data)
445 *data->scream_olds = s;
446 }
447 else
448 s = fbm_instr((unsigned char*)s + start_shift,
449 (unsigned char*)strend - end_shift,
2c2d71f5 450 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
451
452 /* Update the count-of-usability, remove useless subpatterns,
453 unshift s. */
2c2d71f5
JH
454
455 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
456 (s ? "Found" : "Did not find"),
457 ((check == prog->anchored_substr) ? "anchored" : "floating"),
458 PL_colors[0],
7b0972df
JH
459 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
460 SvPVX(check),
2c2d71f5
JH
461 PL_colors[1], (SvTAIL(check) ? "$" : ""),
462 (s ? " at offset " : "...\n") ) );
463
464 if (!s)
465 goto fail_finish;
466
6eb5f6b9
JH
467 check_at = s;
468
2c2d71f5 469 /* Finish the diagnostic message */
30944b6d 470 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
471
472 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
473 Start with the other substr.
474 XXXX no SCREAM optimization yet - and a very coarse implementation
475 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
476 *always* match. Probably should be marked during compile...
477 Probably it is right to do no SCREAM here...
478 */
479
480 if (prog->float_substr && prog->anchored_substr) {
30944b6d 481 /* Take into account the "other" substring. */
2c2d71f5
JH
482 /* XXXX May be hopelessly wrong for UTF... */
483 if (!other_last)
6eb5f6b9 484 other_last = strpos;
2c2d71f5 485 if (check == prog->float_substr) {
30944b6d
IZ
486 do_other_anchored:
487 {
2c2d71f5
JH
488 char *last = s - start_shift, *last1, *last2;
489 char *s1 = s;
490
491 tmp = PL_bostr;
492 t = s - prog->check_offset_max;
493 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
494 && (!(prog->reganch & ROPT_UTF8)
495 || (PL_bostr = strpos, /* Used in regcopmaybe() */
496 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
497 && t > strpos)))
30944b6d 498 /* EMPTY */;
2c2d71f5
JH
499 else
500 t = strpos;
501 t += prog->anchored_offset;
6eb5f6b9
JH
502 if (t < other_last) /* These positions already checked */
503 t = other_last;
2c2d71f5
JH
504 PL_bostr = tmp;
505 last2 = last1 = strend - prog->minlen;
506 if (last < last1)
507 last1 = last;
508 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
509 /* On end-of-str: see comment below. */
510 s = fbm_instr((unsigned char*)t,
511 (unsigned char*)last1 + prog->anchored_offset
512 + SvCUR(prog->anchored_substr)
513 - (SvTAIL(prog->anchored_substr)!=0),
514 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
515 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
516 (s ? "Found" : "Contradicts"),
517 PL_colors[0],
7b0972df
JH
518 (int)(SvCUR(prog->anchored_substr)
519 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
520 SvPVX(prog->anchored_substr),
521 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
522 if (!s) {
523 if (last1 >= last2) {
524 DEBUG_r(PerlIO_printf(Perl_debug_log,
525 ", giving up...\n"));
526 goto fail_finish;
527 }
528 DEBUG_r(PerlIO_printf(Perl_debug_log,
529 ", trying floating at offset %ld...\n",
30944b6d 530 (long)(s1 + 1 - i_strpos)));
2c2d71f5 531 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 532 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
533 s = HOPc(last, 1);
534 goto restart;
535 }
536 else {
537 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 538 (long)(s - i_strpos)));
2c2d71f5 539 t = s - prog->anchored_offset;
6eb5f6b9 540 other_last = s + 1;
30944b6d 541 s = s1;
2c2d71f5
JH
542 if (t == strpos)
543 goto try_at_start;
2c2d71f5
JH
544 goto try_at_offset;
545 }
30944b6d 546 }
2c2d71f5
JH
547 }
548 else { /* Take into account the floating substring. */
549 char *last, *last1;
550 char *s1 = s;
551
552 t = s - start_shift;
553 last1 = last = strend - prog->minlen + prog->float_min_offset;
554 if (last - t > prog->float_max_offset)
555 last = t + prog->float_max_offset;
556 s = t + prog->float_min_offset;
6eb5f6b9
JH
557 if (s < other_last)
558 s = other_last;
2c2d71f5
JH
559 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
560 /* fbm_instr() takes into account exact value of end-of-str
561 if the check is SvTAIL(ed). Since false positives are OK,
562 and end-of-str is not later than strend we are OK. */
563 s = fbm_instr((unsigned char*)s,
564 (unsigned char*)last + SvCUR(prog->float_substr)
565 - (SvTAIL(prog->float_substr)!=0),
566 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
567 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
568 (s ? "Found" : "Contradicts"),
569 PL_colors[0],
7b0972df
JH
570 (int)(SvCUR(prog->float_substr)
571 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
572 SvPVX(prog->float_substr),
573 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
574 if (!s) {
575 if (last1 == last) {
576 DEBUG_r(PerlIO_printf(Perl_debug_log,
577 ", giving up...\n"));
578 goto fail_finish;
579 }
580 DEBUG_r(PerlIO_printf(Perl_debug_log,
581 ", trying anchored starting at offset %ld...\n",
30944b6d 582 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 583 other_last = last + 1;
2c2d71f5
JH
584 PL_regeol = strend; /* Used in HOP() */
585 s = HOPc(t, 1);
586 goto restart;
587 }
588 else {
589 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 590 (long)(s - i_strpos)));
6eb5f6b9 591 other_last = s + 1;
30944b6d 592 s = s1;
2c2d71f5
JH
593 if (t == strpos)
594 goto try_at_start;
2c2d71f5
JH
595 goto try_at_offset;
596 }
597 }
cad2e5aa 598 }
2c2d71f5
JH
599
600 t = s - prog->check_offset_max;
601 tmp = PL_bostr;
602 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
603 && (!(prog->reganch & ROPT_UTF8)
604 || (PL_bostr = strpos, /* Used in regcopmaybe() */
605 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
606 && t > strpos)))) {
607 PL_bostr = tmp;
608 /* Fixed substring is found far enough so that the match
609 cannot start at strpos. */
610 try_at_offset:
cad2e5aa 611 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
612 /* Eventually fbm_*() should handle this, but often
613 anchored_offset is not 0, so this check will not be wasted. */
614 /* XXXX In the code below we prefer to look for "^" even in
615 presence of anchored substrings. And we search even
616 beyond the found float position. These pessimizations
617 are historical artefacts only. */
618 find_anchor:
2c2d71f5 619 while (t < strend - prog->minlen) {
cad2e5aa 620 if (*t == '\n') {
4ee3650e 621 if (t < check_at - prog->check_offset_min) {
30944b6d 622 if (prog->anchored_substr) {
4ee3650e
GS
623 /* Since we moved from the found position,
624 we definitely contradict the found anchored
30944b6d
IZ
625 substr. Due to the above check we do not
626 contradict "check" substr.
627 Thus we can arrive here only if check substr
628 is float. Redo checking for "other"=="fixed".
629 */
630 strpos = t + 1;
631 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
632 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
633 goto do_other_anchored;
634 }
4ee3650e
GS
635 /* We don't contradict the found floating substring. */
636 /* XXXX Why not check for STCLASS? */
cad2e5aa 637 s = t + 1;
2c2d71f5 638 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 639 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
640 goto set_useful;
641 }
4ee3650e
GS
642 /* Position contradicts check-string */
643 /* XXXX probably better to look for check-string
644 than for "\n", so one should lower the limit for t? */
645 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 646 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 647 other_last = strpos = s = t + 1;
cad2e5aa
JH
648 goto restart;
649 }
650 t++;
651 }
2c2d71f5
JH
652 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
653 PL_colors[0],PL_colors[1]));
654 goto fail_finish;
cad2e5aa 655 }
f5952150
GS
656 else {
657 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
658 PL_colors[0],PL_colors[1]));
659 }
cad2e5aa
JH
660 s = t;
661 set_useful:
2c2d71f5 662 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
663 }
664 else {
2c2d71f5 665 PL_bostr = tmp;
f5952150 666 /* The found string does not prohibit matching at strpos,
2c2d71f5 667 - no optimization of calling REx engine can be performed,
f5952150
GS
668 unless it was an MBOL and we are not after MBOL,
669 or a future STCLASS check will fail this. */
2c2d71f5
JH
670 try_at_start:
671 /* Even in this situation we may use MBOL flag if strpos is offset
672 wrt the start of the string. */
05b4157f 673 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 674 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
675 /* May be due to an implicit anchor of m{.*foo} */
676 && !(prog->reganch & ROPT_IMPLICIT))
677 {
cad2e5aa
JH
678 t = strpos;
679 goto find_anchor;
680 }
30944b6d 681 DEBUG_r( if (ml_anch)
f5952150
GS
682 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
683 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 684 );
2c2d71f5 685 success_at_start:
30944b6d 686 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 687 && prog->check_substr /* Could be deleted already */
cad2e5aa 688 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
689 && prog->check_substr == prog->float_substr)
690 {
cad2e5aa 691 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 692 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
693 SvREFCNT_dec(prog->check_substr);
694 prog->check_substr = Nullsv; /* disable */
695 prog->float_substr = Nullsv; /* clear */
5e39e1e5 696 check = Nullsv; /* abort */
cad2e5aa 697 s = strpos;
3cf5c195
IZ
698 /* XXXX This is a remnant of the old implementation. It
699 looks wasteful, since now INTUIT can use many
6eb5f6b9 700 other heuristics. */
cad2e5aa
JH
701 prog->reganch &= ~RE_USE_INTUIT;
702 }
703 else
704 s = strpos;
705 }
706
6eb5f6b9
JH
707 /* Last resort... */
708 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
709 if (prog->regstclass) {
710 /* minlen == 0 is possible if regstclass is \b or \B,
711 and the fixed substr is ''$.
712 Since minlen is already taken into account, s+1 is before strend;
713 accidentally, minlen >= 1 guaranties no false positives at s + 1
714 even for \b or \B. But (minlen? 1 : 0) below assumes that
715 regstclass does not come from lookahead... */
716 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
717 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
718 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
719 ? STR_LEN(prog->regstclass)
720 : 1);
6eb5f6b9 721 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
722 ? s + (prog->minlen? cl_l : 0)
723 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 724 : strend) ;
a1933d95 725 char *startpos = strbeg;
6eb5f6b9
JH
726
727 t = s;
76384e4a
GS
728 if (prog->reganch & ROPT_UTF8) {
729 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
730 PL_bostr = startpos;
731 }
6eb5f6b9
JH
732 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
733 if (!s) {
734#ifdef DEBUGGING
735 char *what;
736#endif
737 if (endpos == strend) {
738 DEBUG_r( PerlIO_printf(Perl_debug_log,
739 "Could not match STCLASS...\n") );
740 goto fail;
741 }
66e933ab
GS
742 DEBUG_r( PerlIO_printf(Perl_debug_log,
743 "This position contradicts STCLASS...\n") );
653099ff
GS
744 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
745 goto fail;
6eb5f6b9
JH
746 /* Contradict one of substrings */
747 if (prog->anchored_substr) {
6eb5f6b9
JH
748 if (prog->anchored_substr == check) {
749 DEBUG_r( what = "anchored" );
750 hop_and_restart:
751 PL_regeol = strend; /* Used in HOP() */
752 s = HOPc(t, 1);
66e933ab
GS
753 if (s + start_shift + end_shift > strend) {
754 /* XXXX Should be taken into account earlier? */
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "Could not match STCLASS...\n") );
757 goto fail;
758 }
5e39e1e5
HS
759 if (!check)
760 goto giveup;
6eb5f6b9 761 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 762 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
763 what, (long)(s + start_shift - i_strpos)) );
764 goto restart;
765 }
66e933ab 766 /* Have both, check_string is floating */
6eb5f6b9
JH
767 if (t + start_shift >= check_at) /* Contradicts floating=check */
768 goto retry_floating_check;
769 /* Recheck anchored substring, but not floating... */
770 s = check_at;
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
775 (long)(other_last - i_strpos)) );
776 goto do_other_anchored;
777 }
60e71179
GS
778 /* Another way we could have checked stclass at the
779 current position only: */
780 if (ml_anch) {
781 s = t = t + 1;
5e39e1e5
HS
782 if (!check)
783 goto giveup;
60e71179 784 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
785 "Looking for /%s^%s/m starting at offset %ld...\n",
786 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 787 goto try_at_offset;
66e933ab 788 }
60e71179
GS
789 if (!prog->float_substr) /* Could have been deleted */
790 goto fail;
6eb5f6b9
JH
791 /* Check is floating subtring. */
792 retry_floating_check:
793 t = check_at - start_shift;
794 DEBUG_r( what = "floating" );
795 goto hop_and_restart;
796 }
797 DEBUG_r( if (t != s)
798 PerlIO_printf(Perl_debug_log,
799 "By STCLASS: moving %ld --> %ld\n",
800 (long)(t - i_strpos), (long)(s - i_strpos));
801 else
802 PerlIO_printf(Perl_debug_log,
803 "Does not contradict STCLASS...\n") );
804 }
5e39e1e5
HS
805 giveup:
806 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
807 PL_colors[4], (check ? "Guessed" : "Giving up"),
808 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 809 return s;
2c2d71f5
JH
810
811 fail_finish: /* Substring not found */
66e933ab
GS
812 if (prog->check_substr) /* could be removed already */
813 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 814 fail:
2c2d71f5 815 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
816 PL_colors[4],PL_colors[5]));
817 return Nullch;
818}
9661b544 819
6eb5f6b9 820/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
821STATIC char *
822S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 823{
6eb5f6b9
JH
824 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
825 char *m;
d8093b23
G
826 STRLEN ln;
827 unsigned int c1;
828 unsigned int c2;
6eb5f6b9
JH
829 char *e;
830 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 831
6eb5f6b9
JH
832 /* We know what class it must start with. */
833 switch (OP(c)) {
834 case ANYOFUTF8:
835 while (s < strend) {
836 if (REGINCLASSUTF8(c, (U8*)s)) {
837 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 838 goto got_it;
3cf5c195 839 else
6eb5f6b9 840 tmp = doevery;
a0ed51b3 841 }
6eb5f6b9
JH
842 else
843 tmp = 1;
a0ed51b3
LW
844 s += UTF8SKIP(s);
845 }
6eb5f6b9
JH
846 break;
847 case ANYOF:
a0ed51b3 848 while (s < strend) {
d8093b23 849 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
850 if (tmp && (norun || regtry(prog, s)))
851 goto got_it;
852 else
853 tmp = doevery;
a0ed51b3 854 }
6eb5f6b9
JH
855 else
856 tmp = 1;
a0ed51b3 857 s++;
a0d0e21e 858 }
6eb5f6b9
JH
859 break;
860 case EXACTF:
861 m = STRING(c);
862 ln = STR_LEN(c);
d8093b23 863 c1 = *(U8*)m;
6eb5f6b9
JH
864 c2 = PL_fold[c1];
865 goto do_exactf;
866 case EXACTFL:
867 m = STRING(c);
868 ln = STR_LEN(c);
d8093b23 869 c1 = *(U8*)m;
6eb5f6b9
JH
870 c2 = PL_fold_locale[c1];
871 do_exactf:
872 e = strend - ln;
b3c9acc1 873
6eb5f6b9
JH
874 if (norun && e < s)
875 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
876 /* Here it is NOT UTF! */
877 if (c1 == c2) {
878 while (s <= e) {
d8093b23 879 if ( *(U8*)s == c1
66e933ab
GS
880 && (ln == 1 || !(OP(c) == EXACTF
881 ? ibcmp(s, m, ln)
882 : ibcmp_locale(s, m, ln)))
6eb5f6b9 883 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
884 goto got_it;
885 s++;
886 }
887 } else {
888 while (s <= e) {
d8093b23 889 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
890 && (ln == 1 || !(OP(c) == EXACTF
891 ? ibcmp(s, m, ln)
892 : ibcmp_locale(s, m, ln)))
6eb5f6b9 893 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
894 goto got_it;
895 s++;
896 }
897 }
898 break;
bbce6d69 899 case BOUNDL:
3280af22 900 PL_reg_flags |= RF_tainted;
bbce6d69 901 /* FALL THROUGH */
a0d0e21e 902 case BOUND:
6eb5f6b9 903 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 904 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 905 while (s < strend) {
95bac841 906 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 907 tmp = !tmp;
6eb5f6b9 908 if ((norun || regtry(prog, s)))
a0d0e21e 909 goto got_it;
a687059c 910 }
a0d0e21e
LW
911 s++;
912 }
6eb5f6b9 913 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
914 goto got_it;
915 break;
a0ed51b3
LW
916 case BOUNDLUTF8:
917 PL_reg_flags |= RF_tainted;
918 /* FALL THROUGH */
919 case BOUNDUTF8:
dcad2880 920 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
ba210ebe
JH
921 strend - s,
922 0, 0) : '\n';
76384e4a 923 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 924 while (s < strend) {
76384e4a 925 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
926 swash_fetch(PL_utf8_alnum, (U8*)s) :
927 isALNUM_LC_utf8((U8*)s)))
928 {
a0ed51b3 929 tmp = !tmp;
6eb5f6b9 930 if ((norun || regtry(prog, s)))
a0ed51b3
LW
931 goto got_it;
932 }
933 s += UTF8SKIP(s);
934 }
6eb5f6b9 935 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
936 goto got_it;
937 break;
bbce6d69 938 case NBOUNDL:
3280af22 939 PL_reg_flags |= RF_tainted;
bbce6d69 940 /* FALL THROUGH */
a0d0e21e 941 case NBOUND:
6eb5f6b9 942 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 943 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 944 while (s < strend) {
95bac841 945 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 946 tmp = !tmp;
6eb5f6b9 947 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
948 goto got_it;
949 s++;
950 }
6eb5f6b9 951 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
952 goto got_it;
953 break;
a0ed51b3
LW
954 case NBOUNDLUTF8:
955 PL_reg_flags |= RF_tainted;
956 /* FALL THROUGH */
957 case NBOUNDUTF8:
dcad2880 958 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
ba210ebe
JH
959 strend - s,
960 0, 0) : '\n';
76384e4a 961 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 962 while (s < strend) {
76384e4a 963 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
964 swash_fetch(PL_utf8_alnum, (U8*)s) :
965 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 966 tmp = !tmp;
6eb5f6b9 967 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
968 goto got_it;
969 s += UTF8SKIP(s);
970 }
6eb5f6b9 971 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
972 goto got_it;
973 break;
a0d0e21e
LW
974 case ALNUM:
975 while (s < strend) {
bbce6d69 976 if (isALNUM(*s)) {
6eb5f6b9 977 if (tmp && (norun || regtry(prog, s)))
bbce6d69 978 goto got_it;
979 else
980 tmp = doevery;
981 }
982 else
983 tmp = 1;
984 s++;
985 }
986 break;
a0ed51b3
LW
987 case ALNUMUTF8:
988 while (s < strend) {
dfe13c55 989 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 990 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
991 goto got_it;
992 else
993 tmp = doevery;
994 }
995 else
996 tmp = 1;
997 s += UTF8SKIP(s);
998 }
999 break;
bbce6d69 1000 case ALNUML:
3280af22 1001 PL_reg_flags |= RF_tainted;
bbce6d69 1002 while (s < strend) {
1003 if (isALNUM_LC(*s)) {
6eb5f6b9 1004 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1005 goto got_it;
a687059c 1006 else
a0d0e21e
LW
1007 tmp = doevery;
1008 }
1009 else
1010 tmp = 1;
1011 s++;
1012 }
1013 break;
a0ed51b3
LW
1014 case ALNUMLUTF8:
1015 PL_reg_flags |= RF_tainted;
1016 while (s < strend) {
dfe13c55 1017 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1018 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1019 goto got_it;
1020 else
1021 tmp = doevery;
1022 }
1023 else
1024 tmp = 1;
1025 s += UTF8SKIP(s);
1026 }
1027 break;
a0d0e21e
LW
1028 case NALNUM:
1029 while (s < strend) {
bbce6d69 1030 if (!isALNUM(*s)) {
6eb5f6b9 1031 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1032 goto got_it;
1033 else
1034 tmp = doevery;
1035 }
1036 else
1037 tmp = 1;
1038 s++;
1039 }
1040 break;
a0ed51b3
LW
1041 case NALNUMUTF8:
1042 while (s < strend) {
dfe13c55 1043 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1044 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1045 goto got_it;
1046 else
1047 tmp = doevery;
1048 }
1049 else
1050 tmp = 1;
1051 s += UTF8SKIP(s);
1052 }
1053 break;
bbce6d69 1054 case NALNUML:
3280af22 1055 PL_reg_flags |= RF_tainted;
bbce6d69 1056 while (s < strend) {
1057 if (!isALNUM_LC(*s)) {
6eb5f6b9 1058 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1059 goto got_it;
a687059c 1060 else
a0d0e21e 1061 tmp = doevery;
a687059c 1062 }
a0d0e21e
LW
1063 else
1064 tmp = 1;
1065 s++;
1066 }
1067 break;
a0ed51b3
LW
1068 case NALNUMLUTF8:
1069 PL_reg_flags |= RF_tainted;
1070 while (s < strend) {
dfe13c55 1071 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1072 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1073 goto got_it;
1074 else
1075 tmp = doevery;
1076 }
1077 else
1078 tmp = 1;
1079 s += UTF8SKIP(s);
1080 }
1081 break;
a0d0e21e
LW
1082 case SPACE:
1083 while (s < strend) {
1084 if (isSPACE(*s)) {
6eb5f6b9 1085 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1086 goto got_it;
1087 else
1088 tmp = doevery;
2304df62 1089 }
a0d0e21e
LW
1090 else
1091 tmp = 1;
1092 s++;
1093 }
1094 break;
a0ed51b3
LW
1095 case SPACEUTF8:
1096 while (s < strend) {
dfe13c55 1097 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1098 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1099 goto got_it;
1100 else
1101 tmp = doevery;
1102 }
1103 else
1104 tmp = 1;
1105 s += UTF8SKIP(s);
1106 }
1107 break;
bbce6d69 1108 case SPACEL:
3280af22 1109 PL_reg_flags |= RF_tainted;
bbce6d69 1110 while (s < strend) {
1111 if (isSPACE_LC(*s)) {
6eb5f6b9 1112 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1113 goto got_it;
1114 else
1115 tmp = doevery;
1116 }
1117 else
1118 tmp = 1;
1119 s++;
1120 }
1121 break;
a0ed51b3
LW
1122 case SPACELUTF8:
1123 PL_reg_flags |= RF_tainted;
1124 while (s < strend) {
dfe13c55 1125 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1126 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1127 goto got_it;
1128 else
1129 tmp = doevery;
1130 }
1131 else
1132 tmp = 1;
1133 s += UTF8SKIP(s);
1134 }
1135 break;
a0d0e21e
LW
1136 case NSPACE:
1137 while (s < strend) {
1138 if (!isSPACE(*s)) {
6eb5f6b9 1139 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1140 goto got_it;
1141 else
1142 tmp = doevery;
a687059c 1143 }
a0d0e21e
LW
1144 else
1145 tmp = 1;
1146 s++;
1147 }
1148 break;
a0ed51b3
LW
1149 case NSPACEUTF8:
1150 while (s < strend) {
dfe13c55 1151 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1152 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
1157 else
1158 tmp = 1;
1159 s += UTF8SKIP(s);
1160 }
1161 break;
bbce6d69 1162 case NSPACEL:
3280af22 1163 PL_reg_flags |= RF_tainted;
bbce6d69 1164 while (s < strend) {
1165 if (!isSPACE_LC(*s)) {
6eb5f6b9 1166 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1167 goto got_it;
1168 else
1169 tmp = doevery;
1170 }
1171 else
1172 tmp = 1;
1173 s++;
1174 }
1175 break;
a0ed51b3
LW
1176 case NSPACELUTF8:
1177 PL_reg_flags |= RF_tainted;
1178 while (s < strend) {
dfe13c55 1179 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1180 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1181 goto got_it;
1182 else
1183 tmp = doevery;
1184 }
1185 else
1186 tmp = 1;
1187 s += UTF8SKIP(s);
1188 }
1189 break;
a0d0e21e
LW
1190 case DIGIT:
1191 while (s < strend) {
1192 if (isDIGIT(*s)) {
6eb5f6b9 1193 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1194 goto got_it;
1195 else
1196 tmp = doevery;
2b69d0c2 1197 }
a0d0e21e
LW
1198 else
1199 tmp = 1;
1200 s++;
1201 }
1202 break;
a0ed51b3
LW
1203 case DIGITUTF8:
1204 while (s < strend) {
dfe13c55 1205 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1206 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
1211 else
1212 tmp = 1;
1213 s += UTF8SKIP(s);
1214 }
1215 break;
b8c5462f
JH
1216 case DIGITL:
1217 PL_reg_flags |= RF_tainted;
1218 while (s < strend) {
1219 if (isDIGIT_LC(*s)) {
6eb5f6b9 1220 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
1225 else
1226 tmp = 1;
1227 s++;
1228 }
1229 break;
1230 case DIGITLUTF8:
1231 PL_reg_flags |= RF_tainted;
1232 while (s < strend) {
1233 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1234 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
1239 else
1240 tmp = 1;
1241 s += UTF8SKIP(s);
1242 }
1243 break;
a0d0e21e
LW
1244 case NDIGIT:
1245 while (s < strend) {
1246 if (!isDIGIT(*s)) {
6eb5f6b9 1247 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1248 goto got_it;
1249 else
1250 tmp = doevery;
a687059c 1251 }
a0d0e21e
LW
1252 else
1253 tmp = 1;
1254 s++;
1255 }
1256 break;
a0ed51b3
LW
1257 case NDIGITUTF8:
1258 while (s < strend) {
dfe13c55 1259 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1260 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1261 goto got_it;
1262 else
1263 tmp = doevery;
1264 }
1265 else
1266 tmp = 1;
1267 s += UTF8SKIP(s);
1268 }
1269 break;
b8c5462f
JH
1270 case NDIGITL:
1271 PL_reg_flags |= RF_tainted;
1272 while (s < strend) {
1273 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1274 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
1279 else
1280 tmp = 1;
1281 s++;
a0ed51b3 1282 }
b8c5462f
JH
1283 break;
1284 case NDIGITLUTF8:
1285 PL_reg_flags |= RF_tainted;
1286 while (s < strend) {
1287 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1288 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1289 goto got_it;
cf93c79d 1290 else
b8c5462f
JH
1291 tmp = doevery;
1292 }
1293 else
1294 tmp = 1;
1295 s += UTF8SKIP(s);
1296 }
1297 break;
b3c9acc1 1298 default:
3c3eec57
GS
1299 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1300 break;
d6a28714 1301 }
6eb5f6b9
JH
1302 return 0;
1303 got_it:
1304 return s;
1305}
1306
1307/*
1308 - regexec_flags - match a regexp against a string
1309 */
1310I32
1311Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1312 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1313/* strend: pointer to null at end of string */
1314/* strbeg: real beginning of string */
1315/* minend: end of match must be >=minend after stringarg. */
1316/* data: May be used for some additional optimizations. */
1317/* nosave: For optimizations. */
1318{
1319 dTHR;
1320 register char *s;
1321 register regnode *c;
1322 register char *startpos = stringarg;
6eb5f6b9
JH
1323 I32 minlen; /* must match at least this many chars */
1324 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1325 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1326 constant substr. */ /* CC */
1327 I32 end_shift = 0; /* Same for the end. */ /* CC */
1328 I32 scream_pos = -1; /* Internal iterator of scream. */
1329 char *scream_olds;
1330 SV* oreplsv = GvSV(PL_replgv);
1331
1332 PL_regcc = 0;
1333
1334 cache_re(prog);
1335#ifdef DEBUGGING
1336 PL_regnarrate = PL_debug & 512;
1337#endif
1338
1339 /* Be paranoid... */
1340 if (prog == NULL || startpos == NULL) {
1341 Perl_croak(aTHX_ "NULL regexp parameter");
1342 return 0;
1343 }
1344
1345 minlen = prog->minlen;
1346 if (strend - startpos < minlen) goto phooey;
1347
1348 if (startpos == strbeg) /* is ^ valid at stringarg? */
1349 PL_regprev = '\n';
1350 else {
1351 PL_regprev = (U32)stringarg[-1];
1352 if (!PL_multiline && PL_regprev == '\n')
1353 PL_regprev = '\0'; /* force ^ to NOT match */
1354 }
1355
1356 /* Check validity of program. */
1357 if (UCHARAT(prog->program) != REG_MAGIC) {
1358 Perl_croak(aTHX_ "corrupted regexp program");
1359 }
1360
1361 PL_reg_flags = 0;
1362 PL_reg_eval_set = 0;
1363 PL_reg_maxiter = 0;
1364
1365 if (prog->reganch & ROPT_UTF8)
1366 PL_reg_flags |= RF_utf8;
1367
1368 /* Mark beginning of line for ^ and lookbehind. */
1369 PL_regbol = startpos;
1370 PL_bostr = strbeg;
1371 PL_reg_sv = sv;
1372
1373 /* Mark end of line for $ (and such) */
1374 PL_regeol = strend;
1375
1376 /* see how far we have to get to not match where we matched before */
1377 PL_regtill = startpos+minend;
1378
1379 /* We start without call_cc context. */
1380 PL_reg_call_cc = 0;
1381
1382 /* If there is a "must appear" string, look for it. */
1383 s = startpos;
1384
1385 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1386 MAGIC *mg;
1387
1388 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1389 PL_reg_ganch = startpos;
1390 else if (sv && SvTYPE(sv) >= SVt_PVMG
1391 && SvMAGIC(sv)
1392 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1393 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1394 if (prog->reganch & ROPT_ANCH_GPOS) {
1395 if (s > PL_reg_ganch)
1396 goto phooey;
1397 s = PL_reg_ganch;
1398 }
1399 }
1400 else /* pos() not defined */
1401 PL_reg_ganch = strbeg;
1402 }
1403
1404 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1405 re_scream_pos_data d;
1406
1407 d.scream_olds = &scream_olds;
1408 d.scream_pos = &scream_pos;
1409 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1410 if (!s)
1411 goto phooey; /* not present */
1412 }
1413
1414 DEBUG_r( if (!PL_colorset) reginitcolors() );
1415 DEBUG_r(PerlIO_printf(Perl_debug_log,
1416 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1417 PL_colors[4],PL_colors[5],PL_colors[0],
1418 prog->precomp,
1419 PL_colors[1],
1420 (strlen(prog->precomp) > 60 ? "..." : ""),
1421 PL_colors[0],
1422 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1423 startpos, PL_colors[1],
1424 (strend - startpos > 60 ? "..." : ""))
1425 );
1426
1427 /* Simplest case: anchored match need be tried only once. */
1428 /* [unless only anchor is BOL and multiline is set] */
1429 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1430 if (s == startpos && regtry(prog, startpos))
1431 goto got_it;
1432 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1433 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1434 {
1435 char *end;
1436
1437 if (minlen)
1438 dontbother = minlen - 1;
1439 end = HOPc(strend, -dontbother) - 1;
1440 /* for multiline we only have to try after newlines */
1441 if (prog->check_substr) {
1442 if (s == startpos)
1443 goto after_try;
1444 while (1) {
1445 if (regtry(prog, s))
1446 goto got_it;
1447 after_try:
1448 if (s >= end)
1449 goto phooey;
1450 if (prog->reganch & RE_USE_INTUIT) {
1451 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1452 if (!s)
1453 goto phooey;
1454 }
1455 else
1456 s++;
1457 }
1458 } else {
1459 if (s > startpos)
1460 s--;
1461 while (s < end) {
1462 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1463 if (regtry(prog, s))
1464 goto got_it;
1465 }
1466 }
1467 }
1468 }
1469 goto phooey;
1470 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1471 if (regtry(prog, PL_reg_ganch))
1472 goto got_it;
1473 goto phooey;
1474 }
1475
1476 /* Messy cases: unanchored match. */
1477 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1478 /* we have /x+whatever/ */
1479 /* it must be a one character string (XXXX Except UTF?) */
1480 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1481#ifdef DEBUGGING
1482 int did_match = 0;
1483#endif
1484
6eb5f6b9
JH
1485 if (UTF) {
1486 while (s < strend) {
1487 if (*s == ch) {
bf93d4cc 1488 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1489 if (regtry(prog, s)) goto got_it;
1490 s += UTF8SKIP(s);
1491 while (s < strend && *s == ch)
1492 s += UTF8SKIP(s);
1493 }
1494 s += UTF8SKIP(s);
1495 }
1496 }
1497 else {
1498 while (s < strend) {
1499 if (*s == ch) {
bf93d4cc 1500 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1501 if (regtry(prog, s)) goto got_it;
1502 s++;
1503 while (s < strend && *s == ch)
1504 s++;
1505 }
1506 s++;
1507 }
1508 }
bf93d4cc
GS
1509 DEBUG_r(did_match ||
1510 PerlIO_printf(Perl_debug_log,
1511 "Did not find anchored character...\n"));
6eb5f6b9
JH
1512 }
1513 /*SUPPRESS 560*/
1514 else if (prog->anchored_substr != Nullsv
1515 || (prog->float_substr != Nullsv
1516 && prog->float_max_offset < strend - s)) {
1517 SV *must = prog->anchored_substr
1518 ? prog->anchored_substr : prog->float_substr;
1519 I32 back_max =
1520 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1521 I32 back_min =
1522 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1523 char *last = HOPc(strend, /* Cannot start after this */
1524 -(I32)(CHR_SVLEN(must)
1525 - (SvTAIL(must) != 0) + back_min));
1526 char *last1; /* Last position checked before */
bf93d4cc
GS
1527#ifdef DEBUGGING
1528 int did_match = 0;
1529#endif
6eb5f6b9
JH
1530
1531 if (s > PL_bostr)
1532 last1 = HOPc(s, -1);
1533 else
1534 last1 = s - 1; /* bogus */
1535
1536 /* XXXX check_substr already used to find `s', can optimize if
1537 check_substr==must. */
1538 scream_pos = -1;
1539 dontbother = end_shift;
1540 strend = HOPc(strend, -dontbother);
1541 while ( (s <= last) &&
1542 ((flags & REXEC_SCREAM)
1543 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1544 end_shift, &scream_pos, 0))
1545 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1546 (unsigned char*)strend, must,
1547 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1548 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1549 if (HOPc(s, -back_max) > last1) {
1550 last1 = HOPc(s, -back_min);
1551 s = HOPc(s, -back_max);
1552 }
1553 else {
1554 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1555
1556 last1 = HOPc(s, -back_min);
1557 s = t;
1558 }
1559 if (UTF) {
1560 while (s <= last1) {
1561 if (regtry(prog, s))
1562 goto got_it;
1563 s += UTF8SKIP(s);
1564 }
1565 }
1566 else {
1567 while (s <= last1) {
1568 if (regtry(prog, s))
1569 goto got_it;
1570 s++;
1571 }
1572 }
1573 }
bf93d4cc
GS
1574 DEBUG_r(did_match ||
1575 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1576 ((must == prog->anchored_substr)
1577 ? "anchored" : "floating"),
1578 PL_colors[0],
1579 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1580 SvPVX(must),
1581 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1582 goto phooey;
1583 }
155aba94 1584 else if ((c = prog->regstclass)) {
66e933ab
GS
1585 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1586 /* don't bother with what can't match */
6eb5f6b9
JH
1587 strend = HOPc(strend, -(minlen - 1));
1588 if (find_byclass(prog, c, s, strend, startpos, 0))
1589 goto got_it;
bf93d4cc 1590 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1591 }
1592 else {
1593 dontbother = 0;
1594 if (prog->float_substr != Nullsv) { /* Trim the end. */
1595 char *last;
d6a28714
JH
1596
1597 if (flags & REXEC_SCREAM) {
1598 last = screaminstr(sv, prog->float_substr, s - strbeg,
1599 end_shift, &scream_pos, 1); /* last one */
1600 if (!last)
1601 last = scream_olds; /* Only one occurence. */
b8c5462f 1602 }
d6a28714
JH
1603 else {
1604 STRLEN len;
1605 char *little = SvPV(prog->float_substr, len);
1606
1607 if (SvTAIL(prog->float_substr)) {
1608 if (memEQ(strend - len + 1, little, len - 1))
1609 last = strend - len + 1;
1610 else if (!PL_multiline)
1611 last = memEQ(strend - len, little, len)
1612 ? strend - len : Nullch;
b8c5462f 1613 else
d6a28714
JH
1614 goto find_last;
1615 } else {
1616 find_last:
1617 if (len)
1618 last = rninstr(s, strend, little, little + len);
b8c5462f 1619 else
d6a28714 1620 last = strend; /* matching `$' */
b8c5462f 1621 }
b8c5462f 1622 }
bf93d4cc
GS
1623 if (last == NULL) {
1624 DEBUG_r(PerlIO_printf(Perl_debug_log,
1625 "%sCan't trim the tail, match fails (should not happen)%s\n",
1626 PL_colors[4],PL_colors[5]));
1627 goto phooey; /* Should not happen! */
1628 }
d6a28714
JH
1629 dontbother = strend - last + prog->float_min_offset;
1630 }
1631 if (minlen && (dontbother < minlen))
1632 dontbother = minlen - 1;
1633 strend -= dontbother; /* this one's always in bytes! */
1634 /* We don't know much -- general case. */
1635 if (UTF) {
1636 for (;;) {
1637 if (regtry(prog, s))
1638 goto got_it;
1639 if (s >= strend)
1640 break;
b8c5462f 1641 s += UTF8SKIP(s);
d6a28714
JH
1642 };
1643 }
1644 else {
1645 do {
1646 if (regtry(prog, s))
1647 goto got_it;
1648 } while (s++ < strend);
1649 }
1650 }
1651
1652 /* Failure. */
1653 goto phooey;
1654
1655got_it:
1656 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1657
1658 if (PL_reg_eval_set) {
1659 /* Preserve the current value of $^R */
1660 if (oreplsv != GvSV(PL_replgv))
1661 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1662 restored, the value remains
1663 the same. */
1664 restore_pos(aTHXo_ 0);
1665 }
1666
1667 /* make sure $`, $&, $', and $digit will work later */
1668 if ( !(flags & REXEC_NOT_FIRST) ) {
1669 if (RX_MATCH_COPIED(prog)) {
1670 Safefree(prog->subbeg);
1671 RX_MATCH_COPIED_off(prog);
1672 }
1673 if (flags & REXEC_COPY_STR) {
1674 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1675
1676 s = savepvn(strbeg, i);
1677 prog->subbeg = s;
1678 prog->sublen = i;
1679 RX_MATCH_COPIED_on(prog);
1680 }
1681 else {
1682 prog->subbeg = strbeg;
1683 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1684 }
1685 }
1686
1687 return 1;
1688
1689phooey:
bf93d4cc
GS
1690 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1691 PL_colors[4],PL_colors[5]));
d6a28714
JH
1692 if (PL_reg_eval_set)
1693 restore_pos(aTHXo_ 0);
1694 return 0;
1695}
1696
1697/*
1698 - regtry - try match at specific point
1699 */
1700STATIC I32 /* 0 failure, 1 success */
1701S_regtry(pTHX_ regexp *prog, char *startpos)
1702{
1703 dTHR;
1704 register I32 i;
1705 register I32 *sp;
1706 register I32 *ep;
1707 CHECKPOINT lastcp;
1708
1709 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1710 MAGIC *mg;
1711
1712 PL_reg_eval_set = RS_init;
1713 DEBUG_r(DEBUG_s(
b900a521
JH
1714 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1715 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1716 ));
e8347627 1717 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1718 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1719 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1720 SAVETMPS;
1721 /* Apparently this is not needed, judging by wantarray. */
e8347627 1722 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1723 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1724
1725 if (PL_reg_sv) {
1726 /* Make $_ available to executed code. */
1727 if (PL_reg_sv != DEFSV) {
1728 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1729 SAVESPTR(DEFSV);
1730 DEFSV = PL_reg_sv;
b8c5462f 1731 }
d6a28714
JH
1732
1733 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1734 && (mg = mg_find(PL_reg_sv, 'g')))) {
1735 /* prepare for quick setting of pos */
1736 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1737 mg = mg_find(PL_reg_sv, 'g');
1738 mg->mg_len = -1;
b8c5462f 1739 }
d6a28714
JH
1740 PL_reg_magic = mg;
1741 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1742 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1743 }
1744 if (!PL_reg_curpm)
0f79a09d 1745 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1746 PL_reg_curpm->op_pmregexp = prog;
1747 PL_reg_oldcurpm = PL_curpm;
1748 PL_curpm = PL_reg_curpm;
1749 if (RX_MATCH_COPIED(prog)) {
1750 /* Here is a serious problem: we cannot rewrite subbeg,
1751 since it may be needed if this match fails. Thus
1752 $` inside (?{}) could fail... */
1753 PL_reg_oldsaved = prog->subbeg;
1754 PL_reg_oldsavedlen = prog->sublen;
1755 RX_MATCH_COPIED_off(prog);
1756 }
1757 else
1758 PL_reg_oldsaved = Nullch;
1759 prog->subbeg = PL_bostr;
1760 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1761 }
1762 prog->startp[0] = startpos - PL_bostr;
1763 PL_reginput = startpos;
1764 PL_regstartp = prog->startp;
1765 PL_regendp = prog->endp;
1766 PL_reglastparen = &prog->lastparen;
1767 prog->lastparen = 0;
1768 PL_regsize = 0;
1769 DEBUG_r(PL_reg_starttry = startpos);
1770 if (PL_reg_start_tmpl <= prog->nparens) {
1771 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1772 if(PL_reg_start_tmp)
1773 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1774 else
1775 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1776 }
1777
1778 /* XXXX What this code is doing here?!!! There should be no need
1779 to do this again and again, PL_reglastparen should take care of
1780 this! */
1781 sp = prog->startp;
1782 ep = prog->endp;
1783 if (prog->nparens) {
1784 for (i = prog->nparens; i >= 1; i--) {
1785 *++sp = -1;
1786 *++ep = -1;
1787 }
1788 }
1789 REGCP_SET;
1790 if (regmatch(prog->program + 1)) {
1791 prog->endp[0] = PL_reginput - PL_bostr;
1792 return 1;
1793 }
1794 REGCP_UNWIND;
1795 return 0;
1796}
1797
1798/*
1799 - regmatch - main matching routine
1800 *
1801 * Conceptually the strategy is simple: check to see whether the current
1802 * node matches, call self recursively to see whether the rest matches,
1803 * and then act accordingly. In practice we make some effort to avoid
1804 * recursion, in particular by going through "ordinary" nodes (that don't
1805 * need to know whether the rest of the match failed) by a loop instead of
1806 * by recursion.
1807 */
1808/* [lwall] I've hoisted the register declarations to the outer block in order to
1809 * maybe save a little bit of pushing and popping on the stack. It also takes
1810 * advantage of machines that use a register save mask on subroutine entry.
1811 */
1812STATIC I32 /* 0 failure, 1 success */
1813S_regmatch(pTHX_ regnode *prog)
1814{
1815 dTHR;
1816 register regnode *scan; /* Current node. */
1817 regnode *next; /* Next node. */
1818 regnode *inner; /* Next node in internal branch. */
1819 register I32 nextchr; /* renamed nextchr - nextchar colides with
1820 function of same name */
1821 register I32 n; /* no or next */
1822 register I32 ln; /* len or last */
1823 register char *s; /* operand or save */
1824 register char *locinput = PL_reginput;
1825 register I32 c1, c2, paren; /* case fold search, parenth */
1826 int minmod = 0, sw = 0, logical = 0;
1827#ifdef DEBUGGING
1828 PL_regindent++;
1829#endif
1830
1831 /* Note that nextchr is a byte even in UTF */
1832 nextchr = UCHARAT(locinput);
1833 scan = prog;
1834 while (scan != NULL) {
1835#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1836#ifdef DEBUGGING
1837# define sayYES goto yes
1838# define sayNO goto no
7821416a
IZ
1839# define sayYES_FINAL goto yes_final
1840# define sayYES_LOUD goto yes_loud
1841# define sayNO_FINAL goto no_final
1842# define sayNO_SILENT goto do_no
d6a28714
JH
1843# define saySAME(x) if (x) goto yes; else goto no
1844# define REPORT_CODE_OFF 24
1845#else
1846# define sayYES return 1
1847# define sayNO return 0
7821416a
IZ
1848# define sayYES_FINAL return 1
1849# define sayYES_LOUD return 1
1850# define sayNO_FINAL return 0
1851# define sayNO_SILENT return 0
d6a28714
JH
1852# define saySAME(x) return x
1853#endif
1854 DEBUG_r( {
1855 SV *prop = sv_newmortal();
1856 int docolor = *PL_colors[0];
1857 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1858 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1859 /* The part of the string before starttry has one color
1860 (pref0_len chars), between starttry and current
1861 position another one (pref_len - pref0_len chars),
1862 after the current position the third one.
1863 We assume that pref0_len <= pref_len, otherwise we
1864 decrease pref0_len. */
1865 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1866 ? (5 + taill) - l : locinput - PL_bostr);
1867 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1868
1869 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1870 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1871 ? (5 + taill) - pref_len : PL_regeol - locinput);
1872 if (pref0_len < 0)
1873 pref0_len = 0;
1874 if (pref0_len > pref_len)
1875 pref0_len = pref_len;
1876 regprop(prop, scan);
1877 PerlIO_printf(Perl_debug_log,
b900a521
JH
1878 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1879 (IV)(locinput - PL_bostr),
d6a28714
JH
1880 PL_colors[4], pref0_len,
1881 locinput - pref_len, PL_colors[5],
1882 PL_colors[2], pref_len - pref0_len,
1883 locinput - pref_len + pref0_len, PL_colors[3],
1884 (docolor ? "" : "> <"),
1885 PL_colors[0], l, locinput, PL_colors[1],
1886 15 - l - pref_len + 1,
1887 "",
b900a521 1888 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1889 SvPVX(prop));
1890 } );
1891
1892 next = scan + NEXT_OFF(scan);
1893 if (next == scan)
1894 next = NULL;
1895
1896 switch (OP(scan)) {
1897 case BOL:
1898 if (locinput == PL_bostr
1899 ? PL_regprev == '\n'
1900 : (PL_multiline &&
1901 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1902 {
1903 /* regtill = regbol; */
b8c5462f
JH
1904 break;
1905 }
d6a28714
JH
1906 sayNO;
1907 case MBOL:
1908 if (locinput == PL_bostr
1909 ? PL_regprev == '\n'
1910 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1911 {
b8c5462f
JH
1912 break;
1913 }
d6a28714
JH
1914 sayNO;
1915 case SBOL:
c2a73568 1916 if (locinput == PL_bostr)
b8c5462f 1917 break;
d6a28714
JH
1918 sayNO;
1919 case GPOS:
1920 if (locinput == PL_reg_ganch)
1921 break;
1922 sayNO;
1923 case EOL:
1924 if (PL_multiline)
1925 goto meol;
1926 else
1927 goto seol;
1928 case MEOL:
1929 meol:
1930 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1931 sayNO;
b8c5462f 1932 break;
d6a28714
JH
1933 case SEOL:
1934 seol:
1935 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1936 sayNO;
d6a28714 1937 if (PL_regeol - locinput > 1)
b8c5462f 1938 sayNO;
b8c5462f 1939 break;
d6a28714
JH
1940 case EOS:
1941 if (PL_regeol != locinput)
b8c5462f 1942 sayNO;
d6a28714
JH
1943 break;
1944 case SANYUTF8:
b8c5462f 1945 if (nextchr & 0x80) {
b8c5462f 1946 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1947 if (locinput > PL_regeol)
1948 sayNO;
b8c5462f
JH
1949 nextchr = UCHARAT(locinput);
1950 break;
1951 }
d6a28714 1952 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1953 sayNO;
b8c5462f 1954 nextchr = UCHARAT(++locinput);
a0d0e21e 1955 break;
d6a28714
JH
1956 case SANY:
1957 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1958 sayNO;
1959 nextchr = UCHARAT(++locinput);
b85d18e9 1960 break;
d6a28714 1961 case ANYUTF8:
a0ed51b3 1962 if (nextchr & 0x80) {
b8c5462f 1963 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1964 if (locinput > PL_regeol)
1965 sayNO;
a0ed51b3
LW
1966 nextchr = UCHARAT(locinput);
1967 break;
1968 }
155aba94 1969 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1970 sayNO;
1971 nextchr = UCHARAT(++locinput);
1972 break;
d6a28714 1973 case REG_ANY:
155aba94 1974 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1975 sayNO;
76e3520e 1976 nextchr = UCHARAT(++locinput);
a0d0e21e 1977 break;
d6a28714 1978 case EXACT:
cd439c50
IZ
1979 s = STRING(scan);
1980 ln = STR_LEN(scan);
d6a28714
JH
1981 /* Inline the first character, for speed. */
1982 if (UCHARAT(s) != nextchr)
1983 sayNO;
1984 if (PL_regeol - locinput < ln)
1985 sayNO;
1986 if (ln > 1 && memNE(s, locinput, ln))
1987 sayNO;
1988 locinput += ln;
1989 nextchr = UCHARAT(locinput);
1990 break;
1991 case EXACTFL:
b8c5462f
JH
1992 PL_reg_flags |= RF_tainted;
1993 /* FALL THROUGH */
d6a28714 1994 case EXACTF:
cd439c50
IZ
1995 s = STRING(scan);
1996 ln = STR_LEN(scan);
d6a28714
JH
1997
1998 if (UTF) {
1999 char *l = locinput;
2000 char *e = s + ln;
2001 c1 = OP(scan) == EXACTF;
2002 while (s < e) {
2003 if (l >= PL_regeol)
2004 sayNO;
dcad2880 2005 if (utf8_to_uv((U8*)s, e - s, 0, 0) != (c1 ?
d6a28714
JH
2006 toLOWER_utf8((U8*)l) :
2007 toLOWER_LC_utf8((U8*)l)))
2008 {
2009 sayNO;
2010 }
2011 s += UTF8SKIP(s);
2012 l += UTF8SKIP(l);
b8c5462f 2013 }
d6a28714 2014 locinput = l;
a0ed51b3
LW
2015 nextchr = UCHARAT(locinput);
2016 break;
2017 }
d6a28714
JH
2018
2019 /* Inline the first character, for speed. */
2020 if (UCHARAT(s) != nextchr &&
2021 UCHARAT(s) != ((OP(scan) == EXACTF)
2022 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2023 sayNO;
d6a28714 2024 if (PL_regeol - locinput < ln)
b8c5462f 2025 sayNO;
d6a28714
JH
2026 if (ln > 1 && (OP(scan) == EXACTF
2027 ? ibcmp(s, locinput, ln)
2028 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2029 sayNO;
d6a28714
JH
2030 locinput += ln;
2031 nextchr = UCHARAT(locinput);
a0d0e21e 2032 break;
d6a28714 2033 case ANYOFUTF8:
d6a28714 2034 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 2035 sayNO;
d6a28714
JH
2036 if (locinput >= PL_regeol)
2037 sayNO;
2038 locinput += PL_utf8skip[nextchr];
2039 nextchr = UCHARAT(locinput);
2040 break;
2041 case ANYOF:
d6a28714 2042 if (nextchr < 0)
b8c5462f 2043 nextchr = UCHARAT(locinput);
936ed897 2044 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
2045 sayNO;
2046 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2047 sayNO;
b8c5462f
JH
2048 nextchr = UCHARAT(++locinput);
2049 break;
d6a28714 2050 case ALNUML:
b8c5462f
JH
2051 PL_reg_flags |= RF_tainted;
2052 /* FALL THROUGH */
d6a28714 2053 case ALNUM:
b8c5462f 2054 if (!nextchr)
4633a7c4 2055 sayNO;
d6a28714
JH
2056 if (!(OP(scan) == ALNUM
2057 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
2058 sayNO;
2059 nextchr = UCHARAT(++locinput);
bbce6d69 2060 break;
d6a28714 2061 case ALNUMLUTF8:
3280af22 2062 PL_reg_flags |= RF_tainted;
bbce6d69 2063 /* FALL THROUGH */
d6a28714 2064 case ALNUMUTF8:
b8c5462f
JH
2065 if (!nextchr)
2066 sayNO;
2067 if (nextchr & 0x80) {
d6a28714
JH
2068 if (!(OP(scan) == ALNUMUTF8
2069 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2070 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2071 {
2072 sayNO;
a0ed51b3 2073 }
b8c5462f 2074 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2075 nextchr = UCHARAT(locinput);
2076 break;
2077 }
d6a28714
JH
2078 if (!(OP(scan) == ALNUMUTF8
2079 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2080 sayNO;
b8c5462f 2081 nextchr = UCHARAT(++locinput);
a0d0e21e 2082 break;
d6a28714 2083 case NALNUML:
b8c5462f
JH
2084 PL_reg_flags |= RF_tainted;
2085 /* FALL THROUGH */
d6a28714
JH
2086 case NALNUM:
2087 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2088 sayNO;
d6a28714
JH
2089 if (OP(scan) == NALNUM
2090 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2091 sayNO;
b8c5462f 2092 nextchr = UCHARAT(++locinput);
a0ed51b3 2093 break;
d6a28714 2094 case NALNUMLUTF8:
b8c5462f
JH
2095 PL_reg_flags |= RF_tainted;
2096 /* FALL THROUGH */
d6a28714 2097 case NALNUMUTF8:
3280af22 2098 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2099 sayNO;
b8c5462f 2100 if (nextchr & 0x80) {
d6a28714
JH
2101 if (OP(scan) == NALNUMUTF8
2102 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2103 : isALNUM_LC_utf8((U8*)locinput))
2104 {
b8c5462f 2105 sayNO;
d6a28714 2106 }
b8c5462f
JH
2107 locinput += PL_utf8skip[nextchr];
2108 nextchr = UCHARAT(locinput);
2109 break;
2110 }
d6a28714
JH
2111 if (OP(scan) == NALNUMUTF8
2112 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2113 sayNO;
76e3520e 2114 nextchr = UCHARAT(++locinput);
a0d0e21e 2115 break;
d6a28714
JH
2116 case BOUNDL:
2117 case NBOUNDL:
3280af22 2118 PL_reg_flags |= RF_tainted;
bbce6d69 2119 /* FALL THROUGH */
d6a28714
JH
2120 case BOUND:
2121 case NBOUND:
2122 /* was last char in word? */
2123 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2124 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2125 ln = isALNUM(ln);
2126 n = isALNUM(nextchr);
2127 }
2128 else {
2129 ln = isALNUM_LC(ln);
2130 n = isALNUM_LC(nextchr);
2131 }
2132 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2133 sayNO;
a0d0e21e 2134 break;
d6a28714
JH
2135 case BOUNDLUTF8:
2136 case NBOUNDLUTF8:
a0ed51b3
LW
2137 PL_reg_flags |= RF_tainted;
2138 /* FALL THROUGH */
d6a28714
JH
2139 case BOUNDUTF8:
2140 case NBOUNDUTF8:
2141 /* was last char in word? */
2142 ln = (locinput != PL_regbol)
dcad2880 2143 ? utf8_to_uv(reghop((U8*)locinput, -1),
ba210ebe 2144 PL_regeol - locinput, 0, 0) : PL_regprev;
d6a28714
JH
2145 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2146 ln = isALNUM_uni(ln);
2147 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2148 }
d6a28714
JH
2149 else {
2150 ln = isALNUM_LC_uni(ln);
2151 n = isALNUM_LC_utf8((U8*)locinput);
2152 }
2153 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2154 sayNO;
a0ed51b3 2155 break;
d6a28714 2156 case SPACEL:
3280af22 2157 PL_reg_flags |= RF_tainted;
bbce6d69 2158 /* FALL THROUGH */
d6a28714 2159 case SPACE:
9442cb0e 2160 if (!nextchr)
4633a7c4 2161 sayNO;
d6a28714
JH
2162 if (!(OP(scan) == SPACE
2163 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2164 sayNO;
76e3520e 2165 nextchr = UCHARAT(++locinput);
a0d0e21e 2166 break;
d6a28714 2167 case SPACELUTF8:
a0ed51b3
LW
2168 PL_reg_flags |= RF_tainted;
2169 /* FALL THROUGH */
d6a28714 2170 case SPACEUTF8:
9442cb0e 2171 if (!nextchr)
a0ed51b3
LW
2172 sayNO;
2173 if (nextchr & 0x80) {
d6a28714 2174 if (!(OP(scan) == SPACEUTF8
9442cb0e 2175 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2176 : isSPACE_LC_utf8((U8*)locinput)))
2177 {
a0ed51b3 2178 sayNO;
d6a28714 2179 }
6f06b55f 2180 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2181 nextchr = UCHARAT(locinput);
2182 break;
2183 }
d6a28714
JH
2184 if (!(OP(scan) == SPACEUTF8
2185 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2186 sayNO;
2187 nextchr = UCHARAT(++locinput);
2188 break;
d6a28714 2189 case NSPACEL:
3280af22 2190 PL_reg_flags |= RF_tainted;
bbce6d69 2191 /* FALL THROUGH */
d6a28714 2192 case NSPACE:
9442cb0e 2193 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2194 sayNO;
9442cb0e 2195 if (OP(scan) == NSPACE
d6a28714 2196 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2197 sayNO;
b8c5462f 2198 nextchr = UCHARAT(++locinput);
a0d0e21e 2199 break;
d6a28714 2200 case NSPACELUTF8:
a0ed51b3
LW
2201 PL_reg_flags |= RF_tainted;
2202 /* FALL THROUGH */
d6a28714 2203 case NSPACEUTF8:
9442cb0e 2204 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2205 sayNO;
2206 if (nextchr & 0x80) {
d6a28714 2207 if (OP(scan) == NSPACEUTF8
9442cb0e 2208 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2209 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2210 {
2211 sayNO;
2212 }
2213 locinput += PL_utf8skip[nextchr];
2214 nextchr = UCHARAT(locinput);
2215 break;
a0ed51b3 2216 }
d6a28714
JH
2217 if (OP(scan) == NSPACEUTF8
2218 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2219 sayNO;
76e3520e 2220 nextchr = UCHARAT(++locinput);
a0d0e21e 2221 break;
d6a28714 2222 case DIGITL:
a0ed51b3
LW
2223 PL_reg_flags |= RF_tainted;
2224 /* FALL THROUGH */
d6a28714 2225 case DIGIT:
9442cb0e 2226 if (!nextchr)
a0ed51b3 2227 sayNO;
d6a28714
JH
2228 if (!(OP(scan) == DIGIT
2229 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2230 sayNO;
76e3520e 2231 nextchr = UCHARAT(++locinput);
a0d0e21e 2232 break;
d6a28714 2233 case DIGITLUTF8:
a0ed51b3
LW
2234 PL_reg_flags |= RF_tainted;
2235 /* FALL THROUGH */
d6a28714 2236 case DIGITUTF8:
a0ed51b3
LW
2237 if (!nextchr)
2238 sayNO;
2239 if (nextchr & 0x80) {
9442cb0e
GS
2240 if (!(OP(scan) == DIGITUTF8
2241 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2242 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2243 {
a0ed51b3 2244 sayNO;
dfe13c55 2245 }
6f06b55f 2246 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2247 nextchr = UCHARAT(locinput);
2248 break;
2249 }
9442cb0e
GS
2250 if (!(OP(scan) == DIGITUTF8
2251 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2252 sayNO;
2253 nextchr = UCHARAT(++locinput);
2254 break;
d6a28714 2255 case NDIGITL:
b8c5462f
JH
2256 PL_reg_flags |= RF_tainted;
2257 /* FALL THROUGH */
d6a28714 2258 case NDIGIT:
9442cb0e 2259 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2260 sayNO;
9442cb0e 2261 if (OP(scan) == NDIGIT
d6a28714 2262 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2263 sayNO;
76e3520e 2264 nextchr = UCHARAT(++locinput);
a0d0e21e 2265 break;
d6a28714 2266 case NDIGITLUTF8:
b8c5462f
JH
2267 PL_reg_flags |= RF_tainted;
2268 /* FALL THROUGH */
d6a28714 2269 case NDIGITUTF8:
b8c5462f
JH
2270 if (!nextchr && locinput >= PL_regeol)
2271 sayNO;
a0ed51b3 2272 if (nextchr & 0x80) {
9442cb0e
GS
2273 if (OP(scan) == NDIGITUTF8
2274 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2275 : isDIGIT_LC_utf8((U8*)locinput))
2276 {
a0ed51b3 2277 sayNO;
9442cb0e 2278 }
6f06b55f 2279 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2280 nextchr = UCHARAT(locinput);
2281 break;
2282 }
9442cb0e
GS
2283 if (OP(scan) == NDIGITUTF8
2284 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2285 sayNO;
2286 nextchr = UCHARAT(++locinput);
2287 break;
2288 case CLUMP:
dfe13c55 2289 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2290 sayNO;
6f06b55f 2291 locinput += PL_utf8skip[nextchr];
dfe13c55 2292 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2293 locinput += UTF8SKIP(locinput);
2294 if (locinput > PL_regeol)
2295 sayNO;
2296 nextchr = UCHARAT(locinput);
2297 break;
c8756f30 2298 case REFFL:
3280af22 2299 PL_reg_flags |= RF_tainted;
c8756f30 2300 /* FALL THROUGH */
c277df42 2301 case REF:
c8756f30 2302 case REFF:
c277df42 2303 n = ARG(scan); /* which paren pair */
cf93c79d 2304 ln = PL_regstartp[n];
2c2d71f5 2305 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2306 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2307 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2308 if (ln == PL_regendp[n])
a0d0e21e 2309 break;
a0ed51b3 2310
cf93c79d 2311 s = PL_bostr + ln;
a0ed51b3
LW
2312 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2313 char *l = locinput;
cf93c79d 2314 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2315 /*
2316 * Note that we can't do the "other character" lookup trick as
2317 * in the 8-bit case (no pun intended) because in Unicode we
2318 * have to map both upper and title case to lower case.
2319 */
2320 if (OP(scan) == REFF) {
2321 while (s < e) {
2322 if (l >= PL_regeol)
2323 sayNO;
dfe13c55 2324 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2325 sayNO;
2326 s += UTF8SKIP(s);
2327 l += UTF8SKIP(l);
2328 }
2329 }
2330 else {
2331 while (s < e) {
2332 if (l >= PL_regeol)
2333 sayNO;
dfe13c55 2334 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2335 sayNO;
2336 s += UTF8SKIP(s);
2337 l += UTF8SKIP(l);
2338 }
2339 }
2340 locinput = l;
2341 nextchr = UCHARAT(locinput);
2342 break;
2343 }
2344
a0d0e21e 2345 /* Inline the first character, for speed. */
76e3520e 2346 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2347 (OP(scan) == REF ||
2348 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2349 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2350 sayNO;
cf93c79d 2351 ln = PL_regendp[n] - ln;
3280af22 2352 if (locinput + ln > PL_regeol)
4633a7c4 2353 sayNO;
c8756f30
AK
2354 if (ln > 1 && (OP(scan) == REF
2355 ? memNE(s, locinput, ln)
2356 : (OP(scan) == REFF
2357 ? ibcmp(s, locinput, ln)
2358 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2359 sayNO;
a0d0e21e 2360 locinput += ln;
76e3520e 2361 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2362 break;
2363
2364 case NOTHING:
c277df42 2365 case TAIL:
a0d0e21e
LW
2366 break;
2367 case BACK:
2368 break;
c277df42
IZ
2369 case EVAL:
2370 {
2371 dSP;
533c011a 2372 OP_4tree *oop = PL_op;
3280af22
NIS
2373 COP *ocurcop = PL_curcop;
2374 SV **ocurpad = PL_curpad;
c277df42
IZ
2375 SV *ret;
2376
2377 n = ARG(scan);
533c011a 2378 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2379 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2380 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2381 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2382
cea2e8a9 2383 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2384 SPAGAIN;
2385 ret = POPs;
2386 PUTBACK;
2387
0f5d15d6
IZ
2388 PL_op = oop;
2389 PL_curpad = ocurpad;
2390 PL_curcop = ocurcop;
c277df42 2391 if (logical) {
0f5d15d6
IZ
2392 if (logical == 2) { /* Postponed subexpression. */
2393 regexp *re;
22c35a8c 2394 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2395 re_cc_state state;
0f5d15d6
IZ
2396 CHECKPOINT cp, lastcp;
2397
2398 if(SvROK(ret) || SvRMAGICAL(ret)) {
2399 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2400
2401 if(SvMAGICAL(sv))
2402 mg = mg_find(sv, 'r');
2403 }
2404 if (mg) {
2405 re = (regexp *)mg->mg_obj;
df0003d4 2406 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2407 }
2408 else {
2409 STRLEN len;
2410 char *t = SvPV(ret, len);
2411 PMOP pm;
2412 char *oprecomp = PL_regprecomp;
2413 I32 osize = PL_regsize;
2414 I32 onpar = PL_regnpar;
2415
2416 pm.op_pmflags = 0;
393fec97 2417 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2418 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2419 if (!(SvFLAGS(ret)
2420 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2421 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2422 PL_regprecomp = oprecomp;
2423 PL_regsize = osize;
2424 PL_regnpar = onpar;
2425 }
2426 DEBUG_r(
2427 PerlIO_printf(Perl_debug_log,
2428 "Entering embedded `%s%.60s%s%s'\n",
2429 PL_colors[0],
2430 re->precomp,
2431 PL_colors[1],
2432 (strlen(re->precomp) > 60 ? "..." : ""))
2433 );
2434 state.node = next;
2435 state.prev = PL_reg_call_cc;
2436 state.cc = PL_regcc;
2437 state.re = PL_reg_re;
2438
2ab05381 2439 PL_regcc = 0;
0f5d15d6
IZ
2440
2441 cp = regcppush(0); /* Save *all* the positions. */
2442 REGCP_SET;
2443 cache_re(re);
2444 state.ss = PL_savestack_ix;
2445 *PL_reglastparen = 0;
2446 PL_reg_call_cc = &state;
2447 PL_reginput = locinput;
2c2d71f5
JH
2448
2449 /* XXXX This is too dramatic a measure... */
2450 PL_reg_maxiter = 0;
2451
0f5d15d6 2452 if (regmatch(re->program + 1)) {
2c914db6
IZ
2453 /* Even though we succeeded, we need to restore
2454 global variables, since we may be wrapped inside
2455 SUSPEND, thus the match may be not finished yet. */
2456
2457 /* XXXX Do this only if SUSPENDed? */
2458 PL_reg_call_cc = state.prev;
2459 PL_regcc = state.cc;
2460 PL_reg_re = state.re;
2461 cache_re(PL_reg_re);
2462
2463 /* XXXX This is too dramatic a measure... */
2464 PL_reg_maxiter = 0;
2465
2466 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2467 ReREFCNT_dec(re);
2468 regcpblow(cp);
2469 sayYES;
2470 }
0f5d15d6
IZ
2471 ReREFCNT_dec(re);
2472 REGCP_UNWIND;
2473 regcppop();
2474 PL_reg_call_cc = state.prev;
2475 PL_regcc = state.cc;
2476 PL_reg_re = state.re;
d3790889 2477 cache_re(PL_reg_re);
2c2d71f5
JH
2478
2479 /* XXXX This is too dramatic a measure... */
2480 PL_reg_maxiter = 0;
2481
0f5d15d6
IZ
2482 sayNO;
2483 }
c277df42 2484 sw = SvTRUE(ret);
0f5d15d6 2485 logical = 0;
a0ed51b3
LW
2486 }
2487 else
3280af22 2488 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2489 break;
2490 }
a0d0e21e 2491 case OPEN:
c277df42 2492 n = ARG(scan); /* which paren pair */
3280af22
NIS
2493 PL_reg_start_tmp[n] = locinput;
2494 if (n > PL_regsize)
2495 PL_regsize = n;
a0d0e21e
LW
2496 break;
2497 case CLOSE:
c277df42 2498 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2499 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2500 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2501 if (n > *PL_reglastparen)
2502 *PL_reglastparen = n;
a0d0e21e 2503 break;
c277df42
IZ
2504 case GROUPP:
2505 n = ARG(scan); /* which paren pair */
cf93c79d 2506 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2507 break;
2508 case IFTHEN:
2c2d71f5 2509 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2510 if (sw)
2511 next = NEXTOPER(NEXTOPER(scan));
2512 else {
2513 next = scan + ARG(scan);
2514 if (OP(next) == IFTHEN) /* Fake one. */
2515 next = NEXTOPER(NEXTOPER(next));
2516 }
2517 break;
2518 case LOGICAL:
0f5d15d6 2519 logical = scan->flags;
c277df42 2520 break;
2ab05381
IZ
2521/*******************************************************************
2522 PL_regcc contains infoblock about the innermost (...)* loop, and
2523 a pointer to the next outer infoblock.
2524
2525 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2526
2527 1) After matching X, regnode for CURLYX is processed;
2528
2529 2) This regnode creates infoblock on the stack, and calls
2530 regmatch() recursively with the starting point at WHILEM node;
2531
2532 3) Each hit of WHILEM node tries to match A and Z (in the order
2533 depending on the current iteration, min/max of {min,max} and
2534 greediness). The information about where are nodes for "A"
2535 and "Z" is read from the infoblock, as is info on how many times "A"
2536 was already matched, and greediness.
2537
2538 4) After A matches, the same WHILEM node is hit again.
2539
2540 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2541 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2542 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2543 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2544 of the external loop.
2545
2546 Currently present infoblocks form a tree with a stem formed by PL_curcc
2547 and whatever it mentions via ->next, and additional attached trees
2548 corresponding to temporarily unset infoblocks as in "5" above.
2549
2550 In the following picture infoblocks for outer loop of
2551 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2552 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2553 infoblocks are drawn below the "reset" infoblock.
2554
2555 In fact in the picture below we do not show failed matches for Z and T
2556 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2557 more obvious *why* one needs to *temporary* unset infoblocks.]
2558
2559 Matched REx position InfoBlocks Comment
2560 (Y(A)*?Z)*?T x
2561 Y(A)*?Z)*?T x <- O
2562 Y (A)*?Z)*?T x <- O
2563 Y A)*?Z)*?T x <- O <- I
2564 YA )*?Z)*?T x <- O <- I
2565 YA A)*?Z)*?T x <- O <- I
2566 YAA )*?Z)*?T x <- O <- I
2567 YAA Z)*?T x <- O # Temporary unset I
2568 I
2569
2570 YAAZ Y(A)*?Z)*?T x <- O
2571 I
2572
2573 YAAZY (A)*?Z)*?T x <- O
2574 I
2575
2576 YAAZY A)*?Z)*?T x <- O <- I
2577 I
2578
2579 YAAZYA )*?Z)*?T x <- O <- I
2580 I
2581
2582 YAAZYA Z)*?T x <- O # Temporary unset I
2583 I,I
2584
2585 YAAZYAZ )*?T x <- O
2586 I,I
2587
2588 YAAZYAZ T x # Temporary unset O
2589 O
2590 I,I
2591
2592 YAAZYAZT x
2593 O
2594 I,I
2595 *******************************************************************/
a0d0e21e
LW
2596 case CURLYX: {
2597 CURCUR cc;
3280af22 2598 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2599 /* No need to save/restore up to this paren */
2600 I32 parenfloor = scan->flags;
c277df42
IZ
2601
2602 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2603 next += ARG(next);
3280af22
NIS
2604 cc.oldcc = PL_regcc;
2605 PL_regcc = &cc;
cb434fcc
IZ
2606 /* XXXX Probably it is better to teach regpush to support
2607 parenfloor > PL_regsize... */
2608 if (parenfloor > *PL_reglastparen)
2609 parenfloor = *PL_reglastparen; /* Pessimization... */
2610 cc.parenfloor = parenfloor;
a0d0e21e
LW
2611 cc.cur = -1;
2612 cc.min = ARG1(scan);
2613 cc.max = ARG2(scan);
c277df42 2614 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2615 cc.next = next;
2616 cc.minmod = minmod;
2617 cc.lastloc = 0;
3280af22 2618 PL_reginput = locinput;
a0d0e21e
LW
2619 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2620 regcpblow(cp);
3280af22 2621 PL_regcc = cc.oldcc;
4633a7c4 2622 saySAME(n);
a0d0e21e
LW
2623 }
2624 /* NOT REACHED */
2625 case WHILEM: {
2626 /*
2627 * This is really hard to understand, because after we match
2628 * what we're trying to match, we must make sure the rest of
2c2d71f5 2629 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2630 * to go back UP the parse tree by recursing ever deeper. And
2631 * if it fails, we have to reset our parent's current state
2632 * that we can try again after backing off.
2633 */
2634
c277df42 2635 CHECKPOINT cp, lastcp;
3280af22 2636 CURCUR* cc = PL_regcc;
c277df42
IZ
2637 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2638
4633a7c4 2639 n = cc->cur + 1; /* how many we know we matched */
3280af22 2640 PL_reginput = locinput;
a0d0e21e 2641
c277df42
IZ
2642 DEBUG_r(
2643 PerlIO_printf(Perl_debug_log,
2644 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2645 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2646 (long)n, (long)cc->min,
2647 (long)cc->max, (long)cc)
2648 );
4633a7c4 2649
a0d0e21e
LW
2650 /* If degenerate scan matches "", assume scan done. */
2651
579cf2c3 2652 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2653 PL_regcc = cc->oldcc;
2ab05381
IZ
2654 if (PL_regcc)
2655 ln = PL_regcc->cur;
c277df42 2656 DEBUG_r(
c3464db5
DD
2657 PerlIO_printf(Perl_debug_log,
2658 "%*s empty match detected, try continuation...\n",
3280af22 2659 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2660 );
a0d0e21e 2661 if (regmatch(cc->next))
4633a7c4 2662 sayYES;
2ab05381
IZ
2663 if (PL_regcc)
2664 PL_regcc->cur = ln;
3280af22 2665 PL_regcc = cc;
4633a7c4 2666 sayNO;
a0d0e21e
LW
2667 }
2668
2669 /* First just match a string of min scans. */
2670
2671 if (n < cc->min) {
2672 cc->cur = n;
2673 cc->lastloc = locinput;
4633a7c4
LW
2674 if (regmatch(cc->scan))
2675 sayYES;
2676 cc->cur = n - 1;
c277df42 2677 cc->lastloc = lastloc;
4633a7c4 2678 sayNO;
a0d0e21e
LW
2679 }
2680
2c2d71f5
JH
2681 if (scan->flags) {
2682 /* Check whether we already were at this position.
2683 Postpone detection until we know the match is not
2684 *that* much linear. */
2685 if (!PL_reg_maxiter) {
2686 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2687 PL_reg_leftiter = PL_reg_maxiter;
2688 }
2689 if (PL_reg_leftiter-- == 0) {
2690 I32 size = (PL_reg_maxiter + 7)/8;
2691 if (PL_reg_poscache) {
2692 if (PL_reg_poscache_size < size) {
2693 Renew(PL_reg_poscache, size, char);
2694 PL_reg_poscache_size = size;
2695 }
2696 Zero(PL_reg_poscache, size, char);
2697 }
2698 else {
2699 PL_reg_poscache_size = size;
2700 Newz(29, PL_reg_poscache, size, char);
2701 }
2702 DEBUG_r(
2703 PerlIO_printf(Perl_debug_log,
2704 "%sDetected a super-linear match, switching on caching%s...\n",
2705 PL_colors[4], PL_colors[5])
2706 );
2707 }
2708 if (PL_reg_leftiter < 0) {
2709 I32 o = locinput - PL_bostr, b;
2710
2711 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2712 b = o % 8;
2713 o /= 8;
2714 if (PL_reg_poscache[o] & (1<<b)) {
2715 DEBUG_r(
2716 PerlIO_printf(Perl_debug_log,
2717 "%*s already tried at this position...\n",
2718 REPORT_CODE_OFF+PL_regindent*2, "")
2719 );
7821416a 2720 sayNO_SILENT;
2c2d71f5
JH
2721 }
2722 PL_reg_poscache[o] |= (1<<b);
2723 }
2724 }
2725
a0d0e21e
LW
2726 /* Prefer next over scan for minimal matching. */
2727
2728 if (cc->minmod) {
3280af22 2729 PL_regcc = cc->oldcc;
2ab05381
IZ
2730 if (PL_regcc)
2731 ln = PL_regcc->cur;
5f05dabc 2732 cp = regcppush(cc->parenfloor);
c277df42 2733 REGCP_SET;
5f05dabc 2734 if (regmatch(cc->next)) {
c277df42 2735 regcpblow(cp);
4633a7c4 2736 sayYES; /* All done. */
5f05dabc 2737 }
c277df42 2738 REGCP_UNWIND;
5f05dabc 2739 regcppop();
2ab05381
IZ
2740 if (PL_regcc)
2741 PL_regcc->cur = ln;
3280af22 2742 PL_regcc = cc;
a0d0e21e 2743
c277df42 2744 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2745 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2746 && !(PL_reg_flags & RF_warned)) {
2747 PL_reg_flags |= RF_warned;
e476b1b5 2748 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2749 "Complex regular subexpression recursion",
2750 REG_INFTY - 1);
c277df42 2751 }
4633a7c4 2752 sayNO;
c277df42 2753 }
a687059c 2754
c277df42 2755 DEBUG_r(
c3464db5
DD
2756 PerlIO_printf(Perl_debug_log,
2757 "%*s trying longer...\n",
3280af22 2758 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2759 );
a0d0e21e 2760 /* Try scanning more and see if it helps. */
3280af22 2761 PL_reginput = locinput;
a0d0e21e
LW
2762 cc->cur = n;
2763 cc->lastloc = locinput;
5f05dabc 2764 cp = regcppush(cc->parenfloor);
c277df42 2765 REGCP_SET;
5f05dabc 2766 if (regmatch(cc->scan)) {
c277df42 2767 regcpblow(cp);
4633a7c4 2768 sayYES;
5f05dabc 2769 }
c277df42 2770 REGCP_UNWIND;
5f05dabc 2771 regcppop();
4633a7c4 2772 cc->cur = n - 1;
c277df42 2773 cc->lastloc = lastloc;
4633a7c4 2774 sayNO;
a0d0e21e
LW
2775 }
2776
2777 /* Prefer scan over next for maximal matching. */
2778
2779 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2780 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2781 cc->cur = n;
2782 cc->lastloc = locinput;
c277df42 2783 REGCP_SET;
5f05dabc 2784 if (regmatch(cc->scan)) {
c277df42 2785 regcpblow(cp);
4633a7c4 2786 sayYES;
5f05dabc 2787 }
c277df42 2788 REGCP_UNWIND;
a0d0e21e 2789 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2790 PL_reginput = locinput;
c277df42 2791 DEBUG_r(
c3464db5
DD
2792 PerlIO_printf(Perl_debug_log,
2793 "%*s failed, try continuation...\n",
3280af22 2794 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2795 );
2796 }
e476b1b5 2797 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2798 && !(PL_reg_flags & RF_warned)) {
3280af22 2799 PL_reg_flags |= RF_warned;
e476b1b5 2800 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2801 "Complex regular subexpression recursion",
2802 REG_INFTY - 1);
a0d0e21e
LW
2803 }
2804
2805 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2806 PL_regcc = cc->oldcc;
2ab05381
IZ
2807 if (PL_regcc)
2808 ln = PL_regcc->cur;
a0d0e21e 2809 if (regmatch(cc->next))
4633a7c4 2810 sayYES;
2ab05381
IZ
2811 if (PL_regcc)
2812 PL_regcc->cur = ln;
3280af22 2813 PL_regcc = cc;
4633a7c4 2814 cc->cur = n - 1;
c277df42 2815 cc->lastloc = lastloc;
4633a7c4 2816 sayNO;
a0d0e21e
LW
2817 }
2818 /* NOT REACHED */
c277df42
IZ
2819 case BRANCHJ:
2820 next = scan + ARG(scan);
2821 if (next == scan)
2822 next = NULL;
2823 inner = NEXTOPER(NEXTOPER(scan));
2824 goto do_branch;
2825 case BRANCH:
2826 inner = NEXTOPER(scan);
2827 do_branch:
2828 {
2829 CHECKPOINT lastcp;
2830 c1 = OP(scan);
2831 if (OP(next) != c1) /* No choice. */
2832 next = inner; /* Avoid recursion. */
a0d0e21e 2833 else {
3280af22 2834 int lastparen = *PL_reglastparen;
c277df42
IZ
2835
2836 REGCP_SET;
a0d0e21e 2837 do {
3280af22 2838 PL_reginput = locinput;
c277df42 2839 if (regmatch(inner))
4633a7c4 2840 sayYES;
c277df42 2841 REGCP_UNWIND;
3280af22 2842 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2843 PL_regendp[n] = -1;
3280af22 2844 *PL_reglastparen = n;
c277df42 2845 scan = next;
a0d0e21e 2846 /*SUPPRESS 560*/
155aba94 2847 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2848 next += n;
a0d0e21e 2849 else
c277df42 2850 next = NULL;
c277df42
IZ
2851 inner = NEXTOPER(scan);
2852 if (c1 == BRANCHJ) {
2853 inner = NEXTOPER(inner);
2854 }
2855 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2856 sayNO;
a0d0e21e 2857 /* NOTREACHED */
a687059c 2858 }
a0d0e21e
LW
2859 }
2860 break;
2861 case MINMOD:
2862 minmod = 1;
2863 break;
c277df42
IZ
2864 case CURLYM:
2865 {
00db4c45 2866 I32 l = 0;
c277df42
IZ
2867 CHECKPOINT lastcp;
2868
2869 /* We suppose that the next guy does not need
2870 backtracking: in particular, it is of constant length,
2871 and has no parenths to influence future backrefs. */
2872 ln = ARG1(scan); /* min to match */
2873 n = ARG2(scan); /* max to match */
c277df42
IZ
2874 paren = scan->flags;
2875 if (paren) {
3280af22
NIS
2876 if (paren > PL_regsize)
2877 PL_regsize = paren;
2878 if (paren > *PL_reglastparen)
2879 *PL_reglastparen = paren;
c277df42 2880 }
dc45a647 2881 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2882 if (paren)
2883 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2884 PL_reginput = locinput;
c277df42
IZ
2885 if (minmod) {
2886 minmod = 0;
2887 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2888 sayNO;
5f4b28b2 2889 if (ln && l == 0 && n >= ln
c277df42
IZ
2890 /* In fact, this is tricky. If paren, then the
2891 fact that we did/didnot match may influence
2892 future execution. */
2893 && !(paren && ln == 0))
2894 ln = n;
3280af22 2895 locinput = PL_reginput;
22c35a8c 2896 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2897 c1 = (U8)*STRING(next);
c277df42 2898 if (OP(next) == EXACTF)
22c35a8c 2899 c2 = PL_fold[c1];
c277df42 2900 else if (OP(next) == EXACTFL)
22c35a8c 2901 c2 = PL_fold_locale[c1];
c277df42
IZ
2902 else
2903 c2 = c1;
a0ed51b3
LW
2904 }
2905 else
c277df42
IZ
2906 c1 = c2 = -1000;
2907 REGCP_SET;
5f4b28b2 2908 /* This may be improved if l == 0. */
c277df42
IZ
2909 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2910 /* If it could work, try it. */
2911 if (c1 == -1000 ||
3280af22
NIS
2912 UCHARAT(PL_reginput) == c1 ||
2913 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2914 {
2915 if (paren) {
2916 if (n) {
cf93c79d
IZ
2917 PL_regstartp[paren] =
2918 HOPc(PL_reginput, -l) - PL_bostr;
2919 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2920 }
2921 else
cf93c79d 2922 PL_regendp[paren] = -1;
c277df42
IZ
2923 }
2924 if (regmatch(next))
2925 sayYES;
2926 REGCP_UNWIND;
2927 }
2928 /* Couldn't or didn't -- move forward. */
3280af22 2929 PL_reginput = locinput;
c277df42
IZ
2930 if (regrepeat_hard(scan, 1, &l)) {
2931 ln++;
3280af22 2932 locinput = PL_reginput;
c277df42
IZ
2933 }
2934 else
2935 sayNO;
2936 }
a0ed51b3
LW
2937 }
2938 else {
c277df42
IZ
2939 n = regrepeat_hard(scan, n, &l);
2940 if (n != 0 && l == 0
2941 /* In fact, this is tricky. If paren, then the
2942 fact that we did/didnot match may influence
2943 future execution. */
2944 && !(paren && ln == 0))
2945 ln = n;
3280af22 2946 locinput = PL_reginput;
c277df42 2947 DEBUG_r(
5c0ca799 2948 PerlIO_printf(Perl_debug_log,
faccc32b 2949 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2950 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2951 (IV) n, (IV)l)
c277df42
IZ
2952 );
2953 if (n >= ln) {
22c35a8c 2954 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2955 c1 = (U8)*STRING(next);
c277df42 2956 if (OP(next) == EXACTF)
22c35a8c 2957 c2 = PL_fold[c1];
c277df42 2958 else if (OP(next) == EXACTFL)
22c35a8c 2959 c2 = PL_fold_locale[c1];
c277df42
IZ
2960 else
2961 c2 = c1;
a0ed51b3
LW
2962 }
2963 else
c277df42
IZ
2964 c1 = c2 = -1000;
2965 }
2966 REGCP_SET;
2967 while (n >= ln) {
2968 /* If it could work, try it. */
2969 if (c1 == -1000 ||
3280af22
NIS
2970 UCHARAT(PL_reginput) == c1 ||
2971 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2972 {
2973 DEBUG_r(
c3464db5 2974 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2975 "%*s trying tail with n=%"IVdf"...\n",
2976 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2977 );
2978 if (paren) {
2979 if (n) {
cf93c79d
IZ
2980 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2981 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2982 }
a0ed51b3 2983 else
cf93c79d 2984 PL_regendp[paren] = -1;
c277df42 2985 }
a0ed51b3
LW
2986 if (regmatch(next))
2987 sayYES;
2988 REGCP_UNWIND;
2989 }
c277df42
IZ
2990 /* Couldn't or didn't -- back up. */
2991 n--;
dfe13c55 2992 locinput = HOPc(locinput, -l);
3280af22 2993 PL_reginput = locinput;
c277df42
IZ
2994 }
2995 }
2996 sayNO;
2997 break;
2998 }
2999 case CURLYN:
3000 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3001 if (paren > PL_regsize)
3002 PL_regsize = paren;
3003 if (paren > *PL_reglastparen)
3004 *PL_reglastparen = paren;
c277df42
IZ
3005 ln = ARG1(scan); /* min to match */
3006 n = ARG2(scan); /* max to match */
dc45a647 3007 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3008 goto repeat;
a0d0e21e 3009 case CURLY:
c277df42 3010 paren = 0;
a0d0e21e
LW
3011 ln = ARG1(scan); /* min to match */
3012 n = ARG2(scan); /* max to match */
dc45a647 3013 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3014 goto repeat;
3015 case STAR:
3016 ln = 0;
c277df42 3017 n = REG_INFTY;
a0d0e21e 3018 scan = NEXTOPER(scan);
c277df42 3019 paren = 0;
a0d0e21e
LW
3020 goto repeat;
3021 case PLUS:
c277df42
IZ
3022 ln = 1;
3023 n = REG_INFTY;
3024 scan = NEXTOPER(scan);
3025 paren = 0;
3026 repeat:
a0d0e21e
LW
3027 /*
3028 * Lookahead to avoid useless match attempts
3029 * when we know what character comes next.
3030 */
22c35a8c 3031 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3032 c1 = (U8)*STRING(next);
bbce6d69 3033 if (OP(next) == EXACTF)
22c35a8c 3034 c2 = PL_fold[c1];
bbce6d69 3035 else if (OP(next) == EXACTFL)
22c35a8c 3036 c2 = PL_fold_locale[c1];
bbce6d69 3037 else
3038 c2 = c1;
3039 }
a0d0e21e 3040 else
bbce6d69 3041 c1 = c2 = -1000;
3280af22 3042 PL_reginput = locinput;
a0d0e21e 3043 if (minmod) {
c277df42 3044 CHECKPOINT lastcp;
a0d0e21e
LW
3045 minmod = 0;
3046 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3047 sayNO;
a0ed51b3 3048 locinput = PL_reginput;
c277df42 3049 REGCP_SET;
0fe9bf95
IZ
3050 if (c1 != -1000) {
3051 char *e = locinput + n - ln; /* Should not check after this */
3052 char *old = locinput;
3053
3054 if (e >= PL_regeol || (n == REG_INFTY))
3055 e = PL_regeol - 1;
3056 while (1) {
3057 /* Find place 'next' could work */
3058 if (c1 == c2) {
3059 while (locinput <= e && *locinput != c1)
3060 locinput++;
3061 } else {
3062 while (locinput <= e
3063 && *locinput != c1
3064 && *locinput != c2)
3065 locinput++;
3066 }
3067 if (locinput > e)
3068 sayNO;
3069 /* PL_reginput == old now */
3070 if (locinput != old) {
3071 ln = 1; /* Did some */
3072 if (regrepeat(scan, locinput - old) <
3073 locinput - old)
3074 sayNO;
3075 }
3076 /* PL_reginput == locinput now */
29d1e993 3077 TRYPAREN(paren, ln, locinput);
0fe9bf95
IZ
3078 PL_reginput = locinput; /* Could be reset... */
3079 REGCP_UNWIND;
3080 /* Couldn't or didn't -- move forward. */
3081 old = locinput++;
3082 }
3083 }
3084 else
c277df42 3085 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3086 /* If it could work, try it. */
bbce6d69 3087 if (c1 == -1000 ||
3280af22
NIS
3088 UCHARAT(PL_reginput) == c1 ||
3089 UCHARAT(PL_reginput) == c2)
bbce6d69 3090 {
29d1e993 3091 TRYPAREN(paren, n, PL_reginput);
c277df42 3092 REGCP_UNWIND;
bbce6d69 3093 }
c277df42 3094 /* Couldn't or didn't -- move forward. */
a0ed51b3 3095 PL_reginput = locinput;
a0d0e21e
LW
3096 if (regrepeat(scan, 1)) {
3097 ln++;
a0ed51b3
LW
3098 locinput = PL_reginput;
3099 }
3100 else
4633a7c4 3101 sayNO;
a0d0e21e
LW
3102 }
3103 }
3104 else {
c277df42 3105 CHECKPOINT lastcp;
a0d0e21e 3106 n = regrepeat(scan, n);
a0ed51b3 3107 locinput = PL_reginput;
22c35a8c 3108 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3109 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3110 ln = n; /* why back off? */
1aeab75a
GS
3111 /* ...because $ and \Z can match before *and* after
3112 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3113 We should back off by one in this case. */
3114 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3115 ln--;
3116 }
c277df42
IZ
3117 REGCP_SET;
3118 if (paren) {
3119 while (n >= ln) {
3120 /* If it could work, try it. */
3121 if (c1 == -1000 ||
3280af22
NIS
3122 UCHARAT(PL_reginput) == c1 ||
3123 UCHARAT(PL_reginput) == c2)
c277df42 3124 {
29d1e993 3125 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3126 REGCP_UNWIND;
3127 }
3128 /* Couldn't or didn't -- back up. */
3129 n--;
dfe13c55 3130 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3131 }
a0ed51b3
LW
3132 }
3133 else {
c277df42
IZ
3134 while (n >= ln) {
3135 /* If it could work, try it. */
3136 if (c1 == -1000 ||
3280af22
NIS
3137 UCHARAT(PL_reginput) == c1 ||
3138 UCHARAT(PL_reginput) == c2)
c277df42 3139 {
29d1e993 3140 TRYPAREN(paren, n, PL_reginput);
c277df42
IZ
3141 REGCP_UNWIND;
3142 }
3143 /* Couldn't or didn't -- back up. */
3144 n--;
dfe13c55 3145 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3146 }
a0d0e21e
LW
3147 }
3148 }
4633a7c4 3149 sayNO;
c277df42 3150 break;
a0d0e21e 3151 case END:
0f5d15d6
IZ
3152 if (PL_reg_call_cc) {
3153 re_cc_state *cur_call_cc = PL_reg_call_cc;
3154 CURCUR *cctmp = PL_regcc;
3155 regexp *re = PL_reg_re;
3156 CHECKPOINT cp, lastcp;
3157
3158 cp = regcppush(0); /* Save *all* the positions. */
3159 REGCP_SET;
3160 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3161 the caller. */
3162 PL_reginput = locinput; /* Make position available to
3163 the callcc. */
3164 cache_re(PL_reg_call_cc->re);
3165 PL_regcc = PL_reg_call_cc->cc;
3166 PL_reg_call_cc = PL_reg_call_cc->prev;
3167 if (regmatch(cur_call_cc->node)) {
3168 PL_reg_call_cc = cur_call_cc;
3169 regcpblow(cp);
3170 sayYES;
3171 }
3172 REGCP_UNWIND;
3173 regcppop();
3174 PL_reg_call_cc = cur_call_cc;
3175 PL_regcc = cctmp;
3176 PL_reg_re = re;
3177 cache_re(re);
3178
3179 DEBUG_r(
3180 PerlIO_printf(Perl_debug_log,
3181 "%*s continuation failed...\n",
3182 REPORT_CODE_OFF+PL_regindent*2, "")
3183 );
7821416a 3184 sayNO_SILENT;
0f5d15d6 3185 }
7821416a
IZ
3186 if (locinput < PL_regtill) {
3187 DEBUG_r(PerlIO_printf(Perl_debug_log,
3188 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3189 PL_colors[4],
3190 (long)(locinput - PL_reg_starttry),
3191 (long)(PL_regtill - PL_reg_starttry),
3192 PL_colors[5]));
3193 sayNO_FINAL; /* Cannot match: too short. */
3194 }
3195 PL_reginput = locinput; /* put where regtry can find it */
3196 sayYES_FINAL; /* Success! */
7e5428c5 3197 case SUCCEED:
3280af22 3198 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3199 sayYES_LOUD; /* Success! */
c277df42
IZ
3200 case SUSPEND:
3201 n = 1;
9fe1d20c 3202 PL_reginput = locinput;
c277df42 3203 goto do_ifmatch;
a0d0e21e 3204 case UNLESSM:
c277df42 3205 n = 0;
a0ed51b3 3206 if (scan->flags) {
0fe9bf95
IZ
3207 if (UTF) { /* XXXX This is absolutely
3208 broken, we read before
3209 start of string. */
3210 s = HOPMAYBEc(locinput, -scan->flags);
3211 if (!s)
3212 goto say_yes;
3213 PL_reginput = s;
3214 }
3215 else {
3216 if (locinput < PL_bostr + scan->flags)
3217 goto say_yes;
3218 PL_reginput = locinput - scan->flags;
3219 goto do_ifmatch;
3220 }
a0ed51b3
LW
3221 }
3222 else
3223 PL_reginput = locinput;
c277df42
IZ
3224 goto do_ifmatch;
3225 case IFMATCH:
3226 n = 1;
a0ed51b3 3227 if (scan->flags) {
0fe9bf95
IZ
3228 if (UTF) { /* XXXX This is absolutely
3229 broken, we read before
3230 start of string. */
3231 s = HOPMAYBEc(locinput, -scan->flags);
3232 if (!s || s < PL_bostr)
3233 goto say_no;
3234 PL_reginput = s;
3235 }
3236 else {
3237 if (locinput < PL_bostr + scan->flags)
3238 goto say_no;
3239 PL_reginput = locinput - scan->flags;
3240 goto do_ifmatch;
3241 }
a0ed51b3
LW
3242 }
3243 else
3244 PL_reginput = locinput;
3245
c277df42 3246 do_ifmatch:
c277df42
IZ
3247 inner = NEXTOPER(NEXTOPER(scan));
3248 if (regmatch(inner) != n) {
3249 say_no:
3250 if (logical) {
3251 logical = 0;
3252 sw = 0;
3253 goto do_longjump;
a0ed51b3
LW
3254 }
3255 else
c277df42
IZ
3256 sayNO;
3257 }
3258 say_yes:
3259 if (logical) {
3260 logical = 0;
3261 sw = 1;
3262 }
fe44a5e8 3263 if (OP(scan) == SUSPEND) {
3280af22 3264 locinput = PL_reginput;
565764a8 3265 nextchr = UCHARAT(locinput);
fe44a5e8 3266 }
c277df42
IZ
3267 /* FALL THROUGH. */
3268 case LONGJMP:
3269 do_longjump:
3270 next = scan + ARG(scan);
3271 if (next == scan)
3272 next = NULL;
a0d0e21e
LW
3273 break;
3274 default:
b900a521 3275 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3276 PTR2UV(scan), OP(scan));
cea2e8a9 3277 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3278 }
a0d0e21e
LW
3279 scan = next;
3280 }
a687059c 3281
a0d0e21e
LW
3282 /*
3283 * We get here only if there's trouble -- normally "case END" is
3284 * the terminating point.
3285 */
cea2e8a9 3286 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3287 /*NOTREACHED*/
4633a7c4
LW
3288 sayNO;
3289
7821416a
IZ
3290yes_loud:
3291 DEBUG_r(
3292 PerlIO_printf(Perl_debug_log,
3293 "%*s %scould match...%s\n",
3294 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3295 );
3296 goto yes;
3297yes_final:
3298 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3299 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3300yes:
3301#ifdef DEBUGGING
3280af22 3302 PL_regindent--;
4633a7c4
LW
3303#endif
3304 return 1;
3305
3306no:
7821416a
IZ
3307 DEBUG_r(
3308 PerlIO_printf(Perl_debug_log,
3309 "%*s %sfailed...%s\n",
3310 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3311 );
3312 goto do_no;
3313no_final:
3314do_no:
4633a7c4 3315#ifdef DEBUGGING
3280af22 3316 PL_regindent--;
4633a7c4 3317#endif
a0d0e21e 3318 return 0;
a687059c
LW
3319}
3320
3321/*
3322 - regrepeat - repeatedly match something simple, report how many
3323 */
3324/*
3325 * [This routine now assumes that it will only match on things of length 1.
3326 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3327 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3328 */
76e3520e 3329STATIC I32
cea2e8a9 3330S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3331{
5c0ca799 3332 dTHR;
a0d0e21e 3333 register char *scan;
a0d0e21e 3334 register I32 c;
3280af22 3335 register char *loceol = PL_regeol;
a0ed51b3 3336 register I32 hardcount = 0;
a0d0e21e 3337
3280af22 3338 scan = PL_reginput;
c277df42 3339 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3340 loceol = scan + max;
a0d0e21e 3341 switch (OP(p)) {
22c35a8c 3342 case REG_ANY:
a0d0e21e
LW
3343 while (scan < loceol && *scan != '\n')
3344 scan++;
3345 break;
3346 case SANY:
3347 scan = loceol;
3348 break;
a0ed51b3
LW
3349 case ANYUTF8:
3350 loceol = PL_regeol;
3351 while (scan < loceol && *scan != '\n') {
3352 scan += UTF8SKIP(scan);
3353 hardcount++;
3354 }
3355 break;
3356 case SANYUTF8:
3357 loceol = PL_regeol;
3358 while (scan < loceol) {
3359 scan += UTF8SKIP(scan);
3360 hardcount++;
3361 }
3362 break;
bbce6d69 3363 case EXACT: /* length of string is 1 */
cd439c50 3364 c = (U8)*STRING(p);
bbce6d69 3365 while (scan < loceol && UCHARAT(scan) == c)
3366 scan++;
3367 break;
3368 case EXACTF: /* length of string is 1 */
cd439c50 3369 c = (U8)*STRING(p);
bbce6d69 3370 while (scan < loceol &&
22c35a8c 3371 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3372 scan++;
3373 break;
3374 case EXACTFL: /* length of string is 1 */
3280af22 3375 PL_reg_flags |= RF_tainted;
cd439c50 3376 c = (U8)*STRING(p);
bbce6d69 3377 while (scan < loceol &&
22c35a8c 3378 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3379 scan++;
3380 break;
a0ed51b3
LW
3381 case ANYOFUTF8:
3382 loceol = PL_regeol;
3383 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3384 scan += UTF8SKIP(scan);
3385 hardcount++;
3386 }
3387 break;
a0d0e21e 3388 case ANYOF:
936ed897 3389 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3390 scan++;
a0d0e21e
LW
3391 break;
3392 case ALNUM:
3393 while (scan < loceol && isALNUM(*scan))
3394 scan++;
3395 break;
a0ed51b3
LW
3396 case ALNUMUTF8:
3397 loceol = PL_regeol;
dfe13c55 3398 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3399 scan += UTF8SKIP(scan);
3400 hardcount++;
3401 }
3402 break;
bbce6d69 3403 case ALNUML:
3280af22 3404 PL_reg_flags |= RF_tainted;
bbce6d69 3405 while (scan < loceol && isALNUM_LC(*scan))
3406 scan++;
3407 break;
a0ed51b3
LW
3408 case ALNUMLUTF8:
3409 PL_reg_flags |= RF_tainted;
3410 loceol = PL_regeol;
dfe13c55 3411 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3412 scan += UTF8SKIP(scan);
3413 hardcount++;
3414 }
3415 break;
3416 break;
a0d0e21e
LW
3417 case NALNUM:
3418 while (scan < loceol && !isALNUM(*scan))
3419 scan++;
3420 break;
a0ed51b3
LW
3421 case NALNUMUTF8:
3422 loceol = PL_regeol;
dfe13c55 3423 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3424 scan += UTF8SKIP(scan);
3425 hardcount++;
3426 }
3427 break;
bbce6d69 3428 case NALNUML:
3280af22 3429 PL_reg_flags |= RF_tainted;
bbce6d69 3430 while (scan < loceol && !isALNUM_LC(*scan))
3431 scan++;
3432 break;
a0ed51b3
LW
3433 case NALNUMLUTF8:
3434 PL_reg_flags |= RF_tainted;
3435 loceol = PL_regeol;
dfe13c55 3436 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3437 scan += UTF8SKIP(scan);
3438 hardcount++;
3439 }
3440 break;
a0d0e21e
LW
3441 case SPACE:
3442 while (scan < loceol && isSPACE(*scan))
3443 scan++;
3444 break;
a0ed51b3
LW
3445 case SPACEUTF8:
3446 loceol = PL_regeol;
dfe13c55 3447 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3448 scan += UTF8SKIP(scan);
3449 hardcount++;
3450 }
3451 break;
bbce6d69 3452 case SPACEL:
3280af22 3453 PL_reg_flags |= RF_tainted;
bbce6d69 3454 while (scan < loceol && isSPACE_LC(*scan))
3455 scan++;
3456 break;
a0ed51b3
LW
3457 case SPACELUTF8:
3458 PL_reg_flags |= RF_tainted;
3459 loceol = PL_regeol;
dfe13c55 3460 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3461 scan += UTF8SKIP(scan);
3462 hardcount++;
3463 }
3464 break;
a0d0e21e
LW
3465 case NSPACE:
3466 while (scan < loceol && !isSPACE(*scan))
3467 scan++;
3468 break;
a0ed51b3
LW
3469 case NSPACEUTF8:
3470 loceol = PL_regeol;
dfe13c55 3471 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3472 scan += UTF8SKIP(scan);
3473 hardcount++;
3474 }
3475 break;
bbce6d69 3476 case NSPACEL:
3280af22 3477 PL_reg_flags |= RF_tainted;
bbce6d69 3478 while (scan < loceol && !isSPACE_LC(*scan))
3479 scan++;
3480 break;
a0ed51b3
LW
3481 case NSPACELUTF8:
3482 PL_reg_flags |= RF_tainted;
3483 loceol = PL_regeol;
dfe13c55 3484 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3485 scan += UTF8SKIP(scan);
3486 hardcount++;
3487 }
3488 break;
a0d0e21e
LW
3489 case DIGIT:
3490 while (scan < loceol && isDIGIT(*scan))
3491 scan++;
3492 break;
a0ed51b3
LW
3493 case DIGITUTF8:
3494 loceol = PL_regeol;
dfe13c55 3495 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3496 scan += UTF8SKIP(scan);
3497 hardcount++;
3498 }
3499 break;
3500 break;
a0d0e21e
LW
3501 case NDIGIT:
3502 while (scan < loceol && !isDIGIT(*scan))
3503 scan++;
3504 break;
a0ed51b3
LW
3505 case NDIGITUTF8:
3506 loceol = PL_regeol;
dfe13c55 3507 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3508 scan += UTF8SKIP(scan);
3509 hardcount++;
3510 }
3511 break;
a0d0e21e
LW
3512 default: /* Called on something of 0 width. */
3513 break; /* So match right here or not at all. */
3514 }
a687059c 3515
a0ed51b3
LW
3516 if (hardcount)
3517 c = hardcount;
3518 else
3519 c = scan - PL_reginput;
3280af22 3520 PL_reginput = scan;
a687059c 3521
c277df42
IZ
3522 DEBUG_r(
3523 {
3524 SV *prop = sv_newmortal();
3525
3526 regprop(prop, p);
3527 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3528 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3529 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3530 });
3531
a0d0e21e 3532 return(c);
a687059c
LW
3533}
3534
3535/*
c277df42
IZ
3536 - regrepeat_hard - repeatedly match something, report total lenth and length
3537 *
3538 * The repeater is supposed to have constant length.
3539 */
3540
76e3520e 3541STATIC I32
cea2e8a9 3542S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3543{
5c0ca799 3544 dTHR;
c277df42
IZ
3545 register char *scan;
3546 register char *start;
3280af22 3547 register char *loceol = PL_regeol;
a0ed51b3 3548 I32 l = 0;
708e3b05 3549 I32 count = 0, res = 1;
a0ed51b3
LW
3550
3551 if (!max)
3552 return 0;
c277df42 3553
3280af22 3554 start = PL_reginput;
a0ed51b3 3555 if (UTF) {
708e3b05 3556 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3557 if (!count++) {
3558 l = 0;
3559 while (start < PL_reginput) {
3560 l++;
3561 start += UTF8SKIP(start);
3562 }
3563 *lp = l;
3564 if (l == 0)
3565 return max;
3566 }
3567 if (count == max)
3568 return count;
3569 }
3570 }
3571 else {
708e3b05 3572 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3573 if (!count++) {
3574 *lp = l = PL_reginput - start;
3575 if (max != REG_INFTY && l*max < loceol - scan)
3576 loceol = scan + l*max;
3577 if (l == 0)
3578 return max;
c277df42
IZ
3579 }
3580 }
3581 }
708e3b05 3582 if (!res)
3280af22 3583 PL_reginput = scan;
c277df42 3584
a0ed51b3 3585 return count;
c277df42
IZ
3586}
3587
3588/*
cb8d8820 3589 - reginclass - determine if a character falls into a character class
bbce6d69 3590 */
3591
76e3520e 3592STATIC bool
936ed897 3593S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3594{
5c0ca799 3595 dTHR;
b8c5462f 3596 char flags = ANYOF_FLAGS(p);
bbce6d69 3597 bool match = FALSE;
3598
3599 c &= 0xFF;
b8c5462f 3600 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3601 match = TRUE;
3602 else if (flags & ANYOF_FOLD) {
3603 I32 cf;
3604 if (flags & ANYOF_LOCALE) {
3280af22 3605 PL_reg_flags |= RF_tainted;
22c35a8c 3606 cf = PL_fold_locale[c];
bbce6d69 3607 }
3608 else
22c35a8c 3609 cf = PL_fold[c];
b8c5462f 3610 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3611 match = TRUE;
3612 }
3613
b8c5462f 3614 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3615 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3616 if (
3617 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3618 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3619 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3620 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3621 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3622 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3623 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3624 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3625 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3626 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3627 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3628 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3629 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3630 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3631 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3632 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3633 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3634 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3635 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3636 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3637 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3638 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3639 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3640 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3641 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
aaa51d5e
JF
3642 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3643 (ANYOF_CLASS_TEST(p, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3644 (ANYOF_CLASS_TEST(p, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3645 (ANYOF_CLASS_TEST(p, ANYOF_BLANK) && isBLANK(c)) ||
3646 (ANYOF_CLASS_TEST(p, ANYOF_NBLANK) && !isBLANK(c))
b8c5462f 3647 ) /* How's that for a conditional? */
bbce6d69 3648 {
3649 match = TRUE;
3650 }
3651 }
3652
ae5c130c 3653 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3654}
3655
a0ed51b3 3656STATIC bool
cea2e8a9 3657S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3658{
3659 dTHR;
a0ed51b3
LW
3660 char flags = ARG1(f);
3661 bool match = FALSE;
3662 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3663
3664 if (swash_fetch(sv, p))
3665 match = TRUE;
3666 else if (flags & ANYOF_FOLD) {
806e7201 3667 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3668 if (flags & ANYOF_LOCALE) {
3669 PL_reg_flags |= RF_tainted;
3670 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3671 }
3672 else
3673 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3674 if (swash_fetch(sv, tmpbuf))
3675 match = TRUE;
3676 }
3677
b8c5462f 3678 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3679
3680 return (flags & ANYOF_INVERT) ? !match : match;
3681}
161b471a 3682
dfe13c55 3683STATIC U8 *
cea2e8a9 3684S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3685{
3686 dTHR;
a0ed51b3
LW
3687 if (off >= 0) {
3688 while (off-- && s < (U8*)PL_regeol)
3689 s += UTF8SKIP(s);
3690 }
3691 else {
3692 while (off++) {
3693 if (s > (U8*)PL_bostr) {
3694 s--;
3695 if (*s & 0x80) {
3696 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3697 s--;
3698 } /* XXX could check well-formedness here */
3699 }
3700 }
3701 }
3702 return s;
3703}
161b471a 3704
dfe13c55 3705STATIC U8 *
cea2e8a9 3706S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3707{
c485e607 3708 dTHR;
a0ed51b3
LW
3709 if (off >= 0) {
3710 while (off-- && s < (U8*)PL_regeol)
3711 s += UTF8SKIP(s);
3712 if (off >= 0)
3713 return 0;
3714 }
3715 else {
3716 while (off++) {
3717 if (s > (U8*)PL_bostr) {
3718 s--;
3719 if (*s & 0x80) {
3720 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3721 s--;
3722 } /* XXX could check well-formedness here */
3723 }
3724 else
3725 break;
3726 }
3727 if (off <= 0)
3728 return 0;
3729 }
3730 return s;
3731}
51371543
GS
3732
3733#ifdef PERL_OBJECT
51371543
GS
3734#include "XSUB.h"
3735#endif
3736
3737static void
3738restore_pos(pTHXo_ void *arg)
3739{
3740 dTHR;
3741 if (PL_reg_eval_set) {
3742 if (PL_reg_oldsaved) {
3743 PL_reg_re->subbeg = PL_reg_oldsaved;
3744 PL_reg_re->sublen = PL_reg_oldsavedlen;
3745 RX_MATCH_COPIED_on(PL_reg_re);
3746 }
3747 PL_reg_magic->mg_len = PL_reg_oldpos;
3748 PL_reg_eval_set = 0;
3749 PL_curpm = PL_reg_oldcurpm;
3750 }
3751}