This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
NULL-terminate PERL_INC_VERSION_LIST
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
349 && (sv && (strpos + SvCUR(sv) != strend)) ) {
350 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 351 goto fail;
2c2d71f5 352 }
adac82c7 353 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 354 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
355 if (SvTAIL(check)) {
356 slen = SvCUR(check); /* >= 1 */
cad2e5aa 357
2c2d71f5
JH
358 if ( strend - s > slen || strend - s < slen - 1
359 || (strend - s == slen && strend[-1] != '\n')) {
360 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
361 goto fail_finish;
cad2e5aa
JH
362 }
363 /* Now should match s[0..slen-2] */
364 slen--;
653099ff 365 if (slen && (*SvPVX(check) != *s
cad2e5aa 366 || (slen > 1
653099ff 367 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
368 report_neq:
369 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
370 goto fail_finish;
371 }
cad2e5aa 372 }
653099ff
GS
373 else if (*SvPVX(check) != *s
374 || ((slen = SvCUR(check)) > 1
375 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
376 goto report_neq;
377 goto success_at_start;
cad2e5aa 378 }
2c2d71f5 379 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 380 s = strpos;
2c2d71f5 381 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 382 end_shift = prog->minlen - start_shift -
653099ff 383 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 384 if (!ml_anch) {
653099ff
GS
385 I32 end = prog->check_offset_max + CHR_SVLEN(check)
386 - (SvTAIL(check) != 0);
2c2d71f5
JH
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
653099ff 399 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
2c2d71f5
JH
407 restart:
408 /* Find a possible match in the region s..strend by looking for
409 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 410 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
411 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
412 I32 p = -1; /* Internal iterator of scream. */
413 I32 *pp = data ? data->scream_pos : &p;
414
2c2d71f5
JH
415 if (PL_screamfirst[BmRARE(check)] >= 0
416 || ( BmRARE(check) == '\n'
417 && (BmPREVIOUS(check) == SvCUR(check) - 1)
418 && SvTAIL(check) ))
419 s = screaminstr(sv, check,
420 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 421 else
2c2d71f5 422 goto fail_finish;
cad2e5aa
JH
423 if (data)
424 *data->scream_olds = s;
425 }
426 else
427 s = fbm_instr((unsigned char*)s + start_shift,
428 (unsigned char*)strend - end_shift,
2c2d71f5 429 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
430
431 /* Update the count-of-usability, remove useless subpatterns,
432 unshift s. */
2c2d71f5
JH
433
434 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
435 (s ? "Found" : "Did not find"),
436 ((check == prog->anchored_substr) ? "anchored" : "floating"),
437 PL_colors[0],
7b0972df
JH
438 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
439 SvPVX(check),
2c2d71f5
JH
440 PL_colors[1], (SvTAIL(check) ? "$" : ""),
441 (s ? " at offset " : "...\n") ) );
442
443 if (!s)
444 goto fail_finish;
445
6eb5f6b9
JH
446 check_at = s;
447
2c2d71f5 448 /* Finish the diagnostic message */
30944b6d 449 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
450
451 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
452 Start with the other substr.
453 XXXX no SCREAM optimization yet - and a very coarse implementation
454 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
455 *always* match. Probably should be marked during compile...
456 Probably it is right to do no SCREAM here...
457 */
458
459 if (prog->float_substr && prog->anchored_substr) {
30944b6d 460 /* Take into account the "other" substring. */
2c2d71f5
JH
461 /* XXXX May be hopelessly wrong for UTF... */
462 if (!other_last)
6eb5f6b9 463 other_last = strpos;
2c2d71f5 464 if (check == prog->float_substr) {
30944b6d
IZ
465 do_other_anchored:
466 {
2c2d71f5
JH
467 char *last = s - start_shift, *last1, *last2;
468 char *s1 = s;
469
470 tmp = PL_bostr;
471 t = s - prog->check_offset_max;
472 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
473 && (!(prog->reganch & ROPT_UTF8)
474 || (PL_bostr = strpos, /* Used in regcopmaybe() */
475 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
476 && t > strpos)))
30944b6d 477 /* EMPTY */;
2c2d71f5
JH
478 else
479 t = strpos;
480 t += prog->anchored_offset;
6eb5f6b9
JH
481 if (t < other_last) /* These positions already checked */
482 t = other_last;
2c2d71f5
JH
483 PL_bostr = tmp;
484 last2 = last1 = strend - prog->minlen;
485 if (last < last1)
486 last1 = last;
487 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
488 /* On end-of-str: see comment below. */
489 s = fbm_instr((unsigned char*)t,
490 (unsigned char*)last1 + prog->anchored_offset
491 + SvCUR(prog->anchored_substr)
492 - (SvTAIL(prog->anchored_substr)!=0),
493 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
494 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
495 (s ? "Found" : "Contradicts"),
496 PL_colors[0],
7b0972df
JH
497 (int)(SvCUR(prog->anchored_substr)
498 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
499 SvPVX(prog->anchored_substr),
500 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
501 if (!s) {
502 if (last1 >= last2) {
503 DEBUG_r(PerlIO_printf(Perl_debug_log,
504 ", giving up...\n"));
505 goto fail_finish;
506 }
507 DEBUG_r(PerlIO_printf(Perl_debug_log,
508 ", trying floating at offset %ld...\n",
30944b6d 509 (long)(s1 + 1 - i_strpos)));
2c2d71f5 510 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 511 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
512 s = HOPc(last, 1);
513 goto restart;
514 }
515 else {
516 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 517 (long)(s - i_strpos)));
2c2d71f5 518 t = s - prog->anchored_offset;
6eb5f6b9 519 other_last = s + 1;
30944b6d 520 s = s1;
2c2d71f5
JH
521 if (t == strpos)
522 goto try_at_start;
2c2d71f5
JH
523 goto try_at_offset;
524 }
30944b6d 525 }
2c2d71f5
JH
526 }
527 else { /* Take into account the floating substring. */
528 char *last, *last1;
529 char *s1 = s;
530
531 t = s - start_shift;
532 last1 = last = strend - prog->minlen + prog->float_min_offset;
533 if (last - t > prog->float_max_offset)
534 last = t + prog->float_max_offset;
535 s = t + prog->float_min_offset;
6eb5f6b9
JH
536 if (s < other_last)
537 s = other_last;
2c2d71f5
JH
538 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
539 /* fbm_instr() takes into account exact value of end-of-str
540 if the check is SvTAIL(ed). Since false positives are OK,
541 and end-of-str is not later than strend we are OK. */
542 s = fbm_instr((unsigned char*)s,
543 (unsigned char*)last + SvCUR(prog->float_substr)
544 - (SvTAIL(prog->float_substr)!=0),
545 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
546 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
547 (s ? "Found" : "Contradicts"),
548 PL_colors[0],
7b0972df
JH
549 (int)(SvCUR(prog->float_substr)
550 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
551 SvPVX(prog->float_substr),
552 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
553 if (!s) {
554 if (last1 == last) {
555 DEBUG_r(PerlIO_printf(Perl_debug_log,
556 ", giving up...\n"));
557 goto fail_finish;
558 }
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", trying anchored starting at offset %ld...\n",
30944b6d 561 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 562 other_last = last + 1;
2c2d71f5
JH
563 PL_regeol = strend; /* Used in HOP() */
564 s = HOPc(t, 1);
565 goto restart;
566 }
567 else {
568 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 569 (long)(s - i_strpos)));
6eb5f6b9 570 other_last = s + 1;
30944b6d 571 s = s1;
2c2d71f5
JH
572 if (t == strpos)
573 goto try_at_start;
2c2d71f5
JH
574 goto try_at_offset;
575 }
576 }
cad2e5aa 577 }
2c2d71f5
JH
578
579 t = s - prog->check_offset_max;
580 tmp = PL_bostr;
581 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
582 && (!(prog->reganch & ROPT_UTF8)
583 || (PL_bostr = strpos, /* Used in regcopmaybe() */
584 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
585 && t > strpos)))) {
586 PL_bostr = tmp;
587 /* Fixed substring is found far enough so that the match
588 cannot start at strpos. */
589 try_at_offset:
cad2e5aa 590 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
591 /* Eventually fbm_*() should handle this, but often
592 anchored_offset is not 0, so this check will not be wasted. */
593 /* XXXX In the code below we prefer to look for "^" even in
594 presence of anchored substrings. And we search even
595 beyond the found float position. These pessimizations
596 are historical artefacts only. */
597 find_anchor:
2c2d71f5 598 while (t < strend - prog->minlen) {
cad2e5aa
JH
599 if (*t == '\n') {
600 if (t < s - prog->check_offset_min) {
30944b6d
IZ
601 if (prog->anchored_substr) {
602 /* We definitely contradict the found anchored
603 substr. Due to the above check we do not
604 contradict "check" substr.
605 Thus we can arrive here only if check substr
606 is float. Redo checking for "other"=="fixed".
607 */
608 strpos = t + 1;
609 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
610 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
611 goto do_other_anchored;
612 }
cad2e5aa 613 s = t + 1;
2c2d71f5 614 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 615 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
616 goto set_useful;
617 }
2c2d71f5 618 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
619 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
620 strpos = s = t + 1;
cad2e5aa
JH
621 goto restart;
622 }
623 t++;
624 }
2c2d71f5
JH
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
626 PL_colors[0],PL_colors[1]));
627 goto fail_finish;
cad2e5aa
JH
628 }
629 s = t;
630 set_useful:
2c2d71f5 631 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
632 }
633 else {
2c2d71f5
JH
634 PL_bostr = tmp;
635 /* The found string does not prohibit matching at beg-of-str
636 - no optimization of calling REx engine can be performed,
637 unless it was an MBOL and we are not after MBOL. */
638 try_at_start:
639 /* Even in this situation we may use MBOL flag if strpos is offset
640 wrt the start of the string. */
641 if (ml_anch && sv
d506a20d
IZ
642 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
643 /* May be due to an implicit anchor of m{.*foo} */
644 && !(prog->reganch & ROPT_IMPLICIT))
645 {
cad2e5aa
JH
646 t = strpos;
647 goto find_anchor;
648 }
30944b6d
IZ
649 DEBUG_r( if (ml_anch)
650 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
651 PL_colors[0],PL_colors[1]);
652 );
2c2d71f5 653 success_at_start:
30944b6d 654 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 655 && prog->check_substr /* Could be deleted already */
cad2e5aa 656 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
657 && prog->check_substr == prog->float_substr)
658 {
cad2e5aa
JH
659 /* If flags & SOMETHING - do not do it many times on the same match */
660 SvREFCNT_dec(prog->check_substr);
661 prog->check_substr = Nullsv; /* disable */
662 prog->float_substr = Nullsv; /* clear */
663 s = strpos;
3cf5c195
IZ
664 /* XXXX This is a remnant of the old implementation. It
665 looks wasteful, since now INTUIT can use many
6eb5f6b9 666 other heuristics. */
cad2e5aa
JH
667 prog->reganch &= ~RE_USE_INTUIT;
668 }
669 else
670 s = strpos;
671 }
672
6eb5f6b9
JH
673 /* Last resort... */
674 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
675 if (prog->regstclass) {
676 /* minlen == 0 is possible if regstclass is \b or \B,
677 and the fixed substr is ''$.
678 Since minlen is already taken into account, s+1 is before strend;
679 accidentally, minlen >= 1 guaranties no false positives at s + 1
680 even for \b or \B. But (minlen? 1 : 0) below assumes that
681 regstclass does not come from lookahead... */
682 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
683 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
684 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
685 ? STR_LEN(prog->regstclass)
686 : 1);
6eb5f6b9 687 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
688 ? s + (prog->minlen? cl_l : 0)
689 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9
JH
690 : strend) ;
691 char *startpos = sv ? strend - SvCUR(sv) : s;
692
693 t = s;
76384e4a
GS
694 if (prog->reganch & ROPT_UTF8) {
695 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
696 PL_bostr = startpos;
697 }
6eb5f6b9
JH
698 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
699 if (!s) {
700#ifdef DEBUGGING
701 char *what;
702#endif
703 if (endpos == strend) {
704 DEBUG_r( PerlIO_printf(Perl_debug_log,
705 "Could not match STCLASS...\n") );
706 goto fail;
707 }
66e933ab
GS
708 DEBUG_r( PerlIO_printf(Perl_debug_log,
709 "This position contradicts STCLASS...\n") );
653099ff
GS
710 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
711 goto fail;
6eb5f6b9
JH
712 /* Contradict one of substrings */
713 if (prog->anchored_substr) {
6eb5f6b9
JH
714 if (prog->anchored_substr == check) {
715 DEBUG_r( what = "anchored" );
716 hop_and_restart:
717 PL_regeol = strend; /* Used in HOP() */
718 s = HOPc(t, 1);
66e933ab
GS
719 if (s + start_shift + end_shift > strend) {
720 /* XXXX Should be taken into account earlier? */
721 DEBUG_r( PerlIO_printf(Perl_debug_log,
722 "Could not match STCLASS...\n") );
723 goto fail;
724 }
6eb5f6b9 725 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 726 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
727 what, (long)(s + start_shift - i_strpos)) );
728 goto restart;
729 }
66e933ab 730 /* Have both, check_string is floating */
6eb5f6b9
JH
731 if (t + start_shift >= check_at) /* Contradicts floating=check */
732 goto retry_floating_check;
733 /* Recheck anchored substring, but not floating... */
734 s = check_at;
735 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 736 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
737 (long)(other_last - i_strpos)) );
738 goto do_other_anchored;
739 }
60e71179
GS
740 /* Another way we could have checked stclass at the
741 current position only: */
742 if (ml_anch) {
743 s = t = t + 1;
744 DEBUG_r( PerlIO_printf(Perl_debug_log,
745 "Trying /^/m starting at offset %ld...\n",
746 (long)(t - i_strpos)) );
747 goto try_at_offset;
66e933ab 748 }
60e71179
GS
749 if (!prog->float_substr) /* Could have been deleted */
750 goto fail;
6eb5f6b9
JH
751 /* Check is floating subtring. */
752 retry_floating_check:
753 t = check_at - start_shift;
754 DEBUG_r( what = "floating" );
755 goto hop_and_restart;
756 }
757 DEBUG_r( if (t != s)
758 PerlIO_printf(Perl_debug_log,
759 "By STCLASS: moving %ld --> %ld\n",
760 (long)(t - i_strpos), (long)(s - i_strpos));
761 else
762 PerlIO_printf(Perl_debug_log,
763 "Does not contradict STCLASS...\n") );
764 }
2c2d71f5 765 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 766 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 767 return s;
2c2d71f5
JH
768
769 fail_finish: /* Substring not found */
66e933ab
GS
770 if (prog->check_substr) /* could be removed already */
771 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 772 fail:
2c2d71f5 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
774 PL_colors[4],PL_colors[5]));
775 return Nullch;
776}
9661b544 777
6eb5f6b9 778/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
779STATIC char *
780S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 781{
6eb5f6b9
JH
782 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
783 char *m;
784 int ln;
785 int c1;
786 int c2;
787 char *e;
788 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 789
6eb5f6b9
JH
790 /* We know what class it must start with. */
791 switch (OP(c)) {
792 case ANYOFUTF8:
793 while (s < strend) {
794 if (REGINCLASSUTF8(c, (U8*)s)) {
795 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 796 goto got_it;
3cf5c195 797 else
6eb5f6b9 798 tmp = doevery;
a0ed51b3 799 }
6eb5f6b9
JH
800 else
801 tmp = 1;
a0ed51b3
LW
802 s += UTF8SKIP(s);
803 }
6eb5f6b9
JH
804 break;
805 case ANYOF:
a0ed51b3 806 while (s < strend) {
6eb5f6b9
JH
807 if (REGINCLASS(c, *s)) {
808 if (tmp && (norun || regtry(prog, s)))
809 goto got_it;
810 else
811 tmp = doevery;
a0ed51b3 812 }
6eb5f6b9
JH
813 else
814 tmp = 1;
a0ed51b3 815 s++;
a0d0e21e 816 }
6eb5f6b9
JH
817 break;
818 case EXACTF:
819 m = STRING(c);
820 ln = STR_LEN(c);
821 c1 = *m;
822 c2 = PL_fold[c1];
823 goto do_exactf;
824 case EXACTFL:
825 m = STRING(c);
826 ln = STR_LEN(c);
827 c1 = *m;
828 c2 = PL_fold_locale[c1];
829 do_exactf:
830 e = strend - ln;
b3c9acc1 831
6eb5f6b9
JH
832 if (norun && e < s)
833 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
834 /* Here it is NOT UTF! */
835 if (c1 == c2) {
836 while (s <= e) {
837 if ( *s == c1
66e933ab
GS
838 && (ln == 1 || !(OP(c) == EXACTF
839 ? ibcmp(s, m, ln)
840 : ibcmp_locale(s, m, ln)))
6eb5f6b9 841 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
842 goto got_it;
843 s++;
844 }
845 } else {
846 while (s <= e) {
847 if ( (*s == c1 || *s == c2)
66e933ab
GS
848 && (ln == 1 || !(OP(c) == EXACTF
849 ? ibcmp(s, m, ln)
850 : ibcmp_locale(s, m, ln)))
6eb5f6b9 851 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
852 goto got_it;
853 s++;
854 }
855 }
856 break;
bbce6d69 857 case BOUNDL:
3280af22 858 PL_reg_flags |= RF_tainted;
bbce6d69 859 /* FALL THROUGH */
a0d0e21e 860 case BOUND:
6eb5f6b9 861 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 862 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 863 while (s < strend) {
95bac841 864 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 865 tmp = !tmp;
6eb5f6b9 866 if ((norun || regtry(prog, s)))
a0d0e21e 867 goto got_it;
a687059c 868 }
a0d0e21e
LW
869 s++;
870 }
6eb5f6b9 871 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
872 goto got_it;
873 break;
a0ed51b3
LW
874 case BOUNDLUTF8:
875 PL_reg_flags |= RF_tainted;
876 /* FALL THROUGH */
877 case BOUNDUTF8:
6eb5f6b9 878 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 879 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 880 while (s < strend) {
76384e4a 881 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
882 swash_fetch(PL_utf8_alnum, (U8*)s) :
883 isALNUM_LC_utf8((U8*)s)))
884 {
a0ed51b3 885 tmp = !tmp;
6eb5f6b9 886 if ((norun || regtry(prog, s)))
a0ed51b3
LW
887 goto got_it;
888 }
889 s += UTF8SKIP(s);
890 }
6eb5f6b9 891 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
892 goto got_it;
893 break;
bbce6d69 894 case NBOUNDL:
3280af22 895 PL_reg_flags |= RF_tainted;
bbce6d69 896 /* FALL THROUGH */
a0d0e21e 897 case NBOUND:
6eb5f6b9 898 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 899 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 900 while (s < strend) {
95bac841 901 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 902 tmp = !tmp;
6eb5f6b9 903 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
904 goto got_it;
905 s++;
906 }
6eb5f6b9 907 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
908 goto got_it;
909 break;
a0ed51b3
LW
910 case NBOUNDLUTF8:
911 PL_reg_flags |= RF_tainted;
912 /* FALL THROUGH */
913 case NBOUNDUTF8:
6eb5f6b9 914 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 915 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 916 while (s < strend) {
76384e4a 917 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
918 swash_fetch(PL_utf8_alnum, (U8*)s) :
919 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 920 tmp = !tmp;
6eb5f6b9 921 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
922 goto got_it;
923 s += UTF8SKIP(s);
924 }
6eb5f6b9 925 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
926 goto got_it;
927 break;
a0d0e21e
LW
928 case ALNUM:
929 while (s < strend) {
bbce6d69 930 if (isALNUM(*s)) {
6eb5f6b9 931 if (tmp && (norun || regtry(prog, s)))
bbce6d69 932 goto got_it;
933 else
934 tmp = doevery;
935 }
936 else
937 tmp = 1;
938 s++;
939 }
940 break;
a0ed51b3
LW
941 case ALNUMUTF8:
942 while (s < strend) {
dfe13c55 943 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 944 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
945 goto got_it;
946 else
947 tmp = doevery;
948 }
949 else
950 tmp = 1;
951 s += UTF8SKIP(s);
952 }
953 break;
bbce6d69 954 case ALNUML:
3280af22 955 PL_reg_flags |= RF_tainted;
bbce6d69 956 while (s < strend) {
957 if (isALNUM_LC(*s)) {
6eb5f6b9 958 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 959 goto got_it;
a687059c 960 else
a0d0e21e
LW
961 tmp = doevery;
962 }
963 else
964 tmp = 1;
965 s++;
966 }
967 break;
a0ed51b3
LW
968 case ALNUMLUTF8:
969 PL_reg_flags |= RF_tainted;
970 while (s < strend) {
dfe13c55 971 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 972 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
973 goto got_it;
974 else
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
979 s += UTF8SKIP(s);
980 }
981 break;
a0d0e21e
LW
982 case NALNUM:
983 while (s < strend) {
bbce6d69 984 if (!isALNUM(*s)) {
6eb5f6b9 985 if (tmp && (norun || regtry(prog, s)))
bbce6d69 986 goto got_it;
987 else
988 tmp = doevery;
989 }
990 else
991 tmp = 1;
992 s++;
993 }
994 break;
a0ed51b3
LW
995 case NALNUMUTF8:
996 while (s < strend) {
dfe13c55 997 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 998 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
999 goto got_it;
1000 else
1001 tmp = doevery;
1002 }
1003 else
1004 tmp = 1;
1005 s += UTF8SKIP(s);
1006 }
1007 break;
bbce6d69 1008 case NALNUML:
3280af22 1009 PL_reg_flags |= RF_tainted;
bbce6d69 1010 while (s < strend) {
1011 if (!isALNUM_LC(*s)) {
6eb5f6b9 1012 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1013 goto got_it;
a687059c 1014 else
a0d0e21e 1015 tmp = doevery;
a687059c 1016 }
a0d0e21e
LW
1017 else
1018 tmp = 1;
1019 s++;
1020 }
1021 break;
a0ed51b3
LW
1022 case NALNUMLUTF8:
1023 PL_reg_flags |= RF_tainted;
1024 while (s < strend) {
dfe13c55 1025 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1026 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1027 goto got_it;
1028 else
1029 tmp = doevery;
1030 }
1031 else
1032 tmp = 1;
1033 s += UTF8SKIP(s);
1034 }
1035 break;
a0d0e21e
LW
1036 case SPACE:
1037 while (s < strend) {
1038 if (isSPACE(*s)) {
6eb5f6b9 1039 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1040 goto got_it;
1041 else
1042 tmp = doevery;
2304df62 1043 }
a0d0e21e
LW
1044 else
1045 tmp = 1;
1046 s++;
1047 }
1048 break;
a0ed51b3
LW
1049 case SPACEUTF8:
1050 while (s < strend) {
dfe13c55 1051 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1052 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
1057 else
1058 tmp = 1;
1059 s += UTF8SKIP(s);
1060 }
1061 break;
bbce6d69 1062 case SPACEL:
3280af22 1063 PL_reg_flags |= RF_tainted;
bbce6d69 1064 while (s < strend) {
1065 if (isSPACE_LC(*s)) {
6eb5f6b9 1066 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
1071 else
1072 tmp = 1;
1073 s++;
1074 }
1075 break;
a0ed51b3
LW
1076 case SPACELUTF8:
1077 PL_reg_flags |= RF_tainted;
1078 while (s < strend) {
dfe13c55 1079 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1080 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1081 goto got_it;
1082 else
1083 tmp = doevery;
1084 }
1085 else
1086 tmp = 1;
1087 s += UTF8SKIP(s);
1088 }
1089 break;
a0d0e21e
LW
1090 case NSPACE:
1091 while (s < strend) {
1092 if (!isSPACE(*s)) {
6eb5f6b9 1093 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1094 goto got_it;
1095 else
1096 tmp = doevery;
a687059c 1097 }
a0d0e21e
LW
1098 else
1099 tmp = 1;
1100 s++;
1101 }
1102 break;
a0ed51b3
LW
1103 case NSPACEUTF8:
1104 while (s < strend) {
dfe13c55 1105 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1106 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
1111 else
1112 tmp = 1;
1113 s += UTF8SKIP(s);
1114 }
1115 break;
bbce6d69 1116 case NSPACEL:
3280af22 1117 PL_reg_flags |= RF_tainted;
bbce6d69 1118 while (s < strend) {
1119 if (!isSPACE_LC(*s)) {
6eb5f6b9 1120 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
1125 else
1126 tmp = 1;
1127 s++;
1128 }
1129 break;
a0ed51b3
LW
1130 case NSPACELUTF8:
1131 PL_reg_flags |= RF_tainted;
1132 while (s < strend) {
dfe13c55 1133 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1134 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1135 goto got_it;
1136 else
1137 tmp = doevery;
1138 }
1139 else
1140 tmp = 1;
1141 s += UTF8SKIP(s);
1142 }
1143 break;
a0d0e21e
LW
1144 case DIGIT:
1145 while (s < strend) {
1146 if (isDIGIT(*s)) {
6eb5f6b9 1147 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1148 goto got_it;
1149 else
1150 tmp = doevery;
2b69d0c2 1151 }
a0d0e21e
LW
1152 else
1153 tmp = 1;
1154 s++;
1155 }
1156 break;
a0ed51b3
LW
1157 case DIGITUTF8:
1158 while (s < strend) {
dfe13c55 1159 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1160 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1161 goto got_it;
1162 else
1163 tmp = doevery;
1164 }
1165 else
1166 tmp = 1;
1167 s += UTF8SKIP(s);
1168 }
1169 break;
b8c5462f
JH
1170 case DIGITL:
1171 PL_reg_flags |= RF_tainted;
1172 while (s < strend) {
1173 if (isDIGIT_LC(*s)) {
6eb5f6b9 1174 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
1179 else
1180 tmp = 1;
1181 s++;
1182 }
1183 break;
1184 case DIGITLUTF8:
1185 PL_reg_flags |= RF_tainted;
1186 while (s < strend) {
1187 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1188 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
1193 else
1194 tmp = 1;
1195 s += UTF8SKIP(s);
1196 }
1197 break;
a0d0e21e
LW
1198 case NDIGIT:
1199 while (s < strend) {
1200 if (!isDIGIT(*s)) {
6eb5f6b9 1201 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1202 goto got_it;
1203 else
1204 tmp = doevery;
a687059c 1205 }
a0d0e21e
LW
1206 else
1207 tmp = 1;
1208 s++;
1209 }
1210 break;
a0ed51b3
LW
1211 case NDIGITUTF8:
1212 while (s < strend) {
dfe13c55 1213 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1214 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1215 goto got_it;
1216 else
1217 tmp = doevery;
1218 }
1219 else
1220 tmp = 1;
1221 s += UTF8SKIP(s);
1222 }
1223 break;
b8c5462f
JH
1224 case NDIGITL:
1225 PL_reg_flags |= RF_tainted;
1226 while (s < strend) {
1227 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1228 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
1233 else
1234 tmp = 1;
1235 s++;
a0ed51b3 1236 }
b8c5462f
JH
1237 break;
1238 case NDIGITLUTF8:
1239 PL_reg_flags |= RF_tainted;
1240 while (s < strend) {
1241 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1242 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1243 goto got_it;
cf93c79d 1244 else
b8c5462f
JH
1245 tmp = doevery;
1246 }
1247 else
1248 tmp = 1;
1249 s += UTF8SKIP(s);
1250 }
1251 break;
b3c9acc1 1252 default:
3c3eec57
GS
1253 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1254 break;
d6a28714 1255 }
6eb5f6b9
JH
1256 return 0;
1257 got_it:
1258 return s;
1259}
1260
1261/*
1262 - regexec_flags - match a regexp against a string
1263 */
1264I32
1265Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1266 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1267/* strend: pointer to null at end of string */
1268/* strbeg: real beginning of string */
1269/* minend: end of match must be >=minend after stringarg. */
1270/* data: May be used for some additional optimizations. */
1271/* nosave: For optimizations. */
1272{
1273 dTHR;
1274 register char *s;
1275 register regnode *c;
1276 register char *startpos = stringarg;
1277 register I32 tmp;
1278 I32 minlen; /* must match at least this many chars */
1279 I32 dontbother = 0; /* how many characters not to try at end */
1280 I32 start_shift = 0; /* Offset of the start to find
1281 constant substr. */ /* CC */
1282 I32 end_shift = 0; /* Same for the end. */ /* CC */
1283 I32 scream_pos = -1; /* Internal iterator of scream. */
1284 char *scream_olds;
1285 SV* oreplsv = GvSV(PL_replgv);
1286
1287 PL_regcc = 0;
1288
1289 cache_re(prog);
1290#ifdef DEBUGGING
1291 PL_regnarrate = PL_debug & 512;
1292#endif
1293
1294 /* Be paranoid... */
1295 if (prog == NULL || startpos == NULL) {
1296 Perl_croak(aTHX_ "NULL regexp parameter");
1297 return 0;
1298 }
1299
1300 minlen = prog->minlen;
1301 if (strend - startpos < minlen) goto phooey;
1302
1303 if (startpos == strbeg) /* is ^ valid at stringarg? */
1304 PL_regprev = '\n';
1305 else {
1306 PL_regprev = (U32)stringarg[-1];
1307 if (!PL_multiline && PL_regprev == '\n')
1308 PL_regprev = '\0'; /* force ^ to NOT match */
1309 }
1310
1311 /* Check validity of program. */
1312 if (UCHARAT(prog->program) != REG_MAGIC) {
1313 Perl_croak(aTHX_ "corrupted regexp program");
1314 }
1315
1316 PL_reg_flags = 0;
1317 PL_reg_eval_set = 0;
1318 PL_reg_maxiter = 0;
1319
1320 if (prog->reganch & ROPT_UTF8)
1321 PL_reg_flags |= RF_utf8;
1322
1323 /* Mark beginning of line for ^ and lookbehind. */
1324 PL_regbol = startpos;
1325 PL_bostr = strbeg;
1326 PL_reg_sv = sv;
1327
1328 /* Mark end of line for $ (and such) */
1329 PL_regeol = strend;
1330
1331 /* see how far we have to get to not match where we matched before */
1332 PL_regtill = startpos+minend;
1333
1334 /* We start without call_cc context. */
1335 PL_reg_call_cc = 0;
1336
1337 /* If there is a "must appear" string, look for it. */
1338 s = startpos;
1339
1340 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1341 MAGIC *mg;
1342
1343 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1344 PL_reg_ganch = startpos;
1345 else if (sv && SvTYPE(sv) >= SVt_PVMG
1346 && SvMAGIC(sv)
1347 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1348 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1349 if (prog->reganch & ROPT_ANCH_GPOS) {
1350 if (s > PL_reg_ganch)
1351 goto phooey;
1352 s = PL_reg_ganch;
1353 }
1354 }
1355 else /* pos() not defined */
1356 PL_reg_ganch = strbeg;
1357 }
1358
1359 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1360 re_scream_pos_data d;
1361
1362 d.scream_olds = &scream_olds;
1363 d.scream_pos = &scream_pos;
1364 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1365 if (!s)
1366 goto phooey; /* not present */
1367 }
1368
1369 DEBUG_r( if (!PL_colorset) reginitcolors() );
1370 DEBUG_r(PerlIO_printf(Perl_debug_log,
1371 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1372 PL_colors[4],PL_colors[5],PL_colors[0],
1373 prog->precomp,
1374 PL_colors[1],
1375 (strlen(prog->precomp) > 60 ? "..." : ""),
1376 PL_colors[0],
1377 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1378 startpos, PL_colors[1],
1379 (strend - startpos > 60 ? "..." : ""))
1380 );
1381
1382 /* Simplest case: anchored match need be tried only once. */
1383 /* [unless only anchor is BOL and multiline is set] */
1384 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1385 if (s == startpos && regtry(prog, startpos))
1386 goto got_it;
1387 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1388 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1389 {
1390 char *end;
1391
1392 if (minlen)
1393 dontbother = minlen - 1;
1394 end = HOPc(strend, -dontbother) - 1;
1395 /* for multiline we only have to try after newlines */
1396 if (prog->check_substr) {
1397 if (s == startpos)
1398 goto after_try;
1399 while (1) {
1400 if (regtry(prog, s))
1401 goto got_it;
1402 after_try:
1403 if (s >= end)
1404 goto phooey;
1405 if (prog->reganch & RE_USE_INTUIT) {
1406 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1407 if (!s)
1408 goto phooey;
1409 }
1410 else
1411 s++;
1412 }
1413 } else {
1414 if (s > startpos)
1415 s--;
1416 while (s < end) {
1417 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1418 if (regtry(prog, s))
1419 goto got_it;
1420 }
1421 }
1422 }
1423 }
1424 goto phooey;
1425 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1426 if (regtry(prog, PL_reg_ganch))
1427 goto got_it;
1428 goto phooey;
1429 }
1430
1431 /* Messy cases: unanchored match. */
1432 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1433 /* we have /x+whatever/ */
1434 /* it must be a one character string (XXXX Except UTF?) */
1435 char ch = SvPVX(prog->anchored_substr)[0];
1436 if (UTF) {
1437 while (s < strend) {
1438 if (*s == ch) {
1439 if (regtry(prog, s)) goto got_it;
1440 s += UTF8SKIP(s);
1441 while (s < strend && *s == ch)
1442 s += UTF8SKIP(s);
1443 }
1444 s += UTF8SKIP(s);
1445 }
1446 }
1447 else {
1448 while (s < strend) {
1449 if (*s == ch) {
1450 if (regtry(prog, s)) goto got_it;
1451 s++;
1452 while (s < strend && *s == ch)
1453 s++;
1454 }
1455 s++;
1456 }
1457 }
1458 }
1459 /*SUPPRESS 560*/
1460 else if (prog->anchored_substr != Nullsv
1461 || (prog->float_substr != Nullsv
1462 && prog->float_max_offset < strend - s)) {
1463 SV *must = prog->anchored_substr
1464 ? prog->anchored_substr : prog->float_substr;
1465 I32 back_max =
1466 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1467 I32 back_min =
1468 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1469 I32 delta = back_max - back_min;
1470 char *last = HOPc(strend, /* Cannot start after this */
1471 -(I32)(CHR_SVLEN(must)
1472 - (SvTAIL(must) != 0) + back_min));
1473 char *last1; /* Last position checked before */
1474
1475 if (s > PL_bostr)
1476 last1 = HOPc(s, -1);
1477 else
1478 last1 = s - 1; /* bogus */
1479
1480 /* XXXX check_substr already used to find `s', can optimize if
1481 check_substr==must. */
1482 scream_pos = -1;
1483 dontbother = end_shift;
1484 strend = HOPc(strend, -dontbother);
1485 while ( (s <= last) &&
1486 ((flags & REXEC_SCREAM)
1487 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1488 end_shift, &scream_pos, 0))
1489 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1490 (unsigned char*)strend, must,
1491 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1492 if (HOPc(s, -back_max) > last1) {
1493 last1 = HOPc(s, -back_min);
1494 s = HOPc(s, -back_max);
1495 }
1496 else {
1497 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1498
1499 last1 = HOPc(s, -back_min);
1500 s = t;
1501 }
1502 if (UTF) {
1503 while (s <= last1) {
1504 if (regtry(prog, s))
1505 goto got_it;
1506 s += UTF8SKIP(s);
1507 }
1508 }
1509 else {
1510 while (s <= last1) {
1511 if (regtry(prog, s))
1512 goto got_it;
1513 s++;
1514 }
1515 }
1516 }
1517 goto phooey;
1518 }
1519 else if (c = prog->regstclass) {
66e933ab
GS
1520 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1521 /* don't bother with what can't match */
6eb5f6b9
JH
1522 strend = HOPc(strend, -(minlen - 1));
1523 if (find_byclass(prog, c, s, strend, startpos, 0))
1524 goto got_it;
d6a28714
JH
1525 }
1526 else {
1527 dontbother = 0;
1528 if (prog->float_substr != Nullsv) { /* Trim the end. */
1529 char *last;
1530 I32 oldpos = scream_pos;
1531
1532 if (flags & REXEC_SCREAM) {
1533 last = screaminstr(sv, prog->float_substr, s - strbeg,
1534 end_shift, &scream_pos, 1); /* last one */
1535 if (!last)
1536 last = scream_olds; /* Only one occurence. */
b8c5462f 1537 }
d6a28714
JH
1538 else {
1539 STRLEN len;
1540 char *little = SvPV(prog->float_substr, len);
1541
1542 if (SvTAIL(prog->float_substr)) {
1543 if (memEQ(strend - len + 1, little, len - 1))
1544 last = strend - len + 1;
1545 else if (!PL_multiline)
1546 last = memEQ(strend - len, little, len)
1547 ? strend - len : Nullch;
b8c5462f 1548 else
d6a28714
JH
1549 goto find_last;
1550 } else {
1551 find_last:
1552 if (len)
1553 last = rninstr(s, strend, little, little + len);
b8c5462f 1554 else
d6a28714 1555 last = strend; /* matching `$' */
b8c5462f 1556 }
b8c5462f 1557 }
d6a28714
JH
1558 if (last == NULL) goto phooey; /* Should not happen! */
1559 dontbother = strend - last + prog->float_min_offset;
1560 }
1561 if (minlen && (dontbother < minlen))
1562 dontbother = minlen - 1;
1563 strend -= dontbother; /* this one's always in bytes! */
1564 /* We don't know much -- general case. */
1565 if (UTF) {
1566 for (;;) {
1567 if (regtry(prog, s))
1568 goto got_it;
1569 if (s >= strend)
1570 break;
b8c5462f 1571 s += UTF8SKIP(s);
d6a28714
JH
1572 };
1573 }
1574 else {
1575 do {
1576 if (regtry(prog, s))
1577 goto got_it;
1578 } while (s++ < strend);
1579 }
1580 }
1581
1582 /* Failure. */
1583 goto phooey;
1584
1585got_it:
1586 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1587
1588 if (PL_reg_eval_set) {
1589 /* Preserve the current value of $^R */
1590 if (oreplsv != GvSV(PL_replgv))
1591 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1592 restored, the value remains
1593 the same. */
1594 restore_pos(aTHXo_ 0);
1595 }
1596
1597 /* make sure $`, $&, $', and $digit will work later */
1598 if ( !(flags & REXEC_NOT_FIRST) ) {
1599 if (RX_MATCH_COPIED(prog)) {
1600 Safefree(prog->subbeg);
1601 RX_MATCH_COPIED_off(prog);
1602 }
1603 if (flags & REXEC_COPY_STR) {
1604 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1605
1606 s = savepvn(strbeg, i);
1607 prog->subbeg = s;
1608 prog->sublen = i;
1609 RX_MATCH_COPIED_on(prog);
1610 }
1611 else {
1612 prog->subbeg = strbeg;
1613 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1614 }
1615 }
1616
1617 return 1;
1618
1619phooey:
1620 if (PL_reg_eval_set)
1621 restore_pos(aTHXo_ 0);
1622 return 0;
1623}
1624
1625/*
1626 - regtry - try match at specific point
1627 */
1628STATIC I32 /* 0 failure, 1 success */
1629S_regtry(pTHX_ regexp *prog, char *startpos)
1630{
1631 dTHR;
1632 register I32 i;
1633 register I32 *sp;
1634 register I32 *ep;
1635 CHECKPOINT lastcp;
1636
1637 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1638 MAGIC *mg;
1639
1640 PL_reg_eval_set = RS_init;
1641 DEBUG_r(DEBUG_s(
b900a521
JH
1642 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1643 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1644 ));
e8347627 1645 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1646 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1647 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1648 SAVETMPS;
1649 /* Apparently this is not needed, judging by wantarray. */
e8347627 1650 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1651 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1652
1653 if (PL_reg_sv) {
1654 /* Make $_ available to executed code. */
1655 if (PL_reg_sv != DEFSV) {
1656 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1657 SAVESPTR(DEFSV);
1658 DEFSV = PL_reg_sv;
b8c5462f 1659 }
d6a28714
JH
1660
1661 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1662 && (mg = mg_find(PL_reg_sv, 'g')))) {
1663 /* prepare for quick setting of pos */
1664 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1665 mg = mg_find(PL_reg_sv, 'g');
1666 mg->mg_len = -1;
b8c5462f 1667 }
d6a28714
JH
1668 PL_reg_magic = mg;
1669 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1670 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1671 }
1672 if (!PL_reg_curpm)
1673 New(22,PL_reg_curpm, 1, PMOP);
1674 PL_reg_curpm->op_pmregexp = prog;
1675 PL_reg_oldcurpm = PL_curpm;
1676 PL_curpm = PL_reg_curpm;
1677 if (RX_MATCH_COPIED(prog)) {
1678 /* Here is a serious problem: we cannot rewrite subbeg,
1679 since it may be needed if this match fails. Thus
1680 $` inside (?{}) could fail... */
1681 PL_reg_oldsaved = prog->subbeg;
1682 PL_reg_oldsavedlen = prog->sublen;
1683 RX_MATCH_COPIED_off(prog);
1684 }
1685 else
1686 PL_reg_oldsaved = Nullch;
1687 prog->subbeg = PL_bostr;
1688 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1689 }
1690 prog->startp[0] = startpos - PL_bostr;
1691 PL_reginput = startpos;
1692 PL_regstartp = prog->startp;
1693 PL_regendp = prog->endp;
1694 PL_reglastparen = &prog->lastparen;
1695 prog->lastparen = 0;
1696 PL_regsize = 0;
1697 DEBUG_r(PL_reg_starttry = startpos);
1698 if (PL_reg_start_tmpl <= prog->nparens) {
1699 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1700 if(PL_reg_start_tmp)
1701 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1702 else
1703 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1704 }
1705
1706 /* XXXX What this code is doing here?!!! There should be no need
1707 to do this again and again, PL_reglastparen should take care of
1708 this! */
1709 sp = prog->startp;
1710 ep = prog->endp;
1711 if (prog->nparens) {
1712 for (i = prog->nparens; i >= 1; i--) {
1713 *++sp = -1;
1714 *++ep = -1;
1715 }
1716 }
1717 REGCP_SET;
1718 if (regmatch(prog->program + 1)) {
1719 prog->endp[0] = PL_reginput - PL_bostr;
1720 return 1;
1721 }
1722 REGCP_UNWIND;
1723 return 0;
1724}
1725
1726/*
1727 - regmatch - main matching routine
1728 *
1729 * Conceptually the strategy is simple: check to see whether the current
1730 * node matches, call self recursively to see whether the rest matches,
1731 * and then act accordingly. In practice we make some effort to avoid
1732 * recursion, in particular by going through "ordinary" nodes (that don't
1733 * need to know whether the rest of the match failed) by a loop instead of
1734 * by recursion.
1735 */
1736/* [lwall] I've hoisted the register declarations to the outer block in order to
1737 * maybe save a little bit of pushing and popping on the stack. It also takes
1738 * advantage of machines that use a register save mask on subroutine entry.
1739 */
1740STATIC I32 /* 0 failure, 1 success */
1741S_regmatch(pTHX_ regnode *prog)
1742{
1743 dTHR;
1744 register regnode *scan; /* Current node. */
1745 regnode *next; /* Next node. */
1746 regnode *inner; /* Next node in internal branch. */
1747 register I32 nextchr; /* renamed nextchr - nextchar colides with
1748 function of same name */
1749 register I32 n; /* no or next */
1750 register I32 ln; /* len or last */
1751 register char *s; /* operand or save */
1752 register char *locinput = PL_reginput;
1753 register I32 c1, c2, paren; /* case fold search, parenth */
1754 int minmod = 0, sw = 0, logical = 0;
1755#ifdef DEBUGGING
1756 PL_regindent++;
1757#endif
1758
1759 /* Note that nextchr is a byte even in UTF */
1760 nextchr = UCHARAT(locinput);
1761 scan = prog;
1762 while (scan != NULL) {
1763#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1764#ifdef DEBUGGING
1765# define sayYES goto yes
1766# define sayNO goto no
7821416a
IZ
1767# define sayYES_FINAL goto yes_final
1768# define sayYES_LOUD goto yes_loud
1769# define sayNO_FINAL goto no_final
1770# define sayNO_SILENT goto do_no
d6a28714
JH
1771# define saySAME(x) if (x) goto yes; else goto no
1772# define REPORT_CODE_OFF 24
1773#else
1774# define sayYES return 1
1775# define sayNO return 0
7821416a
IZ
1776# define sayYES_FINAL return 1
1777# define sayYES_LOUD return 1
1778# define sayNO_FINAL return 0
1779# define sayNO_SILENT return 0
d6a28714
JH
1780# define saySAME(x) return x
1781#endif
1782 DEBUG_r( {
1783 SV *prop = sv_newmortal();
1784 int docolor = *PL_colors[0];
1785 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1786 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1787 /* The part of the string before starttry has one color
1788 (pref0_len chars), between starttry and current
1789 position another one (pref_len - pref0_len chars),
1790 after the current position the third one.
1791 We assume that pref0_len <= pref_len, otherwise we
1792 decrease pref0_len. */
1793 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1794 ? (5 + taill) - l : locinput - PL_bostr);
1795 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1796
1797 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1798 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1799 ? (5 + taill) - pref_len : PL_regeol - locinput);
1800 if (pref0_len < 0)
1801 pref0_len = 0;
1802 if (pref0_len > pref_len)
1803 pref0_len = pref_len;
1804 regprop(prop, scan);
1805 PerlIO_printf(Perl_debug_log,
b900a521
JH
1806 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1807 (IV)(locinput - PL_bostr),
d6a28714
JH
1808 PL_colors[4], pref0_len,
1809 locinput - pref_len, PL_colors[5],
1810 PL_colors[2], pref_len - pref0_len,
1811 locinput - pref_len + pref0_len, PL_colors[3],
1812 (docolor ? "" : "> <"),
1813 PL_colors[0], l, locinput, PL_colors[1],
1814 15 - l - pref_len + 1,
1815 "",
b900a521 1816 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1817 SvPVX(prop));
1818 } );
1819
1820 next = scan + NEXT_OFF(scan);
1821 if (next == scan)
1822 next = NULL;
1823
1824 switch (OP(scan)) {
1825 case BOL:
1826 if (locinput == PL_bostr
1827 ? PL_regprev == '\n'
1828 : (PL_multiline &&
1829 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1830 {
1831 /* regtill = regbol; */
b8c5462f
JH
1832 break;
1833 }
d6a28714
JH
1834 sayNO;
1835 case MBOL:
1836 if (locinput == PL_bostr
1837 ? PL_regprev == '\n'
1838 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1839 {
b8c5462f
JH
1840 break;
1841 }
d6a28714
JH
1842 sayNO;
1843 case SBOL:
1844 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1845 break;
d6a28714
JH
1846 sayNO;
1847 case GPOS:
1848 if (locinput == PL_reg_ganch)
1849 break;
1850 sayNO;
1851 case EOL:
1852 if (PL_multiline)
1853 goto meol;
1854 else
1855 goto seol;
1856 case MEOL:
1857 meol:
1858 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1859 sayNO;
b8c5462f 1860 break;
d6a28714
JH
1861 case SEOL:
1862 seol:
1863 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1864 sayNO;
d6a28714 1865 if (PL_regeol - locinput > 1)
b8c5462f 1866 sayNO;
b8c5462f 1867 break;
d6a28714
JH
1868 case EOS:
1869 if (PL_regeol != locinput)
b8c5462f 1870 sayNO;
d6a28714
JH
1871 break;
1872 case SANYUTF8:
b8c5462f 1873 if (nextchr & 0x80) {
b8c5462f 1874 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1875 if (locinput > PL_regeol)
1876 sayNO;
b8c5462f
JH
1877 nextchr = UCHARAT(locinput);
1878 break;
1879 }
d6a28714 1880 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1881 sayNO;
b8c5462f 1882 nextchr = UCHARAT(++locinput);
a0d0e21e 1883 break;
d6a28714
JH
1884 case SANY:
1885 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1886 sayNO;
1887 nextchr = UCHARAT(++locinput);
b85d18e9 1888 break;
d6a28714 1889 case ANYUTF8:
a0ed51b3 1890 if (nextchr & 0x80) {
b8c5462f 1891 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1892 if (locinput > PL_regeol)
1893 sayNO;
a0ed51b3
LW
1894 nextchr = UCHARAT(locinput);
1895 break;
1896 }
d6a28714 1897 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1898 sayNO;
1899 nextchr = UCHARAT(++locinput);
1900 break;
d6a28714
JH
1901 case REG_ANY:
1902 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1903 sayNO;
76e3520e 1904 nextchr = UCHARAT(++locinput);
a0d0e21e 1905 break;
d6a28714 1906 case EXACT:
cd439c50
IZ
1907 s = STRING(scan);
1908 ln = STR_LEN(scan);
d6a28714
JH
1909 /* Inline the first character, for speed. */
1910 if (UCHARAT(s) != nextchr)
1911 sayNO;
1912 if (PL_regeol - locinput < ln)
1913 sayNO;
1914 if (ln > 1 && memNE(s, locinput, ln))
1915 sayNO;
1916 locinput += ln;
1917 nextchr = UCHARAT(locinput);
1918 break;
1919 case EXACTFL:
b8c5462f
JH
1920 PL_reg_flags |= RF_tainted;
1921 /* FALL THROUGH */
d6a28714 1922 case EXACTF:
cd439c50
IZ
1923 s = STRING(scan);
1924 ln = STR_LEN(scan);
d6a28714
JH
1925
1926 if (UTF) {
1927 char *l = locinput;
1928 char *e = s + ln;
1929 c1 = OP(scan) == EXACTF;
1930 while (s < e) {
1931 if (l >= PL_regeol)
1932 sayNO;
1933 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1934 toLOWER_utf8((U8*)l) :
1935 toLOWER_LC_utf8((U8*)l)))
1936 {
1937 sayNO;
1938 }
1939 s += UTF8SKIP(s);
1940 l += UTF8SKIP(l);
b8c5462f 1941 }
d6a28714 1942 locinput = l;
a0ed51b3
LW
1943 nextchr = UCHARAT(locinput);
1944 break;
1945 }
d6a28714
JH
1946
1947 /* Inline the first character, for speed. */
1948 if (UCHARAT(s) != nextchr &&
1949 UCHARAT(s) != ((OP(scan) == EXACTF)
1950 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1951 sayNO;
d6a28714 1952 if (PL_regeol - locinput < ln)
b8c5462f 1953 sayNO;
d6a28714
JH
1954 if (ln > 1 && (OP(scan) == EXACTF
1955 ? ibcmp(s, locinput, ln)
1956 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1957 sayNO;
d6a28714
JH
1958 locinput += ln;
1959 nextchr = UCHARAT(locinput);
a0d0e21e 1960 break;
d6a28714 1961 case ANYOFUTF8:
d6a28714 1962 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1963 sayNO;
d6a28714
JH
1964 if (locinput >= PL_regeol)
1965 sayNO;
1966 locinput += PL_utf8skip[nextchr];
1967 nextchr = UCHARAT(locinput);
1968 break;
1969 case ANYOF:
d6a28714 1970 if (nextchr < 0)
b8c5462f 1971 nextchr = UCHARAT(locinput);
936ed897 1972 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1973 sayNO;
1974 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1975 sayNO;
b8c5462f
JH
1976 nextchr = UCHARAT(++locinput);
1977 break;
d6a28714 1978 case ALNUML:
b8c5462f
JH
1979 PL_reg_flags |= RF_tainted;
1980 /* FALL THROUGH */
d6a28714 1981 case ALNUM:
b8c5462f 1982 if (!nextchr)
4633a7c4 1983 sayNO;
d6a28714
JH
1984 if (!(OP(scan) == ALNUM
1985 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1986 sayNO;
1987 nextchr = UCHARAT(++locinput);
bbce6d69 1988 break;
d6a28714 1989 case ALNUMLUTF8:
3280af22 1990 PL_reg_flags |= RF_tainted;
bbce6d69 1991 /* FALL THROUGH */
d6a28714 1992 case ALNUMUTF8:
b8c5462f
JH
1993 if (!nextchr)
1994 sayNO;
1995 if (nextchr & 0x80) {
d6a28714
JH
1996 if (!(OP(scan) == ALNUMUTF8
1997 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1998 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1999 {
2000 sayNO;
a0ed51b3 2001 }
b8c5462f 2002 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2003 nextchr = UCHARAT(locinput);
2004 break;
2005 }
d6a28714
JH
2006 if (!(OP(scan) == ALNUMUTF8
2007 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2008 sayNO;
b8c5462f 2009 nextchr = UCHARAT(++locinput);
a0d0e21e 2010 break;
d6a28714 2011 case NALNUML:
b8c5462f
JH
2012 PL_reg_flags |= RF_tainted;
2013 /* FALL THROUGH */
d6a28714
JH
2014 case NALNUM:
2015 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2016 sayNO;
d6a28714
JH
2017 if (OP(scan) == NALNUM
2018 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2019 sayNO;
b8c5462f 2020 nextchr = UCHARAT(++locinput);
a0ed51b3 2021 break;
d6a28714 2022 case NALNUMLUTF8:
b8c5462f
JH
2023 PL_reg_flags |= RF_tainted;
2024 /* FALL THROUGH */
d6a28714 2025 case NALNUMUTF8:
3280af22 2026 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2027 sayNO;
b8c5462f 2028 if (nextchr & 0x80) {
d6a28714
JH
2029 if (OP(scan) == NALNUMUTF8
2030 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2031 : isALNUM_LC_utf8((U8*)locinput))
2032 {
b8c5462f 2033 sayNO;
d6a28714 2034 }
b8c5462f
JH
2035 locinput += PL_utf8skip[nextchr];
2036 nextchr = UCHARAT(locinput);
2037 break;
2038 }
d6a28714
JH
2039 if (OP(scan) == NALNUMUTF8
2040 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2041 sayNO;
76e3520e 2042 nextchr = UCHARAT(++locinput);
a0d0e21e 2043 break;
d6a28714
JH
2044 case BOUNDL:
2045 case NBOUNDL:
3280af22 2046 PL_reg_flags |= RF_tainted;
bbce6d69 2047 /* FALL THROUGH */
d6a28714
JH
2048 case BOUND:
2049 case NBOUND:
2050 /* was last char in word? */
2051 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2052 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2053 ln = isALNUM(ln);
2054 n = isALNUM(nextchr);
2055 }
2056 else {
2057 ln = isALNUM_LC(ln);
2058 n = isALNUM_LC(nextchr);
2059 }
2060 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2061 sayNO;
a0d0e21e 2062 break;
d6a28714
JH
2063 case BOUNDLUTF8:
2064 case NBOUNDLUTF8:
a0ed51b3
LW
2065 PL_reg_flags |= RF_tainted;
2066 /* FALL THROUGH */
d6a28714
JH
2067 case BOUNDUTF8:
2068 case NBOUNDUTF8:
2069 /* was last char in word? */
2070 ln = (locinput != PL_regbol)
2071 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2072 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2073 ln = isALNUM_uni(ln);
2074 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2075 }
d6a28714
JH
2076 else {
2077 ln = isALNUM_LC_uni(ln);
2078 n = isALNUM_LC_utf8((U8*)locinput);
2079 }
2080 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2081 sayNO;
a0ed51b3 2082 break;
d6a28714 2083 case SPACEL:
3280af22 2084 PL_reg_flags |= RF_tainted;
bbce6d69 2085 /* FALL THROUGH */
d6a28714
JH
2086 case SPACE:
2087 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2088 sayNO;
d6a28714
JH
2089 if (!(OP(scan) == SPACE
2090 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2091 sayNO;
76e3520e 2092 nextchr = UCHARAT(++locinput);
a0d0e21e 2093 break;
d6a28714 2094 case SPACELUTF8:
a0ed51b3
LW
2095 PL_reg_flags |= RF_tainted;
2096 /* FALL THROUGH */
d6a28714 2097 case SPACEUTF8:
a0ed51b3
LW
2098 if (!nextchr && locinput >= PL_regeol)
2099 sayNO;
2100 if (nextchr & 0x80) {
d6a28714
JH
2101 if (!(OP(scan) == SPACEUTF8
2102 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2103 : isSPACE_LC_utf8((U8*)locinput)))
2104 {
a0ed51b3 2105 sayNO;
d6a28714 2106 }
6f06b55f 2107 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2108 nextchr = UCHARAT(locinput);
2109 break;
2110 }
d6a28714
JH
2111 if (!(OP(scan) == SPACEUTF8
2112 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2113 sayNO;
2114 nextchr = UCHARAT(++locinput);
2115 break;
d6a28714 2116 case NSPACEL:
3280af22 2117 PL_reg_flags |= RF_tainted;
bbce6d69 2118 /* FALL THROUGH */
d6a28714 2119 case NSPACE:
b8c5462f
JH
2120 if (!nextchr)
2121 sayNO;
d6a28714
JH
2122 if (OP(scan) == SPACE
2123 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2124 sayNO;
b8c5462f 2125 nextchr = UCHARAT(++locinput);
a0d0e21e 2126 break;
d6a28714 2127 case NSPACELUTF8:
a0ed51b3
LW
2128 PL_reg_flags |= RF_tainted;
2129 /* FALL THROUGH */
d6a28714 2130 case NSPACEUTF8:
b8c5462f
JH
2131 if (!nextchr)
2132 sayNO;
2133 if (nextchr & 0x80) {
d6a28714
JH
2134 if (OP(scan) == NSPACEUTF8
2135 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2136 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2137 {
2138 sayNO;
2139 }
2140 locinput += PL_utf8skip[nextchr];
2141 nextchr = UCHARAT(locinput);
2142 break;
a0ed51b3 2143 }
d6a28714
JH
2144 if (OP(scan) == NSPACEUTF8
2145 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2146 sayNO;
76e3520e 2147 nextchr = UCHARAT(++locinput);
a0d0e21e 2148 break;
d6a28714 2149 case DIGITL:
a0ed51b3
LW
2150 PL_reg_flags |= RF_tainted;
2151 /* FALL THROUGH */
d6a28714 2152 case DIGIT:
a0ed51b3
LW
2153 if (!nextchr && locinput >= PL_regeol)
2154 sayNO;
d6a28714
JH
2155 if (!(OP(scan) == DIGIT
2156 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2157 sayNO;
76e3520e 2158 nextchr = UCHARAT(++locinput);
a0d0e21e 2159 break;
d6a28714 2160 case DIGITLUTF8:
a0ed51b3
LW
2161 PL_reg_flags |= RF_tainted;
2162 /* FALL THROUGH */
d6a28714 2163 case DIGITUTF8:
a0ed51b3
LW
2164 if (!nextchr)
2165 sayNO;
2166 if (nextchr & 0x80) {
d6a28714
JH
2167 if (OP(scan) == NDIGITUTF8
2168 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2169 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2170 {
a0ed51b3 2171 sayNO;
dfe13c55 2172 }
6f06b55f 2173 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2174 nextchr = UCHARAT(locinput);
2175 break;
2176 }
d6a28714 2177 if (!isDIGIT(nextchr))
a0ed51b3
LW
2178 sayNO;
2179 nextchr = UCHARAT(++locinput);
2180 break;
d6a28714 2181 case NDIGITL:
b8c5462f
JH
2182 PL_reg_flags |= RF_tainted;
2183 /* FALL THROUGH */
d6a28714 2184 case NDIGIT:
b8c5462f
JH
2185 if (!nextchr)
2186 sayNO;
d6a28714
JH
2187 if (OP(scan) == DIGIT
2188 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2189 sayNO;
76e3520e 2190 nextchr = UCHARAT(++locinput);
a0d0e21e 2191 break;
d6a28714 2192 case NDIGITLUTF8:
b8c5462f
JH
2193 PL_reg_flags |= RF_tainted;
2194 /* FALL THROUGH */
d6a28714 2195 case NDIGITUTF8:
b8c5462f
JH
2196 if (!nextchr && locinput >= PL_regeol)
2197 sayNO;
a0ed51b3 2198 if (nextchr & 0x80) {
d6a28714 2199 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2200 sayNO;
6f06b55f 2201 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2202 nextchr = UCHARAT(locinput);
2203 break;
2204 }
d6a28714 2205 if (isDIGIT(nextchr))
a0ed51b3
LW
2206 sayNO;
2207 nextchr = UCHARAT(++locinput);
2208 break;
2209 case CLUMP:
dfe13c55 2210 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2211 sayNO;
6f06b55f 2212 locinput += PL_utf8skip[nextchr];
dfe13c55 2213 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2214 locinput += UTF8SKIP(locinput);
2215 if (locinput > PL_regeol)
2216 sayNO;
2217 nextchr = UCHARAT(locinput);
2218 break;
c8756f30 2219 case REFFL:
3280af22 2220 PL_reg_flags |= RF_tainted;
c8756f30 2221 /* FALL THROUGH */
c277df42 2222 case REF:
c8756f30 2223 case REFF:
c277df42 2224 n = ARG(scan); /* which paren pair */
cf93c79d 2225 ln = PL_regstartp[n];
2c2d71f5 2226 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2227 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2228 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2229 if (ln == PL_regendp[n])
a0d0e21e 2230 break;
a0ed51b3 2231
cf93c79d 2232 s = PL_bostr + ln;
a0ed51b3
LW
2233 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2234 char *l = locinput;
cf93c79d 2235 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2236 /*
2237 * Note that we can't do the "other character" lookup trick as
2238 * in the 8-bit case (no pun intended) because in Unicode we
2239 * have to map both upper and title case to lower case.
2240 */
2241 if (OP(scan) == REFF) {
2242 while (s < e) {
2243 if (l >= PL_regeol)
2244 sayNO;
dfe13c55 2245 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2246 sayNO;
2247 s += UTF8SKIP(s);
2248 l += UTF8SKIP(l);
2249 }
2250 }
2251 else {
2252 while (s < e) {
2253 if (l >= PL_regeol)
2254 sayNO;
dfe13c55 2255 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2256 sayNO;
2257 s += UTF8SKIP(s);
2258 l += UTF8SKIP(l);
2259 }
2260 }
2261 locinput = l;
2262 nextchr = UCHARAT(locinput);
2263 break;
2264 }
2265
a0d0e21e 2266 /* Inline the first character, for speed. */
76e3520e 2267 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2268 (OP(scan) == REF ||
2269 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2270 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2271 sayNO;
cf93c79d 2272 ln = PL_regendp[n] - ln;
3280af22 2273 if (locinput + ln > PL_regeol)
4633a7c4 2274 sayNO;
c8756f30
AK
2275 if (ln > 1 && (OP(scan) == REF
2276 ? memNE(s, locinput, ln)
2277 : (OP(scan) == REFF
2278 ? ibcmp(s, locinput, ln)
2279 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2280 sayNO;
a0d0e21e 2281 locinput += ln;
76e3520e 2282 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2283 break;
2284
2285 case NOTHING:
c277df42 2286 case TAIL:
a0d0e21e
LW
2287 break;
2288 case BACK:
2289 break;
c277df42
IZ
2290 case EVAL:
2291 {
2292 dSP;
533c011a 2293 OP_4tree *oop = PL_op;
3280af22
NIS
2294 COP *ocurcop = PL_curcop;
2295 SV **ocurpad = PL_curpad;
c277df42
IZ
2296 SV *ret;
2297
2298 n = ARG(scan);
533c011a 2299 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2300 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2301 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2302 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2303
cea2e8a9 2304 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2305 SPAGAIN;
2306 ret = POPs;
2307 PUTBACK;
2308
0f5d15d6
IZ
2309 PL_op = oop;
2310 PL_curpad = ocurpad;
2311 PL_curcop = ocurcop;
c277df42 2312 if (logical) {
0f5d15d6
IZ
2313 if (logical == 2) { /* Postponed subexpression. */
2314 regexp *re;
22c35a8c 2315 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2316 re_cc_state state;
0f5d15d6
IZ
2317 CHECKPOINT cp, lastcp;
2318
2319 if(SvROK(ret) || SvRMAGICAL(ret)) {
2320 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2321
2322 if(SvMAGICAL(sv))
2323 mg = mg_find(sv, 'r');
2324 }
2325 if (mg) {
2326 re = (regexp *)mg->mg_obj;
df0003d4 2327 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2328 }
2329 else {
2330 STRLEN len;
2331 char *t = SvPV(ret, len);
2332 PMOP pm;
2333 char *oprecomp = PL_regprecomp;
2334 I32 osize = PL_regsize;
2335 I32 onpar = PL_regnpar;
2336
2337 pm.op_pmflags = 0;
393fec97 2338 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2339 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2340 if (!(SvFLAGS(ret)
2341 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2342 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2343 PL_regprecomp = oprecomp;
2344 PL_regsize = osize;
2345 PL_regnpar = onpar;
2346 }
2347 DEBUG_r(
2348 PerlIO_printf(Perl_debug_log,
2349 "Entering embedded `%s%.60s%s%s'\n",
2350 PL_colors[0],
2351 re->precomp,
2352 PL_colors[1],
2353 (strlen(re->precomp) > 60 ? "..." : ""))
2354 );
2355 state.node = next;
2356 state.prev = PL_reg_call_cc;
2357 state.cc = PL_regcc;
2358 state.re = PL_reg_re;
2359
2ab05381 2360 PL_regcc = 0;
0f5d15d6
IZ
2361
2362 cp = regcppush(0); /* Save *all* the positions. */
2363 REGCP_SET;
2364 cache_re(re);
2365 state.ss = PL_savestack_ix;
2366 *PL_reglastparen = 0;
2367 PL_reg_call_cc = &state;
2368 PL_reginput = locinput;
2c2d71f5
JH
2369
2370 /* XXXX This is too dramatic a measure... */
2371 PL_reg_maxiter = 0;
2372
0f5d15d6 2373 if (regmatch(re->program + 1)) {
2c914db6
IZ
2374 /* Even though we succeeded, we need to restore
2375 global variables, since we may be wrapped inside
2376 SUSPEND, thus the match may be not finished yet. */
2377
2378 /* XXXX Do this only if SUSPENDed? */
2379 PL_reg_call_cc = state.prev;
2380 PL_regcc = state.cc;
2381 PL_reg_re = state.re;
2382 cache_re(PL_reg_re);
2383
2384 /* XXXX This is too dramatic a measure... */
2385 PL_reg_maxiter = 0;
2386
2387 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2388 ReREFCNT_dec(re);
2389 regcpblow(cp);
2390 sayYES;
2391 }
0f5d15d6
IZ
2392 ReREFCNT_dec(re);
2393 REGCP_UNWIND;
2394 regcppop();
2395 PL_reg_call_cc = state.prev;
2396 PL_regcc = state.cc;
2397 PL_reg_re = state.re;
d3790889 2398 cache_re(PL_reg_re);
2c2d71f5
JH
2399
2400 /* XXXX This is too dramatic a measure... */
2401 PL_reg_maxiter = 0;
2402
0f5d15d6
IZ
2403 sayNO;
2404 }
c277df42 2405 sw = SvTRUE(ret);
0f5d15d6 2406 logical = 0;
a0ed51b3
LW
2407 }
2408 else
3280af22 2409 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2410 break;
2411 }
a0d0e21e 2412 case OPEN:
c277df42 2413 n = ARG(scan); /* which paren pair */
3280af22
NIS
2414 PL_reg_start_tmp[n] = locinput;
2415 if (n > PL_regsize)
2416 PL_regsize = n;
a0d0e21e
LW
2417 break;
2418 case CLOSE:
c277df42 2419 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2420 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2421 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2422 if (n > *PL_reglastparen)
2423 *PL_reglastparen = n;
a0d0e21e 2424 break;
c277df42
IZ
2425 case GROUPP:
2426 n = ARG(scan); /* which paren pair */
cf93c79d 2427 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2428 break;
2429 case IFTHEN:
2c2d71f5 2430 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2431 if (sw)
2432 next = NEXTOPER(NEXTOPER(scan));
2433 else {
2434 next = scan + ARG(scan);
2435 if (OP(next) == IFTHEN) /* Fake one. */
2436 next = NEXTOPER(NEXTOPER(next));
2437 }
2438 break;
2439 case LOGICAL:
0f5d15d6 2440 logical = scan->flags;
c277df42 2441 break;
2ab05381
IZ
2442/*******************************************************************
2443 PL_regcc contains infoblock about the innermost (...)* loop, and
2444 a pointer to the next outer infoblock.
2445
2446 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2447
2448 1) After matching X, regnode for CURLYX is processed;
2449
2450 2) This regnode creates infoblock on the stack, and calls
2451 regmatch() recursively with the starting point at WHILEM node;
2452
2453 3) Each hit of WHILEM node tries to match A and Z (in the order
2454 depending on the current iteration, min/max of {min,max} and
2455 greediness). The information about where are nodes for "A"
2456 and "Z" is read from the infoblock, as is info on how many times "A"
2457 was already matched, and greediness.
2458
2459 4) After A matches, the same WHILEM node is hit again.
2460
2461 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2462 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2463 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2464 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2465 of the external loop.
2466
2467 Currently present infoblocks form a tree with a stem formed by PL_curcc
2468 and whatever it mentions via ->next, and additional attached trees
2469 corresponding to temporarily unset infoblocks as in "5" above.
2470
2471 In the following picture infoblocks for outer loop of
2472 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2473 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2474 infoblocks are drawn below the "reset" infoblock.
2475
2476 In fact in the picture below we do not show failed matches for Z and T
2477 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2478 more obvious *why* one needs to *temporary* unset infoblocks.]
2479
2480 Matched REx position InfoBlocks Comment
2481 (Y(A)*?Z)*?T x
2482 Y(A)*?Z)*?T x <- O
2483 Y (A)*?Z)*?T x <- O
2484 Y A)*?Z)*?T x <- O <- I
2485 YA )*?Z)*?T x <- O <- I
2486 YA A)*?Z)*?T x <- O <- I
2487 YAA )*?Z)*?T x <- O <- I
2488 YAA Z)*?T x <- O # Temporary unset I
2489 I
2490
2491 YAAZ Y(A)*?Z)*?T x <- O
2492 I
2493
2494 YAAZY (A)*?Z)*?T x <- O
2495 I
2496
2497 YAAZY A)*?Z)*?T x <- O <- I
2498 I
2499
2500 YAAZYA )*?Z)*?T x <- O <- I
2501 I
2502
2503 YAAZYA Z)*?T x <- O # Temporary unset I
2504 I,I
2505
2506 YAAZYAZ )*?T x <- O
2507 I,I
2508
2509 YAAZYAZ T x # Temporary unset O
2510 O
2511 I,I
2512
2513 YAAZYAZT x
2514 O
2515 I,I
2516 *******************************************************************/
a0d0e21e
LW
2517 case CURLYX: {
2518 CURCUR cc;
3280af22 2519 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2520
2521 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2522 next += ARG(next);
3280af22
NIS
2523 cc.oldcc = PL_regcc;
2524 PL_regcc = &cc;
2525 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2526 cc.cur = -1;
2527 cc.min = ARG1(scan);
2528 cc.max = ARG2(scan);
c277df42 2529 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2530 cc.next = next;
2531 cc.minmod = minmod;
2532 cc.lastloc = 0;
3280af22 2533 PL_reginput = locinput;
a0d0e21e
LW
2534 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2535 regcpblow(cp);
3280af22 2536 PL_regcc = cc.oldcc;
4633a7c4 2537 saySAME(n);
a0d0e21e
LW
2538 }
2539 /* NOT REACHED */
2540 case WHILEM: {
2541 /*
2542 * This is really hard to understand, because after we match
2543 * what we're trying to match, we must make sure the rest of
2c2d71f5 2544 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2545 * to go back UP the parse tree by recursing ever deeper. And
2546 * if it fails, we have to reset our parent's current state
2547 * that we can try again after backing off.
2548 */
2549
c277df42 2550 CHECKPOINT cp, lastcp;
3280af22 2551 CURCUR* cc = PL_regcc;
c277df42
IZ
2552 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2553
4633a7c4 2554 n = cc->cur + 1; /* how many we know we matched */
3280af22 2555 PL_reginput = locinput;
a0d0e21e 2556
c277df42
IZ
2557 DEBUG_r(
2558 PerlIO_printf(Perl_debug_log,
2559 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2560 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2561 (long)n, (long)cc->min,
2562 (long)cc->max, (long)cc)
2563 );
4633a7c4 2564
a0d0e21e
LW
2565 /* If degenerate scan matches "", assume scan done. */
2566
579cf2c3 2567 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2568 PL_regcc = cc->oldcc;
2ab05381
IZ
2569 if (PL_regcc)
2570 ln = PL_regcc->cur;
c277df42 2571 DEBUG_r(
c3464db5
DD
2572 PerlIO_printf(Perl_debug_log,
2573 "%*s empty match detected, try continuation...\n",
3280af22 2574 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2575 );
a0d0e21e 2576 if (regmatch(cc->next))
4633a7c4 2577 sayYES;
2ab05381
IZ
2578 if (PL_regcc)
2579 PL_regcc->cur = ln;
3280af22 2580 PL_regcc = cc;
4633a7c4 2581 sayNO;
a0d0e21e
LW
2582 }
2583
2584 /* First just match a string of min scans. */
2585
2586 if (n < cc->min) {
2587 cc->cur = n;
2588 cc->lastloc = locinput;
4633a7c4
LW
2589 if (regmatch(cc->scan))
2590 sayYES;
2591 cc->cur = n - 1;
c277df42 2592 cc->lastloc = lastloc;
4633a7c4 2593 sayNO;
a0d0e21e
LW
2594 }
2595
2c2d71f5
JH
2596 if (scan->flags) {
2597 /* Check whether we already were at this position.
2598 Postpone detection until we know the match is not
2599 *that* much linear. */
2600 if (!PL_reg_maxiter) {
2601 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2602 PL_reg_leftiter = PL_reg_maxiter;
2603 }
2604 if (PL_reg_leftiter-- == 0) {
2605 I32 size = (PL_reg_maxiter + 7)/8;
2606 if (PL_reg_poscache) {
2607 if (PL_reg_poscache_size < size) {
2608 Renew(PL_reg_poscache, size, char);
2609 PL_reg_poscache_size = size;
2610 }
2611 Zero(PL_reg_poscache, size, char);
2612 }
2613 else {
2614 PL_reg_poscache_size = size;
2615 Newz(29, PL_reg_poscache, size, char);
2616 }
2617 DEBUG_r(
2618 PerlIO_printf(Perl_debug_log,
2619 "%sDetected a super-linear match, switching on caching%s...\n",
2620 PL_colors[4], PL_colors[5])
2621 );
2622 }
2623 if (PL_reg_leftiter < 0) {
2624 I32 o = locinput - PL_bostr, b;
2625
2626 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2627 b = o % 8;
2628 o /= 8;
2629 if (PL_reg_poscache[o] & (1<<b)) {
2630 DEBUG_r(
2631 PerlIO_printf(Perl_debug_log,
2632 "%*s already tried at this position...\n",
2633 REPORT_CODE_OFF+PL_regindent*2, "")
2634 );
7821416a 2635 sayNO_SILENT;
2c2d71f5
JH
2636 }
2637 PL_reg_poscache[o] |= (1<<b);
2638 }
2639 }
2640
a0d0e21e
LW
2641 /* Prefer next over scan for minimal matching. */
2642
2643 if (cc->minmod) {
3280af22 2644 PL_regcc = cc->oldcc;
2ab05381
IZ
2645 if (PL_regcc)
2646 ln = PL_regcc->cur;
5f05dabc 2647 cp = regcppush(cc->parenfloor);
c277df42 2648 REGCP_SET;
5f05dabc 2649 if (regmatch(cc->next)) {
c277df42 2650 regcpblow(cp);
4633a7c4 2651 sayYES; /* All done. */
5f05dabc 2652 }
c277df42 2653 REGCP_UNWIND;
5f05dabc 2654 regcppop();
2ab05381
IZ
2655 if (PL_regcc)
2656 PL_regcc->cur = ln;
3280af22 2657 PL_regcc = cc;
a0d0e21e 2658
c277df42 2659 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2660 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2661 && !(PL_reg_flags & RF_warned)) {
2662 PL_reg_flags |= RF_warned;
cea2e8a9 2663 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2664 "Complex regular subexpression recursion",
2665 REG_INFTY - 1);
c277df42 2666 }
4633a7c4 2667 sayNO;
c277df42 2668 }
a687059c 2669
c277df42 2670 DEBUG_r(
c3464db5
DD
2671 PerlIO_printf(Perl_debug_log,
2672 "%*s trying longer...\n",
3280af22 2673 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2674 );
a0d0e21e 2675 /* Try scanning more and see if it helps. */
3280af22 2676 PL_reginput = locinput;
a0d0e21e
LW
2677 cc->cur = n;
2678 cc->lastloc = locinput;
5f05dabc 2679 cp = regcppush(cc->parenfloor);
c277df42 2680 REGCP_SET;
5f05dabc 2681 if (regmatch(cc->scan)) {
c277df42 2682 regcpblow(cp);
4633a7c4 2683 sayYES;
5f05dabc 2684 }
c277df42 2685 REGCP_UNWIND;
5f05dabc 2686 regcppop();
4633a7c4 2687 cc->cur = n - 1;
c277df42 2688 cc->lastloc = lastloc;
4633a7c4 2689 sayNO;
a0d0e21e
LW
2690 }
2691
2692 /* Prefer scan over next for maximal matching. */
2693
2694 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2695 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2696 cc->cur = n;
2697 cc->lastloc = locinput;
c277df42 2698 REGCP_SET;
5f05dabc 2699 if (regmatch(cc->scan)) {
c277df42 2700 regcpblow(cp);
4633a7c4 2701 sayYES;
5f05dabc 2702 }
c277df42 2703 REGCP_UNWIND;
a0d0e21e 2704 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2705 PL_reginput = locinput;
c277df42 2706 DEBUG_r(
c3464db5
DD
2707 PerlIO_printf(Perl_debug_log,
2708 "%*s failed, try continuation...\n",
3280af22 2709 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2710 );
2711 }
599cee73
PM
2712 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2713 && !(PL_reg_flags & RF_warned)) {
3280af22 2714 PL_reg_flags |= RF_warned;
cea2e8a9 2715 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2716 "Complex regular subexpression recursion",
2717 REG_INFTY - 1);
a0d0e21e
LW
2718 }
2719
2720 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2721 PL_regcc = cc->oldcc;
2ab05381
IZ
2722 if (PL_regcc)
2723 ln = PL_regcc->cur;
a0d0e21e 2724 if (regmatch(cc->next))
4633a7c4 2725 sayYES;
2ab05381
IZ
2726 if (PL_regcc)
2727 PL_regcc->cur = ln;
3280af22 2728 PL_regcc = cc;
4633a7c4 2729 cc->cur = n - 1;
c277df42 2730 cc->lastloc = lastloc;
4633a7c4 2731 sayNO;
a0d0e21e
LW
2732 }
2733 /* NOT REACHED */
c277df42
IZ
2734 case BRANCHJ:
2735 next = scan + ARG(scan);
2736 if (next == scan)
2737 next = NULL;
2738 inner = NEXTOPER(NEXTOPER(scan));
2739 goto do_branch;
2740 case BRANCH:
2741 inner = NEXTOPER(scan);
2742 do_branch:
2743 {
2744 CHECKPOINT lastcp;
2745 c1 = OP(scan);
2746 if (OP(next) != c1) /* No choice. */
2747 next = inner; /* Avoid recursion. */
a0d0e21e 2748 else {
3280af22 2749 int lastparen = *PL_reglastparen;
c277df42
IZ
2750
2751 REGCP_SET;
a0d0e21e 2752 do {
3280af22 2753 PL_reginput = locinput;
c277df42 2754 if (regmatch(inner))
4633a7c4 2755 sayYES;
c277df42 2756 REGCP_UNWIND;
3280af22 2757 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2758 PL_regendp[n] = -1;
3280af22 2759 *PL_reglastparen = n;
c277df42 2760 scan = next;
a0d0e21e 2761 /*SUPPRESS 560*/
c277df42
IZ
2762 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2763 next += n;
a0d0e21e 2764 else
c277df42 2765 next = NULL;
c277df42
IZ
2766 inner = NEXTOPER(scan);
2767 if (c1 == BRANCHJ) {
2768 inner = NEXTOPER(inner);
2769 }
2770 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2771 sayNO;
a0d0e21e 2772 /* NOTREACHED */
a687059c 2773 }
a0d0e21e
LW
2774 }
2775 break;
2776 case MINMOD:
2777 minmod = 1;
2778 break;
c277df42
IZ
2779 case CURLYM:
2780 {
00db4c45 2781 I32 l = 0;
c277df42
IZ
2782 CHECKPOINT lastcp;
2783
2784 /* We suppose that the next guy does not need
2785 backtracking: in particular, it is of constant length,
2786 and has no parenths to influence future backrefs. */
2787 ln = ARG1(scan); /* min to match */
2788 n = ARG2(scan); /* max to match */
c277df42
IZ
2789 paren = scan->flags;
2790 if (paren) {
3280af22
NIS
2791 if (paren > PL_regsize)
2792 PL_regsize = paren;
2793 if (paren > *PL_reglastparen)
2794 *PL_reglastparen = paren;
c277df42 2795 }
dc45a647 2796 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2797 if (paren)
2798 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2799 PL_reginput = locinput;
c277df42
IZ
2800 if (minmod) {
2801 minmod = 0;
2802 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2803 sayNO;
5f4b28b2 2804 if (ln && l == 0 && n >= ln
c277df42
IZ
2805 /* In fact, this is tricky. If paren, then the
2806 fact that we did/didnot match may influence
2807 future execution. */
2808 && !(paren && ln == 0))
2809 ln = n;
3280af22 2810 locinput = PL_reginput;
22c35a8c 2811 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2812 c1 = (U8)*STRING(next);
c277df42 2813 if (OP(next) == EXACTF)
22c35a8c 2814 c2 = PL_fold[c1];
c277df42 2815 else if (OP(next) == EXACTFL)
22c35a8c 2816 c2 = PL_fold_locale[c1];
c277df42
IZ
2817 else
2818 c2 = c1;
a0ed51b3
LW
2819 }
2820 else
c277df42
IZ
2821 c1 = c2 = -1000;
2822 REGCP_SET;
5f4b28b2 2823 /* This may be improved if l == 0. */
c277df42
IZ
2824 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2825 /* If it could work, try it. */
2826 if (c1 == -1000 ||
3280af22
NIS
2827 UCHARAT(PL_reginput) == c1 ||
2828 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2829 {
2830 if (paren) {
2831 if (n) {
cf93c79d
IZ
2832 PL_regstartp[paren] =
2833 HOPc(PL_reginput, -l) - PL_bostr;
2834 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2835 }
2836 else
cf93c79d 2837 PL_regendp[paren] = -1;
c277df42
IZ
2838 }
2839 if (regmatch(next))
2840 sayYES;
2841 REGCP_UNWIND;
2842 }
2843 /* Couldn't or didn't -- move forward. */
3280af22 2844 PL_reginput = locinput;
c277df42
IZ
2845 if (regrepeat_hard(scan, 1, &l)) {
2846 ln++;
3280af22 2847 locinput = PL_reginput;
c277df42
IZ
2848 }
2849 else
2850 sayNO;
2851 }
a0ed51b3
LW
2852 }
2853 else {
c277df42
IZ
2854 n = regrepeat_hard(scan, n, &l);
2855 if (n != 0 && l == 0
2856 /* In fact, this is tricky. If paren, then the
2857 fact that we did/didnot match may influence
2858 future execution. */
2859 && !(paren && ln == 0))
2860 ln = n;
3280af22 2861 locinput = PL_reginput;
c277df42 2862 DEBUG_r(
5c0ca799 2863 PerlIO_printf(Perl_debug_log,
faccc32b 2864 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2865 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2866 (IV) n, (IV)l)
c277df42
IZ
2867 );
2868 if (n >= ln) {
22c35a8c 2869 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2870 c1 = (U8)*STRING(next);
c277df42 2871 if (OP(next) == EXACTF)
22c35a8c 2872 c2 = PL_fold[c1];
c277df42 2873 else if (OP(next) == EXACTFL)
22c35a8c 2874 c2 = PL_fold_locale[c1];
c277df42
IZ
2875 else
2876 c2 = c1;
a0ed51b3
LW
2877 }
2878 else
c277df42
IZ
2879 c1 = c2 = -1000;
2880 }
2881 REGCP_SET;
2882 while (n >= ln) {
2883 /* If it could work, try it. */
2884 if (c1 == -1000 ||
3280af22
NIS
2885 UCHARAT(PL_reginput) == c1 ||
2886 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2887 {
2888 DEBUG_r(
c3464db5 2889 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2890 "%*s trying tail with n=%"IVdf"...\n",
2891 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2892 );
2893 if (paren) {
2894 if (n) {
cf93c79d
IZ
2895 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2896 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2897 }
a0ed51b3 2898 else
cf93c79d 2899 PL_regendp[paren] = -1;
c277df42 2900 }
a0ed51b3
LW
2901 if (regmatch(next))
2902 sayYES;
2903 REGCP_UNWIND;
2904 }
c277df42
IZ
2905 /* Couldn't or didn't -- back up. */
2906 n--;
dfe13c55 2907 locinput = HOPc(locinput, -l);
3280af22 2908 PL_reginput = locinput;
c277df42
IZ
2909 }
2910 }
2911 sayNO;
2912 break;
2913 }
2914 case CURLYN:
2915 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2916 if (paren > PL_regsize)
2917 PL_regsize = paren;
2918 if (paren > *PL_reglastparen)
2919 *PL_reglastparen = paren;
c277df42
IZ
2920 ln = ARG1(scan); /* min to match */
2921 n = ARG2(scan); /* max to match */
dc45a647 2922 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2923 goto repeat;
a0d0e21e 2924 case CURLY:
c277df42 2925 paren = 0;
a0d0e21e
LW
2926 ln = ARG1(scan); /* min to match */
2927 n = ARG2(scan); /* max to match */
dc45a647 2928 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2929 goto repeat;
2930 case STAR:
2931 ln = 0;
c277df42 2932 n = REG_INFTY;
a0d0e21e 2933 scan = NEXTOPER(scan);
c277df42 2934 paren = 0;
a0d0e21e
LW
2935 goto repeat;
2936 case PLUS:
c277df42
IZ
2937 ln = 1;
2938 n = REG_INFTY;
2939 scan = NEXTOPER(scan);
2940 paren = 0;
2941 repeat:
a0d0e21e
LW
2942 /*
2943 * Lookahead to avoid useless match attempts
2944 * when we know what character comes next.
2945 */
22c35a8c 2946 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2947 c1 = (U8)*STRING(next);
bbce6d69 2948 if (OP(next) == EXACTF)
22c35a8c 2949 c2 = PL_fold[c1];
bbce6d69 2950 else if (OP(next) == EXACTFL)
22c35a8c 2951 c2 = PL_fold_locale[c1];
bbce6d69 2952 else
2953 c2 = c1;
2954 }
a0d0e21e 2955 else
bbce6d69 2956 c1 = c2 = -1000;
3280af22 2957 PL_reginput = locinput;
a0d0e21e 2958 if (minmod) {
c277df42 2959 CHECKPOINT lastcp;
a0d0e21e
LW
2960 minmod = 0;
2961 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2962 sayNO;
a0ed51b3 2963 locinput = PL_reginput;
c277df42 2964 REGCP_SET;
0fe9bf95
IZ
2965 if (c1 != -1000) {
2966 char *e = locinput + n - ln; /* Should not check after this */
2967 char *old = locinput;
2968
2969 if (e >= PL_regeol || (n == REG_INFTY))
2970 e = PL_regeol - 1;
2971 while (1) {
2972 /* Find place 'next' could work */
2973 if (c1 == c2) {
2974 while (locinput <= e && *locinput != c1)
2975 locinput++;
2976 } else {
2977 while (locinput <= e
2978 && *locinput != c1
2979 && *locinput != c2)
2980 locinput++;
2981 }
2982 if (locinput > e)
2983 sayNO;
2984 /* PL_reginput == old now */
2985 if (locinput != old) {
2986 ln = 1; /* Did some */
2987 if (regrepeat(scan, locinput - old) <
2988 locinput - old)
2989 sayNO;
2990 }
2991 /* PL_reginput == locinput now */
2992 if (paren) {
2993 if (ln) {
cf93c79d
IZ
2994 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2995 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2996 }
2997 else
cf93c79d 2998 PL_regendp[paren] = -1;
0fe9bf95
IZ
2999 }
3000 if (regmatch(next))
3001 sayYES;
3002 PL_reginput = locinput; /* Could be reset... */
3003 REGCP_UNWIND;
3004 /* Couldn't or didn't -- move forward. */
3005 old = locinput++;
3006 }
3007 }
3008 else
c277df42 3009 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3010 /* If it could work, try it. */
bbce6d69 3011 if (c1 == -1000 ||
3280af22
NIS
3012 UCHARAT(PL_reginput) == c1 ||
3013 UCHARAT(PL_reginput) == c2)
bbce6d69 3014 {
c277df42
IZ
3015 if (paren) {
3016 if (n) {
cf93c79d
IZ
3017 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3018 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3019 }
3020 else
cf93c79d 3021 PL_regendp[paren] = -1;
c277df42 3022 }
a0d0e21e 3023 if (regmatch(next))
4633a7c4 3024 sayYES;
c277df42 3025 REGCP_UNWIND;
bbce6d69 3026 }
c277df42 3027 /* Couldn't or didn't -- move forward. */
a0ed51b3 3028 PL_reginput = locinput;
a0d0e21e
LW
3029 if (regrepeat(scan, 1)) {
3030 ln++;
a0ed51b3
LW
3031 locinput = PL_reginput;
3032 }
3033 else
4633a7c4 3034 sayNO;
a0d0e21e
LW
3035 }
3036 }
3037 else {
c277df42 3038 CHECKPOINT lastcp;
a0d0e21e 3039 n = regrepeat(scan, n);
a0ed51b3 3040 locinput = PL_reginput;
22c35a8c 3041 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3042 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3043 ln = n; /* why back off? */
c277df42
IZ
3044 REGCP_SET;
3045 if (paren) {
3046 while (n >= ln) {
3047 /* If it could work, try it. */
3048 if (c1 == -1000 ||
3280af22
NIS
3049 UCHARAT(PL_reginput) == c1 ||
3050 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3051 {
3052 if (paren && n) {
3053 if (n) {
cf93c79d
IZ
3054 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3055 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3056 }
3057 else
cf93c79d 3058 PL_regendp[paren] = -1;
c277df42
IZ
3059 }
3060 if (regmatch(next))
3061 sayYES;
3062 REGCP_UNWIND;
3063 }
3064 /* Couldn't or didn't -- back up. */
3065 n--;
dfe13c55 3066 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3067 }
a0ed51b3
LW
3068 }
3069 else {
c277df42
IZ
3070 while (n >= ln) {
3071 /* If it could work, try it. */
3072 if (c1 == -1000 ||
3280af22
NIS
3073 UCHARAT(PL_reginput) == c1 ||
3074 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3075 {
3076 if (regmatch(next))
3077 sayYES;
3078 REGCP_UNWIND;
3079 }
3080 /* Couldn't or didn't -- back up. */
3081 n--;
dfe13c55 3082 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3083 }
a0d0e21e
LW
3084 }
3085 }
4633a7c4 3086 sayNO;
c277df42 3087 break;
a0d0e21e 3088 case END:
0f5d15d6
IZ
3089 if (PL_reg_call_cc) {
3090 re_cc_state *cur_call_cc = PL_reg_call_cc;
3091 CURCUR *cctmp = PL_regcc;
3092 regexp *re = PL_reg_re;
3093 CHECKPOINT cp, lastcp;
3094
3095 cp = regcppush(0); /* Save *all* the positions. */
3096 REGCP_SET;
3097 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3098 the caller. */
3099 PL_reginput = locinput; /* Make position available to
3100 the callcc. */
3101 cache_re(PL_reg_call_cc->re);
3102 PL_regcc = PL_reg_call_cc->cc;
3103 PL_reg_call_cc = PL_reg_call_cc->prev;
3104 if (regmatch(cur_call_cc->node)) {
3105 PL_reg_call_cc = cur_call_cc;
3106 regcpblow(cp);
3107 sayYES;
3108 }
3109 REGCP_UNWIND;
3110 regcppop();
3111 PL_reg_call_cc = cur_call_cc;
3112 PL_regcc = cctmp;
3113 PL_reg_re = re;
3114 cache_re(re);
3115
3116 DEBUG_r(
3117 PerlIO_printf(Perl_debug_log,
3118 "%*s continuation failed...\n",
3119 REPORT_CODE_OFF+PL_regindent*2, "")
3120 );
7821416a 3121 sayNO_SILENT;
0f5d15d6 3122 }
7821416a
IZ
3123 if (locinput < PL_regtill) {
3124 DEBUG_r(PerlIO_printf(Perl_debug_log,
3125 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3126 PL_colors[4],
3127 (long)(locinput - PL_reg_starttry),
3128 (long)(PL_regtill - PL_reg_starttry),
3129 PL_colors[5]));
3130 sayNO_FINAL; /* Cannot match: too short. */
3131 }
3132 PL_reginput = locinput; /* put where regtry can find it */
3133 sayYES_FINAL; /* Success! */
7e5428c5 3134 case SUCCEED:
3280af22 3135 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3136 sayYES_LOUD; /* Success! */
c277df42
IZ
3137 case SUSPEND:
3138 n = 1;
9fe1d20c 3139 PL_reginput = locinput;
c277df42 3140 goto do_ifmatch;
a0d0e21e 3141 case UNLESSM:
c277df42 3142 n = 0;
a0ed51b3 3143 if (scan->flags) {
0fe9bf95
IZ
3144 if (UTF) { /* XXXX This is absolutely
3145 broken, we read before
3146 start of string. */
3147 s = HOPMAYBEc(locinput, -scan->flags);
3148 if (!s)
3149 goto say_yes;
3150 PL_reginput = s;
3151 }
3152 else {
3153 if (locinput < PL_bostr + scan->flags)
3154 goto say_yes;
3155 PL_reginput = locinput - scan->flags;
3156 goto do_ifmatch;
3157 }
a0ed51b3
LW
3158 }
3159 else
3160 PL_reginput = locinput;
c277df42
IZ
3161 goto do_ifmatch;
3162 case IFMATCH:
3163 n = 1;
a0ed51b3 3164 if (scan->flags) {
0fe9bf95
IZ
3165 if (UTF) { /* XXXX This is absolutely
3166 broken, we read before
3167 start of string. */
3168 s = HOPMAYBEc(locinput, -scan->flags);
3169 if (!s || s < PL_bostr)
3170 goto say_no;
3171 PL_reginput = s;
3172 }
3173 else {
3174 if (locinput < PL_bostr + scan->flags)
3175 goto say_no;
3176 PL_reginput = locinput - scan->flags;
3177 goto do_ifmatch;
3178 }
a0ed51b3
LW
3179 }
3180 else
3181 PL_reginput = locinput;
3182
c277df42 3183 do_ifmatch:
c277df42
IZ
3184 inner = NEXTOPER(NEXTOPER(scan));
3185 if (regmatch(inner) != n) {
3186 say_no:
3187 if (logical) {
3188 logical = 0;
3189 sw = 0;
3190 goto do_longjump;
a0ed51b3
LW
3191 }
3192 else
c277df42
IZ
3193 sayNO;
3194 }
3195 say_yes:
3196 if (logical) {
3197 logical = 0;
3198 sw = 1;
3199 }
fe44a5e8 3200 if (OP(scan) == SUSPEND) {
3280af22 3201 locinput = PL_reginput;
565764a8 3202 nextchr = UCHARAT(locinput);
fe44a5e8 3203 }
c277df42
IZ
3204 /* FALL THROUGH. */
3205 case LONGJMP:
3206 do_longjump:
3207 next = scan + ARG(scan);
3208 if (next == scan)
3209 next = NULL;
a0d0e21e
LW
3210 break;
3211 default:
b900a521 3212 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3213 PTR2UV(scan), OP(scan));
cea2e8a9 3214 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3215 }
a0d0e21e
LW
3216 scan = next;
3217 }
a687059c 3218
a0d0e21e
LW
3219 /*
3220 * We get here only if there's trouble -- normally "case END" is
3221 * the terminating point.
3222 */
cea2e8a9 3223 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3224 /*NOTREACHED*/
4633a7c4
LW
3225 sayNO;
3226
7821416a
IZ
3227yes_loud:
3228 DEBUG_r(
3229 PerlIO_printf(Perl_debug_log,
3230 "%*s %scould match...%s\n",
3231 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3232 );
3233 goto yes;
3234yes_final:
3235 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3236 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3237yes:
3238#ifdef DEBUGGING
3280af22 3239 PL_regindent--;
4633a7c4
LW
3240#endif
3241 return 1;
3242
3243no:
7821416a
IZ
3244 DEBUG_r(
3245 PerlIO_printf(Perl_debug_log,
3246 "%*s %sfailed...%s\n",
3247 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3248 );
3249 goto do_no;
3250no_final:
3251do_no:
4633a7c4 3252#ifdef DEBUGGING
3280af22 3253 PL_regindent--;
4633a7c4 3254#endif
a0d0e21e 3255 return 0;
a687059c
LW
3256}
3257
3258/*
3259 - regrepeat - repeatedly match something simple, report how many
3260 */
3261/*
3262 * [This routine now assumes that it will only match on things of length 1.
3263 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3264 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3265 */
76e3520e 3266STATIC I32
cea2e8a9 3267S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3268{
5c0ca799 3269 dTHR;
a0d0e21e 3270 register char *scan;
a0d0e21e 3271 register I32 c;
3280af22 3272 register char *loceol = PL_regeol;
a0ed51b3 3273 register I32 hardcount = 0;
a0d0e21e 3274
3280af22 3275 scan = PL_reginput;
c277df42 3276 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3277 loceol = scan + max;
a0d0e21e 3278 switch (OP(p)) {
22c35a8c 3279 case REG_ANY:
a0d0e21e
LW
3280 while (scan < loceol && *scan != '\n')
3281 scan++;
3282 break;
3283 case SANY:
3284 scan = loceol;
3285 break;
a0ed51b3
LW
3286 case ANYUTF8:
3287 loceol = PL_regeol;
3288 while (scan < loceol && *scan != '\n') {
3289 scan += UTF8SKIP(scan);
3290 hardcount++;
3291 }
3292 break;
3293 case SANYUTF8:
3294 loceol = PL_regeol;
3295 while (scan < loceol) {
3296 scan += UTF8SKIP(scan);
3297 hardcount++;
3298 }
3299 break;
bbce6d69 3300 case EXACT: /* length of string is 1 */
cd439c50 3301 c = (U8)*STRING(p);
bbce6d69 3302 while (scan < loceol && UCHARAT(scan) == c)
3303 scan++;
3304 break;
3305 case EXACTF: /* length of string is 1 */
cd439c50 3306 c = (U8)*STRING(p);
bbce6d69 3307 while (scan < loceol &&
22c35a8c 3308 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3309 scan++;
3310 break;
3311 case EXACTFL: /* length of string is 1 */
3280af22 3312 PL_reg_flags |= RF_tainted;
cd439c50 3313 c = (U8)*STRING(p);
bbce6d69 3314 while (scan < loceol &&
22c35a8c 3315 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3316 scan++;
3317 break;
a0ed51b3
LW
3318 case ANYOFUTF8:
3319 loceol = PL_regeol;
3320 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3321 scan += UTF8SKIP(scan);
3322 hardcount++;
3323 }
3324 break;
a0d0e21e 3325 case ANYOF:
936ed897 3326 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3327 scan++;
a0d0e21e
LW
3328 break;
3329 case ALNUM:
3330 while (scan < loceol && isALNUM(*scan))
3331 scan++;
3332 break;
a0ed51b3
LW
3333 case ALNUMUTF8:
3334 loceol = PL_regeol;
dfe13c55 3335 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3336 scan += UTF8SKIP(scan);
3337 hardcount++;
3338 }
3339 break;
bbce6d69 3340 case ALNUML:
3280af22 3341 PL_reg_flags |= RF_tainted;
bbce6d69 3342 while (scan < loceol && isALNUM_LC(*scan))
3343 scan++;
3344 break;
a0ed51b3
LW
3345 case ALNUMLUTF8:
3346 PL_reg_flags |= RF_tainted;
3347 loceol = PL_regeol;
dfe13c55 3348 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3349 scan += UTF8SKIP(scan);
3350 hardcount++;
3351 }
3352 break;
3353 break;
a0d0e21e
LW
3354 case NALNUM:
3355 while (scan < loceol && !isALNUM(*scan))
3356 scan++;
3357 break;
a0ed51b3
LW
3358 case NALNUMUTF8:
3359 loceol = PL_regeol;
dfe13c55 3360 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3361 scan += UTF8SKIP(scan);
3362 hardcount++;
3363 }
3364 break;
bbce6d69 3365 case NALNUML:
3280af22 3366 PL_reg_flags |= RF_tainted;
bbce6d69 3367 while (scan < loceol && !isALNUM_LC(*scan))
3368 scan++;
3369 break;
a0ed51b3
LW
3370 case NALNUMLUTF8:
3371 PL_reg_flags |= RF_tainted;
3372 loceol = PL_regeol;
dfe13c55 3373 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3374 scan += UTF8SKIP(scan);
3375 hardcount++;
3376 }
3377 break;
a0d0e21e
LW
3378 case SPACE:
3379 while (scan < loceol && isSPACE(*scan))
3380 scan++;
3381 break;
a0ed51b3
LW
3382 case SPACEUTF8:
3383 loceol = PL_regeol;
dfe13c55 3384 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3385 scan += UTF8SKIP(scan);
3386 hardcount++;
3387 }
3388 break;
bbce6d69 3389 case SPACEL:
3280af22 3390 PL_reg_flags |= RF_tainted;
bbce6d69 3391 while (scan < loceol && isSPACE_LC(*scan))
3392 scan++;
3393 break;
a0ed51b3
LW
3394 case SPACELUTF8:
3395 PL_reg_flags |= RF_tainted;
3396 loceol = PL_regeol;
dfe13c55 3397 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3398 scan += UTF8SKIP(scan);
3399 hardcount++;
3400 }
3401 break;
a0d0e21e
LW
3402 case NSPACE:
3403 while (scan < loceol && !isSPACE(*scan))
3404 scan++;
3405 break;
a0ed51b3
LW
3406 case NSPACEUTF8:
3407 loceol = PL_regeol;
dfe13c55 3408 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3409 scan += UTF8SKIP(scan);
3410 hardcount++;
3411 }
3412 break;
bbce6d69 3413 case NSPACEL:
3280af22 3414 PL_reg_flags |= RF_tainted;
bbce6d69 3415 while (scan < loceol && !isSPACE_LC(*scan))
3416 scan++;
3417 break;
a0ed51b3
LW
3418 case NSPACELUTF8:
3419 PL_reg_flags |= RF_tainted;
3420 loceol = PL_regeol;
dfe13c55 3421 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3422 scan += UTF8SKIP(scan);
3423 hardcount++;
3424 }
3425 break;
a0d0e21e
LW
3426 case DIGIT:
3427 while (scan < loceol && isDIGIT(*scan))
3428 scan++;
3429 break;
a0ed51b3
LW
3430 case DIGITUTF8:
3431 loceol = PL_regeol;
dfe13c55 3432 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3433 scan += UTF8SKIP(scan);
3434 hardcount++;
3435 }
3436 break;
3437 break;
a0d0e21e
LW
3438 case NDIGIT:
3439 while (scan < loceol && !isDIGIT(*scan))
3440 scan++;
3441 break;
a0ed51b3
LW
3442 case NDIGITUTF8:
3443 loceol = PL_regeol;
dfe13c55 3444 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3445 scan += UTF8SKIP(scan);
3446 hardcount++;
3447 }
3448 break;
a0d0e21e
LW
3449 default: /* Called on something of 0 width. */
3450 break; /* So match right here or not at all. */
3451 }
a687059c 3452
a0ed51b3
LW
3453 if (hardcount)
3454 c = hardcount;
3455 else
3456 c = scan - PL_reginput;
3280af22 3457 PL_reginput = scan;
a687059c 3458
c277df42
IZ
3459 DEBUG_r(
3460 {
3461 SV *prop = sv_newmortal();
3462
3463 regprop(prop, p);
3464 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3465 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3466 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3467 });
3468
a0d0e21e 3469 return(c);
a687059c
LW
3470}
3471
3472/*
c277df42
IZ
3473 - regrepeat_hard - repeatedly match something, report total lenth and length
3474 *
3475 * The repeater is supposed to have constant length.
3476 */
3477
76e3520e 3478STATIC I32
cea2e8a9 3479S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3480{
5c0ca799 3481 dTHR;
c277df42
IZ
3482 register char *scan;
3483 register char *start;
3280af22 3484 register char *loceol = PL_regeol;
a0ed51b3 3485 I32 l = 0;
708e3b05 3486 I32 count = 0, res = 1;
a0ed51b3
LW
3487
3488 if (!max)
3489 return 0;
c277df42 3490
3280af22 3491 start = PL_reginput;
a0ed51b3 3492 if (UTF) {
708e3b05 3493 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3494 if (!count++) {
3495 l = 0;
3496 while (start < PL_reginput) {
3497 l++;
3498 start += UTF8SKIP(start);
3499 }
3500 *lp = l;
3501 if (l == 0)
3502 return max;
3503 }
3504 if (count == max)
3505 return count;
3506 }
3507 }
3508 else {
708e3b05 3509 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3510 if (!count++) {
3511 *lp = l = PL_reginput - start;
3512 if (max != REG_INFTY && l*max < loceol - scan)
3513 loceol = scan + l*max;
3514 if (l == 0)
3515 return max;
c277df42
IZ
3516 }
3517 }
3518 }
708e3b05 3519 if (!res)
3280af22 3520 PL_reginput = scan;
c277df42 3521
a0ed51b3 3522 return count;
c277df42
IZ
3523}
3524
3525/*
cb8d8820 3526 - reginclass - determine if a character falls into a character class
bbce6d69 3527 */
3528
76e3520e 3529STATIC bool
936ed897 3530S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3531{
5c0ca799 3532 dTHR;
b8c5462f 3533 char flags = ANYOF_FLAGS(p);
bbce6d69 3534 bool match = FALSE;
3535
3536 c &= 0xFF;
b8c5462f 3537 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3538 match = TRUE;
3539 else if (flags & ANYOF_FOLD) {
3540 I32 cf;
3541 if (flags & ANYOF_LOCALE) {
3280af22 3542 PL_reg_flags |= RF_tainted;
22c35a8c 3543 cf = PL_fold_locale[c];
bbce6d69 3544 }
3545 else
22c35a8c 3546 cf = PL_fold[c];
b8c5462f 3547 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3548 match = TRUE;
3549 }
3550
b8c5462f 3551 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3552 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3553 if (
3554 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3555 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3556 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3557 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3558 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3559 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3560 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3561 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3562 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3563 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3564 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3565 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3570 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3571 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3572 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3573 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3574 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3575 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3576 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3577 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3578 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3579 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3580 ) /* How's that for a conditional? */
bbce6d69 3581 {
3582 match = TRUE;
3583 }
3584 }
3585
ae5c130c 3586 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3587}
3588
a0ed51b3 3589STATIC bool
cea2e8a9 3590S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3591{
3592 dTHR;
a0ed51b3
LW
3593 char flags = ARG1(f);
3594 bool match = FALSE;
3595 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3596
3597 if (swash_fetch(sv, p))
3598 match = TRUE;
3599 else if (flags & ANYOF_FOLD) {
3600 I32 cf;
dfe13c55 3601 U8 tmpbuf[10];
a0ed51b3
LW
3602 if (flags & ANYOF_LOCALE) {
3603 PL_reg_flags |= RF_tainted;
3604 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3605 }
3606 else
3607 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3608 if (swash_fetch(sv, tmpbuf))
3609 match = TRUE;
3610 }
3611
b8c5462f 3612 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3613
3614 return (flags & ANYOF_INVERT) ? !match : match;
3615}
161b471a 3616
dfe13c55 3617STATIC U8 *
cea2e8a9 3618S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3619{
3620 dTHR;
a0ed51b3
LW
3621 if (off >= 0) {
3622 while (off-- && s < (U8*)PL_regeol)
3623 s += UTF8SKIP(s);
3624 }
3625 else {
3626 while (off++) {
3627 if (s > (U8*)PL_bostr) {
3628 s--;
3629 if (*s & 0x80) {
3630 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3631 s--;
3632 } /* XXX could check well-formedness here */
3633 }
3634 }
3635 }
3636 return s;
3637}
161b471a 3638
dfe13c55 3639STATIC U8 *
cea2e8a9 3640S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3641{
c485e607 3642 dTHR;
a0ed51b3
LW
3643 if (off >= 0) {
3644 while (off-- && s < (U8*)PL_regeol)
3645 s += UTF8SKIP(s);
3646 if (off >= 0)
3647 return 0;
3648 }
3649 else {
3650 while (off++) {
3651 if (s > (U8*)PL_bostr) {
3652 s--;
3653 if (*s & 0x80) {
3654 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3655 s--;
3656 } /* XXX could check well-formedness here */
3657 }
3658 else
3659 break;
3660 }
3661 if (off <= 0)
3662 return 0;
3663 }
3664 return s;
3665}
51371543
GS
3666
3667#ifdef PERL_OBJECT
51371543
GS
3668#include "XSUB.h"
3669#endif
3670
3671static void
3672restore_pos(pTHXo_ void *arg)
3673{
3674 dTHR;
3675 if (PL_reg_eval_set) {
3676 if (PL_reg_oldsaved) {
3677 PL_reg_re->subbeg = PL_reg_oldsaved;
3678 PL_reg_re->sublen = PL_reg_oldsavedlen;
3679 RX_MATCH_COPIED_on(PL_reg_re);
3680 }
3681 PL_reg_magic->mg_len = PL_reg_oldpos;
3682 PL_reg_eval_set = 0;
3683 PL_curpm = PL_reg_oldcurpm;
3684 }
3685}
3686