This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
patch from Larry to make (\&) prototype work; added tests for
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
3818b22b 69 **** Copyright (c) 1991-2000, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5 337 }
653099ff 338 check = prog->check_substr;
2c2d71f5 339 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
340 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
341 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 342 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
343
344 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 345 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
346 I32 slen;
347
348 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
05b4157f
GS
349 /* SvCUR is not set on references: SvRV and SvPVX overlap */
350 && sv && !SvROK(sv)
351 && (strpos + SvCUR(sv) != strend)) {
2c2d71f5 352 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 353 goto fail;
2c2d71f5 354 }
adac82c7 355 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 356 s = HOPc(strpos, prog->check_offset_min);
653099ff
GS
357 if (SvTAIL(check)) {
358 slen = SvCUR(check); /* >= 1 */
cad2e5aa 359
2c2d71f5
JH
360 if ( strend - s > slen || strend - s < slen - 1
361 || (strend - s == slen && strend[-1] != '\n')) {
362 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
363 goto fail_finish;
cad2e5aa
JH
364 }
365 /* Now should match s[0..slen-2] */
366 slen--;
653099ff 367 if (slen && (*SvPVX(check) != *s
cad2e5aa 368 || (slen > 1
653099ff 369 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
370 report_neq:
371 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
372 goto fail_finish;
373 }
cad2e5aa 374 }
653099ff
GS
375 else if (*SvPVX(check) != *s
376 || ((slen = SvCUR(check)) > 1
377 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
378 goto report_neq;
379 goto success_at_start;
cad2e5aa 380 }
2c2d71f5 381 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 382 s = strpos;
2c2d71f5 383 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 384 end_shift = prog->minlen - start_shift -
653099ff 385 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 386 if (!ml_anch) {
653099ff
GS
387 I32 end = prog->check_offset_max + CHR_SVLEN(check)
388 - (SvTAIL(check) != 0);
2c2d71f5
JH
389 I32 eshift = strend - s - end;
390
391 if (end_shift < eshift)
392 end_shift = eshift;
393 }
cad2e5aa 394 }
2c2d71f5 395 else { /* Can match at random position */
cad2e5aa
JH
396 ml_anch = 0;
397 s = strpos;
2c2d71f5
JH
398 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
399 /* Should be nonnegative! */
400 end_shift = prog->minlen - start_shift -
653099ff 401 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
402 }
403
2c2d71f5 404#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 405 if (end_shift < 0)
6bbae5e6 406 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
407#endif
408
2c2d71f5
JH
409 restart:
410 /* Find a possible match in the region s..strend by looking for
411 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 412 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
413 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
414 I32 p = -1; /* Internal iterator of scream. */
415 I32 *pp = data ? data->scream_pos : &p;
416
2c2d71f5
JH
417 if (PL_screamfirst[BmRARE(check)] >= 0
418 || ( BmRARE(check) == '\n'
419 && (BmPREVIOUS(check) == SvCUR(check) - 1)
420 && SvTAIL(check) ))
421 s = screaminstr(sv, check,
422 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 423 else
2c2d71f5 424 goto fail_finish;
cad2e5aa
JH
425 if (data)
426 *data->scream_olds = s;
427 }
428 else
429 s = fbm_instr((unsigned char*)s + start_shift,
430 (unsigned char*)strend - end_shift,
2c2d71f5 431 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
432
433 /* Update the count-of-usability, remove useless subpatterns,
434 unshift s. */
2c2d71f5
JH
435
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
437 (s ? "Found" : "Did not find"),
438 ((check == prog->anchored_substr) ? "anchored" : "floating"),
439 PL_colors[0],
7b0972df
JH
440 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
441 SvPVX(check),
2c2d71f5
JH
442 PL_colors[1], (SvTAIL(check) ? "$" : ""),
443 (s ? " at offset " : "...\n") ) );
444
445 if (!s)
446 goto fail_finish;
447
6eb5f6b9
JH
448 check_at = s;
449
2c2d71f5 450 /* Finish the diagnostic message */
30944b6d 451 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
452
453 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
454 Start with the other substr.
455 XXXX no SCREAM optimization yet - and a very coarse implementation
456 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
457 *always* match. Probably should be marked during compile...
458 Probably it is right to do no SCREAM here...
459 */
460
461 if (prog->float_substr && prog->anchored_substr) {
30944b6d 462 /* Take into account the "other" substring. */
2c2d71f5
JH
463 /* XXXX May be hopelessly wrong for UTF... */
464 if (!other_last)
6eb5f6b9 465 other_last = strpos;
2c2d71f5 466 if (check == prog->float_substr) {
30944b6d
IZ
467 do_other_anchored:
468 {
2c2d71f5
JH
469 char *last = s - start_shift, *last1, *last2;
470 char *s1 = s;
471
472 tmp = PL_bostr;
473 t = s - prog->check_offset_max;
474 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
475 && (!(prog->reganch & ROPT_UTF8)
476 || (PL_bostr = strpos, /* Used in regcopmaybe() */
477 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
478 && t > strpos)))
30944b6d 479 /* EMPTY */;
2c2d71f5
JH
480 else
481 t = strpos;
482 t += prog->anchored_offset;
6eb5f6b9
JH
483 if (t < other_last) /* These positions already checked */
484 t = other_last;
2c2d71f5
JH
485 PL_bostr = tmp;
486 last2 = last1 = strend - prog->minlen;
487 if (last < last1)
488 last1 = last;
489 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
490 /* On end-of-str: see comment below. */
491 s = fbm_instr((unsigned char*)t,
492 (unsigned char*)last1 + prog->anchored_offset
493 + SvCUR(prog->anchored_substr)
494 - (SvTAIL(prog->anchored_substr)!=0),
495 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
496 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
497 (s ? "Found" : "Contradicts"),
498 PL_colors[0],
7b0972df
JH
499 (int)(SvCUR(prog->anchored_substr)
500 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
501 SvPVX(prog->anchored_substr),
502 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
503 if (!s) {
504 if (last1 >= last2) {
505 DEBUG_r(PerlIO_printf(Perl_debug_log,
506 ", giving up...\n"));
507 goto fail_finish;
508 }
509 DEBUG_r(PerlIO_printf(Perl_debug_log,
510 ", trying floating at offset %ld...\n",
30944b6d 511 (long)(s1 + 1 - i_strpos)));
2c2d71f5 512 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 513 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
514 s = HOPc(last, 1);
515 goto restart;
516 }
517 else {
518 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 519 (long)(s - i_strpos)));
2c2d71f5 520 t = s - prog->anchored_offset;
6eb5f6b9 521 other_last = s + 1;
30944b6d 522 s = s1;
2c2d71f5
JH
523 if (t == strpos)
524 goto try_at_start;
2c2d71f5
JH
525 goto try_at_offset;
526 }
30944b6d 527 }
2c2d71f5
JH
528 }
529 else { /* Take into account the floating substring. */
530 char *last, *last1;
531 char *s1 = s;
532
533 t = s - start_shift;
534 last1 = last = strend - prog->minlen + prog->float_min_offset;
535 if (last - t > prog->float_max_offset)
536 last = t + prog->float_max_offset;
537 s = t + prog->float_min_offset;
6eb5f6b9
JH
538 if (s < other_last)
539 s = other_last;
2c2d71f5
JH
540 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
541 /* fbm_instr() takes into account exact value of end-of-str
542 if the check is SvTAIL(ed). Since false positives are OK,
543 and end-of-str is not later than strend we are OK. */
544 s = fbm_instr((unsigned char*)s,
545 (unsigned char*)last + SvCUR(prog->float_substr)
546 - (SvTAIL(prog->float_substr)!=0),
547 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
548 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
549 (s ? "Found" : "Contradicts"),
550 PL_colors[0],
7b0972df
JH
551 (int)(SvCUR(prog->float_substr)
552 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
553 SvPVX(prog->float_substr),
554 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
555 if (!s) {
556 if (last1 == last) {
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 ", giving up...\n"));
559 goto fail_finish;
560 }
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 ", trying anchored starting at offset %ld...\n",
30944b6d 563 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 564 other_last = last + 1;
2c2d71f5
JH
565 PL_regeol = strend; /* Used in HOP() */
566 s = HOPc(t, 1);
567 goto restart;
568 }
569 else {
570 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 571 (long)(s - i_strpos)));
6eb5f6b9 572 other_last = s + 1;
30944b6d 573 s = s1;
2c2d71f5
JH
574 if (t == strpos)
575 goto try_at_start;
2c2d71f5
JH
576 goto try_at_offset;
577 }
578 }
cad2e5aa 579 }
2c2d71f5
JH
580
581 t = s - prog->check_offset_max;
582 tmp = PL_bostr;
583 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
584 && (!(prog->reganch & ROPT_UTF8)
585 || (PL_bostr = strpos, /* Used in regcopmaybe() */
586 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
587 && t > strpos)))) {
588 PL_bostr = tmp;
589 /* Fixed substring is found far enough so that the match
590 cannot start at strpos. */
591 try_at_offset:
cad2e5aa 592 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
593 /* Eventually fbm_*() should handle this, but often
594 anchored_offset is not 0, so this check will not be wasted. */
595 /* XXXX In the code below we prefer to look for "^" even in
596 presence of anchored substrings. And we search even
597 beyond the found float position. These pessimizations
598 are historical artefacts only. */
599 find_anchor:
2c2d71f5 600 while (t < strend - prog->minlen) {
cad2e5aa
JH
601 if (*t == '\n') {
602 if (t < s - prog->check_offset_min) {
30944b6d
IZ
603 if (prog->anchored_substr) {
604 /* We definitely contradict the found anchored
605 substr. Due to the above check we do not
606 contradict "check" substr.
607 Thus we can arrive here only if check substr
608 is float. Redo checking for "other"=="fixed".
609 */
610 strpos = t + 1;
611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
612 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
613 goto do_other_anchored;
614 }
cad2e5aa 615 s = t + 1;
2c2d71f5 616 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 617 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
618 goto set_useful;
619 }
2c2d71f5 620 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
621 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
622 strpos = s = t + 1;
cad2e5aa
JH
623 goto restart;
624 }
625 t++;
626 }
2c2d71f5
JH
627 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
628 PL_colors[0],PL_colors[1]));
629 goto fail_finish;
cad2e5aa
JH
630 }
631 s = t;
632 set_useful:
2c2d71f5 633 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
634 }
635 else {
2c2d71f5
JH
636 PL_bostr = tmp;
637 /* The found string does not prohibit matching at beg-of-str
638 - no optimization of calling REx engine can be performed,
639 unless it was an MBOL and we are not after MBOL. */
640 try_at_start:
641 /* Even in this situation we may use MBOL flag if strpos is offset
642 wrt the start of the string. */
05b4157f 643 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
d506a20d
IZ
644 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n'
645 /* May be due to an implicit anchor of m{.*foo} */
646 && !(prog->reganch & ROPT_IMPLICIT))
647 {
cad2e5aa
JH
648 t = strpos;
649 goto find_anchor;
650 }
30944b6d
IZ
651 DEBUG_r( if (ml_anch)
652 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
653 PL_colors[0],PL_colors[1]);
654 );
2c2d71f5 655 success_at_start:
30944b6d 656 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 657 && prog->check_substr /* Could be deleted already */
cad2e5aa 658 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
659 && prog->check_substr == prog->float_substr)
660 {
cad2e5aa
JH
661 /* If flags & SOMETHING - do not do it many times on the same match */
662 SvREFCNT_dec(prog->check_substr);
663 prog->check_substr = Nullsv; /* disable */
664 prog->float_substr = Nullsv; /* clear */
665 s = strpos;
3cf5c195
IZ
666 /* XXXX This is a remnant of the old implementation. It
667 looks wasteful, since now INTUIT can use many
6eb5f6b9 668 other heuristics. */
cad2e5aa
JH
669 prog->reganch &= ~RE_USE_INTUIT;
670 }
671 else
672 s = strpos;
673 }
674
6eb5f6b9
JH
675 /* Last resort... */
676 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
677 if (prog->regstclass) {
678 /* minlen == 0 is possible if regstclass is \b or \B,
679 and the fixed substr is ''$.
680 Since minlen is already taken into account, s+1 is before strend;
681 accidentally, minlen >= 1 guaranties no false positives at s + 1
682 even for \b or \B. But (minlen? 1 : 0) below assumes that
683 regstclass does not come from lookahead... */
684 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
685 This leaves EXACTF only, which is dealt with in find_byclass(). */
66e933ab
GS
686 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
687 ? STR_LEN(prog->regstclass)
688 : 1);
6eb5f6b9 689 char *endpos = (prog->anchored_substr || ml_anch)
66e933ab
GS
690 ? s + (prog->minlen? cl_l : 0)
691 : (prog->float_substr ? check_at - start_shift + cl_l
6eb5f6b9 692 : strend) ;
8fdd422b 693 char *startpos = sv && SvPOK(sv) ? strend - SvCUR(sv) : s;
6eb5f6b9
JH
694
695 t = s;
76384e4a
GS
696 if (prog->reganch & ROPT_UTF8) {
697 PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
698 PL_bostr = startpos;
699 }
6eb5f6b9
JH
700 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
701 if (!s) {
702#ifdef DEBUGGING
703 char *what;
704#endif
705 if (endpos == strend) {
706 DEBUG_r( PerlIO_printf(Perl_debug_log,
707 "Could not match STCLASS...\n") );
708 goto fail;
709 }
66e933ab
GS
710 DEBUG_r( PerlIO_printf(Perl_debug_log,
711 "This position contradicts STCLASS...\n") );
653099ff
GS
712 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
713 goto fail;
6eb5f6b9
JH
714 /* Contradict one of substrings */
715 if (prog->anchored_substr) {
6eb5f6b9
JH
716 if (prog->anchored_substr == check) {
717 DEBUG_r( what = "anchored" );
718 hop_and_restart:
719 PL_regeol = strend; /* Used in HOP() */
720 s = HOPc(t, 1);
66e933ab
GS
721 if (s + start_shift + end_shift > strend) {
722 /* XXXX Should be taken into account earlier? */
723 DEBUG_r( PerlIO_printf(Perl_debug_log,
724 "Could not match STCLASS...\n") );
725 goto fail;
726 }
6eb5f6b9 727 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 728 "Trying %s substr starting at offset %ld...\n",
6eb5f6b9
JH
729 what, (long)(s + start_shift - i_strpos)) );
730 goto restart;
731 }
66e933ab 732 /* Have both, check_string is floating */
6eb5f6b9
JH
733 if (t + start_shift >= check_at) /* Contradicts floating=check */
734 goto retry_floating_check;
735 /* Recheck anchored substring, but not floating... */
736 s = check_at;
737 DEBUG_r( PerlIO_printf(Perl_debug_log,
66e933ab 738 "Trying anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
739 (long)(other_last - i_strpos)) );
740 goto do_other_anchored;
741 }
60e71179
GS
742 /* Another way we could have checked stclass at the
743 current position only: */
744 if (ml_anch) {
745 s = t = t + 1;
746 DEBUG_r( PerlIO_printf(Perl_debug_log,
747 "Trying /^/m starting at offset %ld...\n",
748 (long)(t - i_strpos)) );
749 goto try_at_offset;
66e933ab 750 }
60e71179
GS
751 if (!prog->float_substr) /* Could have been deleted */
752 goto fail;
6eb5f6b9
JH
753 /* Check is floating subtring. */
754 retry_floating_check:
755 t = check_at - start_shift;
756 DEBUG_r( what = "floating" );
757 goto hop_and_restart;
758 }
759 DEBUG_r( if (t != s)
760 PerlIO_printf(Perl_debug_log,
761 "By STCLASS: moving %ld --> %ld\n",
762 (long)(t - i_strpos), (long)(s - i_strpos));
763 else
764 PerlIO_printf(Perl_debug_log,
765 "Does not contradict STCLASS...\n") );
766 }
2c2d71f5 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 768 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 769 return s;
2c2d71f5
JH
770
771 fail_finish: /* Substring not found */
66e933ab
GS
772 if (prog->check_substr) /* could be removed already */
773 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 774 fail:
2c2d71f5 775 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
776 PL_colors[4],PL_colors[5]));
777 return Nullch;
778}
9661b544 779
6eb5f6b9 780/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
781STATIC char *
782S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 783{
6eb5f6b9
JH
784 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
785 char *m;
d8093b23
G
786 STRLEN ln;
787 unsigned int c1;
788 unsigned int c2;
6eb5f6b9
JH
789 char *e;
790 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 791
6eb5f6b9
JH
792 /* We know what class it must start with. */
793 switch (OP(c)) {
794 case ANYOFUTF8:
795 while (s < strend) {
796 if (REGINCLASSUTF8(c, (U8*)s)) {
797 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 798 goto got_it;
3cf5c195 799 else
6eb5f6b9 800 tmp = doevery;
a0ed51b3 801 }
6eb5f6b9
JH
802 else
803 tmp = 1;
a0ed51b3
LW
804 s += UTF8SKIP(s);
805 }
6eb5f6b9
JH
806 break;
807 case ANYOF:
a0ed51b3 808 while (s < strend) {
d8093b23 809 if (REGINCLASS(c, *(U8*)s)) {
6eb5f6b9
JH
810 if (tmp && (norun || regtry(prog, s)))
811 goto got_it;
812 else
813 tmp = doevery;
a0ed51b3 814 }
6eb5f6b9
JH
815 else
816 tmp = 1;
a0ed51b3 817 s++;
a0d0e21e 818 }
6eb5f6b9
JH
819 break;
820 case EXACTF:
821 m = STRING(c);
822 ln = STR_LEN(c);
d8093b23 823 c1 = *(U8*)m;
6eb5f6b9
JH
824 c2 = PL_fold[c1];
825 goto do_exactf;
826 case EXACTFL:
827 m = STRING(c);
828 ln = STR_LEN(c);
d8093b23 829 c1 = *(U8*)m;
6eb5f6b9
JH
830 c2 = PL_fold_locale[c1];
831 do_exactf:
832 e = strend - ln;
b3c9acc1 833
6eb5f6b9
JH
834 if (norun && e < s)
835 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
836 /* Here it is NOT UTF! */
837 if (c1 == c2) {
838 while (s <= e) {
d8093b23 839 if ( *(U8*)s == c1
66e933ab
GS
840 && (ln == 1 || !(OP(c) == EXACTF
841 ? ibcmp(s, m, ln)
842 : ibcmp_locale(s, m, ln)))
6eb5f6b9 843 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
844 goto got_it;
845 s++;
846 }
847 } else {
848 while (s <= e) {
d8093b23 849 if ( (*(U8*)s == c1 || *(U8*)s == c2)
66e933ab
GS
850 && (ln == 1 || !(OP(c) == EXACTF
851 ? ibcmp(s, m, ln)
852 : ibcmp_locale(s, m, ln)))
6eb5f6b9 853 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
854 goto got_it;
855 s++;
856 }
857 }
858 break;
bbce6d69 859 case BOUNDL:
3280af22 860 PL_reg_flags |= RF_tainted;
bbce6d69 861 /* FALL THROUGH */
a0d0e21e 862 case BOUND:
6eb5f6b9 863 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 864 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 865 while (s < strend) {
95bac841 866 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 867 tmp = !tmp;
6eb5f6b9 868 if ((norun || regtry(prog, s)))
a0d0e21e 869 goto got_it;
a687059c 870 }
a0d0e21e
LW
871 s++;
872 }
6eb5f6b9 873 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
874 goto got_it;
875 break;
a0ed51b3
LW
876 case BOUNDLUTF8:
877 PL_reg_flags |= RF_tainted;
878 /* FALL THROUGH */
879 case BOUNDUTF8:
6eb5f6b9 880 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 881 tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 882 while (s < strend) {
76384e4a 883 if (tmp == !(OP(c) == BOUNDUTF8 ?
dfe13c55
GS
884 swash_fetch(PL_utf8_alnum, (U8*)s) :
885 isALNUM_LC_utf8((U8*)s)))
886 {
a0ed51b3 887 tmp = !tmp;
6eb5f6b9 888 if ((norun || regtry(prog, s)))
a0ed51b3
LW
889 goto got_it;
890 }
891 s += UTF8SKIP(s);
892 }
6eb5f6b9 893 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
894 goto got_it;
895 break;
bbce6d69 896 case NBOUNDL:
3280af22 897 PL_reg_flags |= RF_tainted;
bbce6d69 898 /* FALL THROUGH */
a0d0e21e 899 case NBOUND:
6eb5f6b9 900 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 901 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 902 while (s < strend) {
95bac841 903 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 904 tmp = !tmp;
6eb5f6b9 905 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
906 goto got_it;
907 s++;
908 }
6eb5f6b9 909 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
910 goto got_it;
911 break;
a0ed51b3
LW
912 case NBOUNDLUTF8:
913 PL_reg_flags |= RF_tainted;
914 /* FALL THROUGH */
915 case NBOUNDUTF8:
6eb5f6b9 916 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
76384e4a 917 tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
a0ed51b3 918 while (s < strend) {
76384e4a 919 if (tmp == !(OP(c) == NBOUNDUTF8 ?
dfe13c55
GS
920 swash_fetch(PL_utf8_alnum, (U8*)s) :
921 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 922 tmp = !tmp;
6eb5f6b9 923 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
924 goto got_it;
925 s += UTF8SKIP(s);
926 }
6eb5f6b9 927 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
928 goto got_it;
929 break;
a0d0e21e
LW
930 case ALNUM:
931 while (s < strend) {
bbce6d69 932 if (isALNUM(*s)) {
6eb5f6b9 933 if (tmp && (norun || regtry(prog, s)))
bbce6d69 934 goto got_it;
935 else
936 tmp = doevery;
937 }
938 else
939 tmp = 1;
940 s++;
941 }
942 break;
a0ed51b3
LW
943 case ALNUMUTF8:
944 while (s < strend) {
dfe13c55 945 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 946 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
947 goto got_it;
948 else
949 tmp = doevery;
950 }
951 else
952 tmp = 1;
953 s += UTF8SKIP(s);
954 }
955 break;
bbce6d69 956 case ALNUML:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 while (s < strend) {
959 if (isALNUM_LC(*s)) {
6eb5f6b9 960 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 961 goto got_it;
a687059c 962 else
a0d0e21e
LW
963 tmp = doevery;
964 }
965 else
966 tmp = 1;
967 s++;
968 }
969 break;
a0ed51b3
LW
970 case ALNUMLUTF8:
971 PL_reg_flags |= RF_tainted;
972 while (s < strend) {
dfe13c55 973 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 974 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
975 goto got_it;
976 else
977 tmp = doevery;
978 }
979 else
980 tmp = 1;
981 s += UTF8SKIP(s);
982 }
983 break;
a0d0e21e
LW
984 case NALNUM:
985 while (s < strend) {
bbce6d69 986 if (!isALNUM(*s)) {
6eb5f6b9 987 if (tmp && (norun || regtry(prog, s)))
bbce6d69 988 goto got_it;
989 else
990 tmp = doevery;
991 }
992 else
993 tmp = 1;
994 s++;
995 }
996 break;
a0ed51b3
LW
997 case NALNUMUTF8:
998 while (s < strend) {
dfe13c55 999 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 1000 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1001 goto got_it;
1002 else
1003 tmp = doevery;
1004 }
1005 else
1006 tmp = 1;
1007 s += UTF8SKIP(s);
1008 }
1009 break;
bbce6d69 1010 case NALNUML:
3280af22 1011 PL_reg_flags |= RF_tainted;
bbce6d69 1012 while (s < strend) {
1013 if (!isALNUM_LC(*s)) {
6eb5f6b9 1014 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 1015 goto got_it;
a687059c 1016 else
a0d0e21e 1017 tmp = doevery;
a687059c 1018 }
a0d0e21e
LW
1019 else
1020 tmp = 1;
1021 s++;
1022 }
1023 break;
a0ed51b3
LW
1024 case NALNUMLUTF8:
1025 PL_reg_flags |= RF_tainted;
1026 while (s < strend) {
dfe13c55 1027 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 1028 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1029 goto got_it;
1030 else
1031 tmp = doevery;
1032 }
1033 else
1034 tmp = 1;
1035 s += UTF8SKIP(s);
1036 }
1037 break;
a0d0e21e
LW
1038 case SPACE:
1039 while (s < strend) {
1040 if (isSPACE(*s)) {
6eb5f6b9 1041 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1042 goto got_it;
1043 else
1044 tmp = doevery;
2304df62 1045 }
a0d0e21e
LW
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3
LW
1051 case SPACEUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1054 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case SPACEL:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (isSPACE_LC(*s)) {
6eb5f6b9 1068 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1069 goto got_it;
1070 else
1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case SPACELUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1082 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e
LW
1092 case NSPACE:
1093 while (s < strend) {
1094 if (!isSPACE(*s)) {
6eb5f6b9 1095 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1096 goto got_it;
1097 else
1098 tmp = doevery;
a687059c 1099 }
a0d0e21e
LW
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3
LW
1105 case NSPACEUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1108 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case NSPACEL:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (!isSPACE_LC(*s)) {
6eb5f6b9 1122 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1123 goto got_it;
1124 else
1125 tmp = doevery;
1126 }
1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3
LW
1132 case NSPACELUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1136 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e
LW
1146 case DIGIT:
1147 while (s < strend) {
1148 if (isDIGIT(*s)) {
6eb5f6b9 1149 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1150 goto got_it;
1151 else
1152 tmp = doevery;
2b69d0c2 1153 }
a0d0e21e
LW
1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3
LW
1159 case DIGITUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1162 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
b8c5462f
JH
1172 case DIGITL:
1173 PL_reg_flags |= RF_tainted;
1174 while (s < strend) {
1175 if (isDIGIT_LC(*s)) {
6eb5f6b9 1176 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
1186 case DIGITLUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
1189 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1190 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e
LW
1200 case NDIGIT:
1201 while (s < strend) {
1202 if (!isDIGIT(*s)) {
6eb5f6b9 1203 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1204 goto got_it;
1205 else
1206 tmp = doevery;
a687059c 1207 }
a0d0e21e
LW
1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3
LW
1213 case NDIGITUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1216 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
b8c5462f
JH
1226 case NDIGITL:
1227 PL_reg_flags |= RF_tainted;
1228 while (s < strend) {
1229 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1230 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
a0ed51b3 1238 }
b8c5462f
JH
1239 break;
1240 case NDIGITLUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
1243 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1244 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1245 goto got_it;
cf93c79d 1246 else
b8c5462f
JH
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
b3c9acc1 1254 default:
3c3eec57
GS
1255 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1256 break;
d6a28714 1257 }
6eb5f6b9
JH
1258 return 0;
1259 got_it:
1260 return s;
1261}
1262
1263/*
1264 - regexec_flags - match a regexp against a string
1265 */
1266I32
1267Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1268 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1269/* strend: pointer to null at end of string */
1270/* strbeg: real beginning of string */
1271/* minend: end of match must be >=minend after stringarg. */
1272/* data: May be used for some additional optimizations. */
1273/* nosave: For optimizations. */
1274{
1275 dTHR;
1276 register char *s;
1277 register regnode *c;
1278 register char *startpos = stringarg;
6eb5f6b9
JH
1279 I32 minlen; /* must match at least this many chars */
1280 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1281 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1282 constant substr. */ /* CC */
1283 I32 end_shift = 0; /* Same for the end. */ /* CC */
1284 I32 scream_pos = -1; /* Internal iterator of scream. */
1285 char *scream_olds;
1286 SV* oreplsv = GvSV(PL_replgv);
1287
1288 PL_regcc = 0;
1289
1290 cache_re(prog);
1291#ifdef DEBUGGING
1292 PL_regnarrate = PL_debug & 512;
1293#endif
1294
1295 /* Be paranoid... */
1296 if (prog == NULL || startpos == NULL) {
1297 Perl_croak(aTHX_ "NULL regexp parameter");
1298 return 0;
1299 }
1300
1301 minlen = prog->minlen;
1302 if (strend - startpos < minlen) goto phooey;
1303
1304 if (startpos == strbeg) /* is ^ valid at stringarg? */
1305 PL_regprev = '\n';
1306 else {
1307 PL_regprev = (U32)stringarg[-1];
1308 if (!PL_multiline && PL_regprev == '\n')
1309 PL_regprev = '\0'; /* force ^ to NOT match */
1310 }
1311
1312 /* Check validity of program. */
1313 if (UCHARAT(prog->program) != REG_MAGIC) {
1314 Perl_croak(aTHX_ "corrupted regexp program");
1315 }
1316
1317 PL_reg_flags = 0;
1318 PL_reg_eval_set = 0;
1319 PL_reg_maxiter = 0;
1320
1321 if (prog->reganch & ROPT_UTF8)
1322 PL_reg_flags |= RF_utf8;
1323
1324 /* Mark beginning of line for ^ and lookbehind. */
1325 PL_regbol = startpos;
1326 PL_bostr = strbeg;
1327 PL_reg_sv = sv;
1328
1329 /* Mark end of line for $ (and such) */
1330 PL_regeol = strend;
1331
1332 /* see how far we have to get to not match where we matched before */
1333 PL_regtill = startpos+minend;
1334
1335 /* We start without call_cc context. */
1336 PL_reg_call_cc = 0;
1337
1338 /* If there is a "must appear" string, look for it. */
1339 s = startpos;
1340
1341 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1342 MAGIC *mg;
1343
1344 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1345 PL_reg_ganch = startpos;
1346 else if (sv && SvTYPE(sv) >= SVt_PVMG
1347 && SvMAGIC(sv)
1348 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1349 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1350 if (prog->reganch & ROPT_ANCH_GPOS) {
1351 if (s > PL_reg_ganch)
1352 goto phooey;
1353 s = PL_reg_ganch;
1354 }
1355 }
1356 else /* pos() not defined */
1357 PL_reg_ganch = strbeg;
1358 }
1359
1360 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1361 re_scream_pos_data d;
1362
1363 d.scream_olds = &scream_olds;
1364 d.scream_pos = &scream_pos;
1365 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1366 if (!s)
1367 goto phooey; /* not present */
1368 }
1369
1370 DEBUG_r( if (!PL_colorset) reginitcolors() );
1371 DEBUG_r(PerlIO_printf(Perl_debug_log,
1372 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1373 PL_colors[4],PL_colors[5],PL_colors[0],
1374 prog->precomp,
1375 PL_colors[1],
1376 (strlen(prog->precomp) > 60 ? "..." : ""),
1377 PL_colors[0],
1378 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1379 startpos, PL_colors[1],
1380 (strend - startpos > 60 ? "..." : ""))
1381 );
1382
1383 /* Simplest case: anchored match need be tried only once. */
1384 /* [unless only anchor is BOL and multiline is set] */
1385 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1386 if (s == startpos && regtry(prog, startpos))
1387 goto got_it;
1388 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1389 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1390 {
1391 char *end;
1392
1393 if (minlen)
1394 dontbother = minlen - 1;
1395 end = HOPc(strend, -dontbother) - 1;
1396 /* for multiline we only have to try after newlines */
1397 if (prog->check_substr) {
1398 if (s == startpos)
1399 goto after_try;
1400 while (1) {
1401 if (regtry(prog, s))
1402 goto got_it;
1403 after_try:
1404 if (s >= end)
1405 goto phooey;
1406 if (prog->reganch & RE_USE_INTUIT) {
1407 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1408 if (!s)
1409 goto phooey;
1410 }
1411 else
1412 s++;
1413 }
1414 } else {
1415 if (s > startpos)
1416 s--;
1417 while (s < end) {
1418 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1419 if (regtry(prog, s))
1420 goto got_it;
1421 }
1422 }
1423 }
1424 }
1425 goto phooey;
1426 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1427 if (regtry(prog, PL_reg_ganch))
1428 goto got_it;
1429 goto phooey;
1430 }
1431
1432 /* Messy cases: unanchored match. */
1433 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1434 /* we have /x+whatever/ */
1435 /* it must be a one character string (XXXX Except UTF?) */
1436 char ch = SvPVX(prog->anchored_substr)[0];
1437 if (UTF) {
1438 while (s < strend) {
1439 if (*s == ch) {
1440 if (regtry(prog, s)) goto got_it;
1441 s += UTF8SKIP(s);
1442 while (s < strend && *s == ch)
1443 s += UTF8SKIP(s);
1444 }
1445 s += UTF8SKIP(s);
1446 }
1447 }
1448 else {
1449 while (s < strend) {
1450 if (*s == ch) {
1451 if (regtry(prog, s)) goto got_it;
1452 s++;
1453 while (s < strend && *s == ch)
1454 s++;
1455 }
1456 s++;
1457 }
1458 }
1459 }
1460 /*SUPPRESS 560*/
1461 else if (prog->anchored_substr != Nullsv
1462 || (prog->float_substr != Nullsv
1463 && prog->float_max_offset < strend - s)) {
1464 SV *must = prog->anchored_substr
1465 ? prog->anchored_substr : prog->float_substr;
1466 I32 back_max =
1467 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1468 I32 back_min =
1469 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
6eb5f6b9
JH
1470 char *last = HOPc(strend, /* Cannot start after this */
1471 -(I32)(CHR_SVLEN(must)
1472 - (SvTAIL(must) != 0) + back_min));
1473 char *last1; /* Last position checked before */
1474
1475 if (s > PL_bostr)
1476 last1 = HOPc(s, -1);
1477 else
1478 last1 = s - 1; /* bogus */
1479
1480 /* XXXX check_substr already used to find `s', can optimize if
1481 check_substr==must. */
1482 scream_pos = -1;
1483 dontbother = end_shift;
1484 strend = HOPc(strend, -dontbother);
1485 while ( (s <= last) &&
1486 ((flags & REXEC_SCREAM)
1487 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1488 end_shift, &scream_pos, 0))
1489 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1490 (unsigned char*)strend, must,
1491 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1492 if (HOPc(s, -back_max) > last1) {
1493 last1 = HOPc(s, -back_min);
1494 s = HOPc(s, -back_max);
1495 }
1496 else {
1497 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1498
1499 last1 = HOPc(s, -back_min);
1500 s = t;
1501 }
1502 if (UTF) {
1503 while (s <= last1) {
1504 if (regtry(prog, s))
1505 goto got_it;
1506 s += UTF8SKIP(s);
1507 }
1508 }
1509 else {
1510 while (s <= last1) {
1511 if (regtry(prog, s))
1512 goto got_it;
1513 s++;
1514 }
1515 }
1516 }
1517 goto phooey;
1518 }
155aba94 1519 else if ((c = prog->regstclass)) {
66e933ab
GS
1520 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1521 /* don't bother with what can't match */
6eb5f6b9
JH
1522 strend = HOPc(strend, -(minlen - 1));
1523 if (find_byclass(prog, c, s, strend, startpos, 0))
1524 goto got_it;
d6a28714
JH
1525 }
1526 else {
1527 dontbother = 0;
1528 if (prog->float_substr != Nullsv) { /* Trim the end. */
1529 char *last;
d6a28714
JH
1530
1531 if (flags & REXEC_SCREAM) {
1532 last = screaminstr(sv, prog->float_substr, s - strbeg,
1533 end_shift, &scream_pos, 1); /* last one */
1534 if (!last)
1535 last = scream_olds; /* Only one occurence. */
b8c5462f 1536 }
d6a28714
JH
1537 else {
1538 STRLEN len;
1539 char *little = SvPV(prog->float_substr, len);
1540
1541 if (SvTAIL(prog->float_substr)) {
1542 if (memEQ(strend - len + 1, little, len - 1))
1543 last = strend - len + 1;
1544 else if (!PL_multiline)
1545 last = memEQ(strend - len, little, len)
1546 ? strend - len : Nullch;
b8c5462f 1547 else
d6a28714
JH
1548 goto find_last;
1549 } else {
1550 find_last:
1551 if (len)
1552 last = rninstr(s, strend, little, little + len);
b8c5462f 1553 else
d6a28714 1554 last = strend; /* matching `$' */
b8c5462f 1555 }
b8c5462f 1556 }
d6a28714
JH
1557 if (last == NULL) goto phooey; /* Should not happen! */
1558 dontbother = strend - last + prog->float_min_offset;
1559 }
1560 if (minlen && (dontbother < minlen))
1561 dontbother = minlen - 1;
1562 strend -= dontbother; /* this one's always in bytes! */
1563 /* We don't know much -- general case. */
1564 if (UTF) {
1565 for (;;) {
1566 if (regtry(prog, s))
1567 goto got_it;
1568 if (s >= strend)
1569 break;
b8c5462f 1570 s += UTF8SKIP(s);
d6a28714
JH
1571 };
1572 }
1573 else {
1574 do {
1575 if (regtry(prog, s))
1576 goto got_it;
1577 } while (s++ < strend);
1578 }
1579 }
1580
1581 /* Failure. */
1582 goto phooey;
1583
1584got_it:
1585 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1586
1587 if (PL_reg_eval_set) {
1588 /* Preserve the current value of $^R */
1589 if (oreplsv != GvSV(PL_replgv))
1590 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1591 restored, the value remains
1592 the same. */
1593 restore_pos(aTHXo_ 0);
1594 }
1595
1596 /* make sure $`, $&, $', and $digit will work later */
1597 if ( !(flags & REXEC_NOT_FIRST) ) {
1598 if (RX_MATCH_COPIED(prog)) {
1599 Safefree(prog->subbeg);
1600 RX_MATCH_COPIED_off(prog);
1601 }
1602 if (flags & REXEC_COPY_STR) {
1603 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1604
1605 s = savepvn(strbeg, i);
1606 prog->subbeg = s;
1607 prog->sublen = i;
1608 RX_MATCH_COPIED_on(prog);
1609 }
1610 else {
1611 prog->subbeg = strbeg;
1612 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1613 }
1614 }
1615
1616 return 1;
1617
1618phooey:
1619 if (PL_reg_eval_set)
1620 restore_pos(aTHXo_ 0);
1621 return 0;
1622}
1623
1624/*
1625 - regtry - try match at specific point
1626 */
1627STATIC I32 /* 0 failure, 1 success */
1628S_regtry(pTHX_ regexp *prog, char *startpos)
1629{
1630 dTHR;
1631 register I32 i;
1632 register I32 *sp;
1633 register I32 *ep;
1634 CHECKPOINT lastcp;
1635
1636 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1637 MAGIC *mg;
1638
1639 PL_reg_eval_set = RS_init;
1640 DEBUG_r(DEBUG_s(
b900a521
JH
1641 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1642 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1643 ));
e8347627 1644 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1645 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1646 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1647 SAVETMPS;
1648 /* Apparently this is not needed, judging by wantarray. */
e8347627 1649 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1650 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1651
1652 if (PL_reg_sv) {
1653 /* Make $_ available to executed code. */
1654 if (PL_reg_sv != DEFSV) {
1655 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1656 SAVESPTR(DEFSV);
1657 DEFSV = PL_reg_sv;
b8c5462f 1658 }
d6a28714
JH
1659
1660 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1661 && (mg = mg_find(PL_reg_sv, 'g')))) {
1662 /* prepare for quick setting of pos */
1663 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1664 mg = mg_find(PL_reg_sv, 'g');
1665 mg->mg_len = -1;
b8c5462f 1666 }
d6a28714
JH
1667 PL_reg_magic = mg;
1668 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1669 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1670 }
1671 if (!PL_reg_curpm)
0f79a09d 1672 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1673 PL_reg_curpm->op_pmregexp = prog;
1674 PL_reg_oldcurpm = PL_curpm;
1675 PL_curpm = PL_reg_curpm;
1676 if (RX_MATCH_COPIED(prog)) {
1677 /* Here is a serious problem: we cannot rewrite subbeg,
1678 since it may be needed if this match fails. Thus
1679 $` inside (?{}) could fail... */
1680 PL_reg_oldsaved = prog->subbeg;
1681 PL_reg_oldsavedlen = prog->sublen;
1682 RX_MATCH_COPIED_off(prog);
1683 }
1684 else
1685 PL_reg_oldsaved = Nullch;
1686 prog->subbeg = PL_bostr;
1687 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1688 }
1689 prog->startp[0] = startpos - PL_bostr;
1690 PL_reginput = startpos;
1691 PL_regstartp = prog->startp;
1692 PL_regendp = prog->endp;
1693 PL_reglastparen = &prog->lastparen;
1694 prog->lastparen = 0;
1695 PL_regsize = 0;
1696 DEBUG_r(PL_reg_starttry = startpos);
1697 if (PL_reg_start_tmpl <= prog->nparens) {
1698 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1699 if(PL_reg_start_tmp)
1700 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1701 else
1702 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1703 }
1704
1705 /* XXXX What this code is doing here?!!! There should be no need
1706 to do this again and again, PL_reglastparen should take care of
1707 this! */
1708 sp = prog->startp;
1709 ep = prog->endp;
1710 if (prog->nparens) {
1711 for (i = prog->nparens; i >= 1; i--) {
1712 *++sp = -1;
1713 *++ep = -1;
1714 }
1715 }
1716 REGCP_SET;
1717 if (regmatch(prog->program + 1)) {
1718 prog->endp[0] = PL_reginput - PL_bostr;
1719 return 1;
1720 }
1721 REGCP_UNWIND;
1722 return 0;
1723}
1724
1725/*
1726 - regmatch - main matching routine
1727 *
1728 * Conceptually the strategy is simple: check to see whether the current
1729 * node matches, call self recursively to see whether the rest matches,
1730 * and then act accordingly. In practice we make some effort to avoid
1731 * recursion, in particular by going through "ordinary" nodes (that don't
1732 * need to know whether the rest of the match failed) by a loop instead of
1733 * by recursion.
1734 */
1735/* [lwall] I've hoisted the register declarations to the outer block in order to
1736 * maybe save a little bit of pushing and popping on the stack. It also takes
1737 * advantage of machines that use a register save mask on subroutine entry.
1738 */
1739STATIC I32 /* 0 failure, 1 success */
1740S_regmatch(pTHX_ regnode *prog)
1741{
1742 dTHR;
1743 register regnode *scan; /* Current node. */
1744 regnode *next; /* Next node. */
1745 regnode *inner; /* Next node in internal branch. */
1746 register I32 nextchr; /* renamed nextchr - nextchar colides with
1747 function of same name */
1748 register I32 n; /* no or next */
1749 register I32 ln; /* len or last */
1750 register char *s; /* operand or save */
1751 register char *locinput = PL_reginput;
1752 register I32 c1, c2, paren; /* case fold search, parenth */
1753 int minmod = 0, sw = 0, logical = 0;
1754#ifdef DEBUGGING
1755 PL_regindent++;
1756#endif
1757
1758 /* Note that nextchr is a byte even in UTF */
1759 nextchr = UCHARAT(locinput);
1760 scan = prog;
1761 while (scan != NULL) {
1762#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1763#ifdef DEBUGGING
1764# define sayYES goto yes
1765# define sayNO goto no
7821416a
IZ
1766# define sayYES_FINAL goto yes_final
1767# define sayYES_LOUD goto yes_loud
1768# define sayNO_FINAL goto no_final
1769# define sayNO_SILENT goto do_no
d6a28714
JH
1770# define saySAME(x) if (x) goto yes; else goto no
1771# define REPORT_CODE_OFF 24
1772#else
1773# define sayYES return 1
1774# define sayNO return 0
7821416a
IZ
1775# define sayYES_FINAL return 1
1776# define sayYES_LOUD return 1
1777# define sayNO_FINAL return 0
1778# define sayNO_SILENT return 0
d6a28714
JH
1779# define saySAME(x) return x
1780#endif
1781 DEBUG_r( {
1782 SV *prop = sv_newmortal();
1783 int docolor = *PL_colors[0];
1784 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1785 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1786 /* The part of the string before starttry has one color
1787 (pref0_len chars), between starttry and current
1788 position another one (pref_len - pref0_len chars),
1789 after the current position the third one.
1790 We assume that pref0_len <= pref_len, otherwise we
1791 decrease pref0_len. */
1792 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1793 ? (5 + taill) - l : locinput - PL_bostr);
1794 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1795
1796 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1797 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1798 ? (5 + taill) - pref_len : PL_regeol - locinput);
1799 if (pref0_len < 0)
1800 pref0_len = 0;
1801 if (pref0_len > pref_len)
1802 pref0_len = pref_len;
1803 regprop(prop, scan);
1804 PerlIO_printf(Perl_debug_log,
b900a521
JH
1805 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1806 (IV)(locinput - PL_bostr),
d6a28714
JH
1807 PL_colors[4], pref0_len,
1808 locinput - pref_len, PL_colors[5],
1809 PL_colors[2], pref_len - pref0_len,
1810 locinput - pref_len + pref0_len, PL_colors[3],
1811 (docolor ? "" : "> <"),
1812 PL_colors[0], l, locinput, PL_colors[1],
1813 15 - l - pref_len + 1,
1814 "",
b900a521 1815 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1816 SvPVX(prop));
1817 } );
1818
1819 next = scan + NEXT_OFF(scan);
1820 if (next == scan)
1821 next = NULL;
1822
1823 switch (OP(scan)) {
1824 case BOL:
1825 if (locinput == PL_bostr
1826 ? PL_regprev == '\n'
1827 : (PL_multiline &&
1828 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1829 {
1830 /* regtill = regbol; */
b8c5462f
JH
1831 break;
1832 }
d6a28714
JH
1833 sayNO;
1834 case MBOL:
1835 if (locinput == PL_bostr
1836 ? PL_regprev == '\n'
1837 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1838 {
b8c5462f
JH
1839 break;
1840 }
d6a28714
JH
1841 sayNO;
1842 case SBOL:
1843 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1844 break;
d6a28714
JH
1845 sayNO;
1846 case GPOS:
1847 if (locinput == PL_reg_ganch)
1848 break;
1849 sayNO;
1850 case EOL:
1851 if (PL_multiline)
1852 goto meol;
1853 else
1854 goto seol;
1855 case MEOL:
1856 meol:
1857 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1858 sayNO;
b8c5462f 1859 break;
d6a28714
JH
1860 case SEOL:
1861 seol:
1862 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1863 sayNO;
d6a28714 1864 if (PL_regeol - locinput > 1)
b8c5462f 1865 sayNO;
b8c5462f 1866 break;
d6a28714
JH
1867 case EOS:
1868 if (PL_regeol != locinput)
b8c5462f 1869 sayNO;
d6a28714
JH
1870 break;
1871 case SANYUTF8:
b8c5462f 1872 if (nextchr & 0x80) {
b8c5462f 1873 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1874 if (locinput > PL_regeol)
1875 sayNO;
b8c5462f
JH
1876 nextchr = UCHARAT(locinput);
1877 break;
1878 }
d6a28714 1879 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1880 sayNO;
b8c5462f 1881 nextchr = UCHARAT(++locinput);
a0d0e21e 1882 break;
d6a28714
JH
1883 case SANY:
1884 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1885 sayNO;
1886 nextchr = UCHARAT(++locinput);
b85d18e9 1887 break;
d6a28714 1888 case ANYUTF8:
a0ed51b3 1889 if (nextchr & 0x80) {
b8c5462f 1890 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1891 if (locinput > PL_regeol)
1892 sayNO;
a0ed51b3
LW
1893 nextchr = UCHARAT(locinput);
1894 break;
1895 }
155aba94 1896 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
a0ed51b3
LW
1897 sayNO;
1898 nextchr = UCHARAT(++locinput);
1899 break;
d6a28714 1900 case REG_ANY:
155aba94 1901 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
4633a7c4 1902 sayNO;
76e3520e 1903 nextchr = UCHARAT(++locinput);
a0d0e21e 1904 break;
d6a28714 1905 case EXACT:
cd439c50
IZ
1906 s = STRING(scan);
1907 ln = STR_LEN(scan);
d6a28714
JH
1908 /* Inline the first character, for speed. */
1909 if (UCHARAT(s) != nextchr)
1910 sayNO;
1911 if (PL_regeol - locinput < ln)
1912 sayNO;
1913 if (ln > 1 && memNE(s, locinput, ln))
1914 sayNO;
1915 locinput += ln;
1916 nextchr = UCHARAT(locinput);
1917 break;
1918 case EXACTFL:
b8c5462f
JH
1919 PL_reg_flags |= RF_tainted;
1920 /* FALL THROUGH */
d6a28714 1921 case EXACTF:
cd439c50
IZ
1922 s = STRING(scan);
1923 ln = STR_LEN(scan);
d6a28714
JH
1924
1925 if (UTF) {
1926 char *l = locinput;
1927 char *e = s + ln;
1928 c1 = OP(scan) == EXACTF;
1929 while (s < e) {
1930 if (l >= PL_regeol)
1931 sayNO;
1932 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1933 toLOWER_utf8((U8*)l) :
1934 toLOWER_LC_utf8((U8*)l)))
1935 {
1936 sayNO;
1937 }
1938 s += UTF8SKIP(s);
1939 l += UTF8SKIP(l);
b8c5462f 1940 }
d6a28714 1941 locinput = l;
a0ed51b3
LW
1942 nextchr = UCHARAT(locinput);
1943 break;
1944 }
d6a28714
JH
1945
1946 /* Inline the first character, for speed. */
1947 if (UCHARAT(s) != nextchr &&
1948 UCHARAT(s) != ((OP(scan) == EXACTF)
1949 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1950 sayNO;
d6a28714 1951 if (PL_regeol - locinput < ln)
b8c5462f 1952 sayNO;
d6a28714
JH
1953 if (ln > 1 && (OP(scan) == EXACTF
1954 ? ibcmp(s, locinput, ln)
1955 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1956 sayNO;
d6a28714
JH
1957 locinput += ln;
1958 nextchr = UCHARAT(locinput);
a0d0e21e 1959 break;
d6a28714 1960 case ANYOFUTF8:
d6a28714 1961 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1962 sayNO;
d6a28714
JH
1963 if (locinput >= PL_regeol)
1964 sayNO;
1965 locinput += PL_utf8skip[nextchr];
1966 nextchr = UCHARAT(locinput);
1967 break;
1968 case ANYOF:
d6a28714 1969 if (nextchr < 0)
b8c5462f 1970 nextchr = UCHARAT(locinput);
936ed897 1971 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1972 sayNO;
1973 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1974 sayNO;
b8c5462f
JH
1975 nextchr = UCHARAT(++locinput);
1976 break;
d6a28714 1977 case ALNUML:
b8c5462f
JH
1978 PL_reg_flags |= RF_tainted;
1979 /* FALL THROUGH */
d6a28714 1980 case ALNUM:
b8c5462f 1981 if (!nextchr)
4633a7c4 1982 sayNO;
d6a28714
JH
1983 if (!(OP(scan) == ALNUM
1984 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1985 sayNO;
1986 nextchr = UCHARAT(++locinput);
bbce6d69 1987 break;
d6a28714 1988 case ALNUMLUTF8:
3280af22 1989 PL_reg_flags |= RF_tainted;
bbce6d69 1990 /* FALL THROUGH */
d6a28714 1991 case ALNUMUTF8:
b8c5462f
JH
1992 if (!nextchr)
1993 sayNO;
1994 if (nextchr & 0x80) {
d6a28714
JH
1995 if (!(OP(scan) == ALNUMUTF8
1996 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1997 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1998 {
1999 sayNO;
a0ed51b3 2000 }
b8c5462f 2001 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2002 nextchr = UCHARAT(locinput);
2003 break;
2004 }
d6a28714
JH
2005 if (!(OP(scan) == ALNUMUTF8
2006 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2007 sayNO;
b8c5462f 2008 nextchr = UCHARAT(++locinput);
a0d0e21e 2009 break;
d6a28714 2010 case NALNUML:
b8c5462f
JH
2011 PL_reg_flags |= RF_tainted;
2012 /* FALL THROUGH */
d6a28714
JH
2013 case NALNUM:
2014 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2015 sayNO;
d6a28714
JH
2016 if (OP(scan) == NALNUM
2017 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 2018 sayNO;
b8c5462f 2019 nextchr = UCHARAT(++locinput);
a0ed51b3 2020 break;
d6a28714 2021 case NALNUMLUTF8:
b8c5462f
JH
2022 PL_reg_flags |= RF_tainted;
2023 /* FALL THROUGH */
d6a28714 2024 case NALNUMUTF8:
3280af22 2025 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2026 sayNO;
b8c5462f 2027 if (nextchr & 0x80) {
d6a28714
JH
2028 if (OP(scan) == NALNUMUTF8
2029 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2030 : isALNUM_LC_utf8((U8*)locinput))
2031 {
b8c5462f 2032 sayNO;
d6a28714 2033 }
b8c5462f
JH
2034 locinput += PL_utf8skip[nextchr];
2035 nextchr = UCHARAT(locinput);
2036 break;
2037 }
d6a28714
JH
2038 if (OP(scan) == NALNUMUTF8
2039 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2040 sayNO;
76e3520e 2041 nextchr = UCHARAT(++locinput);
a0d0e21e 2042 break;
d6a28714
JH
2043 case BOUNDL:
2044 case NBOUNDL:
3280af22 2045 PL_reg_flags |= RF_tainted;
bbce6d69 2046 /* FALL THROUGH */
d6a28714
JH
2047 case BOUND:
2048 case NBOUND:
2049 /* was last char in word? */
2050 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2051 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2052 ln = isALNUM(ln);
2053 n = isALNUM(nextchr);
2054 }
2055 else {
2056 ln = isALNUM_LC(ln);
2057 n = isALNUM_LC(nextchr);
2058 }
2059 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2060 sayNO;
a0d0e21e 2061 break;
d6a28714
JH
2062 case BOUNDLUTF8:
2063 case NBOUNDLUTF8:
a0ed51b3
LW
2064 PL_reg_flags |= RF_tainted;
2065 /* FALL THROUGH */
d6a28714
JH
2066 case BOUNDUTF8:
2067 case NBOUNDUTF8:
2068 /* was last char in word? */
2069 ln = (locinput != PL_regbol)
2070 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2071 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2072 ln = isALNUM_uni(ln);
2073 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2074 }
d6a28714
JH
2075 else {
2076 ln = isALNUM_LC_uni(ln);
2077 n = isALNUM_LC_utf8((U8*)locinput);
2078 }
2079 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2080 sayNO;
a0ed51b3 2081 break;
d6a28714 2082 case SPACEL:
3280af22 2083 PL_reg_flags |= RF_tainted;
bbce6d69 2084 /* FALL THROUGH */
d6a28714 2085 case SPACE:
9442cb0e 2086 if (!nextchr)
4633a7c4 2087 sayNO;
d6a28714
JH
2088 if (!(OP(scan) == SPACE
2089 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2090 sayNO;
76e3520e 2091 nextchr = UCHARAT(++locinput);
a0d0e21e 2092 break;
d6a28714 2093 case SPACELUTF8:
a0ed51b3
LW
2094 PL_reg_flags |= RF_tainted;
2095 /* FALL THROUGH */
d6a28714 2096 case SPACEUTF8:
9442cb0e 2097 if (!nextchr)
a0ed51b3
LW
2098 sayNO;
2099 if (nextchr & 0x80) {
d6a28714 2100 if (!(OP(scan) == SPACEUTF8
9442cb0e 2101 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714
JH
2102 : isSPACE_LC_utf8((U8*)locinput)))
2103 {
a0ed51b3 2104 sayNO;
d6a28714 2105 }
6f06b55f 2106 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2107 nextchr = UCHARAT(locinput);
2108 break;
2109 }
d6a28714
JH
2110 if (!(OP(scan) == SPACEUTF8
2111 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2112 sayNO;
2113 nextchr = UCHARAT(++locinput);
2114 break;
d6a28714 2115 case NSPACEL:
3280af22 2116 PL_reg_flags |= RF_tainted;
bbce6d69 2117 /* FALL THROUGH */
d6a28714 2118 case NSPACE:
9442cb0e 2119 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2120 sayNO;
9442cb0e 2121 if (OP(scan) == NSPACE
d6a28714 2122 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2123 sayNO;
b8c5462f 2124 nextchr = UCHARAT(++locinput);
a0d0e21e 2125 break;
d6a28714 2126 case NSPACELUTF8:
a0ed51b3
LW
2127 PL_reg_flags |= RF_tainted;
2128 /* FALL THROUGH */
d6a28714 2129 case NSPACEUTF8:
9442cb0e 2130 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
2131 sayNO;
2132 if (nextchr & 0x80) {
d6a28714 2133 if (OP(scan) == NSPACEUTF8
9442cb0e 2134 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2135 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2136 {
2137 sayNO;
2138 }
2139 locinput += PL_utf8skip[nextchr];
2140 nextchr = UCHARAT(locinput);
2141 break;
a0ed51b3 2142 }
d6a28714
JH
2143 if (OP(scan) == NSPACEUTF8
2144 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2145 sayNO;
76e3520e 2146 nextchr = UCHARAT(++locinput);
a0d0e21e 2147 break;
d6a28714 2148 case DIGITL:
a0ed51b3
LW
2149 PL_reg_flags |= RF_tainted;
2150 /* FALL THROUGH */
d6a28714 2151 case DIGIT:
9442cb0e 2152 if (!nextchr)
a0ed51b3 2153 sayNO;
d6a28714
JH
2154 if (!(OP(scan) == DIGIT
2155 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2156 sayNO;
76e3520e 2157 nextchr = UCHARAT(++locinput);
a0d0e21e 2158 break;
d6a28714 2159 case DIGITLUTF8:
a0ed51b3
LW
2160 PL_reg_flags |= RF_tainted;
2161 /* FALL THROUGH */
d6a28714 2162 case DIGITUTF8:
a0ed51b3
LW
2163 if (!nextchr)
2164 sayNO;
2165 if (nextchr & 0x80) {
9442cb0e
GS
2166 if (!(OP(scan) == DIGITUTF8
2167 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2168 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2169 {
a0ed51b3 2170 sayNO;
dfe13c55 2171 }
6f06b55f 2172 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2173 nextchr = UCHARAT(locinput);
2174 break;
2175 }
9442cb0e
GS
2176 if (!(OP(scan) == DIGITUTF8
2177 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2178 sayNO;
2179 nextchr = UCHARAT(++locinput);
2180 break;
d6a28714 2181 case NDIGITL:
b8c5462f
JH
2182 PL_reg_flags |= RF_tainted;
2183 /* FALL THROUGH */
d6a28714 2184 case NDIGIT:
9442cb0e 2185 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2186 sayNO;
9442cb0e 2187 if (OP(scan) == NDIGIT
d6a28714 2188 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2189 sayNO;
76e3520e 2190 nextchr = UCHARAT(++locinput);
a0d0e21e 2191 break;
d6a28714 2192 case NDIGITLUTF8:
b8c5462f
JH
2193 PL_reg_flags |= RF_tainted;
2194 /* FALL THROUGH */
d6a28714 2195 case NDIGITUTF8:
b8c5462f
JH
2196 if (!nextchr && locinput >= PL_regeol)
2197 sayNO;
a0ed51b3 2198 if (nextchr & 0x80) {
9442cb0e
GS
2199 if (OP(scan) == NDIGITUTF8
2200 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2201 : isDIGIT_LC_utf8((U8*)locinput))
2202 {
a0ed51b3 2203 sayNO;
9442cb0e 2204 }
6f06b55f 2205 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2206 nextchr = UCHARAT(locinput);
2207 break;
2208 }
9442cb0e
GS
2209 if (OP(scan) == NDIGITUTF8
2210 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2211 sayNO;
2212 nextchr = UCHARAT(++locinput);
2213 break;
2214 case CLUMP:
dfe13c55 2215 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2216 sayNO;
6f06b55f 2217 locinput += PL_utf8skip[nextchr];
dfe13c55 2218 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2219 locinput += UTF8SKIP(locinput);
2220 if (locinput > PL_regeol)
2221 sayNO;
2222 nextchr = UCHARAT(locinput);
2223 break;
c8756f30 2224 case REFFL:
3280af22 2225 PL_reg_flags |= RF_tainted;
c8756f30 2226 /* FALL THROUGH */
c277df42 2227 case REF:
c8756f30 2228 case REFF:
c277df42 2229 n = ARG(scan); /* which paren pair */
cf93c79d 2230 ln = PL_regstartp[n];
2c2d71f5 2231 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2232 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2233 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2234 if (ln == PL_regendp[n])
a0d0e21e 2235 break;
a0ed51b3 2236
cf93c79d 2237 s = PL_bostr + ln;
a0ed51b3
LW
2238 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2239 char *l = locinput;
cf93c79d 2240 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2241 /*
2242 * Note that we can't do the "other character" lookup trick as
2243 * in the 8-bit case (no pun intended) because in Unicode we
2244 * have to map both upper and title case to lower case.
2245 */
2246 if (OP(scan) == REFF) {
2247 while (s < e) {
2248 if (l >= PL_regeol)
2249 sayNO;
dfe13c55 2250 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2251 sayNO;
2252 s += UTF8SKIP(s);
2253 l += UTF8SKIP(l);
2254 }
2255 }
2256 else {
2257 while (s < e) {
2258 if (l >= PL_regeol)
2259 sayNO;
dfe13c55 2260 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2261 sayNO;
2262 s += UTF8SKIP(s);
2263 l += UTF8SKIP(l);
2264 }
2265 }
2266 locinput = l;
2267 nextchr = UCHARAT(locinput);
2268 break;
2269 }
2270
a0d0e21e 2271 /* Inline the first character, for speed. */
76e3520e 2272 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2273 (OP(scan) == REF ||
2274 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2275 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2276 sayNO;
cf93c79d 2277 ln = PL_regendp[n] - ln;
3280af22 2278 if (locinput + ln > PL_regeol)
4633a7c4 2279 sayNO;
c8756f30
AK
2280 if (ln > 1 && (OP(scan) == REF
2281 ? memNE(s, locinput, ln)
2282 : (OP(scan) == REFF
2283 ? ibcmp(s, locinput, ln)
2284 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2285 sayNO;
a0d0e21e 2286 locinput += ln;
76e3520e 2287 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2288 break;
2289
2290 case NOTHING:
c277df42 2291 case TAIL:
a0d0e21e
LW
2292 break;
2293 case BACK:
2294 break;
c277df42
IZ
2295 case EVAL:
2296 {
2297 dSP;
533c011a 2298 OP_4tree *oop = PL_op;
3280af22
NIS
2299 COP *ocurcop = PL_curcop;
2300 SV **ocurpad = PL_curpad;
c277df42
IZ
2301 SV *ret;
2302
2303 n = ARG(scan);
533c011a 2304 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2305 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2306 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2307 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2308
cea2e8a9 2309 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2310 SPAGAIN;
2311 ret = POPs;
2312 PUTBACK;
2313
0f5d15d6
IZ
2314 PL_op = oop;
2315 PL_curpad = ocurpad;
2316 PL_curcop = ocurcop;
c277df42 2317 if (logical) {
0f5d15d6
IZ
2318 if (logical == 2) { /* Postponed subexpression. */
2319 regexp *re;
22c35a8c 2320 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2321 re_cc_state state;
0f5d15d6
IZ
2322 CHECKPOINT cp, lastcp;
2323
2324 if(SvROK(ret) || SvRMAGICAL(ret)) {
2325 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2326
2327 if(SvMAGICAL(sv))
2328 mg = mg_find(sv, 'r');
2329 }
2330 if (mg) {
2331 re = (regexp *)mg->mg_obj;
df0003d4 2332 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2333 }
2334 else {
2335 STRLEN len;
2336 char *t = SvPV(ret, len);
2337 PMOP pm;
2338 char *oprecomp = PL_regprecomp;
2339 I32 osize = PL_regsize;
2340 I32 onpar = PL_regnpar;
2341
2342 pm.op_pmflags = 0;
393fec97 2343 pm.op_pmdynflags = (UTF||DO_UTF8(ret) ? PMdf_UTF8 : 0);
cea2e8a9 2344 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2345 if (!(SvFLAGS(ret)
2346 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2347 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2348 PL_regprecomp = oprecomp;
2349 PL_regsize = osize;
2350 PL_regnpar = onpar;
2351 }
2352 DEBUG_r(
2353 PerlIO_printf(Perl_debug_log,
2354 "Entering embedded `%s%.60s%s%s'\n",
2355 PL_colors[0],
2356 re->precomp,
2357 PL_colors[1],
2358 (strlen(re->precomp) > 60 ? "..." : ""))
2359 );
2360 state.node = next;
2361 state.prev = PL_reg_call_cc;
2362 state.cc = PL_regcc;
2363 state.re = PL_reg_re;
2364
2ab05381 2365 PL_regcc = 0;
0f5d15d6
IZ
2366
2367 cp = regcppush(0); /* Save *all* the positions. */
2368 REGCP_SET;
2369 cache_re(re);
2370 state.ss = PL_savestack_ix;
2371 *PL_reglastparen = 0;
2372 PL_reg_call_cc = &state;
2373 PL_reginput = locinput;
2c2d71f5
JH
2374
2375 /* XXXX This is too dramatic a measure... */
2376 PL_reg_maxiter = 0;
2377
0f5d15d6 2378 if (regmatch(re->program + 1)) {
2c914db6
IZ
2379 /* Even though we succeeded, we need to restore
2380 global variables, since we may be wrapped inside
2381 SUSPEND, thus the match may be not finished yet. */
2382
2383 /* XXXX Do this only if SUSPENDed? */
2384 PL_reg_call_cc = state.prev;
2385 PL_regcc = state.cc;
2386 PL_reg_re = state.re;
2387 cache_re(PL_reg_re);
2388
2389 /* XXXX This is too dramatic a measure... */
2390 PL_reg_maxiter = 0;
2391
2392 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2393 ReREFCNT_dec(re);
2394 regcpblow(cp);
2395 sayYES;
2396 }
0f5d15d6
IZ
2397 ReREFCNT_dec(re);
2398 REGCP_UNWIND;
2399 regcppop();
2400 PL_reg_call_cc = state.prev;
2401 PL_regcc = state.cc;
2402 PL_reg_re = state.re;
d3790889 2403 cache_re(PL_reg_re);
2c2d71f5
JH
2404
2405 /* XXXX This is too dramatic a measure... */
2406 PL_reg_maxiter = 0;
2407
0f5d15d6
IZ
2408 sayNO;
2409 }
c277df42 2410 sw = SvTRUE(ret);
0f5d15d6 2411 logical = 0;
a0ed51b3
LW
2412 }
2413 else
3280af22 2414 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2415 break;
2416 }
a0d0e21e 2417 case OPEN:
c277df42 2418 n = ARG(scan); /* which paren pair */
3280af22
NIS
2419 PL_reg_start_tmp[n] = locinput;
2420 if (n > PL_regsize)
2421 PL_regsize = n;
a0d0e21e
LW
2422 break;
2423 case CLOSE:
c277df42 2424 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2425 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2426 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2427 if (n > *PL_reglastparen)
2428 *PL_reglastparen = n;
a0d0e21e 2429 break;
c277df42
IZ
2430 case GROUPP:
2431 n = ARG(scan); /* which paren pair */
cf93c79d 2432 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2433 break;
2434 case IFTHEN:
2c2d71f5 2435 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2436 if (sw)
2437 next = NEXTOPER(NEXTOPER(scan));
2438 else {
2439 next = scan + ARG(scan);
2440 if (OP(next) == IFTHEN) /* Fake one. */
2441 next = NEXTOPER(NEXTOPER(next));
2442 }
2443 break;
2444 case LOGICAL:
0f5d15d6 2445 logical = scan->flags;
c277df42 2446 break;
2ab05381
IZ
2447/*******************************************************************
2448 PL_regcc contains infoblock about the innermost (...)* loop, and
2449 a pointer to the next outer infoblock.
2450
2451 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2452
2453 1) After matching X, regnode for CURLYX is processed;
2454
2455 2) This regnode creates infoblock on the stack, and calls
2456 regmatch() recursively with the starting point at WHILEM node;
2457
2458 3) Each hit of WHILEM node tries to match A and Z (in the order
2459 depending on the current iteration, min/max of {min,max} and
2460 greediness). The information about where are nodes for "A"
2461 and "Z" is read from the infoblock, as is info on how many times "A"
2462 was already matched, and greediness.
2463
2464 4) After A matches, the same WHILEM node is hit again.
2465
2466 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2467 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2468 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2469 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2470 of the external loop.
2471
2472 Currently present infoblocks form a tree with a stem formed by PL_curcc
2473 and whatever it mentions via ->next, and additional attached trees
2474 corresponding to temporarily unset infoblocks as in "5" above.
2475
2476 In the following picture infoblocks for outer loop of
2477 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2478 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2479 infoblocks are drawn below the "reset" infoblock.
2480
2481 In fact in the picture below we do not show failed matches for Z and T
2482 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2483 more obvious *why* one needs to *temporary* unset infoblocks.]
2484
2485 Matched REx position InfoBlocks Comment
2486 (Y(A)*?Z)*?T x
2487 Y(A)*?Z)*?T x <- O
2488 Y (A)*?Z)*?T x <- O
2489 Y A)*?Z)*?T x <- O <- I
2490 YA )*?Z)*?T x <- O <- I
2491 YA A)*?Z)*?T x <- O <- I
2492 YAA )*?Z)*?T x <- O <- I
2493 YAA Z)*?T x <- O # Temporary unset I
2494 I
2495
2496 YAAZ Y(A)*?Z)*?T x <- O
2497 I
2498
2499 YAAZY (A)*?Z)*?T x <- O
2500 I
2501
2502 YAAZY A)*?Z)*?T x <- O <- I
2503 I
2504
2505 YAAZYA )*?Z)*?T x <- O <- I
2506 I
2507
2508 YAAZYA Z)*?T x <- O # Temporary unset I
2509 I,I
2510
2511 YAAZYAZ )*?T x <- O
2512 I,I
2513
2514 YAAZYAZ T x # Temporary unset O
2515 O
2516 I,I
2517
2518 YAAZYAZT x
2519 O
2520 I,I
2521 *******************************************************************/
a0d0e21e
LW
2522 case CURLYX: {
2523 CURCUR cc;
3280af22 2524 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2525
2526 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2527 next += ARG(next);
3280af22
NIS
2528 cc.oldcc = PL_regcc;
2529 PL_regcc = &cc;
2530 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2531 cc.cur = -1;
2532 cc.min = ARG1(scan);
2533 cc.max = ARG2(scan);
c277df42 2534 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2535 cc.next = next;
2536 cc.minmod = minmod;
2537 cc.lastloc = 0;
3280af22 2538 PL_reginput = locinput;
a0d0e21e
LW
2539 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2540 regcpblow(cp);
3280af22 2541 PL_regcc = cc.oldcc;
4633a7c4 2542 saySAME(n);
a0d0e21e
LW
2543 }
2544 /* NOT REACHED */
2545 case WHILEM: {
2546 /*
2547 * This is really hard to understand, because after we match
2548 * what we're trying to match, we must make sure the rest of
2c2d71f5 2549 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2550 * to go back UP the parse tree by recursing ever deeper. And
2551 * if it fails, we have to reset our parent's current state
2552 * that we can try again after backing off.
2553 */
2554
c277df42 2555 CHECKPOINT cp, lastcp;
3280af22 2556 CURCUR* cc = PL_regcc;
c277df42
IZ
2557 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2558
4633a7c4 2559 n = cc->cur + 1; /* how many we know we matched */
3280af22 2560 PL_reginput = locinput;
a0d0e21e 2561
c277df42
IZ
2562 DEBUG_r(
2563 PerlIO_printf(Perl_debug_log,
2564 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2565 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2566 (long)n, (long)cc->min,
2567 (long)cc->max, (long)cc)
2568 );
4633a7c4 2569
a0d0e21e
LW
2570 /* If degenerate scan matches "", assume scan done. */
2571
579cf2c3 2572 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2573 PL_regcc = cc->oldcc;
2ab05381
IZ
2574 if (PL_regcc)
2575 ln = PL_regcc->cur;
c277df42 2576 DEBUG_r(
c3464db5
DD
2577 PerlIO_printf(Perl_debug_log,
2578 "%*s empty match detected, try continuation...\n",
3280af22 2579 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2580 );
a0d0e21e 2581 if (regmatch(cc->next))
4633a7c4 2582 sayYES;
2ab05381
IZ
2583 if (PL_regcc)
2584 PL_regcc->cur = ln;
3280af22 2585 PL_regcc = cc;
4633a7c4 2586 sayNO;
a0d0e21e
LW
2587 }
2588
2589 /* First just match a string of min scans. */
2590
2591 if (n < cc->min) {
2592 cc->cur = n;
2593 cc->lastloc = locinput;
4633a7c4
LW
2594 if (regmatch(cc->scan))
2595 sayYES;
2596 cc->cur = n - 1;
c277df42 2597 cc->lastloc = lastloc;
4633a7c4 2598 sayNO;
a0d0e21e
LW
2599 }
2600
2c2d71f5
JH
2601 if (scan->flags) {
2602 /* Check whether we already were at this position.
2603 Postpone detection until we know the match is not
2604 *that* much linear. */
2605 if (!PL_reg_maxiter) {
2606 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2607 PL_reg_leftiter = PL_reg_maxiter;
2608 }
2609 if (PL_reg_leftiter-- == 0) {
2610 I32 size = (PL_reg_maxiter + 7)/8;
2611 if (PL_reg_poscache) {
2612 if (PL_reg_poscache_size < size) {
2613 Renew(PL_reg_poscache, size, char);
2614 PL_reg_poscache_size = size;
2615 }
2616 Zero(PL_reg_poscache, size, char);
2617 }
2618 else {
2619 PL_reg_poscache_size = size;
2620 Newz(29, PL_reg_poscache, size, char);
2621 }
2622 DEBUG_r(
2623 PerlIO_printf(Perl_debug_log,
2624 "%sDetected a super-linear match, switching on caching%s...\n",
2625 PL_colors[4], PL_colors[5])
2626 );
2627 }
2628 if (PL_reg_leftiter < 0) {
2629 I32 o = locinput - PL_bostr, b;
2630
2631 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2632 b = o % 8;
2633 o /= 8;
2634 if (PL_reg_poscache[o] & (1<<b)) {
2635 DEBUG_r(
2636 PerlIO_printf(Perl_debug_log,
2637 "%*s already tried at this position...\n",
2638 REPORT_CODE_OFF+PL_regindent*2, "")
2639 );
7821416a 2640 sayNO_SILENT;
2c2d71f5
JH
2641 }
2642 PL_reg_poscache[o] |= (1<<b);
2643 }
2644 }
2645
a0d0e21e
LW
2646 /* Prefer next over scan for minimal matching. */
2647
2648 if (cc->minmod) {
3280af22 2649 PL_regcc = cc->oldcc;
2ab05381
IZ
2650 if (PL_regcc)
2651 ln = PL_regcc->cur;
5f05dabc 2652 cp = regcppush(cc->parenfloor);
c277df42 2653 REGCP_SET;
5f05dabc 2654 if (regmatch(cc->next)) {
c277df42 2655 regcpblow(cp);
4633a7c4 2656 sayYES; /* All done. */
5f05dabc 2657 }
c277df42 2658 REGCP_UNWIND;
5f05dabc 2659 regcppop();
2ab05381
IZ
2660 if (PL_regcc)
2661 PL_regcc->cur = ln;
3280af22 2662 PL_regcc = cc;
a0d0e21e 2663
c277df42 2664 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2665 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2666 && !(PL_reg_flags & RF_warned)) {
2667 PL_reg_flags |= RF_warned;
e476b1b5 2668 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2669 "Complex regular subexpression recursion",
2670 REG_INFTY - 1);
c277df42 2671 }
4633a7c4 2672 sayNO;
c277df42 2673 }
a687059c 2674
c277df42 2675 DEBUG_r(
c3464db5
DD
2676 PerlIO_printf(Perl_debug_log,
2677 "%*s trying longer...\n",
3280af22 2678 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2679 );
a0d0e21e 2680 /* Try scanning more and see if it helps. */
3280af22 2681 PL_reginput = locinput;
a0d0e21e
LW
2682 cc->cur = n;
2683 cc->lastloc = locinput;
5f05dabc 2684 cp = regcppush(cc->parenfloor);
c277df42 2685 REGCP_SET;
5f05dabc 2686 if (regmatch(cc->scan)) {
c277df42 2687 regcpblow(cp);
4633a7c4 2688 sayYES;
5f05dabc 2689 }
c277df42 2690 REGCP_UNWIND;
5f05dabc 2691 regcppop();
4633a7c4 2692 cc->cur = n - 1;
c277df42 2693 cc->lastloc = lastloc;
4633a7c4 2694 sayNO;
a0d0e21e
LW
2695 }
2696
2697 /* Prefer scan over next for maximal matching. */
2698
2699 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2700 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2701 cc->cur = n;
2702 cc->lastloc = locinput;
c277df42 2703 REGCP_SET;
5f05dabc 2704 if (regmatch(cc->scan)) {
c277df42 2705 regcpblow(cp);
4633a7c4 2706 sayYES;
5f05dabc 2707 }
c277df42 2708 REGCP_UNWIND;
a0d0e21e 2709 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2710 PL_reginput = locinput;
c277df42 2711 DEBUG_r(
c3464db5
DD
2712 PerlIO_printf(Perl_debug_log,
2713 "%*s failed, try continuation...\n",
3280af22 2714 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2715 );
2716 }
e476b1b5 2717 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2718 && !(PL_reg_flags & RF_warned)) {
3280af22 2719 PL_reg_flags |= RF_warned;
e476b1b5 2720 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2721 "Complex regular subexpression recursion",
2722 REG_INFTY - 1);
a0d0e21e
LW
2723 }
2724
2725 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2726 PL_regcc = cc->oldcc;
2ab05381
IZ
2727 if (PL_regcc)
2728 ln = PL_regcc->cur;
a0d0e21e 2729 if (regmatch(cc->next))
4633a7c4 2730 sayYES;
2ab05381
IZ
2731 if (PL_regcc)
2732 PL_regcc->cur = ln;
3280af22 2733 PL_regcc = cc;
4633a7c4 2734 cc->cur = n - 1;
c277df42 2735 cc->lastloc = lastloc;
4633a7c4 2736 sayNO;
a0d0e21e
LW
2737 }
2738 /* NOT REACHED */
c277df42
IZ
2739 case BRANCHJ:
2740 next = scan + ARG(scan);
2741 if (next == scan)
2742 next = NULL;
2743 inner = NEXTOPER(NEXTOPER(scan));
2744 goto do_branch;
2745 case BRANCH:
2746 inner = NEXTOPER(scan);
2747 do_branch:
2748 {
2749 CHECKPOINT lastcp;
2750 c1 = OP(scan);
2751 if (OP(next) != c1) /* No choice. */
2752 next = inner; /* Avoid recursion. */
a0d0e21e 2753 else {
3280af22 2754 int lastparen = *PL_reglastparen;
c277df42
IZ
2755
2756 REGCP_SET;
a0d0e21e 2757 do {
3280af22 2758 PL_reginput = locinput;
c277df42 2759 if (regmatch(inner))
4633a7c4 2760 sayYES;
c277df42 2761 REGCP_UNWIND;
3280af22 2762 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2763 PL_regendp[n] = -1;
3280af22 2764 *PL_reglastparen = n;
c277df42 2765 scan = next;
a0d0e21e 2766 /*SUPPRESS 560*/
155aba94 2767 if ((n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next))))
c277df42 2768 next += n;
a0d0e21e 2769 else
c277df42 2770 next = NULL;
c277df42
IZ
2771 inner = NEXTOPER(scan);
2772 if (c1 == BRANCHJ) {
2773 inner = NEXTOPER(inner);
2774 }
2775 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2776 sayNO;
a0d0e21e 2777 /* NOTREACHED */
a687059c 2778 }
a0d0e21e
LW
2779 }
2780 break;
2781 case MINMOD:
2782 minmod = 1;
2783 break;
c277df42
IZ
2784 case CURLYM:
2785 {
00db4c45 2786 I32 l = 0;
c277df42
IZ
2787 CHECKPOINT lastcp;
2788
2789 /* We suppose that the next guy does not need
2790 backtracking: in particular, it is of constant length,
2791 and has no parenths to influence future backrefs. */
2792 ln = ARG1(scan); /* min to match */
2793 n = ARG2(scan); /* max to match */
c277df42
IZ
2794 paren = scan->flags;
2795 if (paren) {
3280af22
NIS
2796 if (paren > PL_regsize)
2797 PL_regsize = paren;
2798 if (paren > *PL_reglastparen)
2799 *PL_reglastparen = paren;
c277df42 2800 }
dc45a647 2801 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2802 if (paren)
2803 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2804 PL_reginput = locinput;
c277df42
IZ
2805 if (minmod) {
2806 minmod = 0;
2807 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2808 sayNO;
5f4b28b2 2809 if (ln && l == 0 && n >= ln
c277df42
IZ
2810 /* In fact, this is tricky. If paren, then the
2811 fact that we did/didnot match may influence
2812 future execution. */
2813 && !(paren && ln == 0))
2814 ln = n;
3280af22 2815 locinput = PL_reginput;
22c35a8c 2816 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2817 c1 = (U8)*STRING(next);
c277df42 2818 if (OP(next) == EXACTF)
22c35a8c 2819 c2 = PL_fold[c1];
c277df42 2820 else if (OP(next) == EXACTFL)
22c35a8c 2821 c2 = PL_fold_locale[c1];
c277df42
IZ
2822 else
2823 c2 = c1;
a0ed51b3
LW
2824 }
2825 else
c277df42
IZ
2826 c1 = c2 = -1000;
2827 REGCP_SET;
5f4b28b2 2828 /* This may be improved if l == 0. */
c277df42
IZ
2829 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2830 /* If it could work, try it. */
2831 if (c1 == -1000 ||
3280af22
NIS
2832 UCHARAT(PL_reginput) == c1 ||
2833 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2834 {
2835 if (paren) {
2836 if (n) {
cf93c79d
IZ
2837 PL_regstartp[paren] =
2838 HOPc(PL_reginput, -l) - PL_bostr;
2839 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2840 }
2841 else
cf93c79d 2842 PL_regendp[paren] = -1;
c277df42
IZ
2843 }
2844 if (regmatch(next))
2845 sayYES;
2846 REGCP_UNWIND;
2847 }
2848 /* Couldn't or didn't -- move forward. */
3280af22 2849 PL_reginput = locinput;
c277df42
IZ
2850 if (regrepeat_hard(scan, 1, &l)) {
2851 ln++;
3280af22 2852 locinput = PL_reginput;
c277df42
IZ
2853 }
2854 else
2855 sayNO;
2856 }
a0ed51b3
LW
2857 }
2858 else {
c277df42
IZ
2859 n = regrepeat_hard(scan, n, &l);
2860 if (n != 0 && l == 0
2861 /* In fact, this is tricky. If paren, then the
2862 fact that we did/didnot match may influence
2863 future execution. */
2864 && !(paren && ln == 0))
2865 ln = n;
3280af22 2866 locinput = PL_reginput;
c277df42 2867 DEBUG_r(
5c0ca799 2868 PerlIO_printf(Perl_debug_log,
faccc32b 2869 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2870 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2871 (IV) n, (IV)l)
c277df42
IZ
2872 );
2873 if (n >= ln) {
22c35a8c 2874 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2875 c1 = (U8)*STRING(next);
c277df42 2876 if (OP(next) == EXACTF)
22c35a8c 2877 c2 = PL_fold[c1];
c277df42 2878 else if (OP(next) == EXACTFL)
22c35a8c 2879 c2 = PL_fold_locale[c1];
c277df42
IZ
2880 else
2881 c2 = c1;
a0ed51b3
LW
2882 }
2883 else
c277df42
IZ
2884 c1 = c2 = -1000;
2885 }
2886 REGCP_SET;
2887 while (n >= ln) {
2888 /* If it could work, try it. */
2889 if (c1 == -1000 ||
3280af22
NIS
2890 UCHARAT(PL_reginput) == c1 ||
2891 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2892 {
2893 DEBUG_r(
c3464db5 2894 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2895 "%*s trying tail with n=%"IVdf"...\n",
2896 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2897 );
2898 if (paren) {
2899 if (n) {
cf93c79d
IZ
2900 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2901 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2902 }
a0ed51b3 2903 else
cf93c79d 2904 PL_regendp[paren] = -1;
c277df42 2905 }
a0ed51b3
LW
2906 if (regmatch(next))
2907 sayYES;
2908 REGCP_UNWIND;
2909 }
c277df42
IZ
2910 /* Couldn't or didn't -- back up. */
2911 n--;
dfe13c55 2912 locinput = HOPc(locinput, -l);
3280af22 2913 PL_reginput = locinput;
c277df42
IZ
2914 }
2915 }
2916 sayNO;
2917 break;
2918 }
2919 case CURLYN:
2920 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2921 if (paren > PL_regsize)
2922 PL_regsize = paren;
2923 if (paren > *PL_reglastparen)
2924 *PL_reglastparen = paren;
c277df42
IZ
2925 ln = ARG1(scan); /* min to match */
2926 n = ARG2(scan); /* max to match */
dc45a647 2927 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2928 goto repeat;
a0d0e21e 2929 case CURLY:
c277df42 2930 paren = 0;
a0d0e21e
LW
2931 ln = ARG1(scan); /* min to match */
2932 n = ARG2(scan); /* max to match */
dc45a647 2933 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2934 goto repeat;
2935 case STAR:
2936 ln = 0;
c277df42 2937 n = REG_INFTY;
a0d0e21e 2938 scan = NEXTOPER(scan);
c277df42 2939 paren = 0;
a0d0e21e
LW
2940 goto repeat;
2941 case PLUS:
c277df42
IZ
2942 ln = 1;
2943 n = REG_INFTY;
2944 scan = NEXTOPER(scan);
2945 paren = 0;
2946 repeat:
a0d0e21e
LW
2947 /*
2948 * Lookahead to avoid useless match attempts
2949 * when we know what character comes next.
2950 */
22c35a8c 2951 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2952 c1 = (U8)*STRING(next);
bbce6d69 2953 if (OP(next) == EXACTF)
22c35a8c 2954 c2 = PL_fold[c1];
bbce6d69 2955 else if (OP(next) == EXACTFL)
22c35a8c 2956 c2 = PL_fold_locale[c1];
bbce6d69 2957 else
2958 c2 = c1;
2959 }
a0d0e21e 2960 else
bbce6d69 2961 c1 = c2 = -1000;
3280af22 2962 PL_reginput = locinput;
a0d0e21e 2963 if (minmod) {
c277df42 2964 CHECKPOINT lastcp;
a0d0e21e
LW
2965 minmod = 0;
2966 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2967 sayNO;
a0ed51b3 2968 locinput = PL_reginput;
c277df42 2969 REGCP_SET;
0fe9bf95
IZ
2970 if (c1 != -1000) {
2971 char *e = locinput + n - ln; /* Should not check after this */
2972 char *old = locinput;
2973
2974 if (e >= PL_regeol || (n == REG_INFTY))
2975 e = PL_regeol - 1;
2976 while (1) {
2977 /* Find place 'next' could work */
2978 if (c1 == c2) {
2979 while (locinput <= e && *locinput != c1)
2980 locinput++;
2981 } else {
2982 while (locinput <= e
2983 && *locinput != c1
2984 && *locinput != c2)
2985 locinput++;
2986 }
2987 if (locinput > e)
2988 sayNO;
2989 /* PL_reginput == old now */
2990 if (locinput != old) {
2991 ln = 1; /* Did some */
2992 if (regrepeat(scan, locinput - old) <
2993 locinput - old)
2994 sayNO;
2995 }
2996 /* PL_reginput == locinput now */
2997 if (paren) {
2998 if (ln) {
cf93c79d
IZ
2999 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
3000 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
3001 }
3002 else
cf93c79d 3003 PL_regendp[paren] = -1;
0fe9bf95
IZ
3004 }
3005 if (regmatch(next))
3006 sayYES;
3007 PL_reginput = locinput; /* Could be reset... */
3008 REGCP_UNWIND;
3009 /* Couldn't or didn't -- move forward. */
3010 old = locinput++;
3011 }
3012 }
3013 else
c277df42 3014 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 3015 /* If it could work, try it. */
bbce6d69 3016 if (c1 == -1000 ||
3280af22
NIS
3017 UCHARAT(PL_reginput) == c1 ||
3018 UCHARAT(PL_reginput) == c2)
bbce6d69 3019 {
c277df42
IZ
3020 if (paren) {
3021 if (n) {
cf93c79d
IZ
3022 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3023 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3024 }
3025 else
cf93c79d 3026 PL_regendp[paren] = -1;
c277df42 3027 }
a0d0e21e 3028 if (regmatch(next))
4633a7c4 3029 sayYES;
c277df42 3030 REGCP_UNWIND;
bbce6d69 3031 }
c277df42 3032 /* Couldn't or didn't -- move forward. */
a0ed51b3 3033 PL_reginput = locinput;
a0d0e21e
LW
3034 if (regrepeat(scan, 1)) {
3035 ln++;
a0ed51b3
LW
3036 locinput = PL_reginput;
3037 }
3038 else
4633a7c4 3039 sayNO;
a0d0e21e
LW
3040 }
3041 }
3042 else {
c277df42 3043 CHECKPOINT lastcp;
a0d0e21e 3044 n = regrepeat(scan, n);
a0ed51b3 3045 locinput = PL_reginput;
22c35a8c 3046 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3047 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3048 ln = n; /* why back off? */
1aeab75a
GS
3049 /* ...because $ and \Z can match before *and* after
3050 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3051 We should back off by one in this case. */
3052 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3053 ln--;
3054 }
c277df42
IZ
3055 REGCP_SET;
3056 if (paren) {
3057 while (n >= ln) {
3058 /* If it could work, try it. */
3059 if (c1 == -1000 ||
3280af22
NIS
3060 UCHARAT(PL_reginput) == c1 ||
3061 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3062 {
3063 if (paren && n) {
3064 if (n) {
cf93c79d
IZ
3065 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3066 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3067 }
3068 else
cf93c79d 3069 PL_regendp[paren] = -1;
c277df42
IZ
3070 }
3071 if (regmatch(next))
3072 sayYES;
3073 REGCP_UNWIND;
3074 }
3075 /* Couldn't or didn't -- back up. */
3076 n--;
dfe13c55 3077 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3078 }
a0ed51b3
LW
3079 }
3080 else {
c277df42
IZ
3081 while (n >= ln) {
3082 /* If it could work, try it. */
3083 if (c1 == -1000 ||
3280af22
NIS
3084 UCHARAT(PL_reginput) == c1 ||
3085 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3086 {
3087 if (regmatch(next))
3088 sayYES;
3089 REGCP_UNWIND;
3090 }
3091 /* Couldn't or didn't -- back up. */
3092 n--;
dfe13c55 3093 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3094 }
a0d0e21e
LW
3095 }
3096 }
4633a7c4 3097 sayNO;
c277df42 3098 break;
a0d0e21e 3099 case END:
0f5d15d6
IZ
3100 if (PL_reg_call_cc) {
3101 re_cc_state *cur_call_cc = PL_reg_call_cc;
3102 CURCUR *cctmp = PL_regcc;
3103 regexp *re = PL_reg_re;
3104 CHECKPOINT cp, lastcp;
3105
3106 cp = regcppush(0); /* Save *all* the positions. */
3107 REGCP_SET;
3108 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3109 the caller. */
3110 PL_reginput = locinput; /* Make position available to
3111 the callcc. */
3112 cache_re(PL_reg_call_cc->re);
3113 PL_regcc = PL_reg_call_cc->cc;
3114 PL_reg_call_cc = PL_reg_call_cc->prev;
3115 if (regmatch(cur_call_cc->node)) {
3116 PL_reg_call_cc = cur_call_cc;
3117 regcpblow(cp);
3118 sayYES;
3119 }
3120 REGCP_UNWIND;
3121 regcppop();
3122 PL_reg_call_cc = cur_call_cc;
3123 PL_regcc = cctmp;
3124 PL_reg_re = re;
3125 cache_re(re);
3126
3127 DEBUG_r(
3128 PerlIO_printf(Perl_debug_log,
3129 "%*s continuation failed...\n",
3130 REPORT_CODE_OFF+PL_regindent*2, "")
3131 );
7821416a 3132 sayNO_SILENT;
0f5d15d6 3133 }
7821416a
IZ
3134 if (locinput < PL_regtill) {
3135 DEBUG_r(PerlIO_printf(Perl_debug_log,
3136 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3137 PL_colors[4],
3138 (long)(locinput - PL_reg_starttry),
3139 (long)(PL_regtill - PL_reg_starttry),
3140 PL_colors[5]));
3141 sayNO_FINAL; /* Cannot match: too short. */
3142 }
3143 PL_reginput = locinput; /* put where regtry can find it */
3144 sayYES_FINAL; /* Success! */
7e5428c5 3145 case SUCCEED:
3280af22 3146 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3147 sayYES_LOUD; /* Success! */
c277df42
IZ
3148 case SUSPEND:
3149 n = 1;
9fe1d20c 3150 PL_reginput = locinput;
c277df42 3151 goto do_ifmatch;
a0d0e21e 3152 case UNLESSM:
c277df42 3153 n = 0;
a0ed51b3 3154 if (scan->flags) {
0fe9bf95
IZ
3155 if (UTF) { /* XXXX This is absolutely
3156 broken, we read before
3157 start of string. */
3158 s = HOPMAYBEc(locinput, -scan->flags);
3159 if (!s)
3160 goto say_yes;
3161 PL_reginput = s;
3162 }
3163 else {
3164 if (locinput < PL_bostr + scan->flags)
3165 goto say_yes;
3166 PL_reginput = locinput - scan->flags;
3167 goto do_ifmatch;
3168 }
a0ed51b3
LW
3169 }
3170 else
3171 PL_reginput = locinput;
c277df42
IZ
3172 goto do_ifmatch;
3173 case IFMATCH:
3174 n = 1;
a0ed51b3 3175 if (scan->flags) {
0fe9bf95
IZ
3176 if (UTF) { /* XXXX This is absolutely
3177 broken, we read before
3178 start of string. */
3179 s = HOPMAYBEc(locinput, -scan->flags);
3180 if (!s || s < PL_bostr)
3181 goto say_no;
3182 PL_reginput = s;
3183 }
3184 else {
3185 if (locinput < PL_bostr + scan->flags)
3186 goto say_no;
3187 PL_reginput = locinput - scan->flags;
3188 goto do_ifmatch;
3189 }
a0ed51b3
LW
3190 }
3191 else
3192 PL_reginput = locinput;
3193
c277df42 3194 do_ifmatch:
c277df42
IZ
3195 inner = NEXTOPER(NEXTOPER(scan));
3196 if (regmatch(inner) != n) {
3197 say_no:
3198 if (logical) {
3199 logical = 0;
3200 sw = 0;
3201 goto do_longjump;
a0ed51b3
LW
3202 }
3203 else
c277df42
IZ
3204 sayNO;
3205 }
3206 say_yes:
3207 if (logical) {
3208 logical = 0;
3209 sw = 1;
3210 }
fe44a5e8 3211 if (OP(scan) == SUSPEND) {
3280af22 3212 locinput = PL_reginput;
565764a8 3213 nextchr = UCHARAT(locinput);
fe44a5e8 3214 }
c277df42
IZ
3215 /* FALL THROUGH. */
3216 case LONGJMP:
3217 do_longjump:
3218 next = scan + ARG(scan);
3219 if (next == scan)
3220 next = NULL;
a0d0e21e
LW
3221 break;
3222 default:
b900a521 3223 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3224 PTR2UV(scan), OP(scan));
cea2e8a9 3225 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3226 }
a0d0e21e
LW
3227 scan = next;
3228 }
a687059c 3229
a0d0e21e
LW
3230 /*
3231 * We get here only if there's trouble -- normally "case END" is
3232 * the terminating point.
3233 */
cea2e8a9 3234 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3235 /*NOTREACHED*/
4633a7c4
LW
3236 sayNO;
3237
7821416a
IZ
3238yes_loud:
3239 DEBUG_r(
3240 PerlIO_printf(Perl_debug_log,
3241 "%*s %scould match...%s\n",
3242 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3243 );
3244 goto yes;
3245yes_final:
3246 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3247 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3248yes:
3249#ifdef DEBUGGING
3280af22 3250 PL_regindent--;
4633a7c4
LW
3251#endif
3252 return 1;
3253
3254no:
7821416a
IZ
3255 DEBUG_r(
3256 PerlIO_printf(Perl_debug_log,
3257 "%*s %sfailed...%s\n",
3258 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3259 );
3260 goto do_no;
3261no_final:
3262do_no:
4633a7c4 3263#ifdef DEBUGGING
3280af22 3264 PL_regindent--;
4633a7c4 3265#endif
a0d0e21e 3266 return 0;
a687059c
LW
3267}
3268
3269/*
3270 - regrepeat - repeatedly match something simple, report how many
3271 */
3272/*
3273 * [This routine now assumes that it will only match on things of length 1.
3274 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3275 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3276 */
76e3520e 3277STATIC I32
cea2e8a9 3278S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3279{
5c0ca799 3280 dTHR;
a0d0e21e 3281 register char *scan;
a0d0e21e 3282 register I32 c;
3280af22 3283 register char *loceol = PL_regeol;
a0ed51b3 3284 register I32 hardcount = 0;
a0d0e21e 3285
3280af22 3286 scan = PL_reginput;
c277df42 3287 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3288 loceol = scan + max;
a0d0e21e 3289 switch (OP(p)) {
22c35a8c 3290 case REG_ANY:
a0d0e21e
LW
3291 while (scan < loceol && *scan != '\n')
3292 scan++;
3293 break;
3294 case SANY:
3295 scan = loceol;
3296 break;
a0ed51b3
LW
3297 case ANYUTF8:
3298 loceol = PL_regeol;
3299 while (scan < loceol && *scan != '\n') {
3300 scan += UTF8SKIP(scan);
3301 hardcount++;
3302 }
3303 break;
3304 case SANYUTF8:
3305 loceol = PL_regeol;
3306 while (scan < loceol) {
3307 scan += UTF8SKIP(scan);
3308 hardcount++;
3309 }
3310 break;
bbce6d69 3311 case EXACT: /* length of string is 1 */
cd439c50 3312 c = (U8)*STRING(p);
bbce6d69 3313 while (scan < loceol && UCHARAT(scan) == c)
3314 scan++;
3315 break;
3316 case EXACTF: /* length of string is 1 */
cd439c50 3317 c = (U8)*STRING(p);
bbce6d69 3318 while (scan < loceol &&
22c35a8c 3319 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3320 scan++;
3321 break;
3322 case EXACTFL: /* length of string is 1 */
3280af22 3323 PL_reg_flags |= RF_tainted;
cd439c50 3324 c = (U8)*STRING(p);
bbce6d69 3325 while (scan < loceol &&
22c35a8c 3326 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3327 scan++;
3328 break;
a0ed51b3
LW
3329 case ANYOFUTF8:
3330 loceol = PL_regeol;
3331 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3332 scan += UTF8SKIP(scan);
3333 hardcount++;
3334 }
3335 break;
a0d0e21e 3336 case ANYOF:
936ed897 3337 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3338 scan++;
a0d0e21e
LW
3339 break;
3340 case ALNUM:
3341 while (scan < loceol && isALNUM(*scan))
3342 scan++;
3343 break;
a0ed51b3
LW
3344 case ALNUMUTF8:
3345 loceol = PL_regeol;
dfe13c55 3346 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3347 scan += UTF8SKIP(scan);
3348 hardcount++;
3349 }
3350 break;
bbce6d69 3351 case ALNUML:
3280af22 3352 PL_reg_flags |= RF_tainted;
bbce6d69 3353 while (scan < loceol && isALNUM_LC(*scan))
3354 scan++;
3355 break;
a0ed51b3
LW
3356 case ALNUMLUTF8:
3357 PL_reg_flags |= RF_tainted;
3358 loceol = PL_regeol;
dfe13c55 3359 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3360 scan += UTF8SKIP(scan);
3361 hardcount++;
3362 }
3363 break;
3364 break;
a0d0e21e
LW
3365 case NALNUM:
3366 while (scan < loceol && !isALNUM(*scan))
3367 scan++;
3368 break;
a0ed51b3
LW
3369 case NALNUMUTF8:
3370 loceol = PL_regeol;
dfe13c55 3371 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3372 scan += UTF8SKIP(scan);
3373 hardcount++;
3374 }
3375 break;
bbce6d69 3376 case NALNUML:
3280af22 3377 PL_reg_flags |= RF_tainted;
bbce6d69 3378 while (scan < loceol && !isALNUM_LC(*scan))
3379 scan++;
3380 break;
a0ed51b3
LW
3381 case NALNUMLUTF8:
3382 PL_reg_flags |= RF_tainted;
3383 loceol = PL_regeol;
dfe13c55 3384 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3385 scan += UTF8SKIP(scan);
3386 hardcount++;
3387 }
3388 break;
a0d0e21e
LW
3389 case SPACE:
3390 while (scan < loceol && isSPACE(*scan))
3391 scan++;
3392 break;
a0ed51b3
LW
3393 case SPACEUTF8:
3394 loceol = PL_regeol;
dfe13c55 3395 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3396 scan += UTF8SKIP(scan);
3397 hardcount++;
3398 }
3399 break;
bbce6d69 3400 case SPACEL:
3280af22 3401 PL_reg_flags |= RF_tainted;
bbce6d69 3402 while (scan < loceol && isSPACE_LC(*scan))
3403 scan++;
3404 break;
a0ed51b3
LW
3405 case SPACELUTF8:
3406 PL_reg_flags |= RF_tainted;
3407 loceol = PL_regeol;
dfe13c55 3408 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3409 scan += UTF8SKIP(scan);
3410 hardcount++;
3411 }
3412 break;
a0d0e21e
LW
3413 case NSPACE:
3414 while (scan < loceol && !isSPACE(*scan))
3415 scan++;
3416 break;
a0ed51b3
LW
3417 case NSPACEUTF8:
3418 loceol = PL_regeol;
dfe13c55 3419 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3420 scan += UTF8SKIP(scan);
3421 hardcount++;
3422 }
3423 break;
bbce6d69 3424 case NSPACEL:
3280af22 3425 PL_reg_flags |= RF_tainted;
bbce6d69 3426 while (scan < loceol && !isSPACE_LC(*scan))
3427 scan++;
3428 break;
a0ed51b3
LW
3429 case NSPACELUTF8:
3430 PL_reg_flags |= RF_tainted;
3431 loceol = PL_regeol;
dfe13c55 3432 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3433 scan += UTF8SKIP(scan);
3434 hardcount++;
3435 }
3436 break;
a0d0e21e
LW
3437 case DIGIT:
3438 while (scan < loceol && isDIGIT(*scan))
3439 scan++;
3440 break;
a0ed51b3
LW
3441 case DIGITUTF8:
3442 loceol = PL_regeol;
dfe13c55 3443 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3444 scan += UTF8SKIP(scan);
3445 hardcount++;
3446 }
3447 break;
3448 break;
a0d0e21e
LW
3449 case NDIGIT:
3450 while (scan < loceol && !isDIGIT(*scan))
3451 scan++;
3452 break;
a0ed51b3
LW
3453 case NDIGITUTF8:
3454 loceol = PL_regeol;
dfe13c55 3455 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3456 scan += UTF8SKIP(scan);
3457 hardcount++;
3458 }
3459 break;
a0d0e21e
LW
3460 default: /* Called on something of 0 width. */
3461 break; /* So match right here or not at all. */
3462 }
a687059c 3463
a0ed51b3
LW
3464 if (hardcount)
3465 c = hardcount;
3466 else
3467 c = scan - PL_reginput;
3280af22 3468 PL_reginput = scan;
a687059c 3469
c277df42
IZ
3470 DEBUG_r(
3471 {
3472 SV *prop = sv_newmortal();
3473
3474 regprop(prop, p);
3475 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3476 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3477 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3478 });
3479
a0d0e21e 3480 return(c);
a687059c
LW
3481}
3482
3483/*
c277df42
IZ
3484 - regrepeat_hard - repeatedly match something, report total lenth and length
3485 *
3486 * The repeater is supposed to have constant length.
3487 */
3488
76e3520e 3489STATIC I32
cea2e8a9 3490S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3491{
5c0ca799 3492 dTHR;
c277df42
IZ
3493 register char *scan;
3494 register char *start;
3280af22 3495 register char *loceol = PL_regeol;
a0ed51b3 3496 I32 l = 0;
708e3b05 3497 I32 count = 0, res = 1;
a0ed51b3
LW
3498
3499 if (!max)
3500 return 0;
c277df42 3501
3280af22 3502 start = PL_reginput;
a0ed51b3 3503 if (UTF) {
708e3b05 3504 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3505 if (!count++) {
3506 l = 0;
3507 while (start < PL_reginput) {
3508 l++;
3509 start += UTF8SKIP(start);
3510 }
3511 *lp = l;
3512 if (l == 0)
3513 return max;
3514 }
3515 if (count == max)
3516 return count;
3517 }
3518 }
3519 else {
708e3b05 3520 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3521 if (!count++) {
3522 *lp = l = PL_reginput - start;
3523 if (max != REG_INFTY && l*max < loceol - scan)
3524 loceol = scan + l*max;
3525 if (l == 0)
3526 return max;
c277df42
IZ
3527 }
3528 }
3529 }
708e3b05 3530 if (!res)
3280af22 3531 PL_reginput = scan;
c277df42 3532
a0ed51b3 3533 return count;
c277df42
IZ
3534}
3535
3536/*
cb8d8820 3537 - reginclass - determine if a character falls into a character class
bbce6d69 3538 */
3539
76e3520e 3540STATIC bool
936ed897 3541S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3542{
5c0ca799 3543 dTHR;
b8c5462f 3544 char flags = ANYOF_FLAGS(p);
bbce6d69 3545 bool match = FALSE;
3546
3547 c &= 0xFF;
b8c5462f 3548 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3549 match = TRUE;
3550 else if (flags & ANYOF_FOLD) {
3551 I32 cf;
3552 if (flags & ANYOF_LOCALE) {
3280af22 3553 PL_reg_flags |= RF_tainted;
22c35a8c 3554 cf = PL_fold_locale[c];
bbce6d69 3555 }
3556 else
22c35a8c 3557 cf = PL_fold[c];
b8c5462f 3558 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3559 match = TRUE;
3560 }
3561
b8c5462f 3562 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3563 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3564 if (
3565 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3566 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3567 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3568 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3569 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3570 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3571 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3572 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3573 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3574 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3575 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3576 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3577 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3578 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3579 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3580 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3581 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3582 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3583 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3584 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3585 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3586 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3587 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3588 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3589 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3590 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3591 ) /* How's that for a conditional? */
bbce6d69 3592 {
3593 match = TRUE;
3594 }
3595 }
3596
ae5c130c 3597 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3598}
3599
a0ed51b3 3600STATIC bool
cea2e8a9 3601S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3602{
3603 dTHR;
a0ed51b3
LW
3604 char flags = ARG1(f);
3605 bool match = FALSE;
3606 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3607
3608 if (swash_fetch(sv, p))
3609 match = TRUE;
3610 else if (flags & ANYOF_FOLD) {
806e7201 3611 U8 tmpbuf[UTF8_MAXLEN];
a0ed51b3
LW
3612 if (flags & ANYOF_LOCALE) {
3613 PL_reg_flags |= RF_tainted;
3614 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3615 }
3616 else
3617 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3618 if (swash_fetch(sv, tmpbuf))
3619 match = TRUE;
3620 }
3621
b8c5462f 3622 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3623
3624 return (flags & ANYOF_INVERT) ? !match : match;
3625}
161b471a 3626
dfe13c55 3627STATIC U8 *
cea2e8a9 3628S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3629{
3630 dTHR;
a0ed51b3
LW
3631 if (off >= 0) {
3632 while (off-- && s < (U8*)PL_regeol)
3633 s += UTF8SKIP(s);
3634 }
3635 else {
3636 while (off++) {
3637 if (s > (U8*)PL_bostr) {
3638 s--;
3639 if (*s & 0x80) {
3640 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3641 s--;
3642 } /* XXX could check well-formedness here */
3643 }
3644 }
3645 }
3646 return s;
3647}
161b471a 3648
dfe13c55 3649STATIC U8 *
cea2e8a9 3650S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3651{
c485e607 3652 dTHR;
a0ed51b3
LW
3653 if (off >= 0) {
3654 while (off-- && s < (U8*)PL_regeol)
3655 s += UTF8SKIP(s);
3656 if (off >= 0)
3657 return 0;
3658 }
3659 else {
3660 while (off++) {
3661 if (s > (U8*)PL_bostr) {
3662 s--;
3663 if (*s & 0x80) {
3664 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3665 s--;
3666 } /* XXX could check well-formedness here */
3667 }
3668 else
3669 break;
3670 }
3671 if (off <= 0)
3672 return 0;
3673 }
3674 return s;
3675}
51371543
GS
3676
3677#ifdef PERL_OBJECT
51371543
GS
3678#include "XSUB.h"
3679#endif
3680
3681static void
3682restore_pos(pTHXo_ void *arg)
3683{
3684 dTHR;
3685 if (PL_reg_eval_set) {
3686 if (PL_reg_oldsaved) {
3687 PL_reg_re->subbeg = PL_reg_oldsaved;
3688 PL_reg_re->sublen = PL_reg_oldsavedlen;
3689 RX_MATCH_COPIED_on(PL_reg_re);
3690 }
3691 PL_reg_magic->mg_len = PL_reg_oldpos;
3692 PL_reg_eval_set = 0;
3693 PL_curpm = PL_reg_oldcurpm;
3694 }
3695}
3696