This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
better CR-handling on shebang line and in formats (fixed variant of
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c
GS
38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
56953603
IZ
40#endif
41
f0fcb552 42/*SUPPRESS 112*/
a687059c 43/*
e50aee73 44 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
45 *
46 * Copyright (c) 1986 by University of Toronto.
47 * Written by Henry Spencer. Not derived from licensed software.
48 *
49 * Permission is granted to anyone to use this software for any
50 * purpose on any computer system, and to redistribute it freely,
51 * subject to the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of
54 * this software, no matter how awful, even if they arise
55 * from defects in it.
56 *
57 * 2. The origin of this software must not be misrepresented, either
58 * by explicit claim or by omission.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not
61 * be misrepresented as being the original software.
62 *
63 **** Alterations to Henry's code are...
64 ****
a0ed51b3 65 **** Copyright (c) 1991-1998, Larry Wall
a687059c 66 ****
9ef589d8
LW
67 **** You may distribute under the terms of either the GNU General Public
68 **** License or the Artistic License, as specified in the README file.
a687059c
LW
69 *
70 * Beware that some of this code is subtly aware of the way operator
71 * precedence is structured in regular expressions. Serious changes in
72 * regular-expression syntax might require a total rethink.
73 */
74#include "EXTERN.h"
75#include "perl.h"
76#include "regcomp.h"
77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
ce862d02 80#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
81#define RF_utf8 8 /* String contains multibyte chars? */
82
83#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
76e3520e 92#ifndef PERL_OBJECT
a0d0e21e
LW
93typedef I32 CHECKPOINT;
94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
99static I32 regmatch _((regnode *prog));
100static I32 regrepeat _((regnode *p, I32 max));
101static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
102static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 103
c277df42 104static bool reginclass _((char *p, I32 c));
a0ed51b3 105static bool reginclassutf8 _((regnode *f, U8* p));
55497cff 106static CHECKPOINT regcppush _((I32 parenfloor));
107static char * regcppop _((void));
76e3520e 108#endif
a0ed51b3 109
ae5c130c 110#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3
LW
111#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
112
113#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
114#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
115
dfe13c55
GS
116#ifndef PERL_OBJECT
117static U8 * reghop _((U8 *pos, I32 off));
118static U8 * reghopmaybe _((U8 *pos, I32 off));
119#endif
120#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
121#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
122#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
123#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
124#define HOPc(pos,off) ((char*)HOP(pos,off))
125#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 126
76e3520e 127STATIC CHECKPOINT
8ac85365 128regcppush(I32 parenfloor)
a0d0e21e 129{
11343788 130 dTHR;
3280af22
NIS
131 int retval = PL_savestack_ix;
132 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
133 int p;
134
135 SSCHECK(i + 5);
3280af22
NIS
136 for (p = PL_regsize; p > parenfloor; p--) {
137 SSPUSHPTR(PL_regendp[p]);
138 SSPUSHPTR(PL_regstartp[p]);
139 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
140 SSPUSHINT(p);
141 }
3280af22
NIS
142 SSPUSHINT(PL_regsize);
143 SSPUSHINT(*PL_reglastparen);
144 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
145 SSPUSHINT(i + 3);
146 SSPUSHINT(SAVEt_REGCONTEXT);
147 return retval;
148}
149
c277df42 150/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
151# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
152 " Setting an EVAL scope, savestack=%i\n", \
3280af22 153 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 154
3280af22 155# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
156 PerlIO_printf(Perl_debug_log, \
157 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 158 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 159
76e3520e 160STATIC char *
8ac85365 161regcppop(void)
a0d0e21e 162{
11343788 163 dTHR;
a0d0e21e
LW
164 I32 i = SSPOPINT;
165 U32 paren = 0;
166 char *input;
167 char *tmps;
168 assert(i == SAVEt_REGCONTEXT);
169 i = SSPOPINT;
170 input = (char *) SSPOPPTR;
3280af22
NIS
171 *PL_reglastparen = SSPOPINT;
172 PL_regsize = SSPOPINT;
c277df42 173 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 174 paren = (U32)SSPOPINT;
3280af22
NIS
175 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
176 PL_regstartp[paren] = (char *) SSPOPPTR;
a0d0e21e 177 tmps = (char*)SSPOPPTR;
3280af22
NIS
178 if (paren <= *PL_reglastparen)
179 PL_regendp[paren] = tmps;
c277df42 180 DEBUG_r(
c3464db5
DD
181 PerlIO_printf(Perl_debug_log,
182 " restoring \\%d to %d(%d)..%d%s\n",
3280af22
NIS
183 paren, PL_regstartp[paren] - PL_regbol,
184 PL_reg_start_tmp[paren] - PL_regbol,
185 PL_regendp[paren] - PL_regbol,
186 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 187 );
a0d0e21e 188 }
c277df42 189 DEBUG_r(
3280af22 190 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
191 PerlIO_printf(Perl_debug_log,
192 " restoring \\%d..\\%d to undef\n",
3280af22 193 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
194 }
195 );
3280af22
NIS
196 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
197 if (paren > PL_regsize)
198 PL_regstartp[paren] = Nullch;
199 PL_regendp[paren] = Nullch;
a0d0e21e
LW
200 }
201 return input;
202}
203
c277df42 204#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 205
a687059c 206/*
e50aee73 207 * pregexec and friends
a687059c
LW
208 */
209
210/*
c277df42 211 - pregexec - match a regexp against a string
a687059c 212 */
c277df42 213I32
c3464db5
DD
214pregexec(register regexp *prog, char *stringarg, register char *strend,
215 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
216/* strend: pointer to null at end of string */
217/* strbeg: real beginning of string */
218/* minend: end of match must be >=minend after stringarg. */
219/* nosave: For optimizations. */
220{
221 return
222 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
223 nosave ? 0 : REXEC_COPY_STR);
224}
225
a687059c 226/*
c277df42 227 - regexec_flags - match a regexp against a string
a687059c 228 */
79072805 229I32
c3464db5
DD
230regexec_flags(register regexp *prog, char *stringarg, register char *strend,
231 char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
c277df42
IZ
232/* strend: pointer to null at end of string */
233/* strbeg: real beginning of string */
234/* minend: end of match must be >=minend after stringarg. */
235/* data: May be used for some additional optimizations. */
236/* nosave: For optimizations. */
a687059c 237{
5c0ca799 238 dTHR;
a0d0e21e 239 register char *s;
c277df42 240 register regnode *c;
a0d0e21e
LW
241 register char *startpos = stringarg;
242 register I32 tmp;
c277df42 243 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
244 I32 dontbother = 0; /* how many characters not to try at end */
245 CURCUR cc;
c277df42 246 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
247 constant substr. */ /* CC */
248 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
249 I32 scream_pos = -1; /* Internal iterator of scream. */
250 char *scream_olds;
3280af22 251 SV* oreplsv = GvSV(PL_replgv);
a687059c 252
a0d0e21e 253 cc.cur = 0;
4633a7c4 254 cc.oldcc = 0;
3280af22 255 PL_regcc = &cc;
a0d0e21e 256
3280af22 257 PL_regprecomp = prog->precomp; /* Needed for error messages. */
a0d0e21e 258#ifdef DEBUGGING
3280af22
NIS
259 PL_regnarrate = PL_debug & 512;
260 PL_regprogram = prog->program;
a0d0e21e
LW
261#endif
262
263 /* Be paranoid... */
264 if (prog == NULL || startpos == NULL) {
265 croak("NULL regexp parameter");
266 return 0;
267 }
268
c277df42
IZ
269 minlen = prog->minlen;
270 if (strend - startpos < minlen) goto phooey;
271
a0d0e21e 272 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 273 PL_regprev = '\n';
a0d0e21e 274 else {
a0ed51b3 275 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
276 if (!PL_multiline && PL_regprev == '\n')
277 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 278 }
bbce6d69 279
a0d0e21e
LW
280 /* Check validity of program. */
281 if (UCHARAT(prog->program) != MAGIC) {
282 FAIL("corrupted regexp program");
283 }
284
3280af22
NIS
285 PL_regnpar = prog->nparens;
286 PL_reg_flags = 0;
287 PL_reg_eval_set = 0;
a0d0e21e 288
a0ed51b3
LW
289 if (prog->reganch & ROPT_UTF8)
290 PL_reg_flags |= RF_utf8;
291
292 /* Mark beginning of line for ^ and lookbehind. */
293 PL_regbol = startpos;
294 PL_bostr = strbeg;
295
296 /* Mark end of line for $ (and such) */
297 PL_regeol = strend;
298
299 /* see how far we have to get to not match where we matched before */
300 PL_regtill = startpos+minend;
301
a0d0e21e
LW
302 /* If there is a "must appear" string, look for it. */
303 s = startpos;
c277df42
IZ
304 if (!(flags & REXEC_CHECKED)
305 && prog->check_substr != Nullsv &&
774d564b 306 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 307 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 308 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 309 {
a0ed51b3
LW
310 char *t;
311 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 312 /* Should be nonnegative! */
a0ed51b3 313 end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
c277df42 314 if (screamer) {
3280af22 315 if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
c277df42
IZ
316 s = screaminstr(screamer, prog->check_substr,
317 start_shift + (stringarg - strbeg),
318 end_shift, &scream_pos, 0);
a0d0e21e
LW
319 else
320 s = Nullch;
c277df42 321 scream_olds = s;
0a12ae7d 322 }
a0d0e21e 323 else
c277df42
IZ
324 s = fbm_instr((unsigned char*)s + start_shift,
325 (unsigned char*)strend - end_shift,
411d5715 326 prog->check_substr, 0);
a0d0e21e 327 if (!s) {
c277df42 328 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 329 goto phooey; /* not present */
a0ed51b3
LW
330 }
331 else if (s - stringarg > prog->check_offset_max &&
332 (UTF
dfe13c55 333 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg)
a0ed51b3
LW
334 : (t = s - prog->check_offset_max) != 0
335 )
336 )
337 {
c277df42 338 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3
LW
339 s = t;
340 }
341 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42
IZ
342 && --BmUSEFUL(prog->check_substr) < 0
343 && prog->check_substr == prog->float_substr) { /* boo */
344 SvREFCNT_dec(prog->check_substr);
345 prog->check_substr = Nullsv; /* disable */
346 prog->float_substr = Nullsv; /* clear */
a0d0e21e 347 s = startpos;
a0ed51b3
LW
348 }
349 else
350 s = startpos;
a0d0e21e 351 }
a687059c 352
c277df42
IZ
353 DEBUG_r(
354 PerlIO_printf(Perl_debug_log,
355 "Matching `%.60s%s' against `%.*s%s'\n",
356 prog->precomp,
357 (strlen(prog->precomp) > 60 ? "..." : ""),
358 (strend - startpos > 60 ? 60 : strend - startpos),
359 startpos,
360 (strend - startpos > 60 ? "..." : ""))
361 );
362
a0ed51b3
LW
363 PL_regdata = prog->data;
364
a0d0e21e 365 /* Simplest case: anchored match need be tried only once. */
774d564b 366 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e
LW
367 if (prog->reganch & ROPT_ANCH) {
368 if (regtry(prog, startpos))
369 goto got_it;
774d564b 370 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
3280af22 371 (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
c277df42 372 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 373 {
a0d0e21e
LW
374 if (minlen)
375 dontbother = minlen - 1;
dfe13c55 376 strend = HOPc(strend, -dontbother);
a0d0e21e
LW
377 /* for multiline we only have to try after newlines */
378 if (s > startpos)
379 s--;
380 while (s < strend) {
a0ed51b3 381 if (*s++ == '\n') { /* don't need utf8skip here */
a0d0e21e
LW
382 if (s < strend && regtry(prog, s))
383 goto got_it;
384 }
35c8bce7 385 }
35c8bce7 386 }
a0d0e21e
LW
387 goto phooey;
388 }
35c8bce7 389
a0d0e21e 390 /* Messy cases: unanchored match. */
c277df42
IZ
391 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
392 /* we have /x+whatever/ */
393 /* it must be a one character string */
394 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
395 if (UTF) {
396 while (s < strend) {
397 if (*s == ch) {
398 if (regtry(prog, s)) goto got_it;
399 s += UTF8SKIP(s);
400 while (s < strend && *s == ch)
401 s += UTF8SKIP(s);
402 }
403 s += UTF8SKIP(s);
404 }
405 }
406 else {
407 while (s < strend) {
408 if (*s == ch) {
409 if (regtry(prog, s)) goto got_it;
c277df42 410 s++;
a0ed51b3
LW
411 while (s < strend && *s == ch)
412 s++;
413 }
414 s++;
a0d0e21e 415 }
a687059c 416 }
c277df42
IZ
417 }
418 /*SUPPRESS 560*/
419 else if (prog->anchored_substr != Nullsv
420 || (prog->float_substr != Nullsv
421 && prog->float_max_offset < strend - s)) {
422 SV *must = prog->anchored_substr
423 ? prog->anchored_substr : prog->float_substr;
424 I32 back_max =
425 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
426 I32 back_min =
427 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
428 I32 delta = back_max - back_min;
dfe13c55 429 char *last = HOPc(strend, 0-(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
a0ed51b3
LW
430 char *last1; /* Last position checked before */
431
432 if (s > PL_bostr)
dfe13c55 433 last1 = HOPc(s, -1);
a0ed51b3
LW
434 else
435 last1 = s - 1; /* bogus */
c277df42
IZ
436
437 /* XXXX check_substr already used to find `s', can optimize if
438 check_substr==must. */
439 scream_pos = -1;
440 dontbother = end_shift;
dfe13c55 441 strend = HOPc(strend, -dontbother);
c277df42
IZ
442 while ( (s <= last) &&
443 (screamer
dfe13c55 444 ? (s = screaminstr(screamer, must, HOPc(s, back_min) - strbeg,
c277df42 445 end_shift, &scream_pos, 0))
a0ed51b3 446 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
411d5715 447 (unsigned char*)strend, must, 0))) ) {
dfe13c55
GS
448 if (HOPc(s, -back_max) > last1) {
449 last1 = HOPc(s, -back_min);
450 s = HOPc(s, -back_max);
a0ed51b3
LW
451 }
452 else {
dfe13c55 453 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 454
dfe13c55 455 last1 = HOPc(s, -back_min);
c277df42 456 s = t;
a0d0e21e 457 }
a0ed51b3
LW
458 if (UTF) {
459 while (s <= last1) {
460 if (regtry(prog, s))
461 goto got_it;
462 s += UTF8SKIP(s);
463 }
464 }
465 else {
466 while (s <= last1) {
467 if (regtry(prog, s))
468 goto got_it;
469 s++;
470 }
a0d0e21e
LW
471 }
472 }
473 goto phooey;
a0ed51b3
LW
474 }
475 else if (c = prog->regstclass) {
a0d0e21e 476 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 477 char *cc;
a687059c 478
a0d0e21e
LW
479 if (minlen)
480 dontbother = minlen - 1;
dfe13c55 481 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
482 tmp = 1;
483 /* We know what class it must start with. */
484 switch (OP(c)) {
a0ed51b3
LW
485 case ANYOFUTF8:
486 cc = (char *) OPERAND(c);
487 while (s < strend) {
488 if (REGINCLASSUTF8(c, (U8*)s)) {
489 if (tmp && regtry(prog, s))
490 goto got_it;
491 else
492 tmp = doevery;
493 }
494 else
495 tmp = 1;
496 s += UTF8SKIP(s);
497 }
498 break;
a0d0e21e 499 case ANYOF:
a0ed51b3 500 cc = (char *) OPERAND(c);
a0d0e21e 501 while (s < strend) {
a0ed51b3 502 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
503 if (tmp && regtry(prog, s))
504 goto got_it;
505 else
506 tmp = doevery;
a687059c 507 }
a0d0e21e
LW
508 else
509 tmp = 1;
510 s++;
511 }
512 break;
bbce6d69 513 case BOUNDL:
3280af22 514 PL_reg_flags |= RF_tainted;
bbce6d69 515 /* FALL THROUGH */
a0d0e21e 516 case BOUND:
a0ed51b3
LW
517 if (minlen) {
518 dontbother++;
519 strend -= 1;
520 }
3280af22 521 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 522 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 523 while (s < strend) {
95bac841 524 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
525 tmp = !tmp;
526 if (regtry(prog, s))
527 goto got_it;
a687059c 528 }
a0d0e21e
LW
529 s++;
530 }
531 if ((minlen || tmp) && regtry(prog,s))
532 goto got_it;
533 break;
a0ed51b3
LW
534 case BOUNDLUTF8:
535 PL_reg_flags |= RF_tainted;
536 /* FALL THROUGH */
537 case BOUNDUTF8:
538 if (minlen) {
539 dontbother++;
dfe13c55 540 strend = reghop_c(strend, -1);
a0ed51b3 541 }
dfe13c55 542 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
543 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
544 while (s < strend) {
dfe13c55
GS
545 if (tmp == !(OP(c) == BOUND ?
546 swash_fetch(PL_utf8_alnum, (U8*)s) :
547 isALNUM_LC_utf8((U8*)s)))
548 {
a0ed51b3
LW
549 tmp = !tmp;
550 if (regtry(prog, s))
551 goto got_it;
552 }
553 s += UTF8SKIP(s);
554 }
555 if ((minlen || tmp) && regtry(prog,s))
556 goto got_it;
557 break;
bbce6d69 558 case NBOUNDL:
3280af22 559 PL_reg_flags |= RF_tainted;
bbce6d69 560 /* FALL THROUGH */
a0d0e21e 561 case NBOUND:
a0ed51b3
LW
562 if (minlen) {
563 dontbother++;
564 strend -= 1;
565 }
3280af22 566 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 567 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 568 while (s < strend) {
95bac841 569 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
570 tmp = !tmp;
571 else if (regtry(prog, s))
572 goto got_it;
573 s++;
574 }
575 if ((minlen || !tmp) && regtry(prog,s))
576 goto got_it;
577 break;
a0ed51b3
LW
578 case NBOUNDLUTF8:
579 PL_reg_flags |= RF_tainted;
580 /* FALL THROUGH */
581 case NBOUNDUTF8:
582 if (minlen) {
583 dontbother++;
dfe13c55 584 strend = reghop_c(strend, -1);
a0ed51b3 585 }
dfe13c55 586 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
587 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
588 while (s < strend) {
dfe13c55
GS
589 if (tmp == !(OP(c) == NBOUND ?
590 swash_fetch(PL_utf8_alnum, (U8*)s) :
591 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
592 tmp = !tmp;
593 else if (regtry(prog, s))
594 goto got_it;
595 s += UTF8SKIP(s);
596 }
597 if ((minlen || !tmp) && regtry(prog,s))
598 goto got_it;
599 break;
a0d0e21e
LW
600 case ALNUM:
601 while (s < strend) {
bbce6d69 602 if (isALNUM(*s)) {
603 if (tmp && regtry(prog, s))
604 goto got_it;
605 else
606 tmp = doevery;
607 }
608 else
609 tmp = 1;
610 s++;
611 }
612 break;
a0ed51b3
LW
613 case ALNUMUTF8:
614 while (s < strend) {
dfe13c55 615 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
616 if (tmp && regtry(prog, s))
617 goto got_it;
618 else
619 tmp = doevery;
620 }
621 else
622 tmp = 1;
623 s += UTF8SKIP(s);
624 }
625 break;
bbce6d69 626 case ALNUML:
3280af22 627 PL_reg_flags |= RF_tainted;
bbce6d69 628 while (s < strend) {
629 if (isALNUM_LC(*s)) {
a0d0e21e
LW
630 if (tmp && regtry(prog, s))
631 goto got_it;
a687059c 632 else
a0d0e21e
LW
633 tmp = doevery;
634 }
635 else
636 tmp = 1;
637 s++;
638 }
639 break;
a0ed51b3
LW
640 case ALNUMLUTF8:
641 PL_reg_flags |= RF_tainted;
642 while (s < strend) {
dfe13c55 643 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
644 if (tmp && regtry(prog, s))
645 goto got_it;
646 else
647 tmp = doevery;
648 }
649 else
650 tmp = 1;
651 s += UTF8SKIP(s);
652 }
653 break;
a0d0e21e
LW
654 case NALNUM:
655 while (s < strend) {
bbce6d69 656 if (!isALNUM(*s)) {
657 if (tmp && regtry(prog, s))
658 goto got_it;
659 else
660 tmp = doevery;
661 }
662 else
663 tmp = 1;
664 s++;
665 }
666 break;
a0ed51b3
LW
667 case NALNUMUTF8:
668 while (s < strend) {
dfe13c55 669 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
670 if (tmp && regtry(prog, s))
671 goto got_it;
672 else
673 tmp = doevery;
674 }
675 else
676 tmp = 1;
677 s += UTF8SKIP(s);
678 }
679 break;
bbce6d69 680 case NALNUML:
3280af22 681 PL_reg_flags |= RF_tainted;
bbce6d69 682 while (s < strend) {
683 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
684 if (tmp && regtry(prog, s))
685 goto got_it;
a687059c 686 else
a0d0e21e 687 tmp = doevery;
a687059c 688 }
a0d0e21e
LW
689 else
690 tmp = 1;
691 s++;
692 }
693 break;
a0ed51b3
LW
694 case NALNUMLUTF8:
695 PL_reg_flags |= RF_tainted;
696 while (s < strend) {
dfe13c55 697 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
698 if (tmp && regtry(prog, s))
699 goto got_it;
700 else
701 tmp = doevery;
702 }
703 else
704 tmp = 1;
705 s += UTF8SKIP(s);
706 }
707 break;
a0d0e21e
LW
708 case SPACE:
709 while (s < strend) {
710 if (isSPACE(*s)) {
711 if (tmp && regtry(prog, s))
712 goto got_it;
713 else
714 tmp = doevery;
2304df62 715 }
a0d0e21e
LW
716 else
717 tmp = 1;
718 s++;
719 }
720 break;
a0ed51b3
LW
721 case SPACEUTF8:
722 while (s < strend) {
dfe13c55 723 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
724 if (tmp && regtry(prog, s))
725 goto got_it;
726 else
727 tmp = doevery;
728 }
729 else
730 tmp = 1;
731 s += UTF8SKIP(s);
732 }
733 break;
bbce6d69 734 case SPACEL:
3280af22 735 PL_reg_flags |= RF_tainted;
bbce6d69 736 while (s < strend) {
737 if (isSPACE_LC(*s)) {
738 if (tmp && regtry(prog, s))
739 goto got_it;
740 else
741 tmp = doevery;
742 }
743 else
744 tmp = 1;
745 s++;
746 }
747 break;
a0ed51b3
LW
748 case SPACELUTF8:
749 PL_reg_flags |= RF_tainted;
750 while (s < strend) {
dfe13c55 751 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
752 if (tmp && regtry(prog, s))
753 goto got_it;
754 else
755 tmp = doevery;
756 }
757 else
758 tmp = 1;
759 s += UTF8SKIP(s);
760 }
761 break;
a0d0e21e
LW
762 case NSPACE:
763 while (s < strend) {
764 if (!isSPACE(*s)) {
765 if (tmp && regtry(prog, s))
766 goto got_it;
767 else
768 tmp = doevery;
a687059c 769 }
a0d0e21e
LW
770 else
771 tmp = 1;
772 s++;
773 }
774 break;
a0ed51b3
LW
775 case NSPACEUTF8:
776 while (s < strend) {
dfe13c55 777 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
778 if (tmp && regtry(prog, s))
779 goto got_it;
780 else
781 tmp = doevery;
782 }
783 else
784 tmp = 1;
785 s += UTF8SKIP(s);
786 }
787 break;
bbce6d69 788 case NSPACEL:
3280af22 789 PL_reg_flags |= RF_tainted;
bbce6d69 790 while (s < strend) {
791 if (!isSPACE_LC(*s)) {
792 if (tmp && regtry(prog, s))
793 goto got_it;
794 else
795 tmp = doevery;
796 }
797 else
798 tmp = 1;
799 s++;
800 }
801 break;
a0ed51b3
LW
802 case NSPACELUTF8:
803 PL_reg_flags |= RF_tainted;
804 while (s < strend) {
dfe13c55 805 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
806 if (tmp && regtry(prog, s))
807 goto got_it;
808 else
809 tmp = doevery;
810 }
811 else
812 tmp = 1;
813 s += UTF8SKIP(s);
814 }
815 break;
a0d0e21e
LW
816 case DIGIT:
817 while (s < strend) {
818 if (isDIGIT(*s)) {
819 if (tmp && regtry(prog, s))
820 goto got_it;
821 else
822 tmp = doevery;
2b69d0c2 823 }
a0d0e21e
LW
824 else
825 tmp = 1;
826 s++;
827 }
828 break;
a0ed51b3
LW
829 case DIGITUTF8:
830 while (s < strend) {
dfe13c55 831 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
832 if (tmp && regtry(prog, s))
833 goto got_it;
834 else
835 tmp = doevery;
836 }
837 else
838 tmp = 1;
839 s += UTF8SKIP(s);
840 }
841 break;
a0d0e21e
LW
842 case NDIGIT:
843 while (s < strend) {
844 if (!isDIGIT(*s)) {
845 if (tmp && regtry(prog, s))
846 goto got_it;
847 else
848 tmp = doevery;
a687059c 849 }
a0d0e21e
LW
850 else
851 tmp = 1;
852 s++;
853 }
854 break;
a0ed51b3
LW
855 case NDIGITUTF8:
856 while (s < strend) {
dfe13c55 857 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
858 if (tmp && regtry(prog, s))
859 goto got_it;
860 else
861 tmp = doevery;
862 }
863 else
864 tmp = 1;
865 s += UTF8SKIP(s);
866 }
867 break;
a687059c 868 }
a0d0e21e
LW
869 }
870 else {
c277df42
IZ
871 dontbother = 0;
872 if (prog->float_substr != Nullsv) { /* Trim the end. */
873 char *last;
874 I32 oldpos = scream_pos;
875
876 if (screamer) {
877 last = screaminstr(screamer, prog->float_substr, s - strbeg,
878 end_shift, &scream_pos, 1); /* last one */
879 if (!last) {
880 last = scream_olds; /* Only one occurence. */
881 }
a0ed51b3
LW
882 }
883 else {
c277df42
IZ
884 STRLEN len;
885 char *little = SvPV(prog->float_substr, len);
19b4f81a
JPC
886 if (len)
887 last = rninstr(s, strend, little, little + len);
888 else
889 last = strend; /* matching `$' */
c277df42
IZ
890 }
891 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 892 dontbother = strend - last + prog->float_min_offset;
c277df42
IZ
893 }
894 if (minlen && (dontbother < minlen))
a0d0e21e 895 dontbother = minlen - 1;
a0ed51b3 896 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 897 /* We don't know much -- general case. */
a0ed51b3
LW
898 if (UTF) {
899 for (;;) {
84df6dba 900 if (regtry(prog, s))
a0ed51b3 901 goto got_it;
a0ed51b3
LW
902 if (s >= strend)
903 break;
904 s += UTF8SKIP(s);
905 };
906 }
907 else {
908 do {
909 if (regtry(prog, s))
910 goto got_it;
911 } while (s++ < strend);
912 }
a0d0e21e
LW
913 }
914
915 /* Failure. */
916 goto phooey;
a687059c 917
a0d0e21e
LW
918got_it:
919 prog->subbeg = strbeg;
19b4f81a 920 prog->subend = PL_regeol; /* strend may have been modified */
3280af22 921 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 922
923 /* make sure $`, $&, $', and $digit will work later */
c277df42
IZ
924 if (strbeg != prog->subbase) { /* second+ //g match. */
925 if (!(flags & REXEC_COPY_STR)) {
137443ea 926 if (prog->subbase) {
927 Safefree(prog->subbase);
928 prog->subbase = Nullch;
929 }
930 }
931 else {
19b4f81a 932 I32 i = PL_regeol - startpos + (stringarg - strbeg);
137443ea 933 s = savepvn(strbeg, i);
934 Safefree(prog->subbase);
935 prog->subbase = s;
936 prog->subbeg = prog->subbase;
937 prog->subend = prog->subbase + i;
938 s = prog->subbase + (stringarg - strbeg);
939 for (i = 0; i <= prog->nparens; i++) {
940 if (prog->endp[i]) {
941 prog->startp[i] = s + (prog->startp[i] - startpos);
942 prog->endp[i] = s + (prog->endp[i] - startpos);
943 }
a0d0e21e
LW
944 }
945 }
a0d0e21e 946 }
ce862d02 947 /* Preserve the current value of $^R */
3280af22
NIS
948 if (oreplsv != GvSV(PL_replgv)) {
949 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
ce862d02
IZ
950 restored, the value remains
951 the same. */
952 }
a0d0e21e
LW
953 return 1;
954
955phooey:
a0d0e21e 956 return 0;
a687059c
LW
957}
958
959/*
960 - regtry - try match at specific point
961 */
76e3520e 962STATIC I32 /* 0 failure, 1 success */
8ac85365 963regtry(regexp *prog, char *startpos)
a687059c 964{
c277df42 965 dTHR;
a0d0e21e
LW
966 register I32 i;
967 register char **sp;
968 register char **ep;
c277df42 969 CHECKPOINT lastcp;
a0d0e21e 970
3280af22
NIS
971 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
972 PL_reg_eval_set = RS_init;
ce862d02 973 DEBUG_r(DEBUG_s(
c3464db5 974 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 975 PL_stack_sp - PL_stack_base);
ce862d02
IZ
976 ));
977 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 978 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02
IZ
979 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
980 SAVETMPS;
981 /* Apparently this is not needed, judging by wantarray. */
982 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
983 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
984 }
3280af22
NIS
985 PL_reginput = startpos;
986 PL_regstartp = prog->startp;
987 PL_regendp = prog->endp;
988 PL_reglastparen = &prog->lastparen;
a0d0e21e 989 prog->lastparen = 0;
3280af22
NIS
990 PL_regsize = 0;
991 if (PL_reg_start_tmpl <= prog->nparens) {
992 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
993 if(PL_reg_start_tmp)
994 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 995 else
3280af22 996 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 997 }
a0d0e21e
LW
998
999 sp = prog->startp;
1000 ep = prog->endp;
1001 if (prog->nparens) {
1002 for (i = prog->nparens; i >= 0; i--) {
1003 *sp++ = NULL;
1004 *ep++ = NULL;
a687059c 1005 }
a0d0e21e 1006 }
c277df42 1007 REGCP_SET;
7e5428c5 1008 if (regmatch(prog->program + 1)) {
a0d0e21e 1009 prog->startp[0] = startpos;
3280af22 1010 prog->endp[0] = PL_reginput;
a0d0e21e
LW
1011 return 1;
1012 }
c277df42
IZ
1013 REGCP_UNWIND;
1014 return 0;
a687059c
LW
1015}
1016
1017/*
1018 - regmatch - main matching routine
1019 *
1020 * Conceptually the strategy is simple: check to see whether the current
1021 * node matches, call self recursively to see whether the rest matches,
1022 * and then act accordingly. In practice we make some effort to avoid
1023 * recursion, in particular by going through "ordinary" nodes (that don't
1024 * need to know whether the rest of the match failed) by a loop instead of
1025 * by recursion.
1026 */
1027/* [lwall] I've hoisted the register declarations to the outer block in order to
1028 * maybe save a little bit of pushing and popping on the stack. It also takes
1029 * advantage of machines that use a register save mask on subroutine entry.
1030 */
76e3520e 1031STATIC I32 /* 0 failure, 1 success */
c277df42 1032regmatch(regnode *prog)
a687059c 1033{
c277df42
IZ
1034 dTHR;
1035 register regnode *scan; /* Current node. */
1036 regnode *next; /* Next node. */
1037 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
1038 register I32 nextchr; /* renamed nextchr - nextchar colides with
1039 function of same name */
a0d0e21e
LW
1040 register I32 n; /* no or next */
1041 register I32 ln; /* len or last */
1042 register char *s; /* operand or save */
3280af22 1043 register char *locinput = PL_reginput;
c277df42
IZ
1044 register I32 c1, c2, paren; /* case fold search, parenth */
1045 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1046#ifdef DEBUGGING
3280af22 1047 PL_regindent++;
4633a7c4 1048#endif
a0d0e21e 1049
a0ed51b3 1050 /* Note that nextchr is a byte even in UTF */
76e3520e 1051 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1052 scan = prog;
1053 while (scan != NULL) {
c277df42 1054#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1055#ifdef DEBUGGING
c277df42
IZ
1056# define sayYES goto yes
1057# define sayNO goto no
1058# define saySAME(x) if (x) goto yes; else goto no
1059# define REPORT_CODE_OFF 24
4633a7c4 1060#else
c277df42
IZ
1061# define sayYES return 1
1062# define sayNO return 0
1063# define saySAME(x) return x
a687059c 1064#endif
c277df42
IZ
1065 DEBUG_r( {
1066 SV *prop = sv_newmortal();
3280af22 1067 int docolor = *PL_colors[0];
c277df42 1068 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22
NIS
1069 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1070 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1071 ? (5 + taill) - l : locinput - PL_bostr);
c277df42 1072
3280af22
NIS
1073 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1074 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1075 ? (5 + taill) - pref_len : PL_regeol - locinput);
c277df42
IZ
1076 regprop(prop, scan);
1077 PerlIO_printf(Perl_debug_log,
54dc92de 1078 "%4i <%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22
NIS
1079 locinput - PL_bostr,
1080 PL_colors[2], pref_len, locinput - pref_len, PL_colors[3],
c277df42 1081 (docolor ? "" : "> <"),
3280af22 1082 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
1083 15 - l - pref_len + 1,
1084 "",
3280af22 1085 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
1086 SvPVX(prop));
1087 } );
a687059c 1088
c277df42 1089 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
1090 if (next == scan)
1091 next = NULL;
a687059c 1092
a0d0e21e
LW
1093 switch (OP(scan)) {
1094 case BOL:
3280af22
NIS
1095 if (locinput == PL_bostr
1096 ? PL_regprev == '\n'
1097 : (PL_multiline &&
1098 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1099 {
a0ed51b3 1100 /* regtill = regbol; */
a0d0e21e
LW
1101 break;
1102 }
4633a7c4 1103 sayNO;
a0d0e21e 1104 case MBOL:
3280af22
NIS
1105 if (locinput == PL_bostr
1106 ? PL_regprev == '\n'
1107 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
1108 {
1109 break;
1110 }
4633a7c4 1111 sayNO;
a0d0e21e 1112 case SBOL:
3280af22 1113 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1114 break;
4633a7c4 1115 sayNO;
774d564b 1116 case GPOS:
3280af22 1117 if (locinput == PL_regbol)
a0d0e21e 1118 break;
4633a7c4 1119 sayNO;
a0d0e21e 1120 case EOL:
3280af22 1121 if (PL_multiline)
a0d0e21e
LW
1122 goto meol;
1123 else
1124 goto seol;
1125 case MEOL:
1126 meol:
3280af22 1127 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1128 sayNO;
a0d0e21e
LW
1129 break;
1130 case SEOL:
1131 seol:
3280af22 1132 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1133 sayNO;
3280af22 1134 if (PL_regeol - locinput > 1)
4633a7c4 1135 sayNO;
a0d0e21e 1136 break;
b85d18e9 1137 case EOS:
3280af22 1138 if (PL_regeol != locinput)
b85d18e9
IZ
1139 sayNO;
1140 break;
a0ed51b3
LW
1141 case SANYUTF8:
1142 if (nextchr & 0x80) {
a176fa2a 1143 locinput += utf8skip[nextchr];
a0ed51b3
LW
1144 if (locinput > PL_regeol)
1145 sayNO;
1146 nextchr = UCHARAT(locinput);
1147 break;
1148 }
1149 if (!nextchr && locinput >= PL_regeol)
1150 sayNO;
1151 nextchr = UCHARAT(++locinput);
1152 break;
a0d0e21e 1153 case SANY:
3280af22 1154 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1155 sayNO;
76e3520e 1156 nextchr = UCHARAT(++locinput);
a0d0e21e 1157 break;
a0ed51b3
LW
1158 case ANYUTF8:
1159 if (nextchr & 0x80) {
a176fa2a 1160 locinput += utf8skip[nextchr];
a0ed51b3
LW
1161 if (locinput > PL_regeol)
1162 sayNO;
1163 nextchr = UCHARAT(locinput);
1164 break;
1165 }
1166 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1167 sayNO;
1168 nextchr = UCHARAT(++locinput);
1169 break;
a0d0e21e 1170 case ANY:
3280af22 1171 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1172 sayNO;
76e3520e 1173 nextchr = UCHARAT(++locinput);
a0d0e21e 1174 break;
bbce6d69 1175 case EXACT:
161b471a 1176 s = (char *) OPERAND(scan);
c277df42 1177 ln = UCHARAT(s++);
a0d0e21e 1178 /* Inline the first character, for speed. */
76e3520e 1179 if (UCHARAT(s) != nextchr)
4633a7c4 1180 sayNO;
3280af22 1181 if (PL_regeol - locinput < ln)
4633a7c4 1182 sayNO;
36477c24 1183 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1184 sayNO;
a0d0e21e 1185 locinput += ln;
76e3520e 1186 nextchr = UCHARAT(locinput);
bbce6d69 1187 break;
1188 case EXACTFL:
3280af22 1189 PL_reg_flags |= RF_tainted;
bbce6d69 1190 /* FALL THROUGH */
1191 case EXACTF:
161b471a 1192 s = (char *) OPERAND(scan);
c277df42 1193 ln = UCHARAT(s++);
a0ed51b3
LW
1194
1195 if (UTF) {
1196 char *l = locinput;
1197 char *e = s + ln;
1198 c1 = OP(scan) == EXACTF;
1199 while (s < e) {
1200 if (l >= PL_regeol)
1201 sayNO;
dfe13c55
GS
1202 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1203 toLOWER_utf8((U8*)l) :
1204 toLOWER_LC_utf8((U8*)l)))
1205 {
a0ed51b3 1206 sayNO;
dfe13c55 1207 }
a0ed51b3
LW
1208 s += UTF8SKIP(s);
1209 l += UTF8SKIP(l);
1210 }
1211 locinput = l;
1212 nextchr = UCHARAT(locinput);
1213 break;
1214 }
1215
bbce6d69 1216 /* Inline the first character, for speed. */
76e3520e 1217 if (UCHARAT(s) != nextchr &&
bbce6d69 1218 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 1219 ? fold : fold_locale)[nextchr])
bbce6d69 1220 sayNO;
3280af22 1221 if (PL_regeol - locinput < ln)
bbce6d69 1222 sayNO;
5f05dabc 1223 if (ln > 1 && (OP(scan) == EXACTF
1224 ? ibcmp(s, locinput, ln)
1225 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1226 sayNO;
1227 locinput += ln;
76e3520e 1228 nextchr = UCHARAT(locinput);
a0d0e21e 1229 break;
a0ed51b3
LW
1230 case ANYOFUTF8:
1231 s = (char *) OPERAND(scan);
1232 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1233 sayNO;
1234 if (locinput >= PL_regeol)
1235 sayNO;
a176fa2a 1236 locinput += utf8skip[nextchr];
a0ed51b3
LW
1237 nextchr = UCHARAT(locinput);
1238 break;
a0d0e21e 1239 case ANYOF:
161b471a 1240 s = (char *) OPERAND(scan);
76e3520e
GS
1241 if (nextchr < 0)
1242 nextchr = UCHARAT(locinput);
873ef191 1243 if (!REGINCLASS(s, nextchr))
4633a7c4 1244 sayNO;
3280af22 1245 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1246 sayNO;
76e3520e 1247 nextchr = UCHARAT(++locinput);
a0d0e21e 1248 break;
bbce6d69 1249 case ALNUML:
3280af22 1250 PL_reg_flags |= RF_tainted;
bbce6d69 1251 /* FALL THROUGH */
a0d0e21e 1252 case ALNUM:
76e3520e 1253 if (!nextchr)
4633a7c4 1254 sayNO;
bbce6d69 1255 if (!(OP(scan) == ALNUM
76e3520e 1256 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1257 sayNO;
76e3520e 1258 nextchr = UCHARAT(++locinput);
a0d0e21e 1259 break;
a0ed51b3
LW
1260 case ALNUMLUTF8:
1261 PL_reg_flags |= RF_tainted;
1262 /* FALL THROUGH */
1263 case ALNUMUTF8:
1264 if (!nextchr)
1265 sayNO;
1266 if (nextchr & 0x80) {
1267 if (!(OP(scan) == ALNUMUTF8
dfe13c55
GS
1268 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1269 : isALNUM_LC_utf8((U8*)locinput)))
1270 {
a0ed51b3 1271 sayNO;
dfe13c55 1272 }
a176fa2a 1273 locinput += utf8skip[nextchr];
a0ed51b3
LW
1274 nextchr = UCHARAT(locinput);
1275 break;
1276 }
1277 if (!(OP(scan) == ALNUMUTF8
1278 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1279 sayNO;
1280 nextchr = UCHARAT(++locinput);
1281 break;
bbce6d69 1282 case NALNUML:
3280af22 1283 PL_reg_flags |= RF_tainted;
bbce6d69 1284 /* FALL THROUGH */
a0d0e21e 1285 case NALNUM:
3280af22 1286 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1287 sayNO;
bbce6d69 1288 if (OP(scan) == NALNUM
76e3520e 1289 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1290 sayNO;
76e3520e 1291 nextchr = UCHARAT(++locinput);
a0d0e21e 1292 break;
a0ed51b3
LW
1293 case NALNUMLUTF8:
1294 PL_reg_flags |= RF_tainted;
1295 /* FALL THROUGH */
1296 case NALNUMUTF8:
1297 if (!nextchr && locinput >= PL_regeol)
1298 sayNO;
1299 if (nextchr & 0x80) {
1300 if (OP(scan) == NALNUMUTF8
dfe13c55
GS
1301 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1302 : isALNUM_LC_utf8((U8*)locinput))
1303 {
a0ed51b3 1304 sayNO;
dfe13c55 1305 }
a176fa2a 1306 locinput += utf8skip[nextchr];
a0ed51b3
LW
1307 nextchr = UCHARAT(locinput);
1308 break;
1309 }
1310 if (OP(scan) == NALNUMUTF8
1311 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1312 sayNO;
1313 nextchr = UCHARAT(++locinput);
1314 break;
bbce6d69 1315 case BOUNDL:
1316 case NBOUNDL:
3280af22 1317 PL_reg_flags |= RF_tainted;
bbce6d69 1318 /* FALL THROUGH */
a0d0e21e 1319 case BOUND:
bbce6d69 1320 case NBOUND:
1321 /* was last char in word? */
3280af22 1322 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1323 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1324 ln = isALNUM(ln);
76e3520e 1325 n = isALNUM(nextchr);
bbce6d69 1326 }
1327 else {
1328 ln = isALNUM_LC(ln);
76e3520e 1329 n = isALNUM_LC(nextchr);
bbce6d69 1330 }
95bac841 1331 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1332 sayNO;
a0d0e21e 1333 break;
a0ed51b3
LW
1334 case BOUNDLUTF8:
1335 case NBOUNDLUTF8:
1336 PL_reg_flags |= RF_tainted;
1337 /* FALL THROUGH */
1338 case BOUNDUTF8:
1339 case NBOUNDUTF8:
1340 /* was last char in word? */
dfe13c55
GS
1341 ln = (locinput != PL_regbol)
1342 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
a0ed51b3
LW
1343 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1344 ln = isALNUM_uni(ln);
dfe13c55 1345 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3
LW
1346 }
1347 else {
1348 ln = isALNUM_LC_uni(ln);
dfe13c55 1349 n = isALNUM_LC_utf8((U8*)locinput);
a0ed51b3
LW
1350 }
1351 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1352 sayNO;
1353 break;
bbce6d69 1354 case SPACEL:
3280af22 1355 PL_reg_flags |= RF_tainted;
bbce6d69 1356 /* FALL THROUGH */
a0d0e21e 1357 case SPACE:
3280af22 1358 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1359 sayNO;
bbce6d69 1360 if (!(OP(scan) == SPACE
76e3520e 1361 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1362 sayNO;
76e3520e 1363 nextchr = UCHARAT(++locinput);
a0d0e21e 1364 break;
a0ed51b3
LW
1365 case SPACELUTF8:
1366 PL_reg_flags |= RF_tainted;
1367 /* FALL THROUGH */
1368 case SPACEUTF8:
1369 if (!nextchr && locinput >= PL_regeol)
1370 sayNO;
1371 if (nextchr & 0x80) {
1372 if (!(OP(scan) == SPACEUTF8
dfe13c55
GS
1373 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1374 : isSPACE_LC_utf8((U8*)locinput)))
1375 {
a0ed51b3 1376 sayNO;
dfe13c55 1377 }
a176fa2a 1378 locinput += utf8skip[nextchr];
a0ed51b3
LW
1379 nextchr = UCHARAT(locinput);
1380 break;
1381 }
1382 if (!(OP(scan) == SPACEUTF8
1383 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1384 sayNO;
1385 nextchr = UCHARAT(++locinput);
1386 break;
bbce6d69 1387 case NSPACEL:
3280af22 1388 PL_reg_flags |= RF_tainted;
bbce6d69 1389 /* FALL THROUGH */
a0d0e21e 1390 case NSPACE:
76e3520e 1391 if (!nextchr)
4633a7c4 1392 sayNO;
bbce6d69 1393 if (OP(scan) == SPACE
76e3520e 1394 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1395 sayNO;
76e3520e 1396 nextchr = UCHARAT(++locinput);
a0d0e21e 1397 break;
a0ed51b3
LW
1398 case NSPACELUTF8:
1399 PL_reg_flags |= RF_tainted;
1400 /* FALL THROUGH */
1401 case NSPACEUTF8:
1402 if (!nextchr)
1403 sayNO;
1404 if (nextchr & 0x80) {
1405 if (OP(scan) == NSPACEUTF8
dfe13c55
GS
1406 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1407 : isSPACE_LC_utf8((U8*)locinput))
1408 {
a0ed51b3 1409 sayNO;
dfe13c55 1410 }
a176fa2a 1411 locinput += utf8skip[nextchr];
a0ed51b3
LW
1412 nextchr = UCHARAT(locinput);
1413 break;
1414 }
1415 if (OP(scan) == NSPACEUTF8
1416 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1417 sayNO;
1418 nextchr = UCHARAT(++locinput);
1419 break;
a0d0e21e 1420 case DIGIT:
76e3520e 1421 if (!isDIGIT(nextchr))
4633a7c4 1422 sayNO;
76e3520e 1423 nextchr = UCHARAT(++locinput);
a0d0e21e 1424 break;
a0ed51b3
LW
1425 case DIGITUTF8:
1426 if (nextchr & 0x80) {
dfe13c55 1427 if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
a0ed51b3 1428 sayNO;
a176fa2a 1429 locinput += utf8skip[nextchr];
a0ed51b3
LW
1430 nextchr = UCHARAT(locinput);
1431 break;
1432 }
1433 if (!isDIGIT(nextchr))
1434 sayNO;
1435 nextchr = UCHARAT(++locinput);
1436 break;
a0d0e21e 1437 case NDIGIT:
3280af22 1438 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1439 sayNO;
76e3520e 1440 if (isDIGIT(nextchr))
4633a7c4 1441 sayNO;
76e3520e 1442 nextchr = UCHARAT(++locinput);
a0d0e21e 1443 break;
a0ed51b3
LW
1444 case NDIGITUTF8:
1445 if (!nextchr && locinput >= PL_regeol)
1446 sayNO;
1447 if (nextchr & 0x80) {
dfe13c55 1448 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1449 sayNO;
a176fa2a 1450 locinput += utf8skip[nextchr];
a0ed51b3
LW
1451 nextchr = UCHARAT(locinput);
1452 break;
1453 }
1454 if (isDIGIT(nextchr))
1455 sayNO;
1456 nextchr = UCHARAT(++locinput);
1457 break;
1458 case CLUMP:
dfe13c55 1459 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1460 sayNO;
a176fa2a 1461 locinput += utf8skip[nextchr];
dfe13c55 1462 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
1463 locinput += UTF8SKIP(locinput);
1464 if (locinput > PL_regeol)
1465 sayNO;
1466 nextchr = UCHARAT(locinput);
1467 break;
c8756f30 1468 case REFFL:
3280af22 1469 PL_reg_flags |= RF_tainted;
c8756f30 1470 /* FALL THROUGH */
c277df42 1471 case REF:
c8756f30 1472 case REFF:
c277df42 1473 n = ARG(scan); /* which paren pair */
3280af22
NIS
1474 s = PL_regstartp[n];
1475 if (*PL_reglastparen < n || !s)
af3f8c16 1476 sayNO; /* Do not match unless seen CLOSEn. */
3280af22 1477 if (s == PL_regendp[n])
a0d0e21e 1478 break;
a0ed51b3
LW
1479
1480 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1481 char *l = locinput;
1482 char *e = PL_regendp[n];
1483 /*
1484 * Note that we can't do the "other character" lookup trick as
1485 * in the 8-bit case (no pun intended) because in Unicode we
1486 * have to map both upper and title case to lower case.
1487 */
1488 if (OP(scan) == REFF) {
1489 while (s < e) {
1490 if (l >= PL_regeol)
1491 sayNO;
dfe13c55 1492 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
1493 sayNO;
1494 s += UTF8SKIP(s);
1495 l += UTF8SKIP(l);
1496 }
1497 }
1498 else {
1499 while (s < e) {
1500 if (l >= PL_regeol)
1501 sayNO;
dfe13c55 1502 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
1503 sayNO;
1504 s += UTF8SKIP(s);
1505 l += UTF8SKIP(l);
1506 }
1507 }
1508 locinput = l;
1509 nextchr = UCHARAT(locinput);
1510 break;
1511 }
1512
a0d0e21e 1513 /* Inline the first character, for speed. */
76e3520e 1514 if (UCHARAT(s) != nextchr &&
c8756f30
AK
1515 (OP(scan) == REF ||
1516 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 1517 ? fold : fold_locale)[nextchr]))))
4633a7c4 1518 sayNO;
3280af22
NIS
1519 ln = PL_regendp[n] - s;
1520 if (locinput + ln > PL_regeol)
4633a7c4 1521 sayNO;
c8756f30
AK
1522 if (ln > 1 && (OP(scan) == REF
1523 ? memNE(s, locinput, ln)
1524 : (OP(scan) == REFF
1525 ? ibcmp(s, locinput, ln)
1526 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1527 sayNO;
a0d0e21e 1528 locinput += ln;
76e3520e 1529 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1530 break;
1531
1532 case NOTHING:
c277df42 1533 case TAIL:
a0d0e21e
LW
1534 break;
1535 case BACK:
1536 break;
c277df42
IZ
1537 case EVAL:
1538 {
1539 dSP;
533c011a 1540 OP_4tree *oop = PL_op;
3280af22
NIS
1541 COP *ocurcop = PL_curcop;
1542 SV **ocurpad = PL_curpad;
c277df42
IZ
1543 SV *ret;
1544
1545 n = ARG(scan);
533c011a
NIS
1546 PL_op = (OP_4tree*)PL_regdata->data[n];
1547 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3280af22 1548 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 1]);
c277df42 1549
76e3520e 1550 CALLRUNOPS(); /* Scalar context. */
c277df42
IZ
1551 SPAGAIN;
1552 ret = POPs;
1553 PUTBACK;
1554
1555 if (logical) {
1556 logical = 0;
1557 sw = SvTRUE(ret);
a0ed51b3
LW
1558 }
1559 else
3280af22 1560 sv_setsv(save_scalar(PL_replgv), ret);
533c011a 1561 PL_op = oop;
3280af22
NIS
1562 PL_curpad = ocurpad;
1563 PL_curcop = ocurcop;
c277df42
IZ
1564 break;
1565 }
a0d0e21e 1566 case OPEN:
c277df42 1567 n = ARG(scan); /* which paren pair */
3280af22
NIS
1568 PL_reg_start_tmp[n] = locinput;
1569 if (n > PL_regsize)
1570 PL_regsize = n;
a0d0e21e
LW
1571 break;
1572 case CLOSE:
c277df42 1573 n = ARG(scan); /* which paren pair */
3280af22
NIS
1574 PL_regstartp[n] = PL_reg_start_tmp[n];
1575 PL_regendp[n] = locinput;
1576 if (n > *PL_reglastparen)
1577 *PL_reglastparen = n;
a0d0e21e 1578 break;
c277df42
IZ
1579 case GROUPP:
1580 n = ARG(scan); /* which paren pair */
3280af22 1581 sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
c277df42
IZ
1582 break;
1583 case IFTHEN:
1584 if (sw)
1585 next = NEXTOPER(NEXTOPER(scan));
1586 else {
1587 next = scan + ARG(scan);
1588 if (OP(next) == IFTHEN) /* Fake one. */
1589 next = NEXTOPER(NEXTOPER(next));
1590 }
1591 break;
1592 case LOGICAL:
1593 logical = 1;
1594 break;
a0d0e21e
LW
1595 case CURLYX: {
1596 CURCUR cc;
3280af22 1597 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
1598
1599 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1600 next += ARG(next);
3280af22
NIS
1601 cc.oldcc = PL_regcc;
1602 PL_regcc = &cc;
1603 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
1604 cc.cur = -1;
1605 cc.min = ARG1(scan);
1606 cc.max = ARG2(scan);
c277df42 1607 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1608 cc.next = next;
1609 cc.minmod = minmod;
1610 cc.lastloc = 0;
3280af22 1611 PL_reginput = locinput;
a0d0e21e
LW
1612 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1613 regcpblow(cp);
3280af22 1614 PL_regcc = cc.oldcc;
4633a7c4 1615 saySAME(n);
a0d0e21e
LW
1616 }
1617 /* NOT REACHED */
1618 case WHILEM: {
1619 /*
1620 * This is really hard to understand, because after we match
1621 * what we're trying to match, we must make sure the rest of
1622 * the RE is going to match for sure, and to do that we have
1623 * to go back UP the parse tree by recursing ever deeper. And
1624 * if it fails, we have to reset our parent's current state
1625 * that we can try again after backing off.
1626 */
1627
c277df42 1628 CHECKPOINT cp, lastcp;
3280af22 1629 CURCUR* cc = PL_regcc;
c277df42
IZ
1630 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1631
4633a7c4 1632 n = cc->cur + 1; /* how many we know we matched */
3280af22 1633 PL_reginput = locinput;
a0d0e21e 1634
c277df42
IZ
1635 DEBUG_r(
1636 PerlIO_printf(Perl_debug_log,
1637 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1638 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
1639 (long)n, (long)cc->min,
1640 (long)cc->max, (long)cc)
1641 );
4633a7c4 1642
a0d0e21e
LW
1643 /* If degenerate scan matches "", assume scan done. */
1644
579cf2c3 1645 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
1646 PL_regcc = cc->oldcc;
1647 ln = PL_regcc->cur;
c277df42 1648 DEBUG_r(
c3464db5
DD
1649 PerlIO_printf(Perl_debug_log,
1650 "%*s empty match detected, try continuation...\n",
3280af22 1651 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1652 );
a0d0e21e 1653 if (regmatch(cc->next))
4633a7c4 1654 sayYES;
c277df42 1655 DEBUG_r(
c3464db5
DD
1656 PerlIO_printf(Perl_debug_log,
1657 "%*s failed...\n",
3280af22 1658 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1659 );
3280af22
NIS
1660 PL_regcc->cur = ln;
1661 PL_regcc = cc;
4633a7c4 1662 sayNO;
a0d0e21e
LW
1663 }
1664
1665 /* First just match a string of min scans. */
1666
1667 if (n < cc->min) {
1668 cc->cur = n;
1669 cc->lastloc = locinput;
4633a7c4
LW
1670 if (regmatch(cc->scan))
1671 sayYES;
1672 cc->cur = n - 1;
c277df42
IZ
1673 cc->lastloc = lastloc;
1674 DEBUG_r(
c3464db5
DD
1675 PerlIO_printf(Perl_debug_log,
1676 "%*s failed...\n",
3280af22 1677 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1678 );
4633a7c4 1679 sayNO;
a0d0e21e
LW
1680 }
1681
1682 /* Prefer next over scan for minimal matching. */
1683
1684 if (cc->minmod) {
3280af22
NIS
1685 PL_regcc = cc->oldcc;
1686 ln = PL_regcc->cur;
5f05dabc 1687 cp = regcppush(cc->parenfloor);
c277df42 1688 REGCP_SET;
5f05dabc 1689 if (regmatch(cc->next)) {
c277df42 1690 regcpblow(cp);
4633a7c4 1691 sayYES; /* All done. */
5f05dabc 1692 }
c277df42 1693 REGCP_UNWIND;
5f05dabc 1694 regcppop();
3280af22
NIS
1695 PL_regcc->cur = ln;
1696 PL_regcc = cc;
a0d0e21e 1697
c277df42 1698 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 1699 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
1700 && !(PL_reg_flags & RF_warned)) {
1701 PL_reg_flags |= RF_warned;
599cee73 1702 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
1703 "Complex regular subexpression recursion",
1704 REG_INFTY - 1);
c277df42 1705 }
4633a7c4 1706 sayNO;
c277df42 1707 }
a687059c 1708
c277df42 1709 DEBUG_r(
c3464db5
DD
1710 PerlIO_printf(Perl_debug_log,
1711 "%*s trying longer...\n",
3280af22 1712 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1713 );
a0d0e21e 1714 /* Try scanning more and see if it helps. */
3280af22 1715 PL_reginput = locinput;
a0d0e21e
LW
1716 cc->cur = n;
1717 cc->lastloc = locinput;
5f05dabc 1718 cp = regcppush(cc->parenfloor);
c277df42 1719 REGCP_SET;
5f05dabc 1720 if (regmatch(cc->scan)) {
c277df42 1721 regcpblow(cp);
4633a7c4 1722 sayYES;
5f05dabc 1723 }
c277df42 1724 DEBUG_r(
c3464db5
DD
1725 PerlIO_printf(Perl_debug_log,
1726 "%*s failed...\n",
3280af22 1727 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1728 );
1729 REGCP_UNWIND;
5f05dabc 1730 regcppop();
4633a7c4 1731 cc->cur = n - 1;
c277df42 1732 cc->lastloc = lastloc;
4633a7c4 1733 sayNO;
a0d0e21e
LW
1734 }
1735
1736 /* Prefer scan over next for maximal matching. */
1737
1738 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1739 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1740 cc->cur = n;
1741 cc->lastloc = locinput;
c277df42 1742 REGCP_SET;
5f05dabc 1743 if (regmatch(cc->scan)) {
c277df42 1744 regcpblow(cp);
4633a7c4 1745 sayYES;
5f05dabc 1746 }
c277df42 1747 REGCP_UNWIND;
a0d0e21e 1748 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1749 PL_reginput = locinput;
c277df42 1750 DEBUG_r(
c3464db5
DD
1751 PerlIO_printf(Perl_debug_log,
1752 "%*s failed, try continuation...\n",
3280af22 1753 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1754 );
1755 }
599cee73
PM
1756 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
1757 && !(PL_reg_flags & RF_warned)) {
3280af22 1758 PL_reg_flags |= RF_warned;
599cee73 1759 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
1760 "Complex regular subexpression recursion",
1761 REG_INFTY - 1);
a0d0e21e
LW
1762 }
1763
1764 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
1765 PL_regcc = cc->oldcc;
1766 ln = PL_regcc->cur;
a0d0e21e 1767 if (regmatch(cc->next))
4633a7c4 1768 sayYES;
c277df42 1769 DEBUG_r(
c3464db5 1770 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1771 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1772 );
3280af22
NIS
1773 PL_regcc->cur = ln;
1774 PL_regcc = cc;
4633a7c4 1775 cc->cur = n - 1;
c277df42 1776 cc->lastloc = lastloc;
4633a7c4 1777 sayNO;
a0d0e21e
LW
1778 }
1779 /* NOT REACHED */
c277df42
IZ
1780 case BRANCHJ:
1781 next = scan + ARG(scan);
1782 if (next == scan)
1783 next = NULL;
1784 inner = NEXTOPER(NEXTOPER(scan));
1785 goto do_branch;
1786 case BRANCH:
1787 inner = NEXTOPER(scan);
1788 do_branch:
1789 {
1790 CHECKPOINT lastcp;
1791 c1 = OP(scan);
1792 if (OP(next) != c1) /* No choice. */
1793 next = inner; /* Avoid recursion. */
a0d0e21e 1794 else {
3280af22 1795 int lastparen = *PL_reglastparen;
c277df42
IZ
1796
1797 REGCP_SET;
a0d0e21e 1798 do {
3280af22 1799 PL_reginput = locinput;
c277df42 1800 if (regmatch(inner))
4633a7c4 1801 sayYES;
c277df42 1802 REGCP_UNWIND;
3280af22
NIS
1803 for (n = *PL_reglastparen; n > lastparen; n--)
1804 PL_regendp[n] = 0;
1805 *PL_reglastparen = n;
c277df42 1806 scan = next;
a0d0e21e 1807 /*SUPPRESS 560*/
c277df42
IZ
1808 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1809 next += n;
a0d0e21e 1810 else
c277df42 1811 next = NULL;
c277df42
IZ
1812 inner = NEXTOPER(scan);
1813 if (c1 == BRANCHJ) {
1814 inner = NEXTOPER(inner);
1815 }
1816 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1817 sayNO;
a0d0e21e 1818 /* NOTREACHED */
a687059c 1819 }
a0d0e21e
LW
1820 }
1821 break;
1822 case MINMOD:
1823 minmod = 1;
1824 break;
c277df42
IZ
1825 case CURLYM:
1826 {
00db4c45 1827 I32 l = 0;
c277df42
IZ
1828 CHECKPOINT lastcp;
1829
1830 /* We suppose that the next guy does not need
1831 backtracking: in particular, it is of constant length,
1832 and has no parenths to influence future backrefs. */
1833 ln = ARG1(scan); /* min to match */
1834 n = ARG2(scan); /* max to match */
c277df42
IZ
1835 paren = scan->flags;
1836 if (paren) {
3280af22
NIS
1837 if (paren > PL_regsize)
1838 PL_regsize = paren;
1839 if (paren > *PL_reglastparen)
1840 *PL_reglastparen = paren;
c277df42 1841 }
dc45a647 1842 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
1843 if (paren)
1844 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 1845 PL_reginput = locinput;
c277df42
IZ
1846 if (minmod) {
1847 minmod = 0;
1848 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1849 sayNO;
5f4b28b2 1850 if (ln && l == 0 && n >= ln
c277df42
IZ
1851 /* In fact, this is tricky. If paren, then the
1852 fact that we did/didnot match may influence
1853 future execution. */
1854 && !(paren && ln == 0))
1855 ln = n;
3280af22 1856 locinput = PL_reginput;
c277df42
IZ
1857 if (regkind[(U8)OP(next)] == EXACT) {
1858 c1 = UCHARAT(OPERAND(next) + 1);
1859 if (OP(next) == EXACTF)
1860 c2 = fold[c1];
1861 else if (OP(next) == EXACTFL)
1862 c2 = fold_locale[c1];
1863 else
1864 c2 = c1;
a0ed51b3
LW
1865 }
1866 else
c277df42
IZ
1867 c1 = c2 = -1000;
1868 REGCP_SET;
5f4b28b2 1869 /* This may be improved if l == 0. */
c277df42
IZ
1870 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1871 /* If it could work, try it. */
1872 if (c1 == -1000 ||
3280af22
NIS
1873 UCHARAT(PL_reginput) == c1 ||
1874 UCHARAT(PL_reginput) == c2)
c277df42
IZ
1875 {
1876 if (paren) {
1877 if (n) {
dfe13c55 1878 PL_regstartp[paren] = HOPc(PL_reginput, -l);
3280af22 1879 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
1880 }
1881 else
3280af22 1882 PL_regendp[paren] = NULL;
c277df42
IZ
1883 }
1884 if (regmatch(next))
1885 sayYES;
1886 REGCP_UNWIND;
1887 }
1888 /* Couldn't or didn't -- move forward. */
3280af22 1889 PL_reginput = locinput;
c277df42
IZ
1890 if (regrepeat_hard(scan, 1, &l)) {
1891 ln++;
3280af22 1892 locinput = PL_reginput;
c277df42
IZ
1893 }
1894 else
1895 sayNO;
1896 }
a0ed51b3
LW
1897 }
1898 else {
c277df42
IZ
1899 n = regrepeat_hard(scan, n, &l);
1900 if (n != 0 && l == 0
1901 /* In fact, this is tricky. If paren, then the
1902 fact that we did/didnot match may influence
1903 future execution. */
1904 && !(paren && ln == 0))
1905 ln = n;
3280af22 1906 locinput = PL_reginput;
c277df42 1907 DEBUG_r(
5c0ca799
GS
1908 PerlIO_printf(Perl_debug_log,
1909 "%*s matched %ld times, len=%ld...\n",
3280af22 1910 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
1911 );
1912 if (n >= ln) {
1913 if (regkind[(U8)OP(next)] == EXACT) {
1914 c1 = UCHARAT(OPERAND(next) + 1);
1915 if (OP(next) == EXACTF)
1916 c2 = fold[c1];
1917 else if (OP(next) == EXACTFL)
1918 c2 = fold_locale[c1];
1919 else
1920 c2 = c1;
a0ed51b3
LW
1921 }
1922 else
c277df42
IZ
1923 c1 = c2 = -1000;
1924 }
1925 REGCP_SET;
1926 while (n >= ln) {
1927 /* If it could work, try it. */
1928 if (c1 == -1000 ||
3280af22
NIS
1929 UCHARAT(PL_reginput) == c1 ||
1930 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
1931 {
1932 DEBUG_r(
c3464db5
DD
1933 PerlIO_printf(Perl_debug_log,
1934 "%*s trying tail with n=%ld...\n",
3280af22 1935 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
1936 );
1937 if (paren) {
1938 if (n) {
dfe13c55 1939 PL_regstartp[paren] = HOPc(PL_reginput, -l);
a0ed51b3 1940 PL_regendp[paren] = PL_reginput;
c277df42 1941 }
a0ed51b3
LW
1942 else
1943 PL_regendp[paren] = NULL;
c277df42 1944 }
a0ed51b3
LW
1945 if (regmatch(next))
1946 sayYES;
1947 REGCP_UNWIND;
1948 }
c277df42
IZ
1949 /* Couldn't or didn't -- back up. */
1950 n--;
dfe13c55 1951 locinput = HOPc(locinput, -l);
3280af22 1952 PL_reginput = locinput;
c277df42
IZ
1953 }
1954 }
1955 sayNO;
1956 break;
1957 }
1958 case CURLYN:
1959 paren = scan->flags; /* Which paren to set */
3280af22
NIS
1960 if (paren > PL_regsize)
1961 PL_regsize = paren;
1962 if (paren > *PL_reglastparen)
1963 *PL_reglastparen = paren;
c277df42
IZ
1964 ln = ARG1(scan); /* min to match */
1965 n = ARG2(scan); /* max to match */
dc45a647 1966 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 1967 goto repeat;
a0d0e21e 1968 case CURLY:
c277df42 1969 paren = 0;
a0d0e21e
LW
1970 ln = ARG1(scan); /* min to match */
1971 n = ARG2(scan); /* max to match */
dc45a647 1972 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
1973 goto repeat;
1974 case STAR:
1975 ln = 0;
c277df42 1976 n = REG_INFTY;
a0d0e21e 1977 scan = NEXTOPER(scan);
c277df42 1978 paren = 0;
a0d0e21e
LW
1979 goto repeat;
1980 case PLUS:
c277df42
IZ
1981 ln = 1;
1982 n = REG_INFTY;
1983 scan = NEXTOPER(scan);
1984 paren = 0;
1985 repeat:
a0d0e21e
LW
1986 /*
1987 * Lookahead to avoid useless match attempts
1988 * when we know what character comes next.
1989 */
bbce6d69 1990 if (regkind[(U8)OP(next)] == EXACT) {
1991 c1 = UCHARAT(OPERAND(next) + 1);
1992 if (OP(next) == EXACTF)
1993 c2 = fold[c1];
1994 else if (OP(next) == EXACTFL)
1995 c2 = fold_locale[c1];
1996 else
1997 c2 = c1;
1998 }
a0d0e21e 1999 else
bbce6d69 2000 c1 = c2 = -1000;
3280af22 2001 PL_reginput = locinput;
a0d0e21e 2002 if (minmod) {
c277df42 2003 CHECKPOINT lastcp;
a0d0e21e
LW
2004 minmod = 0;
2005 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2006 sayNO;
a0ed51b3 2007 locinput = PL_reginput;
c277df42
IZ
2008 REGCP_SET;
2009 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2010 /* If it could work, try it. */
bbce6d69 2011 if (c1 == -1000 ||
3280af22
NIS
2012 UCHARAT(PL_reginput) == c1 ||
2013 UCHARAT(PL_reginput) == c2)
bbce6d69 2014 {
c277df42
IZ
2015 if (paren) {
2016 if (n) {
dfe13c55 2017 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2018 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2019 }
2020 else
3280af22 2021 PL_regendp[paren] = NULL;
c277df42 2022 }
a0d0e21e 2023 if (regmatch(next))
4633a7c4 2024 sayYES;
c277df42 2025 REGCP_UNWIND;
bbce6d69 2026 }
c277df42 2027 /* Couldn't or didn't -- move forward. */
a0ed51b3 2028 PL_reginput = locinput;
a0d0e21e
LW
2029 if (regrepeat(scan, 1)) {
2030 ln++;
a0ed51b3
LW
2031 locinput = PL_reginput;
2032 }
2033 else
4633a7c4 2034 sayNO;
a0d0e21e
LW
2035 }
2036 }
2037 else {
c277df42 2038 CHECKPOINT lastcp;
a0d0e21e 2039 n = regrepeat(scan, n);
a0ed51b3 2040 locinput = PL_reginput;
a0d0e21e 2041 if (ln < n && regkind[(U8)OP(next)] == EOL &&
3280af22 2042 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2043 ln = n; /* why back off? */
c277df42
IZ
2044 REGCP_SET;
2045 if (paren) {
2046 while (n >= ln) {
2047 /* If it could work, try it. */
2048 if (c1 == -1000 ||
3280af22
NIS
2049 UCHARAT(PL_reginput) == c1 ||
2050 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2051 {
2052 if (paren && n) {
2053 if (n) {
dfe13c55 2054 PL_regstartp[paren] = HOPc(PL_reginput, -1);
3280af22 2055 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2056 }
2057 else
3280af22 2058 PL_regendp[paren] = NULL;
c277df42
IZ
2059 }
2060 if (regmatch(next))
2061 sayYES;
2062 REGCP_UNWIND;
2063 }
2064 /* Couldn't or didn't -- back up. */
2065 n--;
dfe13c55 2066 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2067 }
a0ed51b3
LW
2068 }
2069 else {
c277df42
IZ
2070 while (n >= ln) {
2071 /* If it could work, try it. */
2072 if (c1 == -1000 ||
3280af22
NIS
2073 UCHARAT(PL_reginput) == c1 ||
2074 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2075 {
2076 if (regmatch(next))
2077 sayYES;
2078 REGCP_UNWIND;
2079 }
2080 /* Couldn't or didn't -- back up. */
2081 n--;
dfe13c55 2082 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2083 }
a0d0e21e
LW
2084 }
2085 }
4633a7c4 2086 sayNO;
c277df42 2087 break;
a0d0e21e 2088 case END:
3280af22 2089 if (locinput < PL_regtill)
7e5428c5
IZ
2090 sayNO; /* Cannot match: too short. */
2091 /* Fall through */
2092 case SUCCEED:
3280af22 2093 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2094 sayYES; /* Success! */
c277df42
IZ
2095 case SUSPEND:
2096 n = 1;
2097 goto do_ifmatch;
a0d0e21e 2098 case UNLESSM:
c277df42 2099 n = 0;
a0ed51b3 2100 if (scan->flags) {
dfe13c55 2101 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3
LW
2102 if (!s)
2103 goto say_yes;
2104 PL_reginput = s;
2105 }
2106 else
2107 PL_reginput = locinput;
c277df42
IZ
2108 goto do_ifmatch;
2109 case IFMATCH:
2110 n = 1;
a0ed51b3 2111 if (scan->flags) {
dfe13c55 2112 s = HOPMAYBEc(locinput, -scan->flags);
a0ed51b3
LW
2113 if (!s)
2114 goto say_no;
2115 PL_reginput = s;
2116 }
2117 else
2118 PL_reginput = locinput;
2119
c277df42 2120 do_ifmatch:
c277df42
IZ
2121 inner = NEXTOPER(NEXTOPER(scan));
2122 if (regmatch(inner) != n) {
2123 say_no:
2124 if (logical) {
2125 logical = 0;
2126 sw = 0;
2127 goto do_longjump;
a0ed51b3
LW
2128 }
2129 else
c277df42
IZ
2130 sayNO;
2131 }
2132 say_yes:
2133 if (logical) {
2134 logical = 0;
2135 sw = 1;
2136 }
fe44a5e8 2137 if (OP(scan) == SUSPEND) {
3280af22 2138 locinput = PL_reginput;
565764a8 2139 nextchr = UCHARAT(locinput);
fe44a5e8 2140 }
c277df42
IZ
2141 /* FALL THROUGH. */
2142 case LONGJMP:
2143 do_longjump:
2144 next = scan + ARG(scan);
2145 if (next == scan)
2146 next = NULL;
a0d0e21e
LW
2147 break;
2148 default:
c030ccd9 2149 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2150 (unsigned long)scan, OP(scan));
a0d0e21e 2151 FAIL("regexp memory corruption");
a687059c 2152 }
a0d0e21e
LW
2153 scan = next;
2154 }
a687059c 2155
a0d0e21e
LW
2156 /*
2157 * We get here only if there's trouble -- normally "case END" is
2158 * the terminating point.
2159 */
2160 FAIL("corrupted regexp pointers");
2161 /*NOTREACHED*/
4633a7c4
LW
2162 sayNO;
2163
2164yes:
2165#ifdef DEBUGGING
3280af22 2166 PL_regindent--;
4633a7c4
LW
2167#endif
2168 return 1;
2169
2170no:
2171#ifdef DEBUGGING
3280af22 2172 PL_regindent--;
4633a7c4 2173#endif
a0d0e21e 2174 return 0;
a687059c
LW
2175}
2176
2177/*
2178 - regrepeat - repeatedly match something simple, report how many
2179 */
2180/*
2181 * [This routine now assumes that it will only match on things of length 1.
2182 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2183 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2184 */
76e3520e 2185STATIC I32
c277df42 2186regrepeat(regnode *p, I32 max)
a687059c 2187{
5c0ca799 2188 dTHR;
a0d0e21e
LW
2189 register char *scan;
2190 register char *opnd;
2191 register I32 c;
3280af22 2192 register char *loceol = PL_regeol;
a0ed51b3 2193 register I32 hardcount = 0;
a0d0e21e 2194
3280af22 2195 scan = PL_reginput;
c277df42 2196 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2197 loceol = scan + max;
161b471a 2198 opnd = (char *) OPERAND(p);
a0d0e21e
LW
2199 switch (OP(p)) {
2200 case ANY:
2201 while (scan < loceol && *scan != '\n')
2202 scan++;
2203 break;
2204 case SANY:
2205 scan = loceol;
2206 break;
a0ed51b3
LW
2207 case ANYUTF8:
2208 loceol = PL_regeol;
2209 while (scan < loceol && *scan != '\n') {
2210 scan += UTF8SKIP(scan);
2211 hardcount++;
2212 }
2213 break;
2214 case SANYUTF8:
2215 loceol = PL_regeol;
2216 while (scan < loceol) {
2217 scan += UTF8SKIP(scan);
2218 hardcount++;
2219 }
2220 break;
bbce6d69 2221 case EXACT: /* length of string is 1 */
2222 c = UCHARAT(++opnd);
2223 while (scan < loceol && UCHARAT(scan) == c)
2224 scan++;
2225 break;
2226 case EXACTF: /* length of string is 1 */
2227 c = UCHARAT(++opnd);
2228 while (scan < loceol &&
2229 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
2230 scan++;
2231 break;
2232 case EXACTFL: /* length of string is 1 */
3280af22 2233 PL_reg_flags |= RF_tainted;
bbce6d69 2234 c = UCHARAT(++opnd);
2235 while (scan < loceol &&
2236 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e
LW
2237 scan++;
2238 break;
a0ed51b3
LW
2239 case ANYOFUTF8:
2240 loceol = PL_regeol;
2241 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2242 scan += UTF8SKIP(scan);
2243 hardcount++;
2244 }
2245 break;
a0d0e21e 2246 case ANYOF:
ae5c130c 2247 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2248 scan++;
a0d0e21e
LW
2249 break;
2250 case ALNUM:
2251 while (scan < loceol && isALNUM(*scan))
2252 scan++;
2253 break;
a0ed51b3
LW
2254 case ALNUMUTF8:
2255 loceol = PL_regeol;
dfe13c55 2256 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2257 scan += UTF8SKIP(scan);
2258 hardcount++;
2259 }
2260 break;
bbce6d69 2261 case ALNUML:
3280af22 2262 PL_reg_flags |= RF_tainted;
bbce6d69 2263 while (scan < loceol && isALNUM_LC(*scan))
2264 scan++;
2265 break;
a0ed51b3
LW
2266 case ALNUMLUTF8:
2267 PL_reg_flags |= RF_tainted;
2268 loceol = PL_regeol;
dfe13c55 2269 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2270 scan += UTF8SKIP(scan);
2271 hardcount++;
2272 }
2273 break;
2274 break;
a0d0e21e
LW
2275 case NALNUM:
2276 while (scan < loceol && !isALNUM(*scan))
2277 scan++;
2278 break;
a0ed51b3
LW
2279 case NALNUMUTF8:
2280 loceol = PL_regeol;
dfe13c55 2281 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2282 scan += UTF8SKIP(scan);
2283 hardcount++;
2284 }
2285 break;
bbce6d69 2286 case NALNUML:
3280af22 2287 PL_reg_flags |= RF_tainted;
bbce6d69 2288 while (scan < loceol && !isALNUM_LC(*scan))
2289 scan++;
2290 break;
a0ed51b3
LW
2291 case NALNUMLUTF8:
2292 PL_reg_flags |= RF_tainted;
2293 loceol = PL_regeol;
dfe13c55 2294 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2295 scan += UTF8SKIP(scan);
2296 hardcount++;
2297 }
2298 break;
a0d0e21e
LW
2299 case SPACE:
2300 while (scan < loceol && isSPACE(*scan))
2301 scan++;
2302 break;
a0ed51b3
LW
2303 case SPACEUTF8:
2304 loceol = PL_regeol;
dfe13c55 2305 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2306 scan += UTF8SKIP(scan);
2307 hardcount++;
2308 }
2309 break;
bbce6d69 2310 case SPACEL:
3280af22 2311 PL_reg_flags |= RF_tainted;
bbce6d69 2312 while (scan < loceol && isSPACE_LC(*scan))
2313 scan++;
2314 break;
a0ed51b3
LW
2315 case SPACELUTF8:
2316 PL_reg_flags |= RF_tainted;
2317 loceol = PL_regeol;
dfe13c55 2318 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2319 scan += UTF8SKIP(scan);
2320 hardcount++;
2321 }
2322 break;
a0d0e21e
LW
2323 case NSPACE:
2324 while (scan < loceol && !isSPACE(*scan))
2325 scan++;
2326 break;
a0ed51b3
LW
2327 case NSPACEUTF8:
2328 loceol = PL_regeol;
dfe13c55 2329 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2330 scan += UTF8SKIP(scan);
2331 hardcount++;
2332 }
2333 break;
bbce6d69 2334 case NSPACEL:
3280af22 2335 PL_reg_flags |= RF_tainted;
bbce6d69 2336 while (scan < loceol && !isSPACE_LC(*scan))
2337 scan++;
2338 break;
a0ed51b3
LW
2339 case NSPACELUTF8:
2340 PL_reg_flags |= RF_tainted;
2341 loceol = PL_regeol;
dfe13c55 2342 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2343 scan += UTF8SKIP(scan);
2344 hardcount++;
2345 }
2346 break;
a0d0e21e
LW
2347 case DIGIT:
2348 while (scan < loceol && isDIGIT(*scan))
2349 scan++;
2350 break;
a0ed51b3
LW
2351 case DIGITUTF8:
2352 loceol = PL_regeol;
dfe13c55 2353 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2354 scan += UTF8SKIP(scan);
2355 hardcount++;
2356 }
2357 break;
2358 break;
a0d0e21e
LW
2359 case NDIGIT:
2360 while (scan < loceol && !isDIGIT(*scan))
2361 scan++;
2362 break;
a0ed51b3
LW
2363 case NDIGITUTF8:
2364 loceol = PL_regeol;
dfe13c55 2365 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2366 scan += UTF8SKIP(scan);
2367 hardcount++;
2368 }
2369 break;
a0d0e21e
LW
2370 default: /* Called on something of 0 width. */
2371 break; /* So match right here or not at all. */
2372 }
a687059c 2373
a0ed51b3
LW
2374 if (hardcount)
2375 c = hardcount;
2376 else
2377 c = scan - PL_reginput;
3280af22 2378 PL_reginput = scan;
a687059c 2379
c277df42
IZ
2380 DEBUG_r(
2381 {
2382 SV *prop = sv_newmortal();
2383
2384 regprop(prop, p);
2385 PerlIO_printf(Perl_debug_log,
2386 "%*s %s can match %ld times out of %ld...\n",
2387 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2388 });
2389
a0d0e21e 2390 return(c);
a687059c
LW
2391}
2392
2393/*
c277df42
IZ
2394 - regrepeat_hard - repeatedly match something, report total lenth and length
2395 *
2396 * The repeater is supposed to have constant length.
2397 */
2398
76e3520e 2399STATIC I32
c277df42
IZ
2400regrepeat_hard(regnode *p, I32 max, I32 *lp)
2401{
5c0ca799 2402 dTHR;
c277df42
IZ
2403 register char *scan;
2404 register char *start;
3280af22 2405 register char *loceol = PL_regeol;
a0ed51b3
LW
2406 I32 l = 0;
2407 I32 count = 0;
2408
2409 if (!max)
2410 return 0;
c277df42 2411
3280af22 2412 start = PL_reginput;
a0ed51b3
LW
2413 if (UTF) {
2414 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2415 if (!count++) {
2416 l = 0;
2417 while (start < PL_reginput) {
2418 l++;
2419 start += UTF8SKIP(start);
2420 }
2421 *lp = l;
2422 if (l == 0)
2423 return max;
2424 }
2425 if (count == max)
2426 return count;
2427 }
2428 }
2429 else {
2430 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2431 if (!count++) {
2432 *lp = l = PL_reginput - start;
2433 if (max != REG_INFTY && l*max < loceol - scan)
2434 loceol = scan + l*max;
2435 if (l == 0)
2436 return max;
c277df42
IZ
2437 }
2438 }
2439 }
3280af22
NIS
2440 if (PL_reginput < loceol)
2441 PL_reginput = scan;
c277df42 2442
a0ed51b3 2443 return count;
c277df42
IZ
2444}
2445
2446/*
bbce6d69 2447 - regclass - determine if a character falls into a character class
2448 */
2449
76e3520e 2450STATIC bool
8ac85365 2451reginclass(register char *p, register I32 c)
bbce6d69 2452{
5c0ca799 2453 dTHR;
bbce6d69 2454 char flags = *p;
2455 bool match = FALSE;
2456
2457 c &= 0xFF;
ae5c130c 2458 if (ANYOF_TEST(p, c))
bbce6d69 2459 match = TRUE;
2460 else if (flags & ANYOF_FOLD) {
2461 I32 cf;
2462 if (flags & ANYOF_LOCALE) {
3280af22 2463 PL_reg_flags |= RF_tainted;
bbce6d69 2464 cf = fold_locale[c];
2465 }
2466 else
2467 cf = fold[c];
ae5c130c 2468 if (ANYOF_TEST(p, cf))
bbce6d69 2469 match = TRUE;
2470 }
2471
2472 if (!match && (flags & ANYOF_ISA)) {
3280af22 2473 PL_reg_flags |= RF_tainted;
bbce6d69 2474
2475 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2476 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2477 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2478 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2479 {
2480 match = TRUE;
2481 }
2482 }
2483
ae5c130c 2484 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2485}
2486
a0ed51b3
LW
2487STATIC bool
2488reginclassutf8(regnode *f, U8 *p)
c485e607
NIS
2489{
2490 dTHR;
a0ed51b3
LW
2491 char flags = ARG1(f);
2492 bool match = FALSE;
2493 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2494
2495 if (swash_fetch(sv, p))
2496 match = TRUE;
2497 else if (flags & ANYOF_FOLD) {
2498 I32 cf;
dfe13c55 2499 U8 tmpbuf[10];
a0ed51b3
LW
2500 if (flags & ANYOF_LOCALE) {
2501 PL_reg_flags |= RF_tainted;
2502 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2503 }
2504 else
2505 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2506 if (swash_fetch(sv, tmpbuf))
2507 match = TRUE;
2508 }
2509
2510 if (!match && (flags & ANYOF_ISA)) {
2511 PL_reg_flags |= RF_tainted;
2512
2513 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2514 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2515 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2516 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2517 {
2518 match = TRUE;
2519 }
2520 }
2521
2522 return (flags & ANYOF_INVERT) ? !match : match;
2523}
161b471a 2524
dfe13c55
GS
2525STATIC U8 *
2526reghop(U8 *s, I32 off)
c485e607
NIS
2527{
2528 dTHR;
a0ed51b3
LW
2529 if (off >= 0) {
2530 while (off-- && s < (U8*)PL_regeol)
2531 s += UTF8SKIP(s);
2532 }
2533 else {
2534 while (off++) {
2535 if (s > (U8*)PL_bostr) {
2536 s--;
2537 if (*s & 0x80) {
2538 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2539 s--;
2540 } /* XXX could check well-formedness here */
2541 }
2542 }
2543 }
2544 return s;
2545}
161b471a 2546
dfe13c55
GS
2547STATIC U8 *
2548reghopmaybe(U8* s, I32 off)
a0ed51b3 2549{
c485e607 2550 dTHR;
a0ed51b3
LW
2551 if (off >= 0) {
2552 while (off-- && s < (U8*)PL_regeol)
2553 s += UTF8SKIP(s);
2554 if (off >= 0)
2555 return 0;
2556 }
2557 else {
2558 while (off++) {
2559 if (s > (U8*)PL_bostr) {
2560 s--;
2561 if (*s & 0x80) {
2562 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2563 s--;
2564 } /* XXX could check well-formedness here */
2565 }
2566 else
2567 break;
2568 }
2569 if (off <= 0)
2570 return 0;
2571 }
2572 return s;
2573}