This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix coredump with MULTIPLICITY (ckWARN() needs early curcop init)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
28# ifndef DEBUGGING
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c
GS
38/* *These* symbols are masked to allow static link. */
39# define Perl_pregexec my_pregexec
56953603
IZ
40#endif
41
f0fcb552 42/*SUPPRESS 112*/
a687059c 43/*
e50aee73 44 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
45 *
46 * Copyright (c) 1986 by University of Toronto.
47 * Written by Henry Spencer. Not derived from licensed software.
48 *
49 * Permission is granted to anyone to use this software for any
50 * purpose on any computer system, and to redistribute it freely,
51 * subject to the following restrictions:
52 *
53 * 1. The author is not responsible for the consequences of use of
54 * this software, no matter how awful, even if they arise
55 * from defects in it.
56 *
57 * 2. The origin of this software must not be misrepresented, either
58 * by explicit claim or by omission.
59 *
60 * 3. Altered versions must be plainly marked as such, and must not
61 * be misrepresented as being the original software.
62 *
63 **** Alterations to Henry's code are...
64 ****
a0ed51b3 65 **** Copyright (c) 1991-1998, Larry Wall
a687059c 66 ****
9ef589d8
LW
67 **** You may distribute under the terms of either the GNU General Public
68 **** License or the Artistic License, as specified in the README file.
a687059c
LW
69 *
70 * Beware that some of this code is subtly aware of the way operator
71 * precedence is structured in regular expressions. Serious changes in
72 * regular-expression syntax might require a total rethink.
73 */
74#include "EXTERN.h"
75#include "perl.h"
76#include "regcomp.h"
77
c277df42
IZ
78#define RF_tainted 1 /* tainted information used? */
79#define RF_warned 2 /* warned about big count? */
ce862d02 80#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
81#define RF_utf8 8 /* String contains multibyte chars? */
82
83#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
84
85#define RS_init 1 /* eval environment created */
86#define RS_set 2 /* replsv value is set */
c277df42 87
a687059c
LW
88#ifndef STATIC
89#define STATIC static
90#endif
91
76e3520e 92#ifndef PERL_OBJECT
a0d0e21e
LW
93typedef I32 CHECKPOINT;
94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
99static I32 regmatch _((regnode *prog));
100static I32 regrepeat _((regnode *p, I32 max));
101static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
102static I32 regtry _((regexp *prog, char *startpos));
ae5c130c 103
c277df42 104static bool reginclass _((char *p, I32 c));
a0ed51b3 105static bool reginclassutf8 _((regnode *f, U8* p));
55497cff 106static CHECKPOINT regcppush _((I32 parenfloor));
107static char * regcppop _((void));
76e3520e 108#endif
a0ed51b3 109
ae5c130c 110#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3
LW
111#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
112
113#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
114#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
115
116static char * reghop _((unsigned char *pos, I32 off));
117static char * reghopmaybe _((unsigned char *pos, I32 off));
118#define HOP(pos,off) (UTF ? reghop(pos, off) : (pos + off))
119#define HOPMAYBE(pos,off) (UTF ? reghopmaybe(pos, off) : (pos + off))
a0d0e21e 120
76e3520e 121STATIC CHECKPOINT
8ac85365 122regcppush(I32 parenfloor)
a0d0e21e 123{
11343788 124 dTHR;
3280af22
NIS
125 int retval = PL_savestack_ix;
126 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
127 int p;
128
129 SSCHECK(i + 5);
3280af22
NIS
130 for (p = PL_regsize; p > parenfloor; p--) {
131 SSPUSHPTR(PL_regendp[p]);
132 SSPUSHPTR(PL_regstartp[p]);
133 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
134 SSPUSHINT(p);
135 }
3280af22
NIS
136 SSPUSHINT(PL_regsize);
137 SSPUSHINT(*PL_reglastparen);
138 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
139 SSPUSHINT(i + 3);
140 SSPUSHINT(SAVEt_REGCONTEXT);
141 return retval;
142}
143
c277df42 144/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
145# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
146 " Setting an EVAL scope, savestack=%i\n", \
3280af22 147 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 148
3280af22 149# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
150 PerlIO_printf(Perl_debug_log, \
151 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 152 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 153
76e3520e 154STATIC char *
8ac85365 155regcppop(void)
a0d0e21e 156{
11343788 157 dTHR;
a0d0e21e
LW
158 I32 i = SSPOPINT;
159 U32 paren = 0;
160 char *input;
161 char *tmps;
162 assert(i == SAVEt_REGCONTEXT);
163 i = SSPOPINT;
164 input = (char *) SSPOPPTR;
3280af22
NIS
165 *PL_reglastparen = SSPOPINT;
166 PL_regsize = SSPOPINT;
c277df42 167 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 168 paren = (U32)SSPOPINT;
3280af22
NIS
169 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
170 PL_regstartp[paren] = (char *) SSPOPPTR;
a0d0e21e 171 tmps = (char*)SSPOPPTR;
3280af22
NIS
172 if (paren <= *PL_reglastparen)
173 PL_regendp[paren] = tmps;
c277df42 174 DEBUG_r(
c3464db5
DD
175 PerlIO_printf(Perl_debug_log,
176 " restoring \\%d to %d(%d)..%d%s\n",
3280af22
NIS
177 paren, PL_regstartp[paren] - PL_regbol,
178 PL_reg_start_tmp[paren] - PL_regbol,
179 PL_regendp[paren] - PL_regbol,
180 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 181 );
a0d0e21e 182 }
c277df42 183 DEBUG_r(
3280af22 184 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
185 PerlIO_printf(Perl_debug_log,
186 " restoring \\%d..\\%d to undef\n",
3280af22 187 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
188 }
189 );
3280af22
NIS
190 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
191 if (paren > PL_regsize)
192 PL_regstartp[paren] = Nullch;
193 PL_regendp[paren] = Nullch;
a0d0e21e
LW
194 }
195 return input;
196}
197
c277df42 198#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 199
a687059c 200/*
e50aee73 201 * pregexec and friends
a687059c
LW
202 */
203
204/*
c277df42 205 - pregexec - match a regexp against a string
a687059c 206 */
c277df42 207I32
c3464db5
DD
208pregexec(register regexp *prog, char *stringarg, register char *strend,
209 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
210/* strend: pointer to null at end of string */
211/* strbeg: real beginning of string */
212/* minend: end of match must be >=minend after stringarg. */
213/* nosave: For optimizations. */
214{
215 return
216 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
217 nosave ? 0 : REXEC_COPY_STR);
218}
219
a687059c 220/*
c277df42 221 - regexec_flags - match a regexp against a string
a687059c 222 */
79072805 223I32
c3464db5
DD
224regexec_flags(register regexp *prog, char *stringarg, register char *strend,
225 char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
c277df42
IZ
226/* strend: pointer to null at end of string */
227/* strbeg: real beginning of string */
228/* minend: end of match must be >=minend after stringarg. */
229/* data: May be used for some additional optimizations. */
230/* nosave: For optimizations. */
a687059c 231{
5c0ca799 232 dTHR;
a0d0e21e 233 register char *s;
c277df42 234 register regnode *c;
a0d0e21e
LW
235 register char *startpos = stringarg;
236 register I32 tmp;
c277df42 237 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
238 I32 dontbother = 0; /* how many characters not to try at end */
239 CURCUR cc;
c277df42 240 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
241 constant substr. */ /* CC */
242 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
243 I32 scream_pos = -1; /* Internal iterator of scream. */
244 char *scream_olds;
3280af22 245 SV* oreplsv = GvSV(PL_replgv);
a687059c 246
a0d0e21e 247 cc.cur = 0;
4633a7c4 248 cc.oldcc = 0;
3280af22 249 PL_regcc = &cc;
a0d0e21e 250
3280af22 251 PL_regprecomp = prog->precomp; /* Needed for error messages. */
a0d0e21e 252#ifdef DEBUGGING
3280af22
NIS
253 PL_regnarrate = PL_debug & 512;
254 PL_regprogram = prog->program;
a0d0e21e
LW
255#endif
256
257 /* Be paranoid... */
258 if (prog == NULL || startpos == NULL) {
259 croak("NULL regexp parameter");
260 return 0;
261 }
262
c277df42
IZ
263 minlen = prog->minlen;
264 if (strend - startpos < minlen) goto phooey;
265
a0d0e21e 266 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 267 PL_regprev = '\n';
a0d0e21e 268 else {
a0ed51b3 269 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
270 if (!PL_multiline && PL_regprev == '\n')
271 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 272 }
bbce6d69 273
a0d0e21e
LW
274 /* Check validity of program. */
275 if (UCHARAT(prog->program) != MAGIC) {
276 FAIL("corrupted regexp program");
277 }
278
3280af22
NIS
279 PL_regnpar = prog->nparens;
280 PL_reg_flags = 0;
281 PL_reg_eval_set = 0;
a0d0e21e 282
a0ed51b3
LW
283 if (prog->reganch & ROPT_UTF8)
284 PL_reg_flags |= RF_utf8;
285
286 /* Mark beginning of line for ^ and lookbehind. */
287 PL_regbol = startpos;
288 PL_bostr = strbeg;
289
290 /* Mark end of line for $ (and such) */
291 PL_regeol = strend;
292
293 /* see how far we have to get to not match where we matched before */
294 PL_regtill = startpos+minend;
295
a0d0e21e
LW
296 /* If there is a "must appear" string, look for it. */
297 s = startpos;
c277df42
IZ
298 if (!(flags & REXEC_CHECKED)
299 && prog->check_substr != Nullsv &&
774d564b 300 !(prog->reganch & ROPT_ANCH_GPOS) &&
c277df42 301 (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
3280af22 302 || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
a0d0e21e 303 {
a0ed51b3
LW
304 char *t;
305 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
c277df42 306 /* Should be nonnegative! */
a0ed51b3 307 end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
c277df42 308 if (screamer) {
3280af22 309 if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
c277df42
IZ
310 s = screaminstr(screamer, prog->check_substr,
311 start_shift + (stringarg - strbeg),
312 end_shift, &scream_pos, 0);
a0d0e21e
LW
313 else
314 s = Nullch;
c277df42 315 scream_olds = s;
0a12ae7d 316 }
a0d0e21e 317 else
c277df42
IZ
318 s = fbm_instr((unsigned char*)s + start_shift,
319 (unsigned char*)strend - end_shift,
411d5715 320 prog->check_substr, 0);
a0d0e21e 321 if (!s) {
c277df42 322 ++BmUSEFUL(prog->check_substr); /* hooray */
a0d0e21e 323 goto phooey; /* not present */
a0ed51b3
LW
324 }
325 else if (s - stringarg > prog->check_offset_max &&
326 (UTF
327 ? ((t = reghopmaybe(s, -(prog->check_offset_max))) && t >= stringarg)
328 : (t = s - prog->check_offset_max) != 0
329 )
330 )
331 {
c277df42 332 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
a0ed51b3
LW
333 s = t;
334 }
335 else if (!(prog->reganch & ROPT_NAUGHTY)
c277df42
IZ
336 && --BmUSEFUL(prog->check_substr) < 0
337 && prog->check_substr == prog->float_substr) { /* boo */
338 SvREFCNT_dec(prog->check_substr);
339 prog->check_substr = Nullsv; /* disable */
340 prog->float_substr = Nullsv; /* clear */
a0d0e21e 341 s = startpos;
a0ed51b3
LW
342 }
343 else
344 s = startpos;
a0d0e21e 345 }
a687059c 346
c277df42
IZ
347 DEBUG_r(
348 PerlIO_printf(Perl_debug_log,
349 "Matching `%.60s%s' against `%.*s%s'\n",
350 prog->precomp,
351 (strlen(prog->precomp) > 60 ? "..." : ""),
352 (strend - startpos > 60 ? 60 : strend - startpos),
353 startpos,
354 (strend - startpos > 60 ? "..." : ""))
355 );
356
a0ed51b3
LW
357 PL_regdata = prog->data;
358
a0d0e21e 359 /* Simplest case: anchored match need be tried only once. */
774d564b 360 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e
LW
361 if (prog->reganch & ROPT_ANCH) {
362 if (regtry(prog, startpos))
363 goto got_it;
774d564b 364 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
3280af22 365 (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
c277df42 366 || (prog->reganch & ROPT_ANCH_MBOL)))
774d564b 367 {
a0d0e21e
LW
368 if (minlen)
369 dontbother = minlen - 1;
a0ed51b3 370 strend = HOP(strend, -dontbother);
a0d0e21e
LW
371 /* for multiline we only have to try after newlines */
372 if (s > startpos)
373 s--;
374 while (s < strend) {
a0ed51b3 375 if (*s++ == '\n') { /* don't need utf8skip here */
a0d0e21e
LW
376 if (s < strend && regtry(prog, s))
377 goto got_it;
378 }
35c8bce7 379 }
35c8bce7 380 }
a0d0e21e
LW
381 goto phooey;
382 }
35c8bce7 383
a0d0e21e 384 /* Messy cases: unanchored match. */
c277df42
IZ
385 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
386 /* we have /x+whatever/ */
387 /* it must be a one character string */
388 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
389 if (UTF) {
390 while (s < strend) {
391 if (*s == ch) {
392 if (regtry(prog, s)) goto got_it;
393 s += UTF8SKIP(s);
394 while (s < strend && *s == ch)
395 s += UTF8SKIP(s);
396 }
397 s += UTF8SKIP(s);
398 }
399 }
400 else {
401 while (s < strend) {
402 if (*s == ch) {
403 if (regtry(prog, s)) goto got_it;
c277df42 404 s++;
a0ed51b3
LW
405 while (s < strend && *s == ch)
406 s++;
407 }
408 s++;
a0d0e21e 409 }
a687059c 410 }
c277df42
IZ
411 }
412 /*SUPPRESS 560*/
413 else if (prog->anchored_substr != Nullsv
414 || (prog->float_substr != Nullsv
415 && prog->float_max_offset < strend - s)) {
416 SV *must = prog->anchored_substr
417 ? prog->anchored_substr : prog->float_substr;
418 I32 back_max =
419 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
420 I32 back_min =
421 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
422 I32 delta = back_max - back_min;
a176fa2a 423 char *last = HOP(strend, 0-(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
a0ed51b3
LW
424 char *last1; /* Last position checked before */
425
426 if (s > PL_bostr)
427 last1 = HOP(s, -1);
428 else
429 last1 = s - 1; /* bogus */
c277df42
IZ
430
431 /* XXXX check_substr already used to find `s', can optimize if
432 check_substr==must. */
433 scream_pos = -1;
434 dontbother = end_shift;
a0ed51b3 435 strend = HOP(strend, -dontbother);
c277df42
IZ
436 while ( (s <= last) &&
437 (screamer
a0ed51b3 438 ? (s = screaminstr(screamer, must, HOP(s, back_min) - strbeg,
c277df42 439 end_shift, &scream_pos, 0))
a0ed51b3 440 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
411d5715 441 (unsigned char*)strend, must, 0))) ) {
a0ed51b3
LW
442 if (HOP(s, -back_max) > last1) {
443 last1 = HOP(s, -back_min);
444 s = HOP(s, -back_max);
445 }
446 else {
8e680dc3 447 char *t = (last1 >= PL_bostr) ? HOP(last1, 1) : last1 + 1;
c277df42 448
a0ed51b3 449 last1 = HOP(s, -back_min);
c277df42 450 s = t;
a0d0e21e 451 }
a0ed51b3
LW
452 if (UTF) {
453 while (s <= last1) {
454 if (regtry(prog, s))
455 goto got_it;
456 s += UTF8SKIP(s);
457 }
458 }
459 else {
460 while (s <= last1) {
461 if (regtry(prog, s))
462 goto got_it;
463 s++;
464 }
a0d0e21e
LW
465 }
466 }
467 goto phooey;
a0ed51b3
LW
468 }
469 else if (c = prog->regstclass) {
a0d0e21e 470 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 471 char *cc;
a687059c 472
a0d0e21e
LW
473 if (minlen)
474 dontbother = minlen - 1;
a0ed51b3 475 strend = HOP(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
476 tmp = 1;
477 /* We know what class it must start with. */
478 switch (OP(c)) {
a0ed51b3
LW
479 case ANYOFUTF8:
480 cc = (char *) OPERAND(c);
481 while (s < strend) {
482 if (REGINCLASSUTF8(c, (U8*)s)) {
483 if (tmp && regtry(prog, s))
484 goto got_it;
485 else
486 tmp = doevery;
487 }
488 else
489 tmp = 1;
490 s += UTF8SKIP(s);
491 }
492 break;
a0d0e21e 493 case ANYOF:
a0ed51b3 494 cc = (char *) OPERAND(c);
a0d0e21e 495 while (s < strend) {
a0ed51b3 496 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
497 if (tmp && regtry(prog, s))
498 goto got_it;
499 else
500 tmp = doevery;
a687059c 501 }
a0d0e21e
LW
502 else
503 tmp = 1;
504 s++;
505 }
506 break;
bbce6d69 507 case BOUNDL:
3280af22 508 PL_reg_flags |= RF_tainted;
bbce6d69 509 /* FALL THROUGH */
a0d0e21e 510 case BOUND:
a0ed51b3
LW
511 if (minlen) {
512 dontbother++;
513 strend -= 1;
514 }
3280af22 515 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 516 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 517 while (s < strend) {
95bac841 518 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
519 tmp = !tmp;
520 if (regtry(prog, s))
521 goto got_it;
a687059c 522 }
a0d0e21e
LW
523 s++;
524 }
525 if ((minlen || tmp) && regtry(prog,s))
526 goto got_it;
527 break;
a0ed51b3
LW
528 case BOUNDLUTF8:
529 PL_reg_flags |= RF_tainted;
530 /* FALL THROUGH */
531 case BOUNDUTF8:
532 if (minlen) {
533 dontbother++;
534 strend = reghop(strend, -1);
535 }
536 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop(s, -1), 0) : PL_regprev;
537 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
538 while (s < strend) {
539 if (tmp == !(OP(c) == BOUND ? swash_fetch(PL_utf8_alnum, s) : isALNUM_LC_utf8(s))) {
540 tmp = !tmp;
541 if (regtry(prog, s))
542 goto got_it;
543 }
544 s += UTF8SKIP(s);
545 }
546 if ((minlen || tmp) && regtry(prog,s))
547 goto got_it;
548 break;
bbce6d69 549 case NBOUNDL:
3280af22 550 PL_reg_flags |= RF_tainted;
bbce6d69 551 /* FALL THROUGH */
a0d0e21e 552 case NBOUND:
a0ed51b3
LW
553 if (minlen) {
554 dontbother++;
555 strend -= 1;
556 }
3280af22 557 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 558 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 559 while (s < strend) {
95bac841 560 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
561 tmp = !tmp;
562 else if (regtry(prog, s))
563 goto got_it;
564 s++;
565 }
566 if ((minlen || !tmp) && regtry(prog,s))
567 goto got_it;
568 break;
a0ed51b3
LW
569 case NBOUNDLUTF8:
570 PL_reg_flags |= RF_tainted;
571 /* FALL THROUGH */
572 case NBOUNDUTF8:
573 if (minlen) {
574 dontbother++;
575 strend = reghop(strend, -1);
576 }
577 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop(s, -1), 0) : PL_regprev;
578 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
579 while (s < strend) {
580 if (tmp == !(OP(c) == NBOUND ? swash_fetch(PL_utf8_alnum, s) : isALNUM_LC_utf8(s)))
581 tmp = !tmp;
582 else if (regtry(prog, s))
583 goto got_it;
584 s += UTF8SKIP(s);
585 }
586 if ((minlen || !tmp) && regtry(prog,s))
587 goto got_it;
588 break;
a0d0e21e
LW
589 case ALNUM:
590 while (s < strend) {
bbce6d69 591 if (isALNUM(*s)) {
592 if (tmp && regtry(prog, s))
593 goto got_it;
594 else
595 tmp = doevery;
596 }
597 else
598 tmp = 1;
599 s++;
600 }
601 break;
a0ed51b3
LW
602 case ALNUMUTF8:
603 while (s < strend) {
604 if (swash_fetch(PL_utf8_alnum, s)) {
605 if (tmp && regtry(prog, s))
606 goto got_it;
607 else
608 tmp = doevery;
609 }
610 else
611 tmp = 1;
612 s += UTF8SKIP(s);
613 }
614 break;
bbce6d69 615 case ALNUML:
3280af22 616 PL_reg_flags |= RF_tainted;
bbce6d69 617 while (s < strend) {
618 if (isALNUM_LC(*s)) {
a0d0e21e
LW
619 if (tmp && regtry(prog, s))
620 goto got_it;
a687059c 621 else
a0d0e21e
LW
622 tmp = doevery;
623 }
624 else
625 tmp = 1;
626 s++;
627 }
628 break;
a0ed51b3
LW
629 case ALNUMLUTF8:
630 PL_reg_flags |= RF_tainted;
631 while (s < strend) {
632 if (isALNUM_LC_utf8(s)) {
633 if (tmp && regtry(prog, s))
634 goto got_it;
635 else
636 tmp = doevery;
637 }
638 else
639 tmp = 1;
640 s += UTF8SKIP(s);
641 }
642 break;
a0d0e21e
LW
643 case NALNUM:
644 while (s < strend) {
bbce6d69 645 if (!isALNUM(*s)) {
646 if (tmp && regtry(prog, s))
647 goto got_it;
648 else
649 tmp = doevery;
650 }
651 else
652 tmp = 1;
653 s++;
654 }
655 break;
a0ed51b3
LW
656 case NALNUMUTF8:
657 while (s < strend) {
658 if (!swash_fetch(PL_utf8_alnum, s)) {
659 if (tmp && regtry(prog, s))
660 goto got_it;
661 else
662 tmp = doevery;
663 }
664 else
665 tmp = 1;
666 s += UTF8SKIP(s);
667 }
668 break;
bbce6d69 669 case NALNUML:
3280af22 670 PL_reg_flags |= RF_tainted;
bbce6d69 671 while (s < strend) {
672 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
673 if (tmp && regtry(prog, s))
674 goto got_it;
a687059c 675 else
a0d0e21e 676 tmp = doevery;
a687059c 677 }
a0d0e21e
LW
678 else
679 tmp = 1;
680 s++;
681 }
682 break;
a0ed51b3
LW
683 case NALNUMLUTF8:
684 PL_reg_flags |= RF_tainted;
685 while (s < strend) {
686 if (!isALNUM_LC_utf8(s)) {
687 if (tmp && regtry(prog, s))
688 goto got_it;
689 else
690 tmp = doevery;
691 }
692 else
693 tmp = 1;
694 s += UTF8SKIP(s);
695 }
696 break;
a0d0e21e
LW
697 case SPACE:
698 while (s < strend) {
699 if (isSPACE(*s)) {
700 if (tmp && regtry(prog, s))
701 goto got_it;
702 else
703 tmp = doevery;
2304df62 704 }
a0d0e21e
LW
705 else
706 tmp = 1;
707 s++;
708 }
709 break;
a0ed51b3
LW
710 case SPACEUTF8:
711 while (s < strend) {
712 if (*s == ' ' || swash_fetch(PL_utf8_space,s)) {
713 if (tmp && regtry(prog, s))
714 goto got_it;
715 else
716 tmp = doevery;
717 }
718 else
719 tmp = 1;
720 s += UTF8SKIP(s);
721 }
722 break;
bbce6d69 723 case SPACEL:
3280af22 724 PL_reg_flags |= RF_tainted;
bbce6d69 725 while (s < strend) {
726 if (isSPACE_LC(*s)) {
727 if (tmp && regtry(prog, s))
728 goto got_it;
729 else
730 tmp = doevery;
731 }
732 else
733 tmp = 1;
734 s++;
735 }
736 break;
a0ed51b3
LW
737 case SPACELUTF8:
738 PL_reg_flags |= RF_tainted;
739 while (s < strend) {
740 if (*s == ' ' || isSPACE_LC_utf8(s)) {
741 if (tmp && regtry(prog, s))
742 goto got_it;
743 else
744 tmp = doevery;
745 }
746 else
747 tmp = 1;
748 s += UTF8SKIP(s);
749 }
750 break;
a0d0e21e
LW
751 case NSPACE:
752 while (s < strend) {
753 if (!isSPACE(*s)) {
754 if (tmp && regtry(prog, s))
755 goto got_it;
756 else
757 tmp = doevery;
a687059c 758 }
a0d0e21e
LW
759 else
760 tmp = 1;
761 s++;
762 }
763 break;
a0ed51b3
LW
764 case NSPACEUTF8:
765 while (s < strend) {
766 if (!(*s == ' ' || swash_fetch(PL_utf8_space,s))) {
767 if (tmp && regtry(prog, s))
768 goto got_it;
769 else
770 tmp = doevery;
771 }
772 else
773 tmp = 1;
774 s += UTF8SKIP(s);
775 }
776 break;
bbce6d69 777 case NSPACEL:
3280af22 778 PL_reg_flags |= RF_tainted;
bbce6d69 779 while (s < strend) {
780 if (!isSPACE_LC(*s)) {
781 if (tmp && regtry(prog, s))
782 goto got_it;
783 else
784 tmp = doevery;
785 }
786 else
787 tmp = 1;
788 s++;
789 }
790 break;
a0ed51b3
LW
791 case NSPACELUTF8:
792 PL_reg_flags |= RF_tainted;
793 while (s < strend) {
794 if (!(*s == ' ' || isSPACE_LC_utf8(s))) {
795 if (tmp && regtry(prog, s))
796 goto got_it;
797 else
798 tmp = doevery;
799 }
800 else
801 tmp = 1;
802 s += UTF8SKIP(s);
803 }
804 break;
a0d0e21e
LW
805 case DIGIT:
806 while (s < strend) {
807 if (isDIGIT(*s)) {
808 if (tmp && regtry(prog, s))
809 goto got_it;
810 else
811 tmp = doevery;
2b69d0c2 812 }
a0d0e21e
LW
813 else
814 tmp = 1;
815 s++;
816 }
817 break;
a0ed51b3
LW
818 case DIGITUTF8:
819 while (s < strend) {
820 if (swash_fetch(PL_utf8_digit,s)) {
821 if (tmp && regtry(prog, s))
822 goto got_it;
823 else
824 tmp = doevery;
825 }
826 else
827 tmp = 1;
828 s += UTF8SKIP(s);
829 }
830 break;
a0d0e21e
LW
831 case NDIGIT:
832 while (s < strend) {
833 if (!isDIGIT(*s)) {
834 if (tmp && regtry(prog, s))
835 goto got_it;
836 else
837 tmp = doevery;
a687059c 838 }
a0d0e21e
LW
839 else
840 tmp = 1;
841 s++;
842 }
843 break;
a0ed51b3
LW
844 case NDIGITUTF8:
845 while (s < strend) {
846 if (!swash_fetch(PL_utf8_digit,s)) {
847 if (tmp && regtry(prog, s))
848 goto got_it;
849 else
850 tmp = doevery;
851 }
852 else
853 tmp = 1;
854 s += UTF8SKIP(s);
855 }
856 break;
a687059c 857 }
a0d0e21e
LW
858 }
859 else {
c277df42
IZ
860 dontbother = 0;
861 if (prog->float_substr != Nullsv) { /* Trim the end. */
862 char *last;
863 I32 oldpos = scream_pos;
864
865 if (screamer) {
866 last = screaminstr(screamer, prog->float_substr, s - strbeg,
867 end_shift, &scream_pos, 1); /* last one */
868 if (!last) {
869 last = scream_olds; /* Only one occurence. */
870 }
a0ed51b3
LW
871 }
872 else {
c277df42
IZ
873 STRLEN len;
874 char *little = SvPV(prog->float_substr, len);
19b4f81a
JPC
875 if (len)
876 last = rninstr(s, strend, little, little + len);
877 else
878 last = strend; /* matching `$' */
c277df42
IZ
879 }
880 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 881 dontbother = strend - last + prog->float_min_offset;
c277df42
IZ
882 }
883 if (minlen && (dontbother < minlen))
a0d0e21e 884 dontbother = minlen - 1;
a0ed51b3 885 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 886 /* We don't know much -- general case. */
a0ed51b3
LW
887 if (UTF) {
888 for (;;) {
84df6dba 889 if (regtry(prog, s))
a0ed51b3 890 goto got_it;
a0ed51b3
LW
891 if (s >= strend)
892 break;
893 s += UTF8SKIP(s);
894 };
895 }
896 else {
897 do {
898 if (regtry(prog, s))
899 goto got_it;
900 } while (s++ < strend);
901 }
a0d0e21e
LW
902 }
903
904 /* Failure. */
905 goto phooey;
a687059c 906
a0d0e21e
LW
907got_it:
908 prog->subbeg = strbeg;
19b4f81a 909 prog->subend = PL_regeol; /* strend may have been modified */
3280af22 910 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 911
912 /* make sure $`, $&, $', and $digit will work later */
c277df42
IZ
913 if (strbeg != prog->subbase) { /* second+ //g match. */
914 if (!(flags & REXEC_COPY_STR)) {
137443ea 915 if (prog->subbase) {
916 Safefree(prog->subbase);
917 prog->subbase = Nullch;
918 }
919 }
920 else {
19b4f81a 921 I32 i = PL_regeol - startpos + (stringarg - strbeg);
137443ea 922 s = savepvn(strbeg, i);
923 Safefree(prog->subbase);
924 prog->subbase = s;
925 prog->subbeg = prog->subbase;
926 prog->subend = prog->subbase + i;
927 s = prog->subbase + (stringarg - strbeg);
928 for (i = 0; i <= prog->nparens; i++) {
929 if (prog->endp[i]) {
930 prog->startp[i] = s + (prog->startp[i] - startpos);
931 prog->endp[i] = s + (prog->endp[i] - startpos);
932 }
a0d0e21e
LW
933 }
934 }
a0d0e21e 935 }
ce862d02 936 /* Preserve the current value of $^R */
3280af22
NIS
937 if (oreplsv != GvSV(PL_replgv)) {
938 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
ce862d02
IZ
939 restored, the value remains
940 the same. */
941 }
a0d0e21e
LW
942 return 1;
943
944phooey:
a0d0e21e 945 return 0;
a687059c
LW
946}
947
948/*
949 - regtry - try match at specific point
950 */
76e3520e 951STATIC I32 /* 0 failure, 1 success */
8ac85365 952regtry(regexp *prog, char *startpos)
a687059c 953{
c277df42 954 dTHR;
a0d0e21e
LW
955 register I32 i;
956 register char **sp;
957 register char **ep;
c277df42 958 CHECKPOINT lastcp;
a0d0e21e 959
3280af22
NIS
960 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
961 PL_reg_eval_set = RS_init;
ce862d02 962 DEBUG_r(DEBUG_s(
c3464db5 963 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 964 PL_stack_sp - PL_stack_base);
ce862d02
IZ
965 ));
966 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 967 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02
IZ
968 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
969 SAVETMPS;
970 /* Apparently this is not needed, judging by wantarray. */
971 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
972 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
973 }
3280af22
NIS
974 PL_reginput = startpos;
975 PL_regstartp = prog->startp;
976 PL_regendp = prog->endp;
977 PL_reglastparen = &prog->lastparen;
a0d0e21e 978 prog->lastparen = 0;
3280af22
NIS
979 PL_regsize = 0;
980 if (PL_reg_start_tmpl <= prog->nparens) {
981 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
982 if(PL_reg_start_tmp)
983 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 984 else
3280af22 985 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 986 }
a0d0e21e
LW
987
988 sp = prog->startp;
989 ep = prog->endp;
990 if (prog->nparens) {
991 for (i = prog->nparens; i >= 0; i--) {
992 *sp++ = NULL;
993 *ep++ = NULL;
a687059c 994 }
a0d0e21e 995 }
c277df42 996 REGCP_SET;
7e5428c5 997 if (regmatch(prog->program + 1)) {
a0d0e21e 998 prog->startp[0] = startpos;
3280af22 999 prog->endp[0] = PL_reginput;
a0d0e21e
LW
1000 return 1;
1001 }
c277df42
IZ
1002 REGCP_UNWIND;
1003 return 0;
a687059c
LW
1004}
1005
1006/*
1007 - regmatch - main matching routine
1008 *
1009 * Conceptually the strategy is simple: check to see whether the current
1010 * node matches, call self recursively to see whether the rest matches,
1011 * and then act accordingly. In practice we make some effort to avoid
1012 * recursion, in particular by going through "ordinary" nodes (that don't
1013 * need to know whether the rest of the match failed) by a loop instead of
1014 * by recursion.
1015 */
1016/* [lwall] I've hoisted the register declarations to the outer block in order to
1017 * maybe save a little bit of pushing and popping on the stack. It also takes
1018 * advantage of machines that use a register save mask on subroutine entry.
1019 */
76e3520e 1020STATIC I32 /* 0 failure, 1 success */
c277df42 1021regmatch(regnode *prog)
a687059c 1022{
c277df42
IZ
1023 dTHR;
1024 register regnode *scan; /* Current node. */
1025 regnode *next; /* Next node. */
1026 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
1027 register I32 nextchr; /* renamed nextchr - nextchar colides with
1028 function of same name */
a0d0e21e
LW
1029 register I32 n; /* no or next */
1030 register I32 ln; /* len or last */
1031 register char *s; /* operand or save */
3280af22 1032 register char *locinput = PL_reginput;
c277df42
IZ
1033 register I32 c1, c2, paren; /* case fold search, parenth */
1034 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1035#ifdef DEBUGGING
3280af22 1036 PL_regindent++;
4633a7c4 1037#endif
a0d0e21e 1038
a0ed51b3 1039 /* Note that nextchr is a byte even in UTF */
76e3520e 1040 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1041 scan = prog;
1042 while (scan != NULL) {
c277df42 1043#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1044#ifdef DEBUGGING
c277df42
IZ
1045# define sayYES goto yes
1046# define sayNO goto no
1047# define saySAME(x) if (x) goto yes; else goto no
1048# define REPORT_CODE_OFF 24
4633a7c4 1049#else
c277df42
IZ
1050# define sayYES return 1
1051# define sayNO return 0
1052# define saySAME(x) return x
a687059c 1053#endif
c277df42
IZ
1054 DEBUG_r( {
1055 SV *prop = sv_newmortal();
3280af22 1056 int docolor = *PL_colors[0];
c277df42 1057 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22
NIS
1058 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1059 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1060 ? (5 + taill) - l : locinput - PL_bostr);
c277df42 1061
3280af22
NIS
1062 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1063 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1064 ? (5 + taill) - pref_len : PL_regeol - locinput);
c277df42
IZ
1065 regprop(prop, scan);
1066 PerlIO_printf(Perl_debug_log,
54dc92de 1067 "%4i <%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22
NIS
1068 locinput - PL_bostr,
1069 PL_colors[2], pref_len, locinput - pref_len, PL_colors[3],
c277df42 1070 (docolor ? "" : "> <"),
3280af22 1071 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
1072 15 - l - pref_len + 1,
1073 "",
3280af22 1074 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
1075 SvPVX(prop));
1076 } );
a687059c 1077
c277df42 1078 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
1079 if (next == scan)
1080 next = NULL;
a687059c 1081
a0d0e21e
LW
1082 switch (OP(scan)) {
1083 case BOL:
3280af22
NIS
1084 if (locinput == PL_bostr
1085 ? PL_regprev == '\n'
1086 : (PL_multiline &&
1087 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1088 {
a0ed51b3 1089 /* regtill = regbol; */
a0d0e21e
LW
1090 break;
1091 }
4633a7c4 1092 sayNO;
a0d0e21e 1093 case MBOL:
3280af22
NIS
1094 if (locinput == PL_bostr
1095 ? PL_regprev == '\n'
1096 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
1097 {
1098 break;
1099 }
4633a7c4 1100 sayNO;
a0d0e21e 1101 case SBOL:
3280af22 1102 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1103 break;
4633a7c4 1104 sayNO;
774d564b 1105 case GPOS:
3280af22 1106 if (locinput == PL_regbol)
a0d0e21e 1107 break;
4633a7c4 1108 sayNO;
a0d0e21e 1109 case EOL:
3280af22 1110 if (PL_multiline)
a0d0e21e
LW
1111 goto meol;
1112 else
1113 goto seol;
1114 case MEOL:
1115 meol:
3280af22 1116 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1117 sayNO;
a0d0e21e
LW
1118 break;
1119 case SEOL:
1120 seol:
3280af22 1121 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1122 sayNO;
3280af22 1123 if (PL_regeol - locinput > 1)
4633a7c4 1124 sayNO;
a0d0e21e 1125 break;
b85d18e9 1126 case EOS:
3280af22 1127 if (PL_regeol != locinput)
b85d18e9
IZ
1128 sayNO;
1129 break;
a0ed51b3
LW
1130 case SANYUTF8:
1131 if (nextchr & 0x80) {
a176fa2a 1132 locinput += utf8skip[nextchr];
a0ed51b3
LW
1133 if (locinput > PL_regeol)
1134 sayNO;
1135 nextchr = UCHARAT(locinput);
1136 break;
1137 }
1138 if (!nextchr && locinput >= PL_regeol)
1139 sayNO;
1140 nextchr = UCHARAT(++locinput);
1141 break;
a0d0e21e 1142 case SANY:
3280af22 1143 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1144 sayNO;
76e3520e 1145 nextchr = UCHARAT(++locinput);
a0d0e21e 1146 break;
a0ed51b3
LW
1147 case ANYUTF8:
1148 if (nextchr & 0x80) {
a176fa2a 1149 locinput += utf8skip[nextchr];
a0ed51b3
LW
1150 if (locinput > PL_regeol)
1151 sayNO;
1152 nextchr = UCHARAT(locinput);
1153 break;
1154 }
1155 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1156 sayNO;
1157 nextchr = UCHARAT(++locinput);
1158 break;
a0d0e21e 1159 case ANY:
3280af22 1160 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1161 sayNO;
76e3520e 1162 nextchr = UCHARAT(++locinput);
a0d0e21e 1163 break;
bbce6d69 1164 case EXACT:
161b471a 1165 s = (char *) OPERAND(scan);
c277df42 1166 ln = UCHARAT(s++);
a0d0e21e 1167 /* Inline the first character, for speed. */
76e3520e 1168 if (UCHARAT(s) != nextchr)
4633a7c4 1169 sayNO;
3280af22 1170 if (PL_regeol - locinput < ln)
4633a7c4 1171 sayNO;
36477c24 1172 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1173 sayNO;
a0d0e21e 1174 locinput += ln;
76e3520e 1175 nextchr = UCHARAT(locinput);
bbce6d69 1176 break;
1177 case EXACTFL:
3280af22 1178 PL_reg_flags |= RF_tainted;
bbce6d69 1179 /* FALL THROUGH */
1180 case EXACTF:
161b471a 1181 s = (char *) OPERAND(scan);
c277df42 1182 ln = UCHARAT(s++);
a0ed51b3
LW
1183
1184 if (UTF) {
1185 char *l = locinput;
1186 char *e = s + ln;
1187 c1 = OP(scan) == EXACTF;
1188 while (s < e) {
1189 if (l >= PL_regeol)
1190 sayNO;
1191 if (utf8_to_uv(s, 0) != (c1 ? toLOWER_utf8(l) : toLOWER_LC_utf8(l)))
1192 sayNO;
1193 s += UTF8SKIP(s);
1194 l += UTF8SKIP(l);
1195 }
1196 locinput = l;
1197 nextchr = UCHARAT(locinput);
1198 break;
1199 }
1200
bbce6d69 1201 /* Inline the first character, for speed. */
76e3520e 1202 if (UCHARAT(s) != nextchr &&
bbce6d69 1203 UCHARAT(s) != ((OP(scan) == EXACTF)
76e3520e 1204 ? fold : fold_locale)[nextchr])
bbce6d69 1205 sayNO;
3280af22 1206 if (PL_regeol - locinput < ln)
bbce6d69 1207 sayNO;
5f05dabc 1208 if (ln > 1 && (OP(scan) == EXACTF
1209 ? ibcmp(s, locinput, ln)
1210 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1211 sayNO;
1212 locinput += ln;
76e3520e 1213 nextchr = UCHARAT(locinput);
a0d0e21e 1214 break;
a0ed51b3
LW
1215 case ANYOFUTF8:
1216 s = (char *) OPERAND(scan);
1217 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1218 sayNO;
1219 if (locinput >= PL_regeol)
1220 sayNO;
a176fa2a 1221 locinput += utf8skip[nextchr];
a0ed51b3
LW
1222 nextchr = UCHARAT(locinput);
1223 break;
a0d0e21e 1224 case ANYOF:
161b471a 1225 s = (char *) OPERAND(scan);
76e3520e
GS
1226 if (nextchr < 0)
1227 nextchr = UCHARAT(locinput);
873ef191 1228 if (!REGINCLASS(s, nextchr))
4633a7c4 1229 sayNO;
3280af22 1230 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1231 sayNO;
76e3520e 1232 nextchr = UCHARAT(++locinput);
a0d0e21e 1233 break;
bbce6d69 1234 case ALNUML:
3280af22 1235 PL_reg_flags |= RF_tainted;
bbce6d69 1236 /* FALL THROUGH */
a0d0e21e 1237 case ALNUM:
76e3520e 1238 if (!nextchr)
4633a7c4 1239 sayNO;
bbce6d69 1240 if (!(OP(scan) == ALNUM
76e3520e 1241 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1242 sayNO;
76e3520e 1243 nextchr = UCHARAT(++locinput);
a0d0e21e 1244 break;
a0ed51b3
LW
1245 case ALNUMLUTF8:
1246 PL_reg_flags |= RF_tainted;
1247 /* FALL THROUGH */
1248 case ALNUMUTF8:
1249 if (!nextchr)
1250 sayNO;
1251 if (nextchr & 0x80) {
1252 if (!(OP(scan) == ALNUMUTF8
1253 ? swash_fetch(PL_utf8_alnum, locinput) : isALNUM_LC_utf8(locinput)))
1254 sayNO;
a176fa2a 1255 locinput += utf8skip[nextchr];
a0ed51b3
LW
1256 nextchr = UCHARAT(locinput);
1257 break;
1258 }
1259 if (!(OP(scan) == ALNUMUTF8
1260 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1261 sayNO;
1262 nextchr = UCHARAT(++locinput);
1263 break;
bbce6d69 1264 case NALNUML:
3280af22 1265 PL_reg_flags |= RF_tainted;
bbce6d69 1266 /* FALL THROUGH */
a0d0e21e 1267 case NALNUM:
3280af22 1268 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1269 sayNO;
bbce6d69 1270 if (OP(scan) == NALNUM
76e3520e 1271 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1272 sayNO;
76e3520e 1273 nextchr = UCHARAT(++locinput);
a0d0e21e 1274 break;
a0ed51b3
LW
1275 case NALNUMLUTF8:
1276 PL_reg_flags |= RF_tainted;
1277 /* FALL THROUGH */
1278 case NALNUMUTF8:
1279 if (!nextchr && locinput >= PL_regeol)
1280 sayNO;
1281 if (nextchr & 0x80) {
1282 if (OP(scan) == NALNUMUTF8
1283 ? swash_fetch(PL_utf8_alnum, locinput) : isALNUM_LC_utf8(locinput))
1284 sayNO;
a176fa2a 1285 locinput += utf8skip[nextchr];
a0ed51b3
LW
1286 nextchr = UCHARAT(locinput);
1287 break;
1288 }
1289 if (OP(scan) == NALNUMUTF8
1290 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1291 sayNO;
1292 nextchr = UCHARAT(++locinput);
1293 break;
bbce6d69 1294 case BOUNDL:
1295 case NBOUNDL:
3280af22 1296 PL_reg_flags |= RF_tainted;
bbce6d69 1297 /* FALL THROUGH */
a0d0e21e 1298 case BOUND:
bbce6d69 1299 case NBOUND:
1300 /* was last char in word? */
3280af22 1301 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1302 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1303 ln = isALNUM(ln);
76e3520e 1304 n = isALNUM(nextchr);
bbce6d69 1305 }
1306 else {
1307 ln = isALNUM_LC(ln);
76e3520e 1308 n = isALNUM_LC(nextchr);
bbce6d69 1309 }
95bac841 1310 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1311 sayNO;
a0d0e21e 1312 break;
a0ed51b3
LW
1313 case BOUNDLUTF8:
1314 case NBOUNDLUTF8:
1315 PL_reg_flags |= RF_tainted;
1316 /* FALL THROUGH */
1317 case BOUNDUTF8:
1318 case NBOUNDUTF8:
1319 /* was last char in word? */
1320 ln = (locinput != PL_regbol) ? utf8_to_uv(reghop(locinput, -1), 0) : PL_regprev;
1321 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1322 ln = isALNUM_uni(ln);
1323 n = swash_fetch(PL_utf8_alnum, locinput);
1324 }
1325 else {
1326 ln = isALNUM_LC_uni(ln);
1327 n = isALNUM_LC_utf8(locinput);
1328 }
1329 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1330 sayNO;
1331 break;
bbce6d69 1332 case SPACEL:
3280af22 1333 PL_reg_flags |= RF_tainted;
bbce6d69 1334 /* FALL THROUGH */
a0d0e21e 1335 case SPACE:
3280af22 1336 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1337 sayNO;
bbce6d69 1338 if (!(OP(scan) == SPACE
76e3520e 1339 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1340 sayNO;
76e3520e 1341 nextchr = UCHARAT(++locinput);
a0d0e21e 1342 break;
a0ed51b3
LW
1343 case SPACELUTF8:
1344 PL_reg_flags |= RF_tainted;
1345 /* FALL THROUGH */
1346 case SPACEUTF8:
1347 if (!nextchr && locinput >= PL_regeol)
1348 sayNO;
1349 if (nextchr & 0x80) {
1350 if (!(OP(scan) == SPACEUTF8
1351 ? swash_fetch(PL_utf8_space,locinput) : isSPACE_LC_utf8(locinput)))
1352 sayNO;
a176fa2a 1353 locinput += utf8skip[nextchr];
a0ed51b3
LW
1354 nextchr = UCHARAT(locinput);
1355 break;
1356 }
1357 if (!(OP(scan) == SPACEUTF8
1358 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1359 sayNO;
1360 nextchr = UCHARAT(++locinput);
1361 break;
bbce6d69 1362 case NSPACEL:
3280af22 1363 PL_reg_flags |= RF_tainted;
bbce6d69 1364 /* FALL THROUGH */
a0d0e21e 1365 case NSPACE:
76e3520e 1366 if (!nextchr)
4633a7c4 1367 sayNO;
bbce6d69 1368 if (OP(scan) == SPACE
76e3520e 1369 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1370 sayNO;
76e3520e 1371 nextchr = UCHARAT(++locinput);
a0d0e21e 1372 break;
a0ed51b3
LW
1373 case NSPACELUTF8:
1374 PL_reg_flags |= RF_tainted;
1375 /* FALL THROUGH */
1376 case NSPACEUTF8:
1377 if (!nextchr)
1378 sayNO;
1379 if (nextchr & 0x80) {
1380 if (OP(scan) == NSPACEUTF8
1381 ? swash_fetch(PL_utf8_space,locinput) : isSPACE_LC_utf8(locinput))
1382 sayNO;
a176fa2a 1383 locinput += utf8skip[nextchr];
a0ed51b3
LW
1384 nextchr = UCHARAT(locinput);
1385 break;
1386 }
1387 if (OP(scan) == NSPACEUTF8
1388 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1389 sayNO;
1390 nextchr = UCHARAT(++locinput);
1391 break;
a0d0e21e 1392 case DIGIT:
76e3520e 1393 if (!isDIGIT(nextchr))
4633a7c4 1394 sayNO;
76e3520e 1395 nextchr = UCHARAT(++locinput);
a0d0e21e 1396 break;
a0ed51b3
LW
1397 case DIGITUTF8:
1398 if (nextchr & 0x80) {
1399 if (!(swash_fetch(PL_utf8_digit,locinput)))
1400 sayNO;
a176fa2a 1401 locinput += utf8skip[nextchr];
a0ed51b3
LW
1402 nextchr = UCHARAT(locinput);
1403 break;
1404 }
1405 if (!isDIGIT(nextchr))
1406 sayNO;
1407 nextchr = UCHARAT(++locinput);
1408 break;
a0d0e21e 1409 case NDIGIT:
3280af22 1410 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1411 sayNO;
76e3520e 1412 if (isDIGIT(nextchr))
4633a7c4 1413 sayNO;
76e3520e 1414 nextchr = UCHARAT(++locinput);
a0d0e21e 1415 break;
a0ed51b3
LW
1416 case NDIGITUTF8:
1417 if (!nextchr && locinput >= PL_regeol)
1418 sayNO;
1419 if (nextchr & 0x80) {
1420 if (swash_fetch(PL_utf8_digit,locinput))
1421 sayNO;
a176fa2a 1422 locinput += utf8skip[nextchr];
a0ed51b3
LW
1423 nextchr = UCHARAT(locinput);
1424 break;
1425 }
1426 if (isDIGIT(nextchr))
1427 sayNO;
1428 nextchr = UCHARAT(++locinput);
1429 break;
1430 case CLUMP:
1431 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark, locinput))
1432 sayNO;
a176fa2a 1433 locinput += utf8skip[nextchr];
a0ed51b3
LW
1434 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark, locinput))
1435 locinput += UTF8SKIP(locinput);
1436 if (locinput > PL_regeol)
1437 sayNO;
1438 nextchr = UCHARAT(locinput);
1439 break;
c8756f30 1440 case REFFL:
3280af22 1441 PL_reg_flags |= RF_tainted;
c8756f30 1442 /* FALL THROUGH */
c277df42 1443 case REF:
c8756f30 1444 case REFF:
c277df42 1445 n = ARG(scan); /* which paren pair */
3280af22
NIS
1446 s = PL_regstartp[n];
1447 if (*PL_reglastparen < n || !s)
af3f8c16 1448 sayNO; /* Do not match unless seen CLOSEn. */
3280af22 1449 if (s == PL_regendp[n])
a0d0e21e 1450 break;
a0ed51b3
LW
1451
1452 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1453 char *l = locinput;
1454 char *e = PL_regendp[n];
1455 /*
1456 * Note that we can't do the "other character" lookup trick as
1457 * in the 8-bit case (no pun intended) because in Unicode we
1458 * have to map both upper and title case to lower case.
1459 */
1460 if (OP(scan) == REFF) {
1461 while (s < e) {
1462 if (l >= PL_regeol)
1463 sayNO;
1464 if (toLOWER_utf8(s) != toLOWER_utf8(l))
1465 sayNO;
1466 s += UTF8SKIP(s);
1467 l += UTF8SKIP(l);
1468 }
1469 }
1470 else {
1471 while (s < e) {
1472 if (l >= PL_regeol)
1473 sayNO;
1474 if (toLOWER_LC_utf8(s) != toLOWER_LC_utf8(l))
1475 sayNO;
1476 s += UTF8SKIP(s);
1477 l += UTF8SKIP(l);
1478 }
1479 }
1480 locinput = l;
1481 nextchr = UCHARAT(locinput);
1482 break;
1483 }
1484
a0d0e21e 1485 /* Inline the first character, for speed. */
76e3520e 1486 if (UCHARAT(s) != nextchr &&
c8756f30
AK
1487 (OP(scan) == REF ||
1488 (UCHARAT(s) != ((OP(scan) == REFF
76e3520e 1489 ? fold : fold_locale)[nextchr]))))
4633a7c4 1490 sayNO;
3280af22
NIS
1491 ln = PL_regendp[n] - s;
1492 if (locinput + ln > PL_regeol)
4633a7c4 1493 sayNO;
c8756f30
AK
1494 if (ln > 1 && (OP(scan) == REF
1495 ? memNE(s, locinput, ln)
1496 : (OP(scan) == REFF
1497 ? ibcmp(s, locinput, ln)
1498 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1499 sayNO;
a0d0e21e 1500 locinput += ln;
76e3520e 1501 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1502 break;
1503
1504 case NOTHING:
c277df42 1505 case TAIL:
a0d0e21e
LW
1506 break;
1507 case BACK:
1508 break;
c277df42
IZ
1509 case EVAL:
1510 {
1511 dSP;
533c011a 1512 OP_4tree *oop = PL_op;
3280af22
NIS
1513 COP *ocurcop = PL_curcop;
1514 SV **ocurpad = PL_curpad;
c277df42
IZ
1515 SV *ret;
1516
1517 n = ARG(scan);
533c011a
NIS
1518 PL_op = (OP_4tree*)PL_regdata->data[n];
1519 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
3280af22 1520 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 1]);
c277df42 1521
76e3520e 1522 CALLRUNOPS(); /* Scalar context. */
c277df42
IZ
1523 SPAGAIN;
1524 ret = POPs;
1525 PUTBACK;
1526
1527 if (logical) {
1528 logical = 0;
1529 sw = SvTRUE(ret);
a0ed51b3
LW
1530 }
1531 else
3280af22 1532 sv_setsv(save_scalar(PL_replgv), ret);
533c011a 1533 PL_op = oop;
3280af22
NIS
1534 PL_curpad = ocurpad;
1535 PL_curcop = ocurcop;
c277df42
IZ
1536 break;
1537 }
a0d0e21e 1538 case OPEN:
c277df42 1539 n = ARG(scan); /* which paren pair */
3280af22
NIS
1540 PL_reg_start_tmp[n] = locinput;
1541 if (n > PL_regsize)
1542 PL_regsize = n;
a0d0e21e
LW
1543 break;
1544 case CLOSE:
c277df42 1545 n = ARG(scan); /* which paren pair */
3280af22
NIS
1546 PL_regstartp[n] = PL_reg_start_tmp[n];
1547 PL_regendp[n] = locinput;
1548 if (n > *PL_reglastparen)
1549 *PL_reglastparen = n;
a0d0e21e 1550 break;
c277df42
IZ
1551 case GROUPP:
1552 n = ARG(scan); /* which paren pair */
3280af22 1553 sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
c277df42
IZ
1554 break;
1555 case IFTHEN:
1556 if (sw)
1557 next = NEXTOPER(NEXTOPER(scan));
1558 else {
1559 next = scan + ARG(scan);
1560 if (OP(next) == IFTHEN) /* Fake one. */
1561 next = NEXTOPER(NEXTOPER(next));
1562 }
1563 break;
1564 case LOGICAL:
1565 logical = 1;
1566 break;
a0d0e21e
LW
1567 case CURLYX: {
1568 CURCUR cc;
3280af22 1569 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
1570
1571 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1572 next += ARG(next);
3280af22
NIS
1573 cc.oldcc = PL_regcc;
1574 PL_regcc = &cc;
1575 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
1576 cc.cur = -1;
1577 cc.min = ARG1(scan);
1578 cc.max = ARG2(scan);
c277df42 1579 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1580 cc.next = next;
1581 cc.minmod = minmod;
1582 cc.lastloc = 0;
3280af22 1583 PL_reginput = locinput;
a0d0e21e
LW
1584 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1585 regcpblow(cp);
3280af22 1586 PL_regcc = cc.oldcc;
4633a7c4 1587 saySAME(n);
a0d0e21e
LW
1588 }
1589 /* NOT REACHED */
1590 case WHILEM: {
1591 /*
1592 * This is really hard to understand, because after we match
1593 * what we're trying to match, we must make sure the rest of
1594 * the RE is going to match for sure, and to do that we have
1595 * to go back UP the parse tree by recursing ever deeper. And
1596 * if it fails, we have to reset our parent's current state
1597 * that we can try again after backing off.
1598 */
1599
c277df42 1600 CHECKPOINT cp, lastcp;
3280af22 1601 CURCUR* cc = PL_regcc;
c277df42
IZ
1602 char *lastloc = cc->lastloc; /* Detection of 0-len. */
1603
4633a7c4 1604 n = cc->cur + 1; /* how many we know we matched */
3280af22 1605 PL_reginput = locinput;
a0d0e21e 1606
c277df42
IZ
1607 DEBUG_r(
1608 PerlIO_printf(Perl_debug_log,
1609 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 1610 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
1611 (long)n, (long)cc->min,
1612 (long)cc->max, (long)cc)
1613 );
4633a7c4 1614
a0d0e21e
LW
1615 /* If degenerate scan matches "", assume scan done. */
1616
579cf2c3 1617 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
1618 PL_regcc = cc->oldcc;
1619 ln = PL_regcc->cur;
c277df42 1620 DEBUG_r(
c3464db5
DD
1621 PerlIO_printf(Perl_debug_log,
1622 "%*s empty match detected, try continuation...\n",
3280af22 1623 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1624 );
a0d0e21e 1625 if (regmatch(cc->next))
4633a7c4 1626 sayYES;
c277df42 1627 DEBUG_r(
c3464db5
DD
1628 PerlIO_printf(Perl_debug_log,
1629 "%*s failed...\n",
3280af22 1630 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1631 );
3280af22
NIS
1632 PL_regcc->cur = ln;
1633 PL_regcc = cc;
4633a7c4 1634 sayNO;
a0d0e21e
LW
1635 }
1636
1637 /* First just match a string of min scans. */
1638
1639 if (n < cc->min) {
1640 cc->cur = n;
1641 cc->lastloc = locinput;
4633a7c4
LW
1642 if (regmatch(cc->scan))
1643 sayYES;
1644 cc->cur = n - 1;
c277df42
IZ
1645 cc->lastloc = lastloc;
1646 DEBUG_r(
c3464db5
DD
1647 PerlIO_printf(Perl_debug_log,
1648 "%*s failed...\n",
3280af22 1649 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1650 );
4633a7c4 1651 sayNO;
a0d0e21e
LW
1652 }
1653
1654 /* Prefer next over scan for minimal matching. */
1655
1656 if (cc->minmod) {
3280af22
NIS
1657 PL_regcc = cc->oldcc;
1658 ln = PL_regcc->cur;
5f05dabc 1659 cp = regcppush(cc->parenfloor);
c277df42 1660 REGCP_SET;
5f05dabc 1661 if (regmatch(cc->next)) {
c277df42 1662 regcpblow(cp);
4633a7c4 1663 sayYES; /* All done. */
5f05dabc 1664 }
c277df42 1665 REGCP_UNWIND;
5f05dabc 1666 regcppop();
3280af22
NIS
1667 PL_regcc->cur = ln;
1668 PL_regcc = cc;
a0d0e21e 1669
c277df42 1670 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 1671 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
1672 && !(PL_reg_flags & RF_warned)) {
1673 PL_reg_flags |= RF_warned;
599cee73 1674 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
1675 "Complex regular subexpression recursion",
1676 REG_INFTY - 1);
c277df42 1677 }
4633a7c4 1678 sayNO;
c277df42 1679 }
a687059c 1680
c277df42 1681 DEBUG_r(
c3464db5
DD
1682 PerlIO_printf(Perl_debug_log,
1683 "%*s trying longer...\n",
3280af22 1684 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1685 );
a0d0e21e 1686 /* Try scanning more and see if it helps. */
3280af22 1687 PL_reginput = locinput;
a0d0e21e
LW
1688 cc->cur = n;
1689 cc->lastloc = locinput;
5f05dabc 1690 cp = regcppush(cc->parenfloor);
c277df42 1691 REGCP_SET;
5f05dabc 1692 if (regmatch(cc->scan)) {
c277df42 1693 regcpblow(cp);
4633a7c4 1694 sayYES;
5f05dabc 1695 }
c277df42 1696 DEBUG_r(
c3464db5
DD
1697 PerlIO_printf(Perl_debug_log,
1698 "%*s failed...\n",
3280af22 1699 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1700 );
1701 REGCP_UNWIND;
5f05dabc 1702 regcppop();
4633a7c4 1703 cc->cur = n - 1;
c277df42 1704 cc->lastloc = lastloc;
4633a7c4 1705 sayNO;
a0d0e21e
LW
1706 }
1707
1708 /* Prefer scan over next for maximal matching. */
1709
1710 if (n < cc->max) { /* More greed allowed? */
5f05dabc 1711 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
1712 cc->cur = n;
1713 cc->lastloc = locinput;
c277df42 1714 REGCP_SET;
5f05dabc 1715 if (regmatch(cc->scan)) {
c277df42 1716 regcpblow(cp);
4633a7c4 1717 sayYES;
5f05dabc 1718 }
c277df42 1719 REGCP_UNWIND;
a0d0e21e 1720 regcppop(); /* Restore some previous $<digit>s? */
3280af22 1721 PL_reginput = locinput;
c277df42 1722 DEBUG_r(
c3464db5
DD
1723 PerlIO_printf(Perl_debug_log,
1724 "%*s failed, try continuation...\n",
3280af22 1725 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
1726 );
1727 }
599cee73
PM
1728 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
1729 && !(PL_reg_flags & RF_warned)) {
3280af22 1730 PL_reg_flags |= RF_warned;
599cee73 1731 warner(WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
1732 "Complex regular subexpression recursion",
1733 REG_INFTY - 1);
a0d0e21e
LW
1734 }
1735
1736 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
1737 PL_regcc = cc->oldcc;
1738 ln = PL_regcc->cur;
a0d0e21e 1739 if (regmatch(cc->next))
4633a7c4 1740 sayYES;
c277df42 1741 DEBUG_r(
c3464db5 1742 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 1743 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 1744 );
3280af22
NIS
1745 PL_regcc->cur = ln;
1746 PL_regcc = cc;
4633a7c4 1747 cc->cur = n - 1;
c277df42 1748 cc->lastloc = lastloc;
4633a7c4 1749 sayNO;
a0d0e21e
LW
1750 }
1751 /* NOT REACHED */
c277df42
IZ
1752 case BRANCHJ:
1753 next = scan + ARG(scan);
1754 if (next == scan)
1755 next = NULL;
1756 inner = NEXTOPER(NEXTOPER(scan));
1757 goto do_branch;
1758 case BRANCH:
1759 inner = NEXTOPER(scan);
1760 do_branch:
1761 {
1762 CHECKPOINT lastcp;
1763 c1 = OP(scan);
1764 if (OP(next) != c1) /* No choice. */
1765 next = inner; /* Avoid recursion. */
a0d0e21e 1766 else {
3280af22 1767 int lastparen = *PL_reglastparen;
c277df42
IZ
1768
1769 REGCP_SET;
a0d0e21e 1770 do {
3280af22 1771 PL_reginput = locinput;
c277df42 1772 if (regmatch(inner))
4633a7c4 1773 sayYES;
c277df42 1774 REGCP_UNWIND;
3280af22
NIS
1775 for (n = *PL_reglastparen; n > lastparen; n--)
1776 PL_regendp[n] = 0;
1777 *PL_reglastparen = n;
c277df42 1778 scan = next;
a0d0e21e 1779 /*SUPPRESS 560*/
c277df42
IZ
1780 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
1781 next += n;
a0d0e21e 1782 else
c277df42 1783 next = NULL;
c277df42
IZ
1784 inner = NEXTOPER(scan);
1785 if (c1 == BRANCHJ) {
1786 inner = NEXTOPER(inner);
1787 }
1788 } while (scan != NULL && OP(scan) == c1);
4633a7c4 1789 sayNO;
a0d0e21e 1790 /* NOTREACHED */
a687059c 1791 }
a0d0e21e
LW
1792 }
1793 break;
1794 case MINMOD:
1795 minmod = 1;
1796 break;
c277df42
IZ
1797 case CURLYM:
1798 {
00db4c45 1799 I32 l = 0;
c277df42
IZ
1800 CHECKPOINT lastcp;
1801
1802 /* We suppose that the next guy does not need
1803 backtracking: in particular, it is of constant length,
1804 and has no parenths to influence future backrefs. */
1805 ln = ARG1(scan); /* min to match */
1806 n = ARG2(scan); /* max to match */
c277df42
IZ
1807 paren = scan->flags;
1808 if (paren) {
3280af22
NIS
1809 if (paren > PL_regsize)
1810 PL_regsize = paren;
1811 if (paren > *PL_reglastparen)
1812 *PL_reglastparen = paren;
c277df42 1813 }
dc45a647 1814 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
1815 if (paren)
1816 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 1817 PL_reginput = locinput;
c277df42
IZ
1818 if (minmod) {
1819 minmod = 0;
1820 if (ln && regrepeat_hard(scan, ln, &l) < ln)
1821 sayNO;
5f4b28b2 1822 if (ln && l == 0 && n >= ln
c277df42
IZ
1823 /* In fact, this is tricky. If paren, then the
1824 fact that we did/didnot match may influence
1825 future execution. */
1826 && !(paren && ln == 0))
1827 ln = n;
3280af22 1828 locinput = PL_reginput;
c277df42
IZ
1829 if (regkind[(U8)OP(next)] == EXACT) {
1830 c1 = UCHARAT(OPERAND(next) + 1);
1831 if (OP(next) == EXACTF)
1832 c2 = fold[c1];
1833 else if (OP(next) == EXACTFL)
1834 c2 = fold_locale[c1];
1835 else
1836 c2 = c1;
a0ed51b3
LW
1837 }
1838 else
c277df42
IZ
1839 c1 = c2 = -1000;
1840 REGCP_SET;
5f4b28b2 1841 /* This may be improved if l == 0. */
c277df42
IZ
1842 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
1843 /* If it could work, try it. */
1844 if (c1 == -1000 ||
3280af22
NIS
1845 UCHARAT(PL_reginput) == c1 ||
1846 UCHARAT(PL_reginput) == c2)
c277df42
IZ
1847 {
1848 if (paren) {
1849 if (n) {
a0ed51b3 1850 PL_regstartp[paren] = HOP(PL_reginput, -l);
3280af22 1851 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
1852 }
1853 else
3280af22 1854 PL_regendp[paren] = NULL;
c277df42
IZ
1855 }
1856 if (regmatch(next))
1857 sayYES;
1858 REGCP_UNWIND;
1859 }
1860 /* Couldn't or didn't -- move forward. */
3280af22 1861 PL_reginput = locinput;
c277df42
IZ
1862 if (regrepeat_hard(scan, 1, &l)) {
1863 ln++;
3280af22 1864 locinput = PL_reginput;
c277df42
IZ
1865 }
1866 else
1867 sayNO;
1868 }
a0ed51b3
LW
1869 }
1870 else {
c277df42
IZ
1871 n = regrepeat_hard(scan, n, &l);
1872 if (n != 0 && l == 0
1873 /* In fact, this is tricky. If paren, then the
1874 fact that we did/didnot match may influence
1875 future execution. */
1876 && !(paren && ln == 0))
1877 ln = n;
3280af22 1878 locinput = PL_reginput;
c277df42 1879 DEBUG_r(
5c0ca799
GS
1880 PerlIO_printf(Perl_debug_log,
1881 "%*s matched %ld times, len=%ld...\n",
3280af22 1882 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
1883 );
1884 if (n >= ln) {
1885 if (regkind[(U8)OP(next)] == EXACT) {
1886 c1 = UCHARAT(OPERAND(next) + 1);
1887 if (OP(next) == EXACTF)
1888 c2 = fold[c1];
1889 else if (OP(next) == EXACTFL)
1890 c2 = fold_locale[c1];
1891 else
1892 c2 = c1;
a0ed51b3
LW
1893 }
1894 else
c277df42
IZ
1895 c1 = c2 = -1000;
1896 }
1897 REGCP_SET;
1898 while (n >= ln) {
1899 /* If it could work, try it. */
1900 if (c1 == -1000 ||
3280af22
NIS
1901 UCHARAT(PL_reginput) == c1 ||
1902 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
1903 {
1904 DEBUG_r(
c3464db5
DD
1905 PerlIO_printf(Perl_debug_log,
1906 "%*s trying tail with n=%ld...\n",
3280af22 1907 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
1908 );
1909 if (paren) {
1910 if (n) {
1911 PL_regstartp[paren] = HOP(PL_reginput, -l);
1912 PL_regendp[paren] = PL_reginput;
c277df42 1913 }
a0ed51b3
LW
1914 else
1915 PL_regendp[paren] = NULL;
c277df42 1916 }
a0ed51b3
LW
1917 if (regmatch(next))
1918 sayYES;
1919 REGCP_UNWIND;
1920 }
c277df42
IZ
1921 /* Couldn't or didn't -- back up. */
1922 n--;
a0ed51b3 1923 locinput = HOP(locinput, -l);
3280af22 1924 PL_reginput = locinput;
c277df42
IZ
1925 }
1926 }
1927 sayNO;
1928 break;
1929 }
1930 case CURLYN:
1931 paren = scan->flags; /* Which paren to set */
3280af22
NIS
1932 if (paren > PL_regsize)
1933 PL_regsize = paren;
1934 if (paren > *PL_reglastparen)
1935 *PL_reglastparen = paren;
c277df42
IZ
1936 ln = ARG1(scan); /* min to match */
1937 n = ARG2(scan); /* max to match */
dc45a647 1938 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 1939 goto repeat;
a0d0e21e 1940 case CURLY:
c277df42 1941 paren = 0;
a0d0e21e
LW
1942 ln = ARG1(scan); /* min to match */
1943 n = ARG2(scan); /* max to match */
dc45a647 1944 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
1945 goto repeat;
1946 case STAR:
1947 ln = 0;
c277df42 1948 n = REG_INFTY;
a0d0e21e 1949 scan = NEXTOPER(scan);
c277df42 1950 paren = 0;
a0d0e21e
LW
1951 goto repeat;
1952 case PLUS:
c277df42
IZ
1953 ln = 1;
1954 n = REG_INFTY;
1955 scan = NEXTOPER(scan);
1956 paren = 0;
1957 repeat:
a0d0e21e
LW
1958 /*
1959 * Lookahead to avoid useless match attempts
1960 * when we know what character comes next.
1961 */
bbce6d69 1962 if (regkind[(U8)OP(next)] == EXACT) {
1963 c1 = UCHARAT(OPERAND(next) + 1);
1964 if (OP(next) == EXACTF)
1965 c2 = fold[c1];
1966 else if (OP(next) == EXACTFL)
1967 c2 = fold_locale[c1];
1968 else
1969 c2 = c1;
1970 }
a0d0e21e 1971 else
bbce6d69 1972 c1 = c2 = -1000;
3280af22 1973 PL_reginput = locinput;
a0d0e21e 1974 if (minmod) {
c277df42 1975 CHECKPOINT lastcp;
a0d0e21e
LW
1976 minmod = 0;
1977 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 1978 sayNO;
a0ed51b3 1979 locinput = PL_reginput;
c277df42
IZ
1980 REGCP_SET;
1981 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 1982 /* If it could work, try it. */
bbce6d69 1983 if (c1 == -1000 ||
3280af22
NIS
1984 UCHARAT(PL_reginput) == c1 ||
1985 UCHARAT(PL_reginput) == c2)
bbce6d69 1986 {
c277df42
IZ
1987 if (paren) {
1988 if (n) {
a0ed51b3 1989 PL_regstartp[paren] = HOP(PL_reginput, -1);
3280af22 1990 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
1991 }
1992 else
3280af22 1993 PL_regendp[paren] = NULL;
c277df42 1994 }
a0d0e21e 1995 if (regmatch(next))
4633a7c4 1996 sayYES;
c277df42 1997 REGCP_UNWIND;
bbce6d69 1998 }
c277df42 1999 /* Couldn't or didn't -- move forward. */
a0ed51b3 2000 PL_reginput = locinput;
a0d0e21e
LW
2001 if (regrepeat(scan, 1)) {
2002 ln++;
a0ed51b3
LW
2003 locinput = PL_reginput;
2004 }
2005 else
4633a7c4 2006 sayNO;
a0d0e21e
LW
2007 }
2008 }
2009 else {
c277df42 2010 CHECKPOINT lastcp;
a0d0e21e 2011 n = regrepeat(scan, n);
a0ed51b3 2012 locinput = PL_reginput;
a0d0e21e 2013 if (ln < n && regkind[(U8)OP(next)] == EOL &&
3280af22 2014 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2015 ln = n; /* why back off? */
c277df42
IZ
2016 REGCP_SET;
2017 if (paren) {
2018 while (n >= ln) {
2019 /* If it could work, try it. */
2020 if (c1 == -1000 ||
3280af22
NIS
2021 UCHARAT(PL_reginput) == c1 ||
2022 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2023 {
2024 if (paren && n) {
2025 if (n) {
a0ed51b3 2026 PL_regstartp[paren] = HOP(PL_reginput, -1);
3280af22 2027 PL_regendp[paren] = PL_reginput;
a0ed51b3
LW
2028 }
2029 else
3280af22 2030 PL_regendp[paren] = NULL;
c277df42
IZ
2031 }
2032 if (regmatch(next))
2033 sayYES;
2034 REGCP_UNWIND;
2035 }
2036 /* Couldn't or didn't -- back up. */
2037 n--;
a0ed51b3 2038 PL_reginput = locinput = HOP(locinput, -1);
c277df42 2039 }
a0ed51b3
LW
2040 }
2041 else {
c277df42
IZ
2042 while (n >= ln) {
2043 /* If it could work, try it. */
2044 if (c1 == -1000 ||
3280af22
NIS
2045 UCHARAT(PL_reginput) == c1 ||
2046 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2047 {
2048 if (regmatch(next))
2049 sayYES;
2050 REGCP_UNWIND;
2051 }
2052 /* Couldn't or didn't -- back up. */
2053 n--;
a0ed51b3 2054 PL_reginput = locinput = HOP(locinput, -1);
bbce6d69 2055 }
a0d0e21e
LW
2056 }
2057 }
4633a7c4 2058 sayNO;
c277df42 2059 break;
a0d0e21e 2060 case END:
3280af22 2061 if (locinput < PL_regtill)
7e5428c5
IZ
2062 sayNO; /* Cannot match: too short. */
2063 /* Fall through */
2064 case SUCCEED:
3280af22 2065 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2066 sayYES; /* Success! */
c277df42
IZ
2067 case SUSPEND:
2068 n = 1;
2069 goto do_ifmatch;
a0d0e21e 2070 case UNLESSM:
c277df42 2071 n = 0;
a0ed51b3
LW
2072 if (scan->flags) {
2073 s = HOPMAYBE(locinput, -scan->flags);
2074 if (!s)
2075 goto say_yes;
2076 PL_reginput = s;
2077 }
2078 else
2079 PL_reginput = locinput;
c277df42
IZ
2080 goto do_ifmatch;
2081 case IFMATCH:
2082 n = 1;
a0ed51b3
LW
2083 if (scan->flags) {
2084 s = HOPMAYBE(locinput, -scan->flags);
2085 if (!s)
2086 goto say_no;
2087 PL_reginput = s;
2088 }
2089 else
2090 PL_reginput = locinput;
2091
c277df42 2092 do_ifmatch:
c277df42
IZ
2093 inner = NEXTOPER(NEXTOPER(scan));
2094 if (regmatch(inner) != n) {
2095 say_no:
2096 if (logical) {
2097 logical = 0;
2098 sw = 0;
2099 goto do_longjump;
a0ed51b3
LW
2100 }
2101 else
c277df42
IZ
2102 sayNO;
2103 }
2104 say_yes:
2105 if (logical) {
2106 logical = 0;
2107 sw = 1;
2108 }
fe44a5e8 2109 if (OP(scan) == SUSPEND) {
3280af22 2110 locinput = PL_reginput;
565764a8 2111 nextchr = UCHARAT(locinput);
fe44a5e8 2112 }
c277df42
IZ
2113 /* FALL THROUGH. */
2114 case LONGJMP:
2115 do_longjump:
2116 next = scan + ARG(scan);
2117 if (next == scan)
2118 next = NULL;
a0d0e21e
LW
2119 break;
2120 default:
c030ccd9 2121 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2122 (unsigned long)scan, OP(scan));
a0d0e21e 2123 FAIL("regexp memory corruption");
a687059c 2124 }
a0d0e21e
LW
2125 scan = next;
2126 }
a687059c 2127
a0d0e21e
LW
2128 /*
2129 * We get here only if there's trouble -- normally "case END" is
2130 * the terminating point.
2131 */
2132 FAIL("corrupted regexp pointers");
2133 /*NOTREACHED*/
4633a7c4
LW
2134 sayNO;
2135
2136yes:
2137#ifdef DEBUGGING
3280af22 2138 PL_regindent--;
4633a7c4
LW
2139#endif
2140 return 1;
2141
2142no:
2143#ifdef DEBUGGING
3280af22 2144 PL_regindent--;
4633a7c4 2145#endif
a0d0e21e 2146 return 0;
a687059c
LW
2147}
2148
2149/*
2150 - regrepeat - repeatedly match something simple, report how many
2151 */
2152/*
2153 * [This routine now assumes that it will only match on things of length 1.
2154 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2155 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2156 */
76e3520e 2157STATIC I32
c277df42 2158regrepeat(regnode *p, I32 max)
a687059c 2159{
5c0ca799 2160 dTHR;
a0d0e21e
LW
2161 register char *scan;
2162 register char *opnd;
2163 register I32 c;
3280af22 2164 register char *loceol = PL_regeol;
a0ed51b3 2165 register I32 hardcount = 0;
a0d0e21e 2166
3280af22 2167 scan = PL_reginput;
c277df42 2168 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2169 loceol = scan + max;
161b471a 2170 opnd = (char *) OPERAND(p);
a0d0e21e
LW
2171 switch (OP(p)) {
2172 case ANY:
2173 while (scan < loceol && *scan != '\n')
2174 scan++;
2175 break;
2176 case SANY:
2177 scan = loceol;
2178 break;
a0ed51b3
LW
2179 case ANYUTF8:
2180 loceol = PL_regeol;
2181 while (scan < loceol && *scan != '\n') {
2182 scan += UTF8SKIP(scan);
2183 hardcount++;
2184 }
2185 break;
2186 case SANYUTF8:
2187 loceol = PL_regeol;
2188 while (scan < loceol) {
2189 scan += UTF8SKIP(scan);
2190 hardcount++;
2191 }
2192 break;
bbce6d69 2193 case EXACT: /* length of string is 1 */
2194 c = UCHARAT(++opnd);
2195 while (scan < loceol && UCHARAT(scan) == c)
2196 scan++;
2197 break;
2198 case EXACTF: /* length of string is 1 */
2199 c = UCHARAT(++opnd);
2200 while (scan < loceol &&
2201 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
2202 scan++;
2203 break;
2204 case EXACTFL: /* length of string is 1 */
3280af22 2205 PL_reg_flags |= RF_tainted;
bbce6d69 2206 c = UCHARAT(++opnd);
2207 while (scan < loceol &&
2208 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e
LW
2209 scan++;
2210 break;
a0ed51b3
LW
2211 case ANYOFUTF8:
2212 loceol = PL_regeol;
2213 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2214 scan += UTF8SKIP(scan);
2215 hardcount++;
2216 }
2217 break;
a0d0e21e 2218 case ANYOF:
ae5c130c 2219 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2220 scan++;
a0d0e21e
LW
2221 break;
2222 case ALNUM:
2223 while (scan < loceol && isALNUM(*scan))
2224 scan++;
2225 break;
a0ed51b3
LW
2226 case ALNUMUTF8:
2227 loceol = PL_regeol;
2228 while (scan < loceol && swash_fetch(PL_utf8_alnum, scan)) {
2229 scan += UTF8SKIP(scan);
2230 hardcount++;
2231 }
2232 break;
bbce6d69 2233 case ALNUML:
3280af22 2234 PL_reg_flags |= RF_tainted;
bbce6d69 2235 while (scan < loceol && isALNUM_LC(*scan))
2236 scan++;
2237 break;
a0ed51b3
LW
2238 case ALNUMLUTF8:
2239 PL_reg_flags |= RF_tainted;
2240 loceol = PL_regeol;
2241 while (scan < loceol && isALNUM_LC_utf8(scan)) {
2242 scan += UTF8SKIP(scan);
2243 hardcount++;
2244 }
2245 break;
2246 break;
a0d0e21e
LW
2247 case NALNUM:
2248 while (scan < loceol && !isALNUM(*scan))
2249 scan++;
2250 break;
a0ed51b3
LW
2251 case NALNUMUTF8:
2252 loceol = PL_regeol;
2253 while (scan < loceol && !swash_fetch(PL_utf8_alnum, scan)) {
2254 scan += UTF8SKIP(scan);
2255 hardcount++;
2256 }
2257 break;
bbce6d69 2258 case NALNUML:
3280af22 2259 PL_reg_flags |= RF_tainted;
bbce6d69 2260 while (scan < loceol && !isALNUM_LC(*scan))
2261 scan++;
2262 break;
a0ed51b3
LW
2263 case NALNUMLUTF8:
2264 PL_reg_flags |= RF_tainted;
2265 loceol = PL_regeol;
2266 while (scan < loceol && !isALNUM_LC_utf8(scan)) {
2267 scan += UTF8SKIP(scan);
2268 hardcount++;
2269 }
2270 break;
a0d0e21e
LW
2271 case SPACE:
2272 while (scan < loceol && isSPACE(*scan))
2273 scan++;
2274 break;
a0ed51b3
LW
2275 case SPACEUTF8:
2276 loceol = PL_regeol;
2277 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,scan))) {
2278 scan += UTF8SKIP(scan);
2279 hardcount++;
2280 }
2281 break;
bbce6d69 2282 case SPACEL:
3280af22 2283 PL_reg_flags |= RF_tainted;
bbce6d69 2284 while (scan < loceol && isSPACE_LC(*scan))
2285 scan++;
2286 break;
a0ed51b3
LW
2287 case SPACELUTF8:
2288 PL_reg_flags |= RF_tainted;
2289 loceol = PL_regeol;
2290 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8(scan))) {
2291 scan += UTF8SKIP(scan);
2292 hardcount++;
2293 }
2294 break;
a0d0e21e
LW
2295 case NSPACE:
2296 while (scan < loceol && !isSPACE(*scan))
2297 scan++;
2298 break;
a0ed51b3
LW
2299 case NSPACEUTF8:
2300 loceol = PL_regeol;
2301 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,scan))) {
2302 scan += UTF8SKIP(scan);
2303 hardcount++;
2304 }
2305 break;
bbce6d69 2306 case NSPACEL:
3280af22 2307 PL_reg_flags |= RF_tainted;
bbce6d69 2308 while (scan < loceol && !isSPACE_LC(*scan))
2309 scan++;
2310 break;
a0ed51b3
LW
2311 case NSPACELUTF8:
2312 PL_reg_flags |= RF_tainted;
2313 loceol = PL_regeol;
2314 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8(scan))) {
2315 scan += UTF8SKIP(scan);
2316 hardcount++;
2317 }
2318 break;
a0d0e21e
LW
2319 case DIGIT:
2320 while (scan < loceol && isDIGIT(*scan))
2321 scan++;
2322 break;
a0ed51b3
LW
2323 case DIGITUTF8:
2324 loceol = PL_regeol;
2325 while (scan < loceol && swash_fetch(PL_utf8_digit,scan)) {
2326 scan += UTF8SKIP(scan);
2327 hardcount++;
2328 }
2329 break;
2330 break;
a0d0e21e
LW
2331 case NDIGIT:
2332 while (scan < loceol && !isDIGIT(*scan))
2333 scan++;
2334 break;
a0ed51b3
LW
2335 case NDIGITUTF8:
2336 loceol = PL_regeol;
2337 while (scan < loceol && !swash_fetch(PL_utf8_digit,scan)) {
2338 scan += UTF8SKIP(scan);
2339 hardcount++;
2340 }
2341 break;
a0d0e21e
LW
2342 default: /* Called on something of 0 width. */
2343 break; /* So match right here or not at all. */
2344 }
a687059c 2345
a0ed51b3
LW
2346 if (hardcount)
2347 c = hardcount;
2348 else
2349 c = scan - PL_reginput;
3280af22 2350 PL_reginput = scan;
a687059c 2351
c277df42
IZ
2352 DEBUG_r(
2353 {
2354 SV *prop = sv_newmortal();
2355
2356 regprop(prop, p);
2357 PerlIO_printf(Perl_debug_log,
2358 "%*s %s can match %ld times out of %ld...\n",
2359 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2360 });
2361
a0d0e21e 2362 return(c);
a687059c
LW
2363}
2364
2365/*
c277df42
IZ
2366 - regrepeat_hard - repeatedly match something, report total lenth and length
2367 *
2368 * The repeater is supposed to have constant length.
2369 */
2370
76e3520e 2371STATIC I32
c277df42
IZ
2372regrepeat_hard(regnode *p, I32 max, I32 *lp)
2373{
5c0ca799 2374 dTHR;
c277df42
IZ
2375 register char *scan;
2376 register char *start;
3280af22 2377 register char *loceol = PL_regeol;
a0ed51b3
LW
2378 I32 l = 0;
2379 I32 count = 0;
2380
2381 if (!max)
2382 return 0;
c277df42 2383
3280af22 2384 start = PL_reginput;
a0ed51b3
LW
2385 if (UTF) {
2386 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2387 if (!count++) {
2388 l = 0;
2389 while (start < PL_reginput) {
2390 l++;
2391 start += UTF8SKIP(start);
2392 }
2393 *lp = l;
2394 if (l == 0)
2395 return max;
2396 }
2397 if (count == max)
2398 return count;
2399 }
2400 }
2401 else {
2402 while (PL_reginput < loceol && (scan = PL_reginput, regmatch(p))) {
2403 if (!count++) {
2404 *lp = l = PL_reginput - start;
2405 if (max != REG_INFTY && l*max < loceol - scan)
2406 loceol = scan + l*max;
2407 if (l == 0)
2408 return max;
c277df42
IZ
2409 }
2410 }
2411 }
3280af22
NIS
2412 if (PL_reginput < loceol)
2413 PL_reginput = scan;
c277df42 2414
a0ed51b3 2415 return count;
c277df42
IZ
2416}
2417
2418/*
bbce6d69 2419 - regclass - determine if a character falls into a character class
2420 */
2421
76e3520e 2422STATIC bool
8ac85365 2423reginclass(register char *p, register I32 c)
bbce6d69 2424{
5c0ca799 2425 dTHR;
bbce6d69 2426 char flags = *p;
2427 bool match = FALSE;
2428
2429 c &= 0xFF;
ae5c130c 2430 if (ANYOF_TEST(p, c))
bbce6d69 2431 match = TRUE;
2432 else if (flags & ANYOF_FOLD) {
2433 I32 cf;
2434 if (flags & ANYOF_LOCALE) {
3280af22 2435 PL_reg_flags |= RF_tainted;
bbce6d69 2436 cf = fold_locale[c];
2437 }
2438 else
2439 cf = fold[c];
ae5c130c 2440 if (ANYOF_TEST(p, cf))
bbce6d69 2441 match = TRUE;
2442 }
2443
2444 if (!match && (flags & ANYOF_ISA)) {
3280af22 2445 PL_reg_flags |= RF_tainted;
bbce6d69 2446
2447 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2448 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2449 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2450 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2451 {
2452 match = TRUE;
2453 }
2454 }
2455
ae5c130c 2456 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2457}
2458
a0ed51b3
LW
2459STATIC bool
2460reginclassutf8(regnode *f, U8 *p)
c485e607
NIS
2461{
2462 dTHR;
a0ed51b3
LW
2463 char flags = ARG1(f);
2464 bool match = FALSE;
2465 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2466
2467 if (swash_fetch(sv, p))
2468 match = TRUE;
2469 else if (flags & ANYOF_FOLD) {
2470 I32 cf;
2471 char tmpbuf[10];
2472 if (flags & ANYOF_LOCALE) {
2473 PL_reg_flags |= RF_tainted;
2474 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2475 }
2476 else
2477 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2478 if (swash_fetch(sv, tmpbuf))
2479 match = TRUE;
2480 }
2481
2482 if (!match && (flags & ANYOF_ISA)) {
2483 PL_reg_flags |= RF_tainted;
2484
2485 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2486 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2487 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2488 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2489 {
2490 match = TRUE;
2491 }
2492 }
2493
2494 return (flags & ANYOF_INVERT) ? !match : match;
2495}
161b471a 2496
a0ed51b3
LW
2497STATIC char *
2498reghop(unsigned char *s, I32 off)
c485e607
NIS
2499{
2500 dTHR;
a0ed51b3
LW
2501 if (off >= 0) {
2502 while (off-- && s < (U8*)PL_regeol)
2503 s += UTF8SKIP(s);
2504 }
2505 else {
2506 while (off++) {
2507 if (s > (U8*)PL_bostr) {
2508 s--;
2509 if (*s & 0x80) {
2510 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2511 s--;
2512 } /* XXX could check well-formedness here */
2513 }
2514 }
2515 }
2516 return s;
2517}
161b471a 2518
a0ed51b3
LW
2519STATIC char *
2520reghopmaybe(unsigned char *s, I32 off)
2521{
c485e607 2522 dTHR;
a0ed51b3
LW
2523 if (off >= 0) {
2524 while (off-- && s < (U8*)PL_regeol)
2525 s += UTF8SKIP(s);
2526 if (off >= 0)
2527 return 0;
2528 }
2529 else {
2530 while (off++) {
2531 if (s > (U8*)PL_bostr) {
2532 s--;
2533 if (*s & 0x80) {
2534 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
2535 s--;
2536 } /* XXX could check well-formedness here */
2537 }
2538 else
2539 break;
2540 }
2541 if (off <= 0)
2542 return 0;
2543 }
2544 return s;
2545}