This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
skip unreadable directory test when running as root
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5
DD
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d
IZ
281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa
JH
292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5
JH
298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa
JH
301 char *t;
302 I32 ml_anch;
2c2d71f5
JH
303 char *tmp;
304 register char *other_last = Nullch;
30944b6d
IZ
305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa
JH
308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
317 (strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5
JH
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5
JH
325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5
JH
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa
JH
349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5
JH
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa
JH
359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5
JH
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa
JH
382 ml_anch = 0;
383 s = strpos;
2c2d71f5
JH
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5
JH
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa
JH
412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5
JH
422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
427 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
428 PL_colors[1], (SvTAIL(check) ? "$" : ""),
429 (s ? " at offset " : "...\n") ) );
430
431 if (!s)
432 goto fail_finish;
433
434 /* Finish the diagnostic message */
30944b6d 435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
436
437 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
438 Start with the other substr.
439 XXXX no SCREAM optimization yet - and a very coarse implementation
440 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
441 *always* match. Probably should be marked during compile...
442 Probably it is right to do no SCREAM here...
443 */
444
445 if (prog->float_substr && prog->anchored_substr) {
30944b6d 446 /* Take into account the "other" substring. */
2c2d71f5
JH
447 /* XXXX May be hopelessly wrong for UTF... */
448 if (!other_last)
449 other_last = strpos - 1;
450 if (check == prog->float_substr) {
30944b6d
IZ
451 do_other_anchored:
452 {
2c2d71f5
JH
453 char *last = s - start_shift, *last1, *last2;
454 char *s1 = s;
455
456 tmp = PL_bostr;
457 t = s - prog->check_offset_max;
458 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
459 && (!(prog->reganch & ROPT_UTF8)
460 || (PL_bostr = strpos, /* Used in regcopmaybe() */
461 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
462 && t > strpos)))
30944b6d 463 /* EMPTY */;
2c2d71f5
JH
464 else
465 t = strpos;
466 t += prog->anchored_offset;
467 if (t <= other_last)
468 t = other_last + 1;
469 PL_bostr = tmp;
470 last2 = last1 = strend - prog->minlen;
471 if (last < last1)
472 last1 = last;
473 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
474 /* On end-of-str: see comment below. */
475 s = fbm_instr((unsigned char*)t,
476 (unsigned char*)last1 + prog->anchored_offset
477 + SvCUR(prog->anchored_substr)
478 - (SvTAIL(prog->anchored_substr)!=0),
479 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
481 (s ? "Found" : "Contradicts"),
482 PL_colors[0],
483 SvCUR(prog->anchored_substr)
484 - (SvTAIL(prog->anchored_substr)!=0),
485 SvPVX(prog->anchored_substr),
486 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
487 if (!s) {
488 if (last1 >= last2) {
489 DEBUG_r(PerlIO_printf(Perl_debug_log,
490 ", giving up...\n"));
491 goto fail_finish;
492 }
493 DEBUG_r(PerlIO_printf(Perl_debug_log,
494 ", trying floating at offset %ld...\n",
30944b6d 495 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
496 PL_regeol = strend; /* Used in HOP() */
497 other_last = last1 + prog->anchored_offset;
498 s = HOPc(last, 1);
499 goto restart;
500 }
501 else {
502 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 503 (long)(s - i_strpos)));
2c2d71f5
JH
504 t = s - prog->anchored_offset;
505 other_last = s - 1;
30944b6d 506 s = s1;
2c2d71f5
JH
507 if (t == strpos)
508 goto try_at_start;
2c2d71f5
JH
509 goto try_at_offset;
510 }
30944b6d 511 }
2c2d71f5
JH
512 }
513 else { /* Take into account the floating substring. */
514 char *last, *last1;
515 char *s1 = s;
516
517 t = s - start_shift;
518 last1 = last = strend - prog->minlen + prog->float_min_offset;
519 if (last - t > prog->float_max_offset)
520 last = t + prog->float_max_offset;
521 s = t + prog->float_min_offset;
522 if (s <= other_last)
523 s = other_last + 1;
524 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
525 /* fbm_instr() takes into account exact value of end-of-str
526 if the check is SvTAIL(ed). Since false positives are OK,
527 and end-of-str is not later than strend we are OK. */
528 s = fbm_instr((unsigned char*)s,
529 (unsigned char*)last + SvCUR(prog->float_substr)
530 - (SvTAIL(prog->float_substr)!=0),
531 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
532 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
533 (s ? "Found" : "Contradicts"),
534 PL_colors[0],
535 SvCUR(prog->float_substr)
536 - (SvTAIL(prog->float_substr)!=0),
537 SvPVX(prog->float_substr),
538 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
539 if (!s) {
540 if (last1 == last) {
541 DEBUG_r(PerlIO_printf(Perl_debug_log,
542 ", giving up...\n"));
543 goto fail_finish;
544 }
545 DEBUG_r(PerlIO_printf(Perl_debug_log,
546 ", trying anchored starting at offset %ld...\n",
30944b6d 547 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
548 other_last = last;
549 PL_regeol = strend; /* Used in HOP() */
550 s = HOPc(t, 1);
551 goto restart;
552 }
553 else {
554 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 555 (long)(s - i_strpos)));
2c2d71f5 556 other_last = s - 1;
30944b6d 557 s = s1;
2c2d71f5
JH
558 if (t == strpos)
559 goto try_at_start;
2c2d71f5
JH
560 goto try_at_offset;
561 }
562 }
cad2e5aa 563 }
2c2d71f5
JH
564
565 t = s - prog->check_offset_max;
566 tmp = PL_bostr;
567 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
568 && (!(prog->reganch & ROPT_UTF8)
569 || (PL_bostr = strpos, /* Used in regcopmaybe() */
570 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
571 && t > strpos)))) {
572 PL_bostr = tmp;
573 /* Fixed substring is found far enough so that the match
574 cannot start at strpos. */
575 try_at_offset:
cad2e5aa 576 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
577 /* Eventually fbm_*() should handle this, but often
578 anchored_offset is not 0, so this check will not be wasted. */
579 /* XXXX In the code below we prefer to look for "^" even in
580 presence of anchored substrings. And we search even
581 beyond the found float position. These pessimizations
582 are historical artefacts only. */
583 find_anchor:
2c2d71f5 584 while (t < strend - prog->minlen) {
cad2e5aa
JH
585 if (*t == '\n') {
586 if (t < s - prog->check_offset_min) {
30944b6d
IZ
587 if (prog->anchored_substr) {
588 /* We definitely contradict the found anchored
589 substr. Due to the above check we do not
590 contradict "check" substr.
591 Thus we can arrive here only if check substr
592 is float. Redo checking for "other"=="fixed".
593 */
594 strpos = t + 1;
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
596 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
597 goto do_other_anchored;
598 }
cad2e5aa 599 s = t + 1;
2c2d71f5 600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 601 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
602 goto set_useful;
603 }
2c2d71f5 604 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
605 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
606 strpos = s = t + 1;
cad2e5aa
JH
607 goto restart;
608 }
609 t++;
610 }
2c2d71f5
JH
611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
612 PL_colors[0],PL_colors[1]));
613 goto fail_finish;
cad2e5aa
JH
614 }
615 s = t;
616 set_useful:
2c2d71f5 617 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
618 }
619 else {
2c2d71f5
JH
620 PL_bostr = tmp;
621 /* The found string does not prohibit matching at beg-of-str
622 - no optimization of calling REx engine can be performed,
623 unless it was an MBOL and we are not after MBOL. */
624 try_at_start:
625 /* Even in this situation we may use MBOL flag if strpos is offset
626 wrt the start of the string. */
627 if (ml_anch && sv
cad2e5aa
JH
628 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
629 t = strpos;
630 goto find_anchor;
631 }
30944b6d
IZ
632 DEBUG_r( if (ml_anch)
633 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]);
635 );
2c2d71f5 636 success_at_start:
30944b6d 637 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
638 && --BmUSEFUL(prog->check_substr) < 0
639 && prog->check_substr == prog->float_substr) { /* boo */
640 /* If flags & SOMETHING - do not do it many times on the same match */
641 SvREFCNT_dec(prog->check_substr);
642 prog->check_substr = Nullsv; /* disable */
643 prog->float_substr = Nullsv; /* clear */
644 s = strpos;
645 prog->reganch &= ~RE_USE_INTUIT;
646 }
647 else
648 s = strpos;
649 }
650
2c2d71f5 651 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 652 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 653 return s;
2c2d71f5
JH
654
655 fail_finish: /* Substring not found */
656 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 657 fail:
2c2d71f5 658 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
659 PL_colors[4],PL_colors[5]));
660 return Nullch;
661}
9661b544 662
a687059c 663/*
c277df42 664 - regexec_flags - match a regexp against a string
a687059c 665 */
79072805 666I32
864dbfa3 667Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 668 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
669/* strend: pointer to null at end of string */
670/* strbeg: real beginning of string */
671/* minend: end of match must be >=minend after stringarg. */
672/* data: May be used for some additional optimizations. */
673/* nosave: For optimizations. */
a687059c 674{
5c0ca799 675 dTHR;
a0d0e21e 676 register char *s;
c277df42 677 register regnode *c;
a0d0e21e
LW
678 register char *startpos = stringarg;
679 register I32 tmp;
c277df42 680 I32 minlen; /* must match at least this many chars */
a0d0e21e 681 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 682 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
683 constant substr. */ /* CC */
684 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
685 I32 scream_pos = -1; /* Internal iterator of scream. */
686 char *scream_olds;
3280af22 687 SV* oreplsv = GvSV(PL_replgv);
a687059c 688
2ab05381 689 PL_regcc = 0;
a0d0e21e 690
0f5d15d6 691 cache_re(prog);
a0d0e21e 692#ifdef DEBUGGING
3280af22 693 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
694#endif
695
696 /* Be paranoid... */
697 if (prog == NULL || startpos == NULL) {
cea2e8a9 698 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
699 return 0;
700 }
701
c277df42
IZ
702 minlen = prog->minlen;
703 if (strend - startpos < minlen) goto phooey;
704
a0d0e21e 705 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 706 PL_regprev = '\n';
a0d0e21e 707 else {
a0ed51b3 708 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
709 if (!PL_multiline && PL_regprev == '\n')
710 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 711 }
bbce6d69 712
a0d0e21e 713 /* Check validity of program. */
22c35a8c 714 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 715 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
716 }
717
3280af22
NIS
718 PL_reg_flags = 0;
719 PL_reg_eval_set = 0;
2c2d71f5 720 PL_reg_maxiter = 0;
a0d0e21e 721
a0ed51b3
LW
722 if (prog->reganch & ROPT_UTF8)
723 PL_reg_flags |= RF_utf8;
724
725 /* Mark beginning of line for ^ and lookbehind. */
726 PL_regbol = startpos;
727 PL_bostr = strbeg;
9661b544 728 PL_reg_sv = sv;
a0ed51b3
LW
729
730 /* Mark end of line for $ (and such) */
731 PL_regeol = strend;
732
733 /* see how far we have to get to not match where we matched before */
734 PL_regtill = startpos+minend;
735
0f5d15d6
IZ
736 /* We start without call_cc context. */
737 PL_reg_call_cc = 0;
738
a0d0e21e
LW
739 /* If there is a "must appear" string, look for it. */
740 s = startpos;
cad2e5aa 741
30382c73 742 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
cad2e5aa
JH
743 MAGIC *mg;
744
30382c73 745 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
cad2e5aa 746 PL_reg_ganch = startpos;
30382c73
IZ
747 else if (sv && SvTYPE(sv) >= SVt_PVMG
748 && SvMAGIC(sv)
749 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
750 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
751 if (prog->reganch & ROPT_ANCH_GPOS) {
752 if (s > PL_reg_ganch)
753 goto phooey;
754 s = PL_reg_ganch;
755 }
a0ed51b3 756 }
30382c73
IZ
757 else /* pos() not defined */
758 PL_reg_ganch = strbeg;
a0d0e21e 759 }
a687059c 760
cad2e5aa
JH
761 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
762 re_scream_pos_data d;
763
764 d.scream_olds = &scream_olds;
765 d.scream_pos = &scream_pos;
766 s = re_intuit_start(prog, sv, s, strend, flags, &d);
767 if (!s)
768 goto phooey; /* not present */
769 }
770
771 DEBUG_r( if (!PL_colorset) reginitcolors() );
772 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 773 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
774 PL_colors[4],PL_colors[5],PL_colors[0],
775 prog->precomp,
776 PL_colors[1],
c277df42 777 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 778 PL_colors[0],
c277df42 779 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 780 startpos, PL_colors[1],
c277df42
IZ
781 (strend - startpos > 60 ? "..." : ""))
782 );
783
a0d0e21e 784 /* Simplest case: anchored match need be tried only once. */
774d564b 785 /* [unless only anchor is BOL and multiline is set] */
22e551b9 786 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 787 if (s == startpos && regtry(prog, startpos))
a0d0e21e 788 goto got_it;
22e551b9
IZ
789 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
790 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 791 {
cad2e5aa
JH
792 char *end;
793
a0d0e21e
LW
794 if (minlen)
795 dontbother = minlen - 1;
cad2e5aa 796 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 797 /* for multiline we only have to try after newlines */
cad2e5aa 798 if (prog->check_substr) {
30944b6d
IZ
799 if (s == startpos)
800 goto after_try;
cad2e5aa
JH
801 while (1) {
802 if (regtry(prog, s))
a0d0e21e 803 goto got_it;
30944b6d 804 after_try:
cad2e5aa
JH
805 if (s >= end)
806 goto phooey;
807 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
808 if (!s)
809 goto phooey;
810 }
811 } else {
812 if (s > startpos)
813 s--;
814 while (s < end) {
815 if (*s++ == '\n') { /* don't need PL_utf8skip here */
816 if (regtry(prog, s))
817 goto got_it;
818 }
819 }
35c8bce7 820 }
35c8bce7 821 }
a0d0e21e 822 goto phooey;
22e551b9
IZ
823 } else if (prog->reganch & ROPT_ANCH_GPOS) {
824 if (regtry(prog, PL_reg_ganch))
825 goto got_it;
826 goto phooey;
a0d0e21e 827 }
35c8bce7 828
a0d0e21e 829 /* Messy cases: unanchored match. */
c277df42
IZ
830 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
831 /* we have /x+whatever/ */
cad2e5aa 832 /* it must be a one character string (XXXX Except UTF?) */
c277df42 833 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
834 if (UTF) {
835 while (s < strend) {
836 if (*s == ch) {
837 if (regtry(prog, s)) goto got_it;
838 s += UTF8SKIP(s);
839 while (s < strend && *s == ch)
840 s += UTF8SKIP(s);
841 }
842 s += UTF8SKIP(s);
843 }
844 }
845 else {
846 while (s < strend) {
847 if (*s == ch) {
848 if (regtry(prog, s)) goto got_it;
c277df42 849 s++;
a0ed51b3
LW
850 while (s < strend && *s == ch)
851 s++;
852 }
853 s++;
a0d0e21e 854 }
a687059c 855 }
c277df42
IZ
856 }
857 /*SUPPRESS 560*/
858 else if (prog->anchored_substr != Nullsv
859 || (prog->float_substr != Nullsv
860 && prog->float_max_offset < strend - s)) {
861 SV *must = prog->anchored_substr
862 ? prog->anchored_substr : prog->float_substr;
863 I32 back_max =
864 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
865 I32 back_min =
866 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
867 I32 delta = back_max - back_min;
cf93c79d 868 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
869 -(I32)(CHR_SVLEN(must)
870 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
871 char *last1; /* Last position checked before */
872
873 if (s > PL_bostr)
dfe13c55 874 last1 = HOPc(s, -1);
a0ed51b3
LW
875 else
876 last1 = s - 1; /* bogus */
c277df42
IZ
877
878 /* XXXX check_substr already used to find `s', can optimize if
879 check_substr==must. */
880 scream_pos = -1;
881 dontbother = end_shift;
dfe13c55 882 strend = HOPc(strend, -dontbother);
c277df42 883 while ( (s <= last) &&
22e551b9
IZ
884 ((flags & REXEC_SCREAM)
885 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 886 end_shift, &scream_pos, 0))
a0ed51b3 887 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
888 (unsigned char*)strend, must,
889 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
890 if (HOPc(s, -back_max) > last1) {
891 last1 = HOPc(s, -back_min);
892 s = HOPc(s, -back_max);
a0ed51b3
LW
893 }
894 else {
dfe13c55 895 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 896
dfe13c55 897 last1 = HOPc(s, -back_min);
c277df42 898 s = t;
a0d0e21e 899 }
a0ed51b3
LW
900 if (UTF) {
901 while (s <= last1) {
902 if (regtry(prog, s))
903 goto got_it;
904 s += UTF8SKIP(s);
905 }
906 }
907 else {
908 while (s <= last1) {
909 if (regtry(prog, s))
910 goto got_it;
911 s++;
912 }
a0d0e21e
LW
913 }
914 }
915 goto phooey;
a0ed51b3
LW
916 }
917 else if (c = prog->regstclass) {
a0d0e21e 918 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 919 char *cc;
a687059c 920
a0d0e21e
LW
921 if (minlen)
922 dontbother = minlen - 1;
dfe13c55 923 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
924 tmp = 1;
925 /* We know what class it must start with. */
926 switch (OP(c)) {
a0ed51b3 927 case ANYOFUTF8:
cd439c50 928 cc = MASK(c);
a0ed51b3
LW
929 while (s < strend) {
930 if (REGINCLASSUTF8(c, (U8*)s)) {
931 if (tmp && regtry(prog, s))
932 goto got_it;
933 else
934 tmp = doevery;
935 }
936 else
937 tmp = 1;
938 s += UTF8SKIP(s);
939 }
940 break;
a0d0e21e 941 case ANYOF:
cd439c50 942 cc = MASK(c);
a0d0e21e 943 while (s < strend) {
a0ed51b3 944 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
945 if (tmp && regtry(prog, s))
946 goto got_it;
947 else
948 tmp = doevery;
a687059c 949 }
a0d0e21e
LW
950 else
951 tmp = 1;
952 s++;
953 }
954 break;
bbce6d69 955 case BOUNDL:
3280af22 956 PL_reg_flags |= RF_tainted;
bbce6d69 957 /* FALL THROUGH */
a0d0e21e 958 case BOUND:
a0ed51b3
LW
959 if (minlen) {
960 dontbother++;
961 strend -= 1;
962 }
3280af22 963 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 964 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 965 while (s < strend) {
95bac841 966 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
967 tmp = !tmp;
968 if (regtry(prog, s))
969 goto got_it;
a687059c 970 }
a0d0e21e
LW
971 s++;
972 }
973 if ((minlen || tmp) && regtry(prog,s))
974 goto got_it;
975 break;
a0ed51b3
LW
976 case BOUNDLUTF8:
977 PL_reg_flags |= RF_tainted;
978 /* FALL THROUGH */
979 case BOUNDUTF8:
980 if (minlen) {
981 dontbother++;
dfe13c55 982 strend = reghop_c(strend, -1);
a0ed51b3 983 }
dfe13c55 984 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
985 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
986 while (s < strend) {
dfe13c55
GS
987 if (tmp == !(OP(c) == BOUND ?
988 swash_fetch(PL_utf8_alnum, (U8*)s) :
989 isALNUM_LC_utf8((U8*)s)))
990 {
a0ed51b3
LW
991 tmp = !tmp;
992 if (regtry(prog, s))
993 goto got_it;
994 }
995 s += UTF8SKIP(s);
996 }
997 if ((minlen || tmp) && regtry(prog,s))
998 goto got_it;
999 break;
bbce6d69 1000 case NBOUNDL:
3280af22 1001 PL_reg_flags |= RF_tainted;
bbce6d69 1002 /* FALL THROUGH */
a0d0e21e 1003 case NBOUND:
a0ed51b3
LW
1004 if (minlen) {
1005 dontbother++;
1006 strend -= 1;
1007 }
3280af22 1008 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1009 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1010 while (s < strend) {
95bac841 1011 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
1012 tmp = !tmp;
1013 else if (regtry(prog, s))
1014 goto got_it;
1015 s++;
1016 }
1017 if ((minlen || !tmp) && regtry(prog,s))
1018 goto got_it;
1019 break;
a0ed51b3
LW
1020 case NBOUNDLUTF8:
1021 PL_reg_flags |= RF_tainted;
1022 /* FALL THROUGH */
1023 case NBOUNDUTF8:
1024 if (minlen) {
1025 dontbother++;
dfe13c55 1026 strend = reghop_c(strend, -1);
a0ed51b3 1027 }
dfe13c55 1028 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1029 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1030 while (s < strend) {
dfe13c55
GS
1031 if (tmp == !(OP(c) == NBOUND ?
1032 swash_fetch(PL_utf8_alnum, (U8*)s) :
1033 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
1034 tmp = !tmp;
1035 else if (regtry(prog, s))
1036 goto got_it;
1037 s += UTF8SKIP(s);
1038 }
1039 if ((minlen || !tmp) && regtry(prog,s))
1040 goto got_it;
1041 break;
a0d0e21e
LW
1042 case ALNUM:
1043 while (s < strend) {
bbce6d69 1044 if (isALNUM(*s)) {
1045 if (tmp && regtry(prog, s))
1046 goto got_it;
1047 else
1048 tmp = doevery;
1049 }
1050 else
1051 tmp = 1;
1052 s++;
1053 }
1054 break;
a0ed51b3
LW
1055 case ALNUMUTF8:
1056 while (s < strend) {
dfe13c55 1057 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1058 if (tmp && regtry(prog, s))
1059 goto got_it;
1060 else
1061 tmp = doevery;
1062 }
1063 else
1064 tmp = 1;
1065 s += UTF8SKIP(s);
1066 }
1067 break;
bbce6d69 1068 case ALNUML:
3280af22 1069 PL_reg_flags |= RF_tainted;
bbce6d69 1070 while (s < strend) {
1071 if (isALNUM_LC(*s)) {
a0d0e21e
LW
1072 if (tmp && regtry(prog, s))
1073 goto got_it;
a687059c 1074 else
a0d0e21e
LW
1075 tmp = doevery;
1076 }
1077 else
1078 tmp = 1;
1079 s++;
1080 }
1081 break;
a0ed51b3
LW
1082 case ALNUMLUTF8:
1083 PL_reg_flags |= RF_tainted;
1084 while (s < strend) {
dfe13c55 1085 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1086 if (tmp && regtry(prog, s))
1087 goto got_it;
1088 else
1089 tmp = doevery;
1090 }
1091 else
1092 tmp = 1;
1093 s += UTF8SKIP(s);
1094 }
1095 break;
a0d0e21e
LW
1096 case NALNUM:
1097 while (s < strend) {
bbce6d69 1098 if (!isALNUM(*s)) {
1099 if (tmp && regtry(prog, s))
1100 goto got_it;
1101 else
1102 tmp = doevery;
1103 }
1104 else
1105 tmp = 1;
1106 s++;
1107 }
1108 break;
a0ed51b3
LW
1109 case NALNUMUTF8:
1110 while (s < strend) {
dfe13c55 1111 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1112 if (tmp && regtry(prog, s))
1113 goto got_it;
1114 else
1115 tmp = doevery;
1116 }
1117 else
1118 tmp = 1;
1119 s += UTF8SKIP(s);
1120 }
1121 break;
bbce6d69 1122 case NALNUML:
3280af22 1123 PL_reg_flags |= RF_tainted;
bbce6d69 1124 while (s < strend) {
1125 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
1126 if (tmp && regtry(prog, s))
1127 goto got_it;
a687059c 1128 else
a0d0e21e 1129 tmp = doevery;
a687059c 1130 }
a0d0e21e
LW
1131 else
1132 tmp = 1;
1133 s++;
1134 }
1135 break;
a0ed51b3
LW
1136 case NALNUMLUTF8:
1137 PL_reg_flags |= RF_tainted;
1138 while (s < strend) {
dfe13c55 1139 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1140 if (tmp && regtry(prog, s))
1141 goto got_it;
1142 else
1143 tmp = doevery;
1144 }
1145 else
1146 tmp = 1;
1147 s += UTF8SKIP(s);
1148 }
1149 break;
a0d0e21e
LW
1150 case SPACE:
1151 while (s < strend) {
1152 if (isSPACE(*s)) {
1153 if (tmp && regtry(prog, s))
1154 goto got_it;
1155 else
1156 tmp = doevery;
2304df62 1157 }
a0d0e21e
LW
1158 else
1159 tmp = 1;
1160 s++;
1161 }
1162 break;
a0ed51b3
LW
1163 case SPACEUTF8:
1164 while (s < strend) {
dfe13c55 1165 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
1166 if (tmp && regtry(prog, s))
1167 goto got_it;
1168 else
1169 tmp = doevery;
1170 }
1171 else
1172 tmp = 1;
1173 s += UTF8SKIP(s);
1174 }
1175 break;
bbce6d69 1176 case SPACEL:
3280af22 1177 PL_reg_flags |= RF_tainted;
bbce6d69 1178 while (s < strend) {
1179 if (isSPACE_LC(*s)) {
1180 if (tmp && regtry(prog, s))
1181 goto got_it;
1182 else
1183 tmp = doevery;
1184 }
1185 else
1186 tmp = 1;
1187 s++;
1188 }
1189 break;
a0ed51b3
LW
1190 case SPACELUTF8:
1191 PL_reg_flags |= RF_tainted;
1192 while (s < strend) {
dfe13c55 1193 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
1194 if (tmp && regtry(prog, s))
1195 goto got_it;
1196 else
1197 tmp = doevery;
1198 }
1199 else
1200 tmp = 1;
1201 s += UTF8SKIP(s);
1202 }
1203 break;
a0d0e21e
LW
1204 case NSPACE:
1205 while (s < strend) {
1206 if (!isSPACE(*s)) {
1207 if (tmp && regtry(prog, s))
1208 goto got_it;
1209 else
1210 tmp = doevery;
a687059c 1211 }
a0d0e21e
LW
1212 else
1213 tmp = 1;
1214 s++;
1215 }
1216 break;
a0ed51b3
LW
1217 case NSPACEUTF8:
1218 while (s < strend) {
dfe13c55 1219 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1220 if (tmp && regtry(prog, s))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
1225 else
1226 tmp = 1;
1227 s += UTF8SKIP(s);
1228 }
1229 break;
bbce6d69 1230 case NSPACEL:
3280af22 1231 PL_reg_flags |= RF_tainted;
bbce6d69 1232 while (s < strend) {
1233 if (!isSPACE_LC(*s)) {
1234 if (tmp && regtry(prog, s))
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
1239 else
1240 tmp = 1;
1241 s++;
1242 }
1243 break;
a0ed51b3
LW
1244 case NSPACELUTF8:
1245 PL_reg_flags |= RF_tainted;
1246 while (s < strend) {
dfe13c55 1247 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1248 if (tmp && regtry(prog, s))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
1253 else
1254 tmp = 1;
1255 s += UTF8SKIP(s);
1256 }
1257 break;
a0d0e21e
LW
1258 case DIGIT:
1259 while (s < strend) {
1260 if (isDIGIT(*s)) {
1261 if (tmp && regtry(prog, s))
1262 goto got_it;
1263 else
1264 tmp = doevery;
2b69d0c2 1265 }
a0d0e21e
LW
1266 else
1267 tmp = 1;
1268 s++;
1269 }
1270 break;
a0ed51b3
LW
1271 case DIGITUTF8:
1272 while (s < strend) {
dfe13c55 1273 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1274 if (tmp && regtry(prog, s))
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
1279 else
1280 tmp = 1;
1281 s += UTF8SKIP(s);
1282 }
1283 break;
b8c5462f
JH
1284 case DIGITL:
1285 PL_reg_flags |= RF_tainted;
1286 while (s < strend) {
1287 if (isDIGIT_LC(*s)) {
1288 if (tmp && regtry(prog, s))
1289 goto got_it;
1290 else
1291 tmp = doevery;
1292 }
1293 else
1294 tmp = 1;
1295 s++;
1296 }
1297 break;
1298 case DIGITLUTF8:
1299 PL_reg_flags |= RF_tainted;
1300 while (s < strend) {
1301 if (isDIGIT_LC_utf8((U8*)s)) {
1302 if (tmp && regtry(prog, s))
1303 goto got_it;
1304 else
1305 tmp = doevery;
1306 }
1307 else
1308 tmp = 1;
1309 s += UTF8SKIP(s);
1310 }
1311 break;
a0d0e21e
LW
1312 case NDIGIT:
1313 while (s < strend) {
1314 if (!isDIGIT(*s)) {
1315 if (tmp && regtry(prog, s))
1316 goto got_it;
1317 else
1318 tmp = doevery;
a687059c 1319 }
a0d0e21e
LW
1320 else
1321 tmp = 1;
1322 s++;
1323 }
1324 break;
a0ed51b3
LW
1325 case NDIGITUTF8:
1326 while (s < strend) {
dfe13c55 1327 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1328 if (tmp && regtry(prog, s))
1329 goto got_it;
1330 else
1331 tmp = doevery;
1332 }
1333 else
1334 tmp = 1;
1335 s += UTF8SKIP(s);
1336 }
1337 break;
b8c5462f
JH
1338 case NDIGITL:
1339 PL_reg_flags |= RF_tainted;
1340 while (s < strend) {
1341 if (!isDIGIT_LC(*s)) {
1342 if (tmp && regtry(prog, s))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
1347 else
1348 tmp = 1;
1349 s++;
a0ed51b3 1350 }
b8c5462f
JH
1351 break;
1352 case NDIGITLUTF8:
1353 PL_reg_flags |= RF_tainted;
1354 while (s < strend) {
1355 if (!isDIGIT_LC_utf8((U8*)s)) {
1356 if (tmp && regtry(prog, s))
1357 goto got_it;
cf93c79d 1358 else
b8c5462f
JH
1359 tmp = doevery;
1360 }
1361 else
1362 tmp = 1;
1363 s += UTF8SKIP(s);
1364 }
1365 break;
d6a28714
JH
1366 }
1367 }
1368 else {
1369 dontbother = 0;
1370 if (prog->float_substr != Nullsv) { /* Trim the end. */
1371 char *last;
1372 I32 oldpos = scream_pos;
1373
1374 if (flags & REXEC_SCREAM) {
1375 last = screaminstr(sv, prog->float_substr, s - strbeg,
1376 end_shift, &scream_pos, 1); /* last one */
1377 if (!last)
1378 last = scream_olds; /* Only one occurence. */
b8c5462f 1379 }
d6a28714
JH
1380 else {
1381 STRLEN len;
1382 char *little = SvPV(prog->float_substr, len);
1383
1384 if (SvTAIL(prog->float_substr)) {
1385 if (memEQ(strend - len + 1, little, len - 1))
1386 last = strend - len + 1;
1387 else if (!PL_multiline)
1388 last = memEQ(strend - len, little, len)
1389 ? strend - len : Nullch;
b8c5462f 1390 else
d6a28714
JH
1391 goto find_last;
1392 } else {
1393 find_last:
1394 if (len)
1395 last = rninstr(s, strend, little, little + len);
b8c5462f 1396 else
d6a28714 1397 last = strend; /* matching `$' */
b8c5462f 1398 }
b8c5462f 1399 }
d6a28714
JH
1400 if (last == NULL) goto phooey; /* Should not happen! */
1401 dontbother = strend - last + prog->float_min_offset;
1402 }
1403 if (minlen && (dontbother < minlen))
1404 dontbother = minlen - 1;
1405 strend -= dontbother; /* this one's always in bytes! */
1406 /* We don't know much -- general case. */
1407 if (UTF) {
1408 for (;;) {
1409 if (regtry(prog, s))
1410 goto got_it;
1411 if (s >= strend)
1412 break;
b8c5462f 1413 s += UTF8SKIP(s);
d6a28714
JH
1414 };
1415 }
1416 else {
1417 do {
1418 if (regtry(prog, s))
1419 goto got_it;
1420 } while (s++ < strend);
1421 }
1422 }
1423
1424 /* Failure. */
1425 goto phooey;
1426
1427got_it:
1428 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1429
1430 if (PL_reg_eval_set) {
1431 /* Preserve the current value of $^R */
1432 if (oreplsv != GvSV(PL_replgv))
1433 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1434 restored, the value remains
1435 the same. */
1436 restore_pos(aTHXo_ 0);
1437 }
1438
1439 /* make sure $`, $&, $', and $digit will work later */
1440 if ( !(flags & REXEC_NOT_FIRST) ) {
1441 if (RX_MATCH_COPIED(prog)) {
1442 Safefree(prog->subbeg);
1443 RX_MATCH_COPIED_off(prog);
1444 }
1445 if (flags & REXEC_COPY_STR) {
1446 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1447
1448 s = savepvn(strbeg, i);
1449 prog->subbeg = s;
1450 prog->sublen = i;
1451 RX_MATCH_COPIED_on(prog);
1452 }
1453 else {
1454 prog->subbeg = strbeg;
1455 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1456 }
1457 }
1458
1459 return 1;
1460
1461phooey:
1462 if (PL_reg_eval_set)
1463 restore_pos(aTHXo_ 0);
1464 return 0;
1465}
1466
1467/*
1468 - regtry - try match at specific point
1469 */
1470STATIC I32 /* 0 failure, 1 success */
1471S_regtry(pTHX_ regexp *prog, char *startpos)
1472{
1473 dTHR;
1474 register I32 i;
1475 register I32 *sp;
1476 register I32 *ep;
1477 CHECKPOINT lastcp;
1478
1479 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1480 MAGIC *mg;
1481
1482 PL_reg_eval_set = RS_init;
1483 DEBUG_r(DEBUG_s(
1484 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1485 PL_stack_sp - PL_stack_base);
1486 ));
1487 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1488 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1489 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1490 SAVETMPS;
1491 /* Apparently this is not needed, judging by wantarray. */
1492 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1493 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1494
1495 if (PL_reg_sv) {
1496 /* Make $_ available to executed code. */
1497 if (PL_reg_sv != DEFSV) {
1498 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1499 SAVESPTR(DEFSV);
1500 DEFSV = PL_reg_sv;
b8c5462f 1501 }
d6a28714
JH
1502
1503 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1504 && (mg = mg_find(PL_reg_sv, 'g')))) {
1505 /* prepare for quick setting of pos */
1506 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1507 mg = mg_find(PL_reg_sv, 'g');
1508 mg->mg_len = -1;
b8c5462f 1509 }
d6a28714
JH
1510 PL_reg_magic = mg;
1511 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1512 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1513 }
1514 if (!PL_reg_curpm)
1515 New(22,PL_reg_curpm, 1, PMOP);
1516 PL_reg_curpm->op_pmregexp = prog;
1517 PL_reg_oldcurpm = PL_curpm;
1518 PL_curpm = PL_reg_curpm;
1519 if (RX_MATCH_COPIED(prog)) {
1520 /* Here is a serious problem: we cannot rewrite subbeg,
1521 since it may be needed if this match fails. Thus
1522 $` inside (?{}) could fail... */
1523 PL_reg_oldsaved = prog->subbeg;
1524 PL_reg_oldsavedlen = prog->sublen;
1525 RX_MATCH_COPIED_off(prog);
1526 }
1527 else
1528 PL_reg_oldsaved = Nullch;
1529 prog->subbeg = PL_bostr;
1530 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1531 }
1532 prog->startp[0] = startpos - PL_bostr;
1533 PL_reginput = startpos;
1534 PL_regstartp = prog->startp;
1535 PL_regendp = prog->endp;
1536 PL_reglastparen = &prog->lastparen;
1537 prog->lastparen = 0;
1538 PL_regsize = 0;
1539 DEBUG_r(PL_reg_starttry = startpos);
1540 if (PL_reg_start_tmpl <= prog->nparens) {
1541 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1542 if(PL_reg_start_tmp)
1543 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1544 else
1545 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1546 }
1547
1548 /* XXXX What this code is doing here?!!! There should be no need
1549 to do this again and again, PL_reglastparen should take care of
1550 this! */
1551 sp = prog->startp;
1552 ep = prog->endp;
1553 if (prog->nparens) {
1554 for (i = prog->nparens; i >= 1; i--) {
1555 *++sp = -1;
1556 *++ep = -1;
1557 }
1558 }
1559 REGCP_SET;
1560 if (regmatch(prog->program + 1)) {
1561 prog->endp[0] = PL_reginput - PL_bostr;
1562 return 1;
1563 }
1564 REGCP_UNWIND;
1565 return 0;
1566}
1567
1568/*
1569 - regmatch - main matching routine
1570 *
1571 * Conceptually the strategy is simple: check to see whether the current
1572 * node matches, call self recursively to see whether the rest matches,
1573 * and then act accordingly. In practice we make some effort to avoid
1574 * recursion, in particular by going through "ordinary" nodes (that don't
1575 * need to know whether the rest of the match failed) by a loop instead of
1576 * by recursion.
1577 */
1578/* [lwall] I've hoisted the register declarations to the outer block in order to
1579 * maybe save a little bit of pushing and popping on the stack. It also takes
1580 * advantage of machines that use a register save mask on subroutine entry.
1581 */
1582STATIC I32 /* 0 failure, 1 success */
1583S_regmatch(pTHX_ regnode *prog)
1584{
1585 dTHR;
1586 register regnode *scan; /* Current node. */
1587 regnode *next; /* Next node. */
1588 regnode *inner; /* Next node in internal branch. */
1589 register I32 nextchr; /* renamed nextchr - nextchar colides with
1590 function of same name */
1591 register I32 n; /* no or next */
1592 register I32 ln; /* len or last */
1593 register char *s; /* operand or save */
1594 register char *locinput = PL_reginput;
1595 register I32 c1, c2, paren; /* case fold search, parenth */
1596 int minmod = 0, sw = 0, logical = 0;
1597#ifdef DEBUGGING
1598 PL_regindent++;
1599#endif
1600
1601 /* Note that nextchr is a byte even in UTF */
1602 nextchr = UCHARAT(locinput);
1603 scan = prog;
1604 while (scan != NULL) {
1605#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1606#ifdef DEBUGGING
1607# define sayYES goto yes
1608# define sayNO goto no
7821416a
IZ
1609# define sayYES_FINAL goto yes_final
1610# define sayYES_LOUD goto yes_loud
1611# define sayNO_FINAL goto no_final
1612# define sayNO_SILENT goto do_no
d6a28714
JH
1613# define saySAME(x) if (x) goto yes; else goto no
1614# define REPORT_CODE_OFF 24
1615#else
1616# define sayYES return 1
1617# define sayNO return 0
7821416a
IZ
1618# define sayYES_FINAL return 1
1619# define sayYES_LOUD return 1
1620# define sayNO_FINAL return 0
1621# define sayNO_SILENT return 0
d6a28714
JH
1622# define saySAME(x) return x
1623#endif
1624 DEBUG_r( {
1625 SV *prop = sv_newmortal();
1626 int docolor = *PL_colors[0];
1627 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1628 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1629 /* The part of the string before starttry has one color
1630 (pref0_len chars), between starttry and current
1631 position another one (pref_len - pref0_len chars),
1632 after the current position the third one.
1633 We assume that pref0_len <= pref_len, otherwise we
1634 decrease pref0_len. */
1635 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1636 ? (5 + taill) - l : locinput - PL_bostr);
1637 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1638
1639 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1640 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1641 ? (5 + taill) - pref_len : PL_regeol - locinput);
1642 if (pref0_len < 0)
1643 pref0_len = 0;
1644 if (pref0_len > pref_len)
1645 pref0_len = pref_len;
1646 regprop(prop, scan);
1647 PerlIO_printf(Perl_debug_log,
1648 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1649 locinput - PL_bostr,
1650 PL_colors[4], pref0_len,
1651 locinput - pref_len, PL_colors[5],
1652 PL_colors[2], pref_len - pref0_len,
1653 locinput - pref_len + pref0_len, PL_colors[3],
1654 (docolor ? "" : "> <"),
1655 PL_colors[0], l, locinput, PL_colors[1],
1656 15 - l - pref_len + 1,
1657 "",
1658 scan - PL_regprogram, PL_regindent*2, "",
1659 SvPVX(prop));
1660 } );
1661
1662 next = scan + NEXT_OFF(scan);
1663 if (next == scan)
1664 next = NULL;
1665
1666 switch (OP(scan)) {
1667 case BOL:
1668 if (locinput == PL_bostr
1669 ? PL_regprev == '\n'
1670 : (PL_multiline &&
1671 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1672 {
1673 /* regtill = regbol; */
b8c5462f
JH
1674 break;
1675 }
d6a28714
JH
1676 sayNO;
1677 case MBOL:
1678 if (locinput == PL_bostr
1679 ? PL_regprev == '\n'
1680 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1681 {
b8c5462f
JH
1682 break;
1683 }
d6a28714
JH
1684 sayNO;
1685 case SBOL:
1686 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1687 break;
d6a28714
JH
1688 sayNO;
1689 case GPOS:
1690 if (locinput == PL_reg_ganch)
1691 break;
1692 sayNO;
1693 case EOL:
1694 if (PL_multiline)
1695 goto meol;
1696 else
1697 goto seol;
1698 case MEOL:
1699 meol:
1700 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1701 sayNO;
b8c5462f 1702 break;
d6a28714
JH
1703 case SEOL:
1704 seol:
1705 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1706 sayNO;
d6a28714 1707 if (PL_regeol - locinput > 1)
b8c5462f 1708 sayNO;
b8c5462f 1709 break;
d6a28714
JH
1710 case EOS:
1711 if (PL_regeol != locinput)
b8c5462f 1712 sayNO;
d6a28714
JH
1713 break;
1714 case SANYUTF8:
b8c5462f 1715 if (nextchr & 0x80) {
b8c5462f 1716 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1717 if (locinput > PL_regeol)
1718 sayNO;
b8c5462f
JH
1719 nextchr = UCHARAT(locinput);
1720 break;
1721 }
d6a28714 1722 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1723 sayNO;
b8c5462f 1724 nextchr = UCHARAT(++locinput);
a0d0e21e 1725 break;
d6a28714
JH
1726 case SANY:
1727 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1728 sayNO;
1729 nextchr = UCHARAT(++locinput);
b85d18e9 1730 break;
d6a28714 1731 case ANYUTF8:
a0ed51b3 1732 if (nextchr & 0x80) {
b8c5462f 1733 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1734 if (locinput > PL_regeol)
1735 sayNO;
a0ed51b3
LW
1736 nextchr = UCHARAT(locinput);
1737 break;
1738 }
d6a28714 1739 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1740 sayNO;
1741 nextchr = UCHARAT(++locinput);
1742 break;
d6a28714
JH
1743 case REG_ANY:
1744 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1745 sayNO;
76e3520e 1746 nextchr = UCHARAT(++locinput);
a0d0e21e 1747 break;
d6a28714 1748 case EXACT:
cd439c50
IZ
1749 s = STRING(scan);
1750 ln = STR_LEN(scan);
d6a28714
JH
1751 /* Inline the first character, for speed. */
1752 if (UCHARAT(s) != nextchr)
1753 sayNO;
1754 if (PL_regeol - locinput < ln)
1755 sayNO;
1756 if (ln > 1 && memNE(s, locinput, ln))
1757 sayNO;
1758 locinput += ln;
1759 nextchr = UCHARAT(locinput);
1760 break;
1761 case EXACTFL:
b8c5462f
JH
1762 PL_reg_flags |= RF_tainted;
1763 /* FALL THROUGH */
d6a28714 1764 case EXACTF:
cd439c50
IZ
1765 s = STRING(scan);
1766 ln = STR_LEN(scan);
d6a28714
JH
1767
1768 if (UTF) {
1769 char *l = locinput;
1770 char *e = s + ln;
1771 c1 = OP(scan) == EXACTF;
1772 while (s < e) {
1773 if (l >= PL_regeol)
1774 sayNO;
1775 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1776 toLOWER_utf8((U8*)l) :
1777 toLOWER_LC_utf8((U8*)l)))
1778 {
1779 sayNO;
1780 }
1781 s += UTF8SKIP(s);
1782 l += UTF8SKIP(l);
b8c5462f 1783 }
d6a28714 1784 locinput = l;
a0ed51b3
LW
1785 nextchr = UCHARAT(locinput);
1786 break;
1787 }
d6a28714
JH
1788
1789 /* Inline the first character, for speed. */
1790 if (UCHARAT(s) != nextchr &&
1791 UCHARAT(s) != ((OP(scan) == EXACTF)
1792 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1793 sayNO;
d6a28714 1794 if (PL_regeol - locinput < ln)
b8c5462f 1795 sayNO;
d6a28714
JH
1796 if (ln > 1 && (OP(scan) == EXACTF
1797 ? ibcmp(s, locinput, ln)
1798 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1799 sayNO;
d6a28714
JH
1800 locinput += ln;
1801 nextchr = UCHARAT(locinput);
a0d0e21e 1802 break;
d6a28714 1803 case ANYOFUTF8:
cd439c50 1804 s = MASK(scan);
d6a28714 1805 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1806 sayNO;
d6a28714
JH
1807 if (locinput >= PL_regeol)
1808 sayNO;
1809 locinput += PL_utf8skip[nextchr];
1810 nextchr = UCHARAT(locinput);
1811 break;
1812 case ANYOF:
cd439c50 1813 s = MASK(scan);
d6a28714 1814 if (nextchr < 0)
b8c5462f 1815 nextchr = UCHARAT(locinput);
d6a28714
JH
1816 if (!REGINCLASS(s, nextchr))
1817 sayNO;
1818 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1819 sayNO;
b8c5462f
JH
1820 nextchr = UCHARAT(++locinput);
1821 break;
d6a28714 1822 case ALNUML:
b8c5462f
JH
1823 PL_reg_flags |= RF_tainted;
1824 /* FALL THROUGH */
d6a28714 1825 case ALNUM:
b8c5462f 1826 if (!nextchr)
4633a7c4 1827 sayNO;
d6a28714
JH
1828 if (!(OP(scan) == ALNUM
1829 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1830 sayNO;
1831 nextchr = UCHARAT(++locinput);
bbce6d69 1832 break;
d6a28714 1833 case ALNUMLUTF8:
3280af22 1834 PL_reg_flags |= RF_tainted;
bbce6d69 1835 /* FALL THROUGH */
d6a28714 1836 case ALNUMUTF8:
b8c5462f
JH
1837 if (!nextchr)
1838 sayNO;
1839 if (nextchr & 0x80) {
d6a28714
JH
1840 if (!(OP(scan) == ALNUMUTF8
1841 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1842 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1843 {
1844 sayNO;
a0ed51b3 1845 }
b8c5462f 1846 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1847 nextchr = UCHARAT(locinput);
1848 break;
1849 }
d6a28714
JH
1850 if (!(OP(scan) == ALNUMUTF8
1851 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1852 sayNO;
b8c5462f 1853 nextchr = UCHARAT(++locinput);
a0d0e21e 1854 break;
d6a28714 1855 case NALNUML:
b8c5462f
JH
1856 PL_reg_flags |= RF_tainted;
1857 /* FALL THROUGH */
d6a28714
JH
1858 case NALNUM:
1859 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1860 sayNO;
d6a28714
JH
1861 if (OP(scan) == NALNUM
1862 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1863 sayNO;
b8c5462f 1864 nextchr = UCHARAT(++locinput);
a0ed51b3 1865 break;
d6a28714 1866 case NALNUMLUTF8:
b8c5462f
JH
1867 PL_reg_flags |= RF_tainted;
1868 /* FALL THROUGH */
d6a28714 1869 case NALNUMUTF8:
3280af22 1870 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1871 sayNO;
b8c5462f 1872 if (nextchr & 0x80) {
d6a28714
JH
1873 if (OP(scan) == NALNUMUTF8
1874 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1875 : isALNUM_LC_utf8((U8*)locinput))
1876 {
b8c5462f 1877 sayNO;
d6a28714 1878 }
b8c5462f
JH
1879 locinput += PL_utf8skip[nextchr];
1880 nextchr = UCHARAT(locinput);
1881 break;
1882 }
d6a28714
JH
1883 if (OP(scan) == NALNUMUTF8
1884 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1885 sayNO;
76e3520e 1886 nextchr = UCHARAT(++locinput);
a0d0e21e 1887 break;
d6a28714
JH
1888 case BOUNDL:
1889 case NBOUNDL:
3280af22 1890 PL_reg_flags |= RF_tainted;
bbce6d69 1891 /* FALL THROUGH */
d6a28714
JH
1892 case BOUND:
1893 case NBOUND:
1894 /* was last char in word? */
1895 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1896 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1897 ln = isALNUM(ln);
1898 n = isALNUM(nextchr);
1899 }
1900 else {
1901 ln = isALNUM_LC(ln);
1902 n = isALNUM_LC(nextchr);
1903 }
1904 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1905 sayNO;
a0d0e21e 1906 break;
d6a28714
JH
1907 case BOUNDLUTF8:
1908 case NBOUNDLUTF8:
a0ed51b3
LW
1909 PL_reg_flags |= RF_tainted;
1910 /* FALL THROUGH */
d6a28714
JH
1911 case BOUNDUTF8:
1912 case NBOUNDUTF8:
1913 /* was last char in word? */
1914 ln = (locinput != PL_regbol)
1915 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1916 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1917 ln = isALNUM_uni(ln);
1918 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1919 }
d6a28714
JH
1920 else {
1921 ln = isALNUM_LC_uni(ln);
1922 n = isALNUM_LC_utf8((U8*)locinput);
1923 }
1924 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1925 sayNO;
a0ed51b3 1926 break;
d6a28714 1927 case SPACEL:
3280af22 1928 PL_reg_flags |= RF_tainted;
bbce6d69 1929 /* FALL THROUGH */
d6a28714
JH
1930 case SPACE:
1931 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1932 sayNO;
d6a28714
JH
1933 if (!(OP(scan) == SPACE
1934 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1935 sayNO;
76e3520e 1936 nextchr = UCHARAT(++locinput);
a0d0e21e 1937 break;
d6a28714 1938 case SPACELUTF8:
a0ed51b3
LW
1939 PL_reg_flags |= RF_tainted;
1940 /* FALL THROUGH */
d6a28714 1941 case SPACEUTF8:
a0ed51b3
LW
1942 if (!nextchr && locinput >= PL_regeol)
1943 sayNO;
1944 if (nextchr & 0x80) {
d6a28714
JH
1945 if (!(OP(scan) == SPACEUTF8
1946 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1947 : isSPACE_LC_utf8((U8*)locinput)))
1948 {
a0ed51b3 1949 sayNO;
d6a28714 1950 }
6f06b55f 1951 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1952 nextchr = UCHARAT(locinput);
1953 break;
1954 }
d6a28714
JH
1955 if (!(OP(scan) == SPACEUTF8
1956 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
1957 sayNO;
1958 nextchr = UCHARAT(++locinput);
1959 break;
d6a28714 1960 case NSPACEL:
3280af22 1961 PL_reg_flags |= RF_tainted;
bbce6d69 1962 /* FALL THROUGH */
d6a28714 1963 case NSPACE:
b8c5462f
JH
1964 if (!nextchr)
1965 sayNO;
d6a28714
JH
1966 if (OP(scan) == SPACE
1967 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1968 sayNO;
b8c5462f 1969 nextchr = UCHARAT(++locinput);
a0d0e21e 1970 break;
d6a28714 1971 case NSPACELUTF8:
a0ed51b3
LW
1972 PL_reg_flags |= RF_tainted;
1973 /* FALL THROUGH */
d6a28714 1974 case NSPACEUTF8:
b8c5462f
JH
1975 if (!nextchr)
1976 sayNO;
1977 if (nextchr & 0x80) {
d6a28714
JH
1978 if (OP(scan) == NSPACEUTF8
1979 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1980 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
1981 {
1982 sayNO;
1983 }
1984 locinput += PL_utf8skip[nextchr];
1985 nextchr = UCHARAT(locinput);
1986 break;
a0ed51b3 1987 }
d6a28714
JH
1988 if (OP(scan) == NSPACEUTF8
1989 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1990 sayNO;
76e3520e 1991 nextchr = UCHARAT(++locinput);
a0d0e21e 1992 break;
d6a28714 1993 case DIGITL:
a0ed51b3
LW
1994 PL_reg_flags |= RF_tainted;
1995 /* FALL THROUGH */
d6a28714 1996 case DIGIT:
a0ed51b3
LW
1997 if (!nextchr && locinput >= PL_regeol)
1998 sayNO;
d6a28714
JH
1999 if (!(OP(scan) == DIGIT
2000 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2001 sayNO;
76e3520e 2002 nextchr = UCHARAT(++locinput);
a0d0e21e 2003 break;
d6a28714 2004 case DIGITLUTF8:
a0ed51b3
LW
2005 PL_reg_flags |= RF_tainted;
2006 /* FALL THROUGH */
d6a28714 2007 case DIGITUTF8:
a0ed51b3
LW
2008 if (!nextchr)
2009 sayNO;
2010 if (nextchr & 0x80) {
d6a28714
JH
2011 if (OP(scan) == NDIGITUTF8
2012 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2013 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2014 {
a0ed51b3 2015 sayNO;
dfe13c55 2016 }
6f06b55f 2017 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2018 nextchr = UCHARAT(locinput);
2019 break;
2020 }
d6a28714 2021 if (!isDIGIT(nextchr))
a0ed51b3
LW
2022 sayNO;
2023 nextchr = UCHARAT(++locinput);
2024 break;
d6a28714 2025 case NDIGITL:
b8c5462f
JH
2026 PL_reg_flags |= RF_tainted;
2027 /* FALL THROUGH */
d6a28714 2028 case NDIGIT:
b8c5462f
JH
2029 if (!nextchr)
2030 sayNO;
d6a28714
JH
2031 if (OP(scan) == DIGIT
2032 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2033 sayNO;
76e3520e 2034 nextchr = UCHARAT(++locinput);
a0d0e21e 2035 break;
d6a28714 2036 case NDIGITLUTF8:
b8c5462f
JH
2037 PL_reg_flags |= RF_tainted;
2038 /* FALL THROUGH */
d6a28714 2039 case NDIGITUTF8:
b8c5462f
JH
2040 if (!nextchr && locinput >= PL_regeol)
2041 sayNO;
a0ed51b3 2042 if (nextchr & 0x80) {
d6a28714 2043 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2044 sayNO;
6f06b55f 2045 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2046 nextchr = UCHARAT(locinput);
2047 break;
2048 }
d6a28714 2049 if (isDIGIT(nextchr))
a0ed51b3
LW
2050 sayNO;
2051 nextchr = UCHARAT(++locinput);
2052 break;
2053 case CLUMP:
dfe13c55 2054 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2055 sayNO;
6f06b55f 2056 locinput += PL_utf8skip[nextchr];
dfe13c55 2057 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2058 locinput += UTF8SKIP(locinput);
2059 if (locinput > PL_regeol)
2060 sayNO;
2061 nextchr = UCHARAT(locinput);
2062 break;
c8756f30 2063 case REFFL:
3280af22 2064 PL_reg_flags |= RF_tainted;
c8756f30 2065 /* FALL THROUGH */
c277df42 2066 case REF:
c8756f30 2067 case REFF:
c277df42 2068 n = ARG(scan); /* which paren pair */
cf93c79d 2069 ln = PL_regstartp[n];
2c2d71f5 2070 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2071 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2072 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2073 if (ln == PL_regendp[n])
a0d0e21e 2074 break;
a0ed51b3 2075
cf93c79d 2076 s = PL_bostr + ln;
a0ed51b3
LW
2077 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2078 char *l = locinput;
cf93c79d 2079 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2080 /*
2081 * Note that we can't do the "other character" lookup trick as
2082 * in the 8-bit case (no pun intended) because in Unicode we
2083 * have to map both upper and title case to lower case.
2084 */
2085 if (OP(scan) == REFF) {
2086 while (s < e) {
2087 if (l >= PL_regeol)
2088 sayNO;
dfe13c55 2089 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2090 sayNO;
2091 s += UTF8SKIP(s);
2092 l += UTF8SKIP(l);
2093 }
2094 }
2095 else {
2096 while (s < e) {
2097 if (l >= PL_regeol)
2098 sayNO;
dfe13c55 2099 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2100 sayNO;
2101 s += UTF8SKIP(s);
2102 l += UTF8SKIP(l);
2103 }
2104 }
2105 locinput = l;
2106 nextchr = UCHARAT(locinput);
2107 break;
2108 }
2109
a0d0e21e 2110 /* Inline the first character, for speed. */
76e3520e 2111 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2112 (OP(scan) == REF ||
2113 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2114 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2115 sayNO;
cf93c79d 2116 ln = PL_regendp[n] - ln;
3280af22 2117 if (locinput + ln > PL_regeol)
4633a7c4 2118 sayNO;
c8756f30
AK
2119 if (ln > 1 && (OP(scan) == REF
2120 ? memNE(s, locinput, ln)
2121 : (OP(scan) == REFF
2122 ? ibcmp(s, locinput, ln)
2123 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2124 sayNO;
a0d0e21e 2125 locinput += ln;
76e3520e 2126 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2127 break;
2128
2129 case NOTHING:
c277df42 2130 case TAIL:
a0d0e21e
LW
2131 break;
2132 case BACK:
2133 break;
c277df42
IZ
2134 case EVAL:
2135 {
2136 dSP;
533c011a 2137 OP_4tree *oop = PL_op;
3280af22
NIS
2138 COP *ocurcop = PL_curcop;
2139 SV **ocurpad = PL_curpad;
c277df42
IZ
2140 SV *ret;
2141
2142 n = ARG(scan);
533c011a
NIS
2143 PL_op = (OP_4tree*)PL_regdata->data[n];
2144 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 2145 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2146 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2147
cea2e8a9 2148 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2149 SPAGAIN;
2150 ret = POPs;
2151 PUTBACK;
2152
0f5d15d6
IZ
2153 PL_op = oop;
2154 PL_curpad = ocurpad;
2155 PL_curcop = ocurcop;
c277df42 2156 if (logical) {
0f5d15d6
IZ
2157 if (logical == 2) { /* Postponed subexpression. */
2158 regexp *re;
22c35a8c 2159 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2160 re_cc_state state;
0f5d15d6
IZ
2161 CHECKPOINT cp, lastcp;
2162
2163 if(SvROK(ret) || SvRMAGICAL(ret)) {
2164 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2165
2166 if(SvMAGICAL(sv))
2167 mg = mg_find(sv, 'r');
2168 }
2169 if (mg) {
2170 re = (regexp *)mg->mg_obj;
df0003d4 2171 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2172 }
2173 else {
2174 STRLEN len;
2175 char *t = SvPV(ret, len);
2176 PMOP pm;
2177 char *oprecomp = PL_regprecomp;
2178 I32 osize = PL_regsize;
2179 I32 onpar = PL_regnpar;
2180
2181 pm.op_pmflags = 0;
cea2e8a9 2182 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2183 if (!(SvFLAGS(ret)
2184 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2185 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2186 PL_regprecomp = oprecomp;
2187 PL_regsize = osize;
2188 PL_regnpar = onpar;
2189 }
2190 DEBUG_r(
2191 PerlIO_printf(Perl_debug_log,
2192 "Entering embedded `%s%.60s%s%s'\n",
2193 PL_colors[0],
2194 re->precomp,
2195 PL_colors[1],
2196 (strlen(re->precomp) > 60 ? "..." : ""))
2197 );
2198 state.node = next;
2199 state.prev = PL_reg_call_cc;
2200 state.cc = PL_regcc;
2201 state.re = PL_reg_re;
2202
2ab05381 2203 PL_regcc = 0;
0f5d15d6
IZ
2204
2205 cp = regcppush(0); /* Save *all* the positions. */
2206 REGCP_SET;
2207 cache_re(re);
2208 state.ss = PL_savestack_ix;
2209 *PL_reglastparen = 0;
2210 PL_reg_call_cc = &state;
2211 PL_reginput = locinput;
2c2d71f5
JH
2212
2213 /* XXXX This is too dramatic a measure... */
2214 PL_reg_maxiter = 0;
2215
0f5d15d6 2216 if (regmatch(re->program + 1)) {
2c914db6
IZ
2217 /* Even though we succeeded, we need to restore
2218 global variables, since we may be wrapped inside
2219 SUSPEND, thus the match may be not finished yet. */
2220
2221 /* XXXX Do this only if SUSPENDed? */
2222 PL_reg_call_cc = state.prev;
2223 PL_regcc = state.cc;
2224 PL_reg_re = state.re;
2225 cache_re(PL_reg_re);
2226
2227 /* XXXX This is too dramatic a measure... */
2228 PL_reg_maxiter = 0;
2229
2230 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2231 ReREFCNT_dec(re);
2232 regcpblow(cp);
2233 sayYES;
2234 }
0f5d15d6
IZ
2235 ReREFCNT_dec(re);
2236 REGCP_UNWIND;
2237 regcppop();
2238 PL_reg_call_cc = state.prev;
2239 PL_regcc = state.cc;
2240 PL_reg_re = state.re;
d3790889 2241 cache_re(PL_reg_re);
2c2d71f5
JH
2242
2243 /* XXXX This is too dramatic a measure... */
2244 PL_reg_maxiter = 0;
2245
0f5d15d6
IZ
2246 sayNO;
2247 }
c277df42 2248 sw = SvTRUE(ret);
0f5d15d6 2249 logical = 0;
a0ed51b3
LW
2250 }
2251 else
3280af22 2252 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2253 break;
2254 }
a0d0e21e 2255 case OPEN:
c277df42 2256 n = ARG(scan); /* which paren pair */
3280af22
NIS
2257 PL_reg_start_tmp[n] = locinput;
2258 if (n > PL_regsize)
2259 PL_regsize = n;
a0d0e21e
LW
2260 break;
2261 case CLOSE:
c277df42 2262 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2263 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2264 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2265 if (n > *PL_reglastparen)
2266 *PL_reglastparen = n;
a0d0e21e 2267 break;
c277df42
IZ
2268 case GROUPP:
2269 n = ARG(scan); /* which paren pair */
cf93c79d 2270 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2271 break;
2272 case IFTHEN:
2c2d71f5 2273 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2274 if (sw)
2275 next = NEXTOPER(NEXTOPER(scan));
2276 else {
2277 next = scan + ARG(scan);
2278 if (OP(next) == IFTHEN) /* Fake one. */
2279 next = NEXTOPER(NEXTOPER(next));
2280 }
2281 break;
2282 case LOGICAL:
0f5d15d6 2283 logical = scan->flags;
c277df42 2284 break;
2ab05381
IZ
2285/*******************************************************************
2286 PL_regcc contains infoblock about the innermost (...)* loop, and
2287 a pointer to the next outer infoblock.
2288
2289 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2290
2291 1) After matching X, regnode for CURLYX is processed;
2292
2293 2) This regnode creates infoblock on the stack, and calls
2294 regmatch() recursively with the starting point at WHILEM node;
2295
2296 3) Each hit of WHILEM node tries to match A and Z (in the order
2297 depending on the current iteration, min/max of {min,max} and
2298 greediness). The information about where are nodes for "A"
2299 and "Z" is read from the infoblock, as is info on how many times "A"
2300 was already matched, and greediness.
2301
2302 4) After A matches, the same WHILEM node is hit again.
2303
2304 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2305 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2306 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2307 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2308 of the external loop.
2309
2310 Currently present infoblocks form a tree with a stem formed by PL_curcc
2311 and whatever it mentions via ->next, and additional attached trees
2312 corresponding to temporarily unset infoblocks as in "5" above.
2313
2314 In the following picture infoblocks for outer loop of
2315 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2316 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2317 infoblocks are drawn below the "reset" infoblock.
2318
2319 In fact in the picture below we do not show failed matches for Z and T
2320 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2321 more obvious *why* one needs to *temporary* unset infoblocks.]
2322
2323 Matched REx position InfoBlocks Comment
2324 (Y(A)*?Z)*?T x
2325 Y(A)*?Z)*?T x <- O
2326 Y (A)*?Z)*?T x <- O
2327 Y A)*?Z)*?T x <- O <- I
2328 YA )*?Z)*?T x <- O <- I
2329 YA A)*?Z)*?T x <- O <- I
2330 YAA )*?Z)*?T x <- O <- I
2331 YAA Z)*?T x <- O # Temporary unset I
2332 I
2333
2334 YAAZ Y(A)*?Z)*?T x <- O
2335 I
2336
2337 YAAZY (A)*?Z)*?T x <- O
2338 I
2339
2340 YAAZY A)*?Z)*?T x <- O <- I
2341 I
2342
2343 YAAZYA )*?Z)*?T x <- O <- I
2344 I
2345
2346 YAAZYA Z)*?T x <- O # Temporary unset I
2347 I,I
2348
2349 YAAZYAZ )*?T x <- O
2350 I,I
2351
2352 YAAZYAZ T x # Temporary unset O
2353 O
2354 I,I
2355
2356 YAAZYAZT x
2357 O
2358 I,I
2359 *******************************************************************/
a0d0e21e
LW
2360 case CURLYX: {
2361 CURCUR cc;
3280af22 2362 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2363
2364 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2365 next += ARG(next);
3280af22
NIS
2366 cc.oldcc = PL_regcc;
2367 PL_regcc = &cc;
2368 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2369 cc.cur = -1;
2370 cc.min = ARG1(scan);
2371 cc.max = ARG2(scan);
c277df42 2372 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2373 cc.next = next;
2374 cc.minmod = minmod;
2375 cc.lastloc = 0;
3280af22 2376 PL_reginput = locinput;
a0d0e21e
LW
2377 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2378 regcpblow(cp);
3280af22 2379 PL_regcc = cc.oldcc;
4633a7c4 2380 saySAME(n);
a0d0e21e
LW
2381 }
2382 /* NOT REACHED */
2383 case WHILEM: {
2384 /*
2385 * This is really hard to understand, because after we match
2386 * what we're trying to match, we must make sure the rest of
2c2d71f5 2387 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2388 * to go back UP the parse tree by recursing ever deeper. And
2389 * if it fails, we have to reset our parent's current state
2390 * that we can try again after backing off.
2391 */
2392
c277df42 2393 CHECKPOINT cp, lastcp;
3280af22 2394 CURCUR* cc = PL_regcc;
c277df42
IZ
2395 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2396
4633a7c4 2397 n = cc->cur + 1; /* how many we know we matched */
3280af22 2398 PL_reginput = locinput;
a0d0e21e 2399
c277df42
IZ
2400 DEBUG_r(
2401 PerlIO_printf(Perl_debug_log,
2402 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2403 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2404 (long)n, (long)cc->min,
2405 (long)cc->max, (long)cc)
2406 );
4633a7c4 2407
a0d0e21e
LW
2408 /* If degenerate scan matches "", assume scan done. */
2409
579cf2c3 2410 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2411 PL_regcc = cc->oldcc;
2ab05381
IZ
2412 if (PL_regcc)
2413 ln = PL_regcc->cur;
c277df42 2414 DEBUG_r(
c3464db5
DD
2415 PerlIO_printf(Perl_debug_log,
2416 "%*s empty match detected, try continuation...\n",
3280af22 2417 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2418 );
a0d0e21e 2419 if (regmatch(cc->next))
4633a7c4 2420 sayYES;
2ab05381
IZ
2421 if (PL_regcc)
2422 PL_regcc->cur = ln;
3280af22 2423 PL_regcc = cc;
4633a7c4 2424 sayNO;
a0d0e21e
LW
2425 }
2426
2427 /* First just match a string of min scans. */
2428
2429 if (n < cc->min) {
2430 cc->cur = n;
2431 cc->lastloc = locinput;
4633a7c4
LW
2432 if (regmatch(cc->scan))
2433 sayYES;
2434 cc->cur = n - 1;
c277df42 2435 cc->lastloc = lastloc;
4633a7c4 2436 sayNO;
a0d0e21e
LW
2437 }
2438
2c2d71f5
JH
2439 if (scan->flags) {
2440 /* Check whether we already were at this position.
2441 Postpone detection until we know the match is not
2442 *that* much linear. */
2443 if (!PL_reg_maxiter) {
2444 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2445 PL_reg_leftiter = PL_reg_maxiter;
2446 }
2447 if (PL_reg_leftiter-- == 0) {
2448 I32 size = (PL_reg_maxiter + 7)/8;
2449 if (PL_reg_poscache) {
2450 if (PL_reg_poscache_size < size) {
2451 Renew(PL_reg_poscache, size, char);
2452 PL_reg_poscache_size = size;
2453 }
2454 Zero(PL_reg_poscache, size, char);
2455 }
2456 else {
2457 PL_reg_poscache_size = size;
2458 Newz(29, PL_reg_poscache, size, char);
2459 }
2460 DEBUG_r(
2461 PerlIO_printf(Perl_debug_log,
2462 "%sDetected a super-linear match, switching on caching%s...\n",
2463 PL_colors[4], PL_colors[5])
2464 );
2465 }
2466 if (PL_reg_leftiter < 0) {
2467 I32 o = locinput - PL_bostr, b;
2468
2469 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2470 b = o % 8;
2471 o /= 8;
2472 if (PL_reg_poscache[o] & (1<<b)) {
2473 DEBUG_r(
2474 PerlIO_printf(Perl_debug_log,
2475 "%*s already tried at this position...\n",
2476 REPORT_CODE_OFF+PL_regindent*2, "")
2477 );
7821416a 2478 sayNO_SILENT;
2c2d71f5
JH
2479 }
2480 PL_reg_poscache[o] |= (1<<b);
2481 }
2482 }
2483
a0d0e21e
LW
2484 /* Prefer next over scan for minimal matching. */
2485
2486 if (cc->minmod) {
3280af22 2487 PL_regcc = cc->oldcc;
2ab05381
IZ
2488 if (PL_regcc)
2489 ln = PL_regcc->cur;
5f05dabc 2490 cp = regcppush(cc->parenfloor);
c277df42 2491 REGCP_SET;
5f05dabc 2492 if (regmatch(cc->next)) {
c277df42 2493 regcpblow(cp);
4633a7c4 2494 sayYES; /* All done. */
5f05dabc 2495 }
c277df42 2496 REGCP_UNWIND;
5f05dabc 2497 regcppop();
2ab05381
IZ
2498 if (PL_regcc)
2499 PL_regcc->cur = ln;
3280af22 2500 PL_regcc = cc;
a0d0e21e 2501
c277df42 2502 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2503 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2504 && !(PL_reg_flags & RF_warned)) {
2505 PL_reg_flags |= RF_warned;
cea2e8a9 2506 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2507 "Complex regular subexpression recursion",
2508 REG_INFTY - 1);
c277df42 2509 }
4633a7c4 2510 sayNO;
c277df42 2511 }
a687059c 2512
c277df42 2513 DEBUG_r(
c3464db5
DD
2514 PerlIO_printf(Perl_debug_log,
2515 "%*s trying longer...\n",
3280af22 2516 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2517 );
a0d0e21e 2518 /* Try scanning more and see if it helps. */
3280af22 2519 PL_reginput = locinput;
a0d0e21e
LW
2520 cc->cur = n;
2521 cc->lastloc = locinput;
5f05dabc 2522 cp = regcppush(cc->parenfloor);
c277df42 2523 REGCP_SET;
5f05dabc 2524 if (regmatch(cc->scan)) {
c277df42 2525 regcpblow(cp);
4633a7c4 2526 sayYES;
5f05dabc 2527 }
c277df42 2528 REGCP_UNWIND;
5f05dabc 2529 regcppop();
4633a7c4 2530 cc->cur = n - 1;
c277df42 2531 cc->lastloc = lastloc;
4633a7c4 2532 sayNO;
a0d0e21e
LW
2533 }
2534
2535 /* Prefer scan over next for maximal matching. */
2536
2537 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2538 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2539 cc->cur = n;
2540 cc->lastloc = locinput;
c277df42 2541 REGCP_SET;
5f05dabc 2542 if (regmatch(cc->scan)) {
c277df42 2543 regcpblow(cp);
4633a7c4 2544 sayYES;
5f05dabc 2545 }
c277df42 2546 REGCP_UNWIND;
a0d0e21e 2547 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2548 PL_reginput = locinput;
c277df42 2549 DEBUG_r(
c3464db5
DD
2550 PerlIO_printf(Perl_debug_log,
2551 "%*s failed, try continuation...\n",
3280af22 2552 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2553 );
2554 }
599cee73
PM
2555 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2556 && !(PL_reg_flags & RF_warned)) {
3280af22 2557 PL_reg_flags |= RF_warned;
cea2e8a9 2558 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2559 "Complex regular subexpression recursion",
2560 REG_INFTY - 1);
a0d0e21e
LW
2561 }
2562
2563 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2564 PL_regcc = cc->oldcc;
2ab05381
IZ
2565 if (PL_regcc)
2566 ln = PL_regcc->cur;
a0d0e21e 2567 if (regmatch(cc->next))
4633a7c4 2568 sayYES;
2ab05381
IZ
2569 if (PL_regcc)
2570 PL_regcc->cur = ln;
3280af22 2571 PL_regcc = cc;
4633a7c4 2572 cc->cur = n - 1;
c277df42 2573 cc->lastloc = lastloc;
4633a7c4 2574 sayNO;
a0d0e21e
LW
2575 }
2576 /* NOT REACHED */
c277df42
IZ
2577 case BRANCHJ:
2578 next = scan + ARG(scan);
2579 if (next == scan)
2580 next = NULL;
2581 inner = NEXTOPER(NEXTOPER(scan));
2582 goto do_branch;
2583 case BRANCH:
2584 inner = NEXTOPER(scan);
2585 do_branch:
2586 {
2587 CHECKPOINT lastcp;
2588 c1 = OP(scan);
2589 if (OP(next) != c1) /* No choice. */
2590 next = inner; /* Avoid recursion. */
a0d0e21e 2591 else {
3280af22 2592 int lastparen = *PL_reglastparen;
c277df42
IZ
2593
2594 REGCP_SET;
a0d0e21e 2595 do {
3280af22 2596 PL_reginput = locinput;
c277df42 2597 if (regmatch(inner))
4633a7c4 2598 sayYES;
c277df42 2599 REGCP_UNWIND;
3280af22 2600 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2601 PL_regendp[n] = -1;
3280af22 2602 *PL_reglastparen = n;
c277df42 2603 scan = next;
a0d0e21e 2604 /*SUPPRESS 560*/
c277df42
IZ
2605 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2606 next += n;
a0d0e21e 2607 else
c277df42 2608 next = NULL;
c277df42
IZ
2609 inner = NEXTOPER(scan);
2610 if (c1 == BRANCHJ) {
2611 inner = NEXTOPER(inner);
2612 }
2613 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2614 sayNO;
a0d0e21e 2615 /* NOTREACHED */
a687059c 2616 }
a0d0e21e
LW
2617 }
2618 break;
2619 case MINMOD:
2620 minmod = 1;
2621 break;
c277df42
IZ
2622 case CURLYM:
2623 {
00db4c45 2624 I32 l = 0;
c277df42
IZ
2625 CHECKPOINT lastcp;
2626
2627 /* We suppose that the next guy does not need
2628 backtracking: in particular, it is of constant length,
2629 and has no parenths to influence future backrefs. */
2630 ln = ARG1(scan); /* min to match */
2631 n = ARG2(scan); /* max to match */
c277df42
IZ
2632 paren = scan->flags;
2633 if (paren) {
3280af22
NIS
2634 if (paren > PL_regsize)
2635 PL_regsize = paren;
2636 if (paren > *PL_reglastparen)
2637 *PL_reglastparen = paren;
c277df42 2638 }
dc45a647 2639 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2640 if (paren)
2641 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2642 PL_reginput = locinput;
c277df42
IZ
2643 if (minmod) {
2644 minmod = 0;
2645 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2646 sayNO;
5f4b28b2 2647 if (ln && l == 0 && n >= ln
c277df42
IZ
2648 /* In fact, this is tricky. If paren, then the
2649 fact that we did/didnot match may influence
2650 future execution. */
2651 && !(paren && ln == 0))
2652 ln = n;
3280af22 2653 locinput = PL_reginput;
22c35a8c 2654 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2655 c1 = (U8)*STRING(next);
c277df42 2656 if (OP(next) == EXACTF)
22c35a8c 2657 c2 = PL_fold[c1];
c277df42 2658 else if (OP(next) == EXACTFL)
22c35a8c 2659 c2 = PL_fold_locale[c1];
c277df42
IZ
2660 else
2661 c2 = c1;
a0ed51b3
LW
2662 }
2663 else
c277df42
IZ
2664 c1 = c2 = -1000;
2665 REGCP_SET;
5f4b28b2 2666 /* This may be improved if l == 0. */
c277df42
IZ
2667 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2668 /* If it could work, try it. */
2669 if (c1 == -1000 ||
3280af22
NIS
2670 UCHARAT(PL_reginput) == c1 ||
2671 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2672 {
2673 if (paren) {
2674 if (n) {
cf93c79d
IZ
2675 PL_regstartp[paren] =
2676 HOPc(PL_reginput, -l) - PL_bostr;
2677 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2678 }
2679 else
cf93c79d 2680 PL_regendp[paren] = -1;
c277df42
IZ
2681 }
2682 if (regmatch(next))
2683 sayYES;
2684 REGCP_UNWIND;
2685 }
2686 /* Couldn't or didn't -- move forward. */
3280af22 2687 PL_reginput = locinput;
c277df42
IZ
2688 if (regrepeat_hard(scan, 1, &l)) {
2689 ln++;
3280af22 2690 locinput = PL_reginput;
c277df42
IZ
2691 }
2692 else
2693 sayNO;
2694 }
a0ed51b3
LW
2695 }
2696 else {
c277df42
IZ
2697 n = regrepeat_hard(scan, n, &l);
2698 if (n != 0 && l == 0
2699 /* In fact, this is tricky. If paren, then the
2700 fact that we did/didnot match may influence
2701 future execution. */
2702 && !(paren && ln == 0))
2703 ln = n;
3280af22 2704 locinput = PL_reginput;
c277df42 2705 DEBUG_r(
5c0ca799
GS
2706 PerlIO_printf(Perl_debug_log,
2707 "%*s matched %ld times, len=%ld...\n",
3280af22 2708 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2709 );
2710 if (n >= ln) {
22c35a8c 2711 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2712 c1 = (U8)*STRING(next);
c277df42 2713 if (OP(next) == EXACTF)
22c35a8c 2714 c2 = PL_fold[c1];
c277df42 2715 else if (OP(next) == EXACTFL)
22c35a8c 2716 c2 = PL_fold_locale[c1];
c277df42
IZ
2717 else
2718 c2 = c1;
a0ed51b3
LW
2719 }
2720 else
c277df42
IZ
2721 c1 = c2 = -1000;
2722 }
2723 REGCP_SET;
2724 while (n >= ln) {
2725 /* If it could work, try it. */
2726 if (c1 == -1000 ||
3280af22
NIS
2727 UCHARAT(PL_reginput) == c1 ||
2728 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2729 {
2730 DEBUG_r(
c3464db5
DD
2731 PerlIO_printf(Perl_debug_log,
2732 "%*s trying tail with n=%ld...\n",
3280af22 2733 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2734 );
2735 if (paren) {
2736 if (n) {
cf93c79d
IZ
2737 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2738 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2739 }
a0ed51b3 2740 else
cf93c79d 2741 PL_regendp[paren] = -1;
c277df42 2742 }
a0ed51b3
LW
2743 if (regmatch(next))
2744 sayYES;
2745 REGCP_UNWIND;
2746 }
c277df42
IZ
2747 /* Couldn't or didn't -- back up. */
2748 n--;
dfe13c55 2749 locinput = HOPc(locinput, -l);
3280af22 2750 PL_reginput = locinput;
c277df42
IZ
2751 }
2752 }
2753 sayNO;
2754 break;
2755 }
2756 case CURLYN:
2757 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2758 if (paren > PL_regsize)
2759 PL_regsize = paren;
2760 if (paren > *PL_reglastparen)
2761 *PL_reglastparen = paren;
c277df42
IZ
2762 ln = ARG1(scan); /* min to match */
2763 n = ARG2(scan); /* max to match */
dc45a647 2764 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2765 goto repeat;
a0d0e21e 2766 case CURLY:
c277df42 2767 paren = 0;
a0d0e21e
LW
2768 ln = ARG1(scan); /* min to match */
2769 n = ARG2(scan); /* max to match */
dc45a647 2770 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2771 goto repeat;
2772 case STAR:
2773 ln = 0;
c277df42 2774 n = REG_INFTY;
a0d0e21e 2775 scan = NEXTOPER(scan);
c277df42 2776 paren = 0;
a0d0e21e
LW
2777 goto repeat;
2778 case PLUS:
c277df42
IZ
2779 ln = 1;
2780 n = REG_INFTY;
2781 scan = NEXTOPER(scan);
2782 paren = 0;
2783 repeat:
a0d0e21e
LW
2784 /*
2785 * Lookahead to avoid useless match attempts
2786 * when we know what character comes next.
2787 */
22c35a8c 2788 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2789 c1 = (U8)*STRING(next);
bbce6d69 2790 if (OP(next) == EXACTF)
22c35a8c 2791 c2 = PL_fold[c1];
bbce6d69 2792 else if (OP(next) == EXACTFL)
22c35a8c 2793 c2 = PL_fold_locale[c1];
bbce6d69 2794 else
2795 c2 = c1;
2796 }
a0d0e21e 2797 else
bbce6d69 2798 c1 = c2 = -1000;
3280af22 2799 PL_reginput = locinput;
a0d0e21e 2800 if (minmod) {
c277df42 2801 CHECKPOINT lastcp;
a0d0e21e
LW
2802 minmod = 0;
2803 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2804 sayNO;
a0ed51b3 2805 locinput = PL_reginput;
c277df42 2806 REGCP_SET;
0fe9bf95
IZ
2807 if (c1 != -1000) {
2808 char *e = locinput + n - ln; /* Should not check after this */
2809 char *old = locinput;
2810
2811 if (e >= PL_regeol || (n == REG_INFTY))
2812 e = PL_regeol - 1;
2813 while (1) {
2814 /* Find place 'next' could work */
2815 if (c1 == c2) {
2816 while (locinput <= e && *locinput != c1)
2817 locinput++;
2818 } else {
2819 while (locinput <= e
2820 && *locinput != c1
2821 && *locinput != c2)
2822 locinput++;
2823 }
2824 if (locinput > e)
2825 sayNO;
2826 /* PL_reginput == old now */
2827 if (locinput != old) {
2828 ln = 1; /* Did some */
2829 if (regrepeat(scan, locinput - old) <
2830 locinput - old)
2831 sayNO;
2832 }
2833 /* PL_reginput == locinput now */
2834 if (paren) {
2835 if (ln) {
cf93c79d
IZ
2836 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2837 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2838 }
2839 else
cf93c79d 2840 PL_regendp[paren] = -1;
0fe9bf95
IZ
2841 }
2842 if (regmatch(next))
2843 sayYES;
2844 PL_reginput = locinput; /* Could be reset... */
2845 REGCP_UNWIND;
2846 /* Couldn't or didn't -- move forward. */
2847 old = locinput++;
2848 }
2849 }
2850 else
c277df42 2851 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2852 /* If it could work, try it. */
bbce6d69 2853 if (c1 == -1000 ||
3280af22
NIS
2854 UCHARAT(PL_reginput) == c1 ||
2855 UCHARAT(PL_reginput) == c2)
bbce6d69 2856 {
c277df42
IZ
2857 if (paren) {
2858 if (n) {
cf93c79d
IZ
2859 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2860 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2861 }
2862 else
cf93c79d 2863 PL_regendp[paren] = -1;
c277df42 2864 }
a0d0e21e 2865 if (regmatch(next))
4633a7c4 2866 sayYES;
c277df42 2867 REGCP_UNWIND;
bbce6d69 2868 }
c277df42 2869 /* Couldn't or didn't -- move forward. */
a0ed51b3 2870 PL_reginput = locinput;
a0d0e21e
LW
2871 if (regrepeat(scan, 1)) {
2872 ln++;
a0ed51b3
LW
2873 locinput = PL_reginput;
2874 }
2875 else
4633a7c4 2876 sayNO;
a0d0e21e
LW
2877 }
2878 }
2879 else {
c277df42 2880 CHECKPOINT lastcp;
a0d0e21e 2881 n = regrepeat(scan, n);
a0ed51b3 2882 locinput = PL_reginput;
22c35a8c 2883 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2884 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2885 ln = n; /* why back off? */
c277df42
IZ
2886 REGCP_SET;
2887 if (paren) {
2888 while (n >= ln) {
2889 /* If it could work, try it. */
2890 if (c1 == -1000 ||
3280af22
NIS
2891 UCHARAT(PL_reginput) == c1 ||
2892 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2893 {
2894 if (paren && n) {
2895 if (n) {
cf93c79d
IZ
2896 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2897 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2898 }
2899 else
cf93c79d 2900 PL_regendp[paren] = -1;
c277df42
IZ
2901 }
2902 if (regmatch(next))
2903 sayYES;
2904 REGCP_UNWIND;
2905 }
2906 /* Couldn't or didn't -- back up. */
2907 n--;
dfe13c55 2908 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2909 }
a0ed51b3
LW
2910 }
2911 else {
c277df42
IZ
2912 while (n >= ln) {
2913 /* If it could work, try it. */
2914 if (c1 == -1000 ||
3280af22
NIS
2915 UCHARAT(PL_reginput) == c1 ||
2916 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2917 {
2918 if (regmatch(next))
2919 sayYES;
2920 REGCP_UNWIND;
2921 }
2922 /* Couldn't or didn't -- back up. */
2923 n--;
dfe13c55 2924 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2925 }
a0d0e21e
LW
2926 }
2927 }
4633a7c4 2928 sayNO;
c277df42 2929 break;
a0d0e21e 2930 case END:
0f5d15d6
IZ
2931 if (PL_reg_call_cc) {
2932 re_cc_state *cur_call_cc = PL_reg_call_cc;
2933 CURCUR *cctmp = PL_regcc;
2934 regexp *re = PL_reg_re;
2935 CHECKPOINT cp, lastcp;
2936
2937 cp = regcppush(0); /* Save *all* the positions. */
2938 REGCP_SET;
2939 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2940 the caller. */
2941 PL_reginput = locinput; /* Make position available to
2942 the callcc. */
2943 cache_re(PL_reg_call_cc->re);
2944 PL_regcc = PL_reg_call_cc->cc;
2945 PL_reg_call_cc = PL_reg_call_cc->prev;
2946 if (regmatch(cur_call_cc->node)) {
2947 PL_reg_call_cc = cur_call_cc;
2948 regcpblow(cp);
2949 sayYES;
2950 }
2951 REGCP_UNWIND;
2952 regcppop();
2953 PL_reg_call_cc = cur_call_cc;
2954 PL_regcc = cctmp;
2955 PL_reg_re = re;
2956 cache_re(re);
2957
2958 DEBUG_r(
2959 PerlIO_printf(Perl_debug_log,
2960 "%*s continuation failed...\n",
2961 REPORT_CODE_OFF+PL_regindent*2, "")
2962 );
7821416a 2963 sayNO_SILENT;
0f5d15d6 2964 }
7821416a
IZ
2965 if (locinput < PL_regtill) {
2966 DEBUG_r(PerlIO_printf(Perl_debug_log,
2967 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
2968 PL_colors[4],
2969 (long)(locinput - PL_reg_starttry),
2970 (long)(PL_regtill - PL_reg_starttry),
2971 PL_colors[5]));
2972 sayNO_FINAL; /* Cannot match: too short. */
2973 }
2974 PL_reginput = locinput; /* put where regtry can find it */
2975 sayYES_FINAL; /* Success! */
7e5428c5 2976 case SUCCEED:
3280af22 2977 PL_reginput = locinput; /* put where regtry can find it */
7821416a 2978 sayYES_LOUD; /* Success! */
c277df42
IZ
2979 case SUSPEND:
2980 n = 1;
9fe1d20c 2981 PL_reginput = locinput;
c277df42 2982 goto do_ifmatch;
a0d0e21e 2983 case UNLESSM:
c277df42 2984 n = 0;
a0ed51b3 2985 if (scan->flags) {
0fe9bf95
IZ
2986 if (UTF) { /* XXXX This is absolutely
2987 broken, we read before
2988 start of string. */
2989 s = HOPMAYBEc(locinput, -scan->flags);
2990 if (!s)
2991 goto say_yes;
2992 PL_reginput = s;
2993 }
2994 else {
2995 if (locinput < PL_bostr + scan->flags)
2996 goto say_yes;
2997 PL_reginput = locinput - scan->flags;
2998 goto do_ifmatch;
2999 }
a0ed51b3
LW
3000 }
3001 else
3002 PL_reginput = locinput;
c277df42
IZ
3003 goto do_ifmatch;
3004 case IFMATCH:
3005 n = 1;
a0ed51b3 3006 if (scan->flags) {
0fe9bf95
IZ
3007 if (UTF) { /* XXXX This is absolutely
3008 broken, we read before
3009 start of string. */
3010 s = HOPMAYBEc(locinput, -scan->flags);
3011 if (!s || s < PL_bostr)
3012 goto say_no;
3013 PL_reginput = s;
3014 }
3015 else {
3016 if (locinput < PL_bostr + scan->flags)
3017 goto say_no;
3018 PL_reginput = locinput - scan->flags;
3019 goto do_ifmatch;
3020 }
a0ed51b3
LW
3021 }
3022 else
3023 PL_reginput = locinput;
3024
c277df42 3025 do_ifmatch:
c277df42
IZ
3026 inner = NEXTOPER(NEXTOPER(scan));
3027 if (regmatch(inner) != n) {
3028 say_no:
3029 if (logical) {
3030 logical = 0;
3031 sw = 0;
3032 goto do_longjump;
a0ed51b3
LW
3033 }
3034 else
c277df42
IZ
3035 sayNO;
3036 }
3037 say_yes:
3038 if (logical) {
3039 logical = 0;
3040 sw = 1;
3041 }
fe44a5e8 3042 if (OP(scan) == SUSPEND) {
3280af22 3043 locinput = PL_reginput;
565764a8 3044 nextchr = UCHARAT(locinput);
fe44a5e8 3045 }
c277df42
IZ
3046 /* FALL THROUGH. */
3047 case LONGJMP:
3048 do_longjump:
3049 next = scan + ARG(scan);
3050 if (next == scan)
3051 next = NULL;
a0d0e21e
LW
3052 break;
3053 default:
bf49b057 3054 PerlIO_printf(Perl_error_log, "%lx %d\n",
c277df42 3055 (unsigned long)scan, OP(scan));
cea2e8a9 3056 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3057 }
a0d0e21e
LW
3058 scan = next;
3059 }
a687059c 3060
a0d0e21e
LW
3061 /*
3062 * We get here only if there's trouble -- normally "case END" is
3063 * the terminating point.
3064 */
cea2e8a9 3065 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3066 /*NOTREACHED*/
4633a7c4
LW
3067 sayNO;
3068
7821416a
IZ
3069yes_loud:
3070 DEBUG_r(
3071 PerlIO_printf(Perl_debug_log,
3072 "%*s %scould match...%s\n",
3073 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3074 );
3075 goto yes;
3076yes_final:
3077 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3078 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3079yes:
3080#ifdef DEBUGGING
3280af22 3081 PL_regindent--;
4633a7c4
LW
3082#endif
3083 return 1;
3084
3085no:
7821416a
IZ
3086 DEBUG_r(
3087 PerlIO_printf(Perl_debug_log,
3088 "%*s %sfailed...%s\n",
3089 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3090 );
3091 goto do_no;
3092no_final:
3093do_no:
4633a7c4 3094#ifdef DEBUGGING
3280af22 3095 PL_regindent--;
4633a7c4 3096#endif
a0d0e21e 3097 return 0;
a687059c
LW
3098}
3099
3100/*
3101 - regrepeat - repeatedly match something simple, report how many
3102 */
3103/*
3104 * [This routine now assumes that it will only match on things of length 1.
3105 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3106 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3107 */
76e3520e 3108STATIC I32
cea2e8a9 3109S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3110{
5c0ca799 3111 dTHR;
a0d0e21e
LW
3112 register char *scan;
3113 register char *opnd;
3114 register I32 c;
3280af22 3115 register char *loceol = PL_regeol;
a0ed51b3 3116 register I32 hardcount = 0;
a0d0e21e 3117
3280af22 3118 scan = PL_reginput;
c277df42 3119 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3120 loceol = scan + max;
a0d0e21e 3121 switch (OP(p)) {
22c35a8c 3122 case REG_ANY:
a0d0e21e
LW
3123 while (scan < loceol && *scan != '\n')
3124 scan++;
3125 break;
3126 case SANY:
3127 scan = loceol;
3128 break;
a0ed51b3
LW
3129 case ANYUTF8:
3130 loceol = PL_regeol;
3131 while (scan < loceol && *scan != '\n') {
3132 scan += UTF8SKIP(scan);
3133 hardcount++;
3134 }
3135 break;
3136 case SANYUTF8:
3137 loceol = PL_regeol;
3138 while (scan < loceol) {
3139 scan += UTF8SKIP(scan);
3140 hardcount++;
3141 }
3142 break;
bbce6d69 3143 case EXACT: /* length of string is 1 */
cd439c50 3144 c = (U8)*STRING(p);
bbce6d69 3145 while (scan < loceol && UCHARAT(scan) == c)
3146 scan++;
3147 break;
3148 case EXACTF: /* length of string is 1 */
cd439c50 3149 c = (U8)*STRING(p);
bbce6d69 3150 while (scan < loceol &&
22c35a8c 3151 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3152 scan++;
3153 break;
3154 case EXACTFL: /* length of string is 1 */
3280af22 3155 PL_reg_flags |= RF_tainted;
cd439c50 3156 c = (U8)*STRING(p);
bbce6d69 3157 while (scan < loceol &&
22c35a8c 3158 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3159 scan++;
3160 break;
a0ed51b3
LW
3161 case ANYOFUTF8:
3162 loceol = PL_regeol;
3163 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3164 scan += UTF8SKIP(scan);
3165 hardcount++;
3166 }
3167 break;
a0d0e21e 3168 case ANYOF:
cd439c50 3169 opnd = MASK(p);
ae5c130c 3170 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3171 scan++;
a0d0e21e
LW
3172 break;
3173 case ALNUM:
3174 while (scan < loceol && isALNUM(*scan))
3175 scan++;
3176 break;
a0ed51b3
LW
3177 case ALNUMUTF8:
3178 loceol = PL_regeol;
dfe13c55 3179 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3180 scan += UTF8SKIP(scan);
3181 hardcount++;
3182 }
3183 break;
bbce6d69 3184 case ALNUML:
3280af22 3185 PL_reg_flags |= RF_tainted;
bbce6d69 3186 while (scan < loceol && isALNUM_LC(*scan))
3187 scan++;
3188 break;
a0ed51b3
LW
3189 case ALNUMLUTF8:
3190 PL_reg_flags |= RF_tainted;
3191 loceol = PL_regeol;
dfe13c55 3192 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3193 scan += UTF8SKIP(scan);
3194 hardcount++;
3195 }
3196 break;
3197 break;
a0d0e21e
LW
3198 case NALNUM:
3199 while (scan < loceol && !isALNUM(*scan))
3200 scan++;
3201 break;
a0ed51b3
LW
3202 case NALNUMUTF8:
3203 loceol = PL_regeol;
dfe13c55 3204 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3205 scan += UTF8SKIP(scan);
3206 hardcount++;
3207 }
3208 break;
bbce6d69 3209 case NALNUML:
3280af22 3210 PL_reg_flags |= RF_tainted;
bbce6d69 3211 while (scan < loceol && !isALNUM_LC(*scan))
3212 scan++;
3213 break;
a0ed51b3
LW
3214 case NALNUMLUTF8:
3215 PL_reg_flags |= RF_tainted;
3216 loceol = PL_regeol;
dfe13c55 3217 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3218 scan += UTF8SKIP(scan);
3219 hardcount++;
3220 }
3221 break;
a0d0e21e
LW
3222 case SPACE:
3223 while (scan < loceol && isSPACE(*scan))
3224 scan++;
3225 break;
a0ed51b3
LW
3226 case SPACEUTF8:
3227 loceol = PL_regeol;
dfe13c55 3228 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3229 scan += UTF8SKIP(scan);
3230 hardcount++;
3231 }
3232 break;
bbce6d69 3233 case SPACEL:
3280af22 3234 PL_reg_flags |= RF_tainted;
bbce6d69 3235 while (scan < loceol && isSPACE_LC(*scan))
3236 scan++;
3237 break;
a0ed51b3
LW
3238 case SPACELUTF8:
3239 PL_reg_flags |= RF_tainted;
3240 loceol = PL_regeol;
dfe13c55 3241 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3242 scan += UTF8SKIP(scan);
3243 hardcount++;
3244 }
3245 break;
a0d0e21e
LW
3246 case NSPACE:
3247 while (scan < loceol && !isSPACE(*scan))
3248 scan++;
3249 break;
a0ed51b3
LW
3250 case NSPACEUTF8:
3251 loceol = PL_regeol;
dfe13c55 3252 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3253 scan += UTF8SKIP(scan);
3254 hardcount++;
3255 }
3256 break;
bbce6d69 3257 case NSPACEL:
3280af22 3258 PL_reg_flags |= RF_tainted;
bbce6d69 3259 while (scan < loceol && !isSPACE_LC(*scan))
3260 scan++;
3261 break;
a0ed51b3
LW
3262 case NSPACELUTF8:
3263 PL_reg_flags |= RF_tainted;
3264 loceol = PL_regeol;
dfe13c55 3265 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3266 scan += UTF8SKIP(scan);
3267 hardcount++;
3268 }
3269 break;
a0d0e21e
LW
3270 case DIGIT:
3271 while (scan < loceol && isDIGIT(*scan))
3272 scan++;
3273 break;
a0ed51b3
LW
3274 case DIGITUTF8:
3275 loceol = PL_regeol;
dfe13c55 3276 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3277 scan += UTF8SKIP(scan);
3278 hardcount++;
3279 }
3280 break;
3281 break;
a0d0e21e
LW
3282 case NDIGIT:
3283 while (scan < loceol && !isDIGIT(*scan))
3284 scan++;
3285 break;
a0ed51b3
LW
3286 case NDIGITUTF8:
3287 loceol = PL_regeol;
dfe13c55 3288 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3289 scan += UTF8SKIP(scan);
3290 hardcount++;
3291 }
3292 break;
a0d0e21e
LW
3293 default: /* Called on something of 0 width. */
3294 break; /* So match right here or not at all. */
3295 }
a687059c 3296
a0ed51b3
LW
3297 if (hardcount)
3298 c = hardcount;
3299 else
3300 c = scan - PL_reginput;
3280af22 3301 PL_reginput = scan;
a687059c 3302
c277df42
IZ
3303 DEBUG_r(
3304 {
3305 SV *prop = sv_newmortal();
3306
3307 regprop(prop, p);
3308 PerlIO_printf(Perl_debug_log,
3309 "%*s %s can match %ld times out of %ld...\n",
3310 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3311 });
3312
a0d0e21e 3313 return(c);
a687059c
LW
3314}
3315
3316/*
c277df42
IZ
3317 - regrepeat_hard - repeatedly match something, report total lenth and length
3318 *
3319 * The repeater is supposed to have constant length.
3320 */
3321
76e3520e 3322STATIC I32
cea2e8a9 3323S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3324{
5c0ca799 3325 dTHR;
c277df42
IZ
3326 register char *scan;
3327 register char *start;
3280af22 3328 register char *loceol = PL_regeol;
a0ed51b3 3329 I32 l = 0;
708e3b05 3330 I32 count = 0, res = 1;
a0ed51b3
LW
3331
3332 if (!max)
3333 return 0;
c277df42 3334
3280af22 3335 start = PL_reginput;
a0ed51b3 3336 if (UTF) {
708e3b05 3337 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3338 if (!count++) {
3339 l = 0;
3340 while (start < PL_reginput) {
3341 l++;
3342 start += UTF8SKIP(start);
3343 }
3344 *lp = l;
3345 if (l == 0)
3346 return max;
3347 }
3348 if (count == max)
3349 return count;
3350 }
3351 }
3352 else {
708e3b05 3353 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3354 if (!count++) {
3355 *lp = l = PL_reginput - start;
3356 if (max != REG_INFTY && l*max < loceol - scan)
3357 loceol = scan + l*max;
3358 if (l == 0)
3359 return max;
c277df42
IZ
3360 }
3361 }
3362 }
708e3b05 3363 if (!res)
3280af22 3364 PL_reginput = scan;
c277df42 3365
a0ed51b3 3366 return count;
c277df42
IZ
3367}
3368
3369/*
cb8d8820 3370 - reginclass - determine if a character falls into a character class
bbce6d69 3371 */
3372
76e3520e 3373STATIC bool
cea2e8a9 3374S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3375{
5c0ca799 3376 dTHR;
b8c5462f 3377 char flags = ANYOF_FLAGS(p);
bbce6d69 3378 bool match = FALSE;
3379
3380 c &= 0xFF;
b8c5462f 3381 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3382 match = TRUE;
3383 else if (flags & ANYOF_FOLD) {
3384 I32 cf;
3385 if (flags & ANYOF_LOCALE) {
3280af22 3386 PL_reg_flags |= RF_tainted;
22c35a8c 3387 cf = PL_fold_locale[c];
bbce6d69 3388 }
3389 else
22c35a8c 3390 cf = PL_fold[c];
b8c5462f 3391 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3392 match = TRUE;
3393 }
3394
b8c5462f 3395 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3396 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3397 if (
3398 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3399 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3400 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3410 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3411 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3412 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3413 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3414 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3415 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3416 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3417 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3418 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3419 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3420 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3421 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3422 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3423 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3424 ) /* How's that for a conditional? */
bbce6d69 3425 {
3426 match = TRUE;
3427 }
3428 }
3429
ae5c130c 3430 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3431}
3432
a0ed51b3 3433STATIC bool
cea2e8a9 3434S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3435{
3436 dTHR;
a0ed51b3
LW
3437 char flags = ARG1(f);
3438 bool match = FALSE;
3439 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3440
3441 if (swash_fetch(sv, p))
3442 match = TRUE;
3443 else if (flags & ANYOF_FOLD) {
3444 I32 cf;
dfe13c55 3445 U8 tmpbuf[10];
a0ed51b3
LW
3446 if (flags & ANYOF_LOCALE) {
3447 PL_reg_flags |= RF_tainted;
3448 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3449 }
3450 else
3451 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3452 if (swash_fetch(sv, tmpbuf))
3453 match = TRUE;
3454 }
3455
b8c5462f 3456 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3457
3458 return (flags & ANYOF_INVERT) ? !match : match;
3459}
161b471a 3460
dfe13c55 3461STATIC U8 *
cea2e8a9 3462S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3463{
3464 dTHR;
a0ed51b3
LW
3465 if (off >= 0) {
3466 while (off-- && s < (U8*)PL_regeol)
3467 s += UTF8SKIP(s);
3468 }
3469 else {
3470 while (off++) {
3471 if (s > (U8*)PL_bostr) {
3472 s--;
3473 if (*s & 0x80) {
3474 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3475 s--;
3476 } /* XXX could check well-formedness here */
3477 }
3478 }
3479 }
3480 return s;
3481}
161b471a 3482
dfe13c55 3483STATIC U8 *
cea2e8a9 3484S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3485{
c485e607 3486 dTHR;
a0ed51b3
LW
3487 if (off >= 0) {
3488 while (off-- && s < (U8*)PL_regeol)
3489 s += UTF8SKIP(s);
3490 if (off >= 0)
3491 return 0;
3492 }
3493 else {
3494 while (off++) {
3495 if (s > (U8*)PL_bostr) {
3496 s--;
3497 if (*s & 0x80) {
3498 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3499 s--;
3500 } /* XXX could check well-formedness here */
3501 }
3502 else
3503 break;
3504 }
3505 if (off <= 0)
3506 return 0;
3507 }
3508 return s;
3509}
51371543
GS
3510
3511#ifdef PERL_OBJECT
3512#define NO_XSLOCKS
3513#include "XSUB.h"
3514#endif
3515
3516static void
3517restore_pos(pTHXo_ void *arg)
3518{
3519 dTHR;
3520 if (PL_reg_eval_set) {
3521 if (PL_reg_oldsaved) {
3522 PL_reg_re->subbeg = PL_reg_oldsaved;
3523 PL_reg_re->sublen = PL_reg_oldsavedlen;
3524 RX_MATCH_COPIED_on(PL_reg_re);
3525 }
3526 PL_reg_magic->mg_len = PL_reg_oldpos;
3527 PL_reg_eval_set = 0;
3528 PL_curpm = PL_reg_oldcurpm;
3529 }
3530}
3531