This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate with Sarathy.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5
DD
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d
IZ
281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa
JH
292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5
JH
298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa
JH
301 char *t;
302 I32 ml_anch;
2c2d71f5
JH
303 char *tmp;
304 register char *other_last = Nullch;
30944b6d
IZ
305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa
JH
308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
317 (strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5
JH
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5
JH
325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5
JH
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa
JH
349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5
JH
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa
JH
359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5
JH
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa
JH
382 ml_anch = 0;
383 s = strpos;
2c2d71f5
JH
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5
JH
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa
JH
412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5
JH
422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
7b0972df
JH
427 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
428 SvPVX(check),
2c2d71f5
JH
429 PL_colors[1], (SvTAIL(check) ? "$" : ""),
430 (s ? " at offset " : "...\n") ) );
431
432 if (!s)
433 goto fail_finish;
434
435 /* Finish the diagnostic message */
30944b6d 436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
437
438 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
439 Start with the other substr.
440 XXXX no SCREAM optimization yet - and a very coarse implementation
441 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
442 *always* match. Probably should be marked during compile...
443 Probably it is right to do no SCREAM here...
444 */
445
446 if (prog->float_substr && prog->anchored_substr) {
30944b6d 447 /* Take into account the "other" substring. */
2c2d71f5
JH
448 /* XXXX May be hopelessly wrong for UTF... */
449 if (!other_last)
450 other_last = strpos - 1;
451 if (check == prog->float_substr) {
30944b6d
IZ
452 do_other_anchored:
453 {
2c2d71f5
JH
454 char *last = s - start_shift, *last1, *last2;
455 char *s1 = s;
456
457 tmp = PL_bostr;
458 t = s - prog->check_offset_max;
459 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
460 && (!(prog->reganch & ROPT_UTF8)
461 || (PL_bostr = strpos, /* Used in regcopmaybe() */
462 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
463 && t > strpos)))
30944b6d 464 /* EMPTY */;
2c2d71f5
JH
465 else
466 t = strpos;
467 t += prog->anchored_offset;
468 if (t <= other_last)
469 t = other_last + 1;
470 PL_bostr = tmp;
471 last2 = last1 = strend - prog->minlen;
472 if (last < last1)
473 last1 = last;
474 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
475 /* On end-of-str: see comment below. */
476 s = fbm_instr((unsigned char*)t,
477 (unsigned char*)last1 + prog->anchored_offset
478 + SvCUR(prog->anchored_substr)
479 - (SvTAIL(prog->anchored_substr)!=0),
480 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
481 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
482 (s ? "Found" : "Contradicts"),
483 PL_colors[0],
7b0972df
JH
484 (int)(SvCUR(prog->anchored_substr)
485 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
486 SvPVX(prog->anchored_substr),
487 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
488 if (!s) {
489 if (last1 >= last2) {
490 DEBUG_r(PerlIO_printf(Perl_debug_log,
491 ", giving up...\n"));
492 goto fail_finish;
493 }
494 DEBUG_r(PerlIO_printf(Perl_debug_log,
495 ", trying floating at offset %ld...\n",
30944b6d 496 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
497 PL_regeol = strend; /* Used in HOP() */
498 other_last = last1 + prog->anchored_offset;
499 s = HOPc(last, 1);
500 goto restart;
501 }
502 else {
503 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 504 (long)(s - i_strpos)));
2c2d71f5
JH
505 t = s - prog->anchored_offset;
506 other_last = s - 1;
30944b6d 507 s = s1;
2c2d71f5
JH
508 if (t == strpos)
509 goto try_at_start;
2c2d71f5
JH
510 goto try_at_offset;
511 }
30944b6d 512 }
2c2d71f5
JH
513 }
514 else { /* Take into account the floating substring. */
515 char *last, *last1;
516 char *s1 = s;
517
518 t = s - start_shift;
519 last1 = last = strend - prog->minlen + prog->float_min_offset;
520 if (last - t > prog->float_max_offset)
521 last = t + prog->float_max_offset;
522 s = t + prog->float_min_offset;
523 if (s <= other_last)
524 s = other_last + 1;
525 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
526 /* fbm_instr() takes into account exact value of end-of-str
527 if the check is SvTAIL(ed). Since false positives are OK,
528 and end-of-str is not later than strend we are OK. */
529 s = fbm_instr((unsigned char*)s,
530 (unsigned char*)last + SvCUR(prog->float_substr)
531 - (SvTAIL(prog->float_substr)!=0),
532 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
533 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
534 (s ? "Found" : "Contradicts"),
535 PL_colors[0],
7b0972df
JH
536 (int)(SvCUR(prog->float_substr)
537 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
538 SvPVX(prog->float_substr),
539 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
540 if (!s) {
541 if (last1 == last) {
542 DEBUG_r(PerlIO_printf(Perl_debug_log,
543 ", giving up...\n"));
544 goto fail_finish;
545 }
546 DEBUG_r(PerlIO_printf(Perl_debug_log,
547 ", trying anchored starting at offset %ld...\n",
30944b6d 548 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
549 other_last = last;
550 PL_regeol = strend; /* Used in HOP() */
551 s = HOPc(t, 1);
552 goto restart;
553 }
554 else {
555 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 556 (long)(s - i_strpos)));
2c2d71f5 557 other_last = s - 1;
30944b6d 558 s = s1;
2c2d71f5
JH
559 if (t == strpos)
560 goto try_at_start;
2c2d71f5
JH
561 goto try_at_offset;
562 }
563 }
cad2e5aa 564 }
2c2d71f5
JH
565
566 t = s - prog->check_offset_max;
567 tmp = PL_bostr;
568 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
569 && (!(prog->reganch & ROPT_UTF8)
570 || (PL_bostr = strpos, /* Used in regcopmaybe() */
571 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
572 && t > strpos)))) {
573 PL_bostr = tmp;
574 /* Fixed substring is found far enough so that the match
575 cannot start at strpos. */
576 try_at_offset:
cad2e5aa 577 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
578 /* Eventually fbm_*() should handle this, but often
579 anchored_offset is not 0, so this check will not be wasted. */
580 /* XXXX In the code below we prefer to look for "^" even in
581 presence of anchored substrings. And we search even
582 beyond the found float position. These pessimizations
583 are historical artefacts only. */
584 find_anchor:
2c2d71f5 585 while (t < strend - prog->minlen) {
cad2e5aa
JH
586 if (*t == '\n') {
587 if (t < s - prog->check_offset_min) {
30944b6d
IZ
588 if (prog->anchored_substr) {
589 /* We definitely contradict the found anchored
590 substr. Due to the above check we do not
591 contradict "check" substr.
592 Thus we can arrive here only if check substr
593 is float. Redo checking for "other"=="fixed".
594 */
595 strpos = t + 1;
596 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
597 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
598 goto do_other_anchored;
599 }
cad2e5aa 600 s = t + 1;
2c2d71f5 601 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 602 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
603 goto set_useful;
604 }
2c2d71f5 605 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
606 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
607 strpos = s = t + 1;
cad2e5aa
JH
608 goto restart;
609 }
610 t++;
611 }
2c2d71f5
JH
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
613 PL_colors[0],PL_colors[1]));
614 goto fail_finish;
cad2e5aa
JH
615 }
616 s = t;
617 set_useful:
2c2d71f5 618 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
619 }
620 else {
2c2d71f5
JH
621 PL_bostr = tmp;
622 /* The found string does not prohibit matching at beg-of-str
623 - no optimization of calling REx engine can be performed,
624 unless it was an MBOL and we are not after MBOL. */
625 try_at_start:
626 /* Even in this situation we may use MBOL flag if strpos is offset
627 wrt the start of the string. */
628 if (ml_anch && sv
cad2e5aa
JH
629 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
630 t = strpos;
631 goto find_anchor;
632 }
30944b6d
IZ
633 DEBUG_r( if (ml_anch)
634 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
635 PL_colors[0],PL_colors[1]);
636 );
2c2d71f5 637 success_at_start:
30944b6d 638 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
639 && --BmUSEFUL(prog->check_substr) < 0
640 && prog->check_substr == prog->float_substr) { /* boo */
641 /* If flags & SOMETHING - do not do it many times on the same match */
642 SvREFCNT_dec(prog->check_substr);
643 prog->check_substr = Nullsv; /* disable */
644 prog->float_substr = Nullsv; /* clear */
645 s = strpos;
646 prog->reganch &= ~RE_USE_INTUIT;
647 }
648 else
649 s = strpos;
650 }
651
2c2d71f5 652 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 653 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 654 return s;
2c2d71f5
JH
655
656 fail_finish: /* Substring not found */
657 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 658 fail:
2c2d71f5 659 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
660 PL_colors[4],PL_colors[5]));
661 return Nullch;
662}
9661b544 663
a687059c 664/*
c277df42 665 - regexec_flags - match a regexp against a string
a687059c 666 */
79072805 667I32
864dbfa3 668Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 669 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
670/* strend: pointer to null at end of string */
671/* strbeg: real beginning of string */
672/* minend: end of match must be >=minend after stringarg. */
673/* data: May be used for some additional optimizations. */
674/* nosave: For optimizations. */
a687059c 675{
5c0ca799 676 dTHR;
a0d0e21e 677 register char *s;
c277df42 678 register regnode *c;
a0d0e21e
LW
679 register char *startpos = stringarg;
680 register I32 tmp;
c277df42 681 I32 minlen; /* must match at least this many chars */
a0d0e21e 682 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 683 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
684 constant substr. */ /* CC */
685 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
686 I32 scream_pos = -1; /* Internal iterator of scream. */
687 char *scream_olds;
3280af22 688 SV* oreplsv = GvSV(PL_replgv);
a687059c 689
2ab05381 690 PL_regcc = 0;
a0d0e21e 691
0f5d15d6 692 cache_re(prog);
a0d0e21e 693#ifdef DEBUGGING
3280af22 694 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
695#endif
696
697 /* Be paranoid... */
698 if (prog == NULL || startpos == NULL) {
cea2e8a9 699 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
700 return 0;
701 }
702
c277df42
IZ
703 minlen = prog->minlen;
704 if (strend - startpos < minlen) goto phooey;
705
a0d0e21e 706 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 707 PL_regprev = '\n';
a0d0e21e 708 else {
a0ed51b3 709 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
710 if (!PL_multiline && PL_regprev == '\n')
711 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 712 }
bbce6d69 713
a0d0e21e 714 /* Check validity of program. */
22c35a8c 715 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 716 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
717 }
718
3280af22
NIS
719 PL_reg_flags = 0;
720 PL_reg_eval_set = 0;
2c2d71f5 721 PL_reg_maxiter = 0;
a0d0e21e 722
a0ed51b3
LW
723 if (prog->reganch & ROPT_UTF8)
724 PL_reg_flags |= RF_utf8;
725
726 /* Mark beginning of line for ^ and lookbehind. */
727 PL_regbol = startpos;
728 PL_bostr = strbeg;
9661b544 729 PL_reg_sv = sv;
a0ed51b3
LW
730
731 /* Mark end of line for $ (and such) */
732 PL_regeol = strend;
733
734 /* see how far we have to get to not match where we matched before */
735 PL_regtill = startpos+minend;
736
0f5d15d6
IZ
737 /* We start without call_cc context. */
738 PL_reg_call_cc = 0;
739
a0d0e21e
LW
740 /* If there is a "must appear" string, look for it. */
741 s = startpos;
cad2e5aa 742
30382c73 743 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
cad2e5aa
JH
744 MAGIC *mg;
745
30382c73 746 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
cad2e5aa 747 PL_reg_ganch = startpos;
30382c73
IZ
748 else if (sv && SvTYPE(sv) >= SVt_PVMG
749 && SvMAGIC(sv)
750 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
751 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
752 if (prog->reganch & ROPT_ANCH_GPOS) {
753 if (s > PL_reg_ganch)
754 goto phooey;
755 s = PL_reg_ganch;
756 }
a0ed51b3 757 }
30382c73
IZ
758 else /* pos() not defined */
759 PL_reg_ganch = strbeg;
a0d0e21e 760 }
a687059c 761
cad2e5aa
JH
762 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
763 re_scream_pos_data d;
764
765 d.scream_olds = &scream_olds;
766 d.scream_pos = &scream_pos;
767 s = re_intuit_start(prog, sv, s, strend, flags, &d);
768 if (!s)
769 goto phooey; /* not present */
770 }
771
772 DEBUG_r( if (!PL_colorset) reginitcolors() );
773 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 774 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
775 PL_colors[4],PL_colors[5],PL_colors[0],
776 prog->precomp,
777 PL_colors[1],
c277df42 778 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 779 PL_colors[0],
c277df42 780 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 781 startpos, PL_colors[1],
c277df42
IZ
782 (strend - startpos > 60 ? "..." : ""))
783 );
784
a0d0e21e 785 /* Simplest case: anchored match need be tried only once. */
774d564b 786 /* [unless only anchor is BOL and multiline is set] */
22e551b9 787 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 788 if (s == startpos && regtry(prog, startpos))
a0d0e21e 789 goto got_it;
22e551b9
IZ
790 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
791 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 792 {
cad2e5aa
JH
793 char *end;
794
a0d0e21e
LW
795 if (minlen)
796 dontbother = minlen - 1;
cad2e5aa 797 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 798 /* for multiline we only have to try after newlines */
cad2e5aa 799 if (prog->check_substr) {
30944b6d
IZ
800 if (s == startpos)
801 goto after_try;
cad2e5aa
JH
802 while (1) {
803 if (regtry(prog, s))
a0d0e21e 804 goto got_it;
30944b6d 805 after_try:
cad2e5aa
JH
806 if (s >= end)
807 goto phooey;
808 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
809 if (!s)
810 goto phooey;
811 }
812 } else {
813 if (s > startpos)
814 s--;
815 while (s < end) {
816 if (*s++ == '\n') { /* don't need PL_utf8skip here */
817 if (regtry(prog, s))
818 goto got_it;
819 }
820 }
35c8bce7 821 }
35c8bce7 822 }
a0d0e21e 823 goto phooey;
22e551b9
IZ
824 } else if (prog->reganch & ROPT_ANCH_GPOS) {
825 if (regtry(prog, PL_reg_ganch))
826 goto got_it;
827 goto phooey;
a0d0e21e 828 }
35c8bce7 829
a0d0e21e 830 /* Messy cases: unanchored match. */
c277df42
IZ
831 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
832 /* we have /x+whatever/ */
cad2e5aa 833 /* it must be a one character string (XXXX Except UTF?) */
c277df42 834 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
835 if (UTF) {
836 while (s < strend) {
837 if (*s == ch) {
838 if (regtry(prog, s)) goto got_it;
839 s += UTF8SKIP(s);
840 while (s < strend && *s == ch)
841 s += UTF8SKIP(s);
842 }
843 s += UTF8SKIP(s);
844 }
845 }
846 else {
847 while (s < strend) {
848 if (*s == ch) {
849 if (regtry(prog, s)) goto got_it;
c277df42 850 s++;
a0ed51b3
LW
851 while (s < strend && *s == ch)
852 s++;
853 }
854 s++;
a0d0e21e 855 }
a687059c 856 }
c277df42
IZ
857 }
858 /*SUPPRESS 560*/
859 else if (prog->anchored_substr != Nullsv
860 || (prog->float_substr != Nullsv
861 && prog->float_max_offset < strend - s)) {
862 SV *must = prog->anchored_substr
863 ? prog->anchored_substr : prog->float_substr;
864 I32 back_max =
865 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
866 I32 back_min =
867 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
868 I32 delta = back_max - back_min;
cf93c79d 869 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
870 -(I32)(CHR_SVLEN(must)
871 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
872 char *last1; /* Last position checked before */
873
874 if (s > PL_bostr)
dfe13c55 875 last1 = HOPc(s, -1);
a0ed51b3
LW
876 else
877 last1 = s - 1; /* bogus */
c277df42
IZ
878
879 /* XXXX check_substr already used to find `s', can optimize if
880 check_substr==must. */
881 scream_pos = -1;
882 dontbother = end_shift;
dfe13c55 883 strend = HOPc(strend, -dontbother);
c277df42 884 while ( (s <= last) &&
22e551b9
IZ
885 ((flags & REXEC_SCREAM)
886 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 887 end_shift, &scream_pos, 0))
a0ed51b3 888 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
889 (unsigned char*)strend, must,
890 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
891 if (HOPc(s, -back_max) > last1) {
892 last1 = HOPc(s, -back_min);
893 s = HOPc(s, -back_max);
a0ed51b3
LW
894 }
895 else {
dfe13c55 896 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 897
dfe13c55 898 last1 = HOPc(s, -back_min);
c277df42 899 s = t;
a0d0e21e 900 }
a0ed51b3
LW
901 if (UTF) {
902 while (s <= last1) {
903 if (regtry(prog, s))
904 goto got_it;
905 s += UTF8SKIP(s);
906 }
907 }
908 else {
909 while (s <= last1) {
910 if (regtry(prog, s))
911 goto got_it;
912 s++;
913 }
a0d0e21e
LW
914 }
915 }
916 goto phooey;
a0ed51b3
LW
917 }
918 else if (c = prog->regstclass) {
a0d0e21e 919 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 920 char *cc;
a687059c 921
a0d0e21e
LW
922 if (minlen)
923 dontbother = minlen - 1;
dfe13c55 924 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
925 tmp = 1;
926 /* We know what class it must start with. */
927 switch (OP(c)) {
a0ed51b3 928 case ANYOFUTF8:
cd439c50 929 cc = MASK(c);
a0ed51b3
LW
930 while (s < strend) {
931 if (REGINCLASSUTF8(c, (U8*)s)) {
932 if (tmp && regtry(prog, s))
933 goto got_it;
934 else
935 tmp = doevery;
936 }
937 else
938 tmp = 1;
939 s += UTF8SKIP(s);
940 }
941 break;
a0d0e21e 942 case ANYOF:
cd439c50 943 cc = MASK(c);
a0d0e21e 944 while (s < strend) {
a0ed51b3 945 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
946 if (tmp && regtry(prog, s))
947 goto got_it;
948 else
949 tmp = doevery;
a687059c 950 }
a0d0e21e
LW
951 else
952 tmp = 1;
953 s++;
954 }
955 break;
bbce6d69 956 case BOUNDL:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 /* FALL THROUGH */
a0d0e21e 959 case BOUND:
a0ed51b3
LW
960 if (minlen) {
961 dontbother++;
962 strend -= 1;
963 }
3280af22 964 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 965 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 966 while (s < strend) {
95bac841 967 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
968 tmp = !tmp;
969 if (regtry(prog, s))
970 goto got_it;
a687059c 971 }
a0d0e21e
LW
972 s++;
973 }
974 if ((minlen || tmp) && regtry(prog,s))
975 goto got_it;
976 break;
a0ed51b3
LW
977 case BOUNDLUTF8:
978 PL_reg_flags |= RF_tainted;
979 /* FALL THROUGH */
980 case BOUNDUTF8:
981 if (minlen) {
982 dontbother++;
dfe13c55 983 strend = reghop_c(strend, -1);
a0ed51b3 984 }
dfe13c55 985 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
986 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
987 while (s < strend) {
dfe13c55
GS
988 if (tmp == !(OP(c) == BOUND ?
989 swash_fetch(PL_utf8_alnum, (U8*)s) :
990 isALNUM_LC_utf8((U8*)s)))
991 {
a0ed51b3
LW
992 tmp = !tmp;
993 if (regtry(prog, s))
994 goto got_it;
995 }
996 s += UTF8SKIP(s);
997 }
998 if ((minlen || tmp) && regtry(prog,s))
999 goto got_it;
1000 break;
bbce6d69 1001 case NBOUNDL:
3280af22 1002 PL_reg_flags |= RF_tainted;
bbce6d69 1003 /* FALL THROUGH */
a0d0e21e 1004 case NBOUND:
a0ed51b3
LW
1005 if (minlen) {
1006 dontbother++;
1007 strend -= 1;
1008 }
3280af22 1009 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1010 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1011 while (s < strend) {
95bac841 1012 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
1013 tmp = !tmp;
1014 else if (regtry(prog, s))
1015 goto got_it;
1016 s++;
1017 }
1018 if ((minlen || !tmp) && regtry(prog,s))
1019 goto got_it;
1020 break;
a0ed51b3
LW
1021 case NBOUNDLUTF8:
1022 PL_reg_flags |= RF_tainted;
1023 /* FALL THROUGH */
1024 case NBOUNDUTF8:
1025 if (minlen) {
1026 dontbother++;
dfe13c55 1027 strend = reghop_c(strend, -1);
a0ed51b3 1028 }
dfe13c55 1029 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1030 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1031 while (s < strend) {
dfe13c55
GS
1032 if (tmp == !(OP(c) == NBOUND ?
1033 swash_fetch(PL_utf8_alnum, (U8*)s) :
1034 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
1035 tmp = !tmp;
1036 else if (regtry(prog, s))
1037 goto got_it;
1038 s += UTF8SKIP(s);
1039 }
1040 if ((minlen || !tmp) && regtry(prog,s))
1041 goto got_it;
1042 break;
a0d0e21e
LW
1043 case ALNUM:
1044 while (s < strend) {
bbce6d69 1045 if (isALNUM(*s)) {
1046 if (tmp && regtry(prog, s))
1047 goto got_it;
1048 else
1049 tmp = doevery;
1050 }
1051 else
1052 tmp = 1;
1053 s++;
1054 }
1055 break;
a0ed51b3
LW
1056 case ALNUMUTF8:
1057 while (s < strend) {
dfe13c55 1058 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1059 if (tmp && regtry(prog, s))
1060 goto got_it;
1061 else
1062 tmp = doevery;
1063 }
1064 else
1065 tmp = 1;
1066 s += UTF8SKIP(s);
1067 }
1068 break;
bbce6d69 1069 case ALNUML:
3280af22 1070 PL_reg_flags |= RF_tainted;
bbce6d69 1071 while (s < strend) {
1072 if (isALNUM_LC(*s)) {
a0d0e21e
LW
1073 if (tmp && regtry(prog, s))
1074 goto got_it;
a687059c 1075 else
a0d0e21e
LW
1076 tmp = doevery;
1077 }
1078 else
1079 tmp = 1;
1080 s++;
1081 }
1082 break;
a0ed51b3
LW
1083 case ALNUMLUTF8:
1084 PL_reg_flags |= RF_tainted;
1085 while (s < strend) {
dfe13c55 1086 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1087 if (tmp && regtry(prog, s))
1088 goto got_it;
1089 else
1090 tmp = doevery;
1091 }
1092 else
1093 tmp = 1;
1094 s += UTF8SKIP(s);
1095 }
1096 break;
a0d0e21e
LW
1097 case NALNUM:
1098 while (s < strend) {
bbce6d69 1099 if (!isALNUM(*s)) {
1100 if (tmp && regtry(prog, s))
1101 goto got_it;
1102 else
1103 tmp = doevery;
1104 }
1105 else
1106 tmp = 1;
1107 s++;
1108 }
1109 break;
a0ed51b3
LW
1110 case NALNUMUTF8:
1111 while (s < strend) {
dfe13c55 1112 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1113 if (tmp && regtry(prog, s))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
1118 else
1119 tmp = 1;
1120 s += UTF8SKIP(s);
1121 }
1122 break;
bbce6d69 1123 case NALNUML:
3280af22 1124 PL_reg_flags |= RF_tainted;
bbce6d69 1125 while (s < strend) {
1126 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
1127 if (tmp && regtry(prog, s))
1128 goto got_it;
a687059c 1129 else
a0d0e21e 1130 tmp = doevery;
a687059c 1131 }
a0d0e21e
LW
1132 else
1133 tmp = 1;
1134 s++;
1135 }
1136 break;
a0ed51b3
LW
1137 case NALNUMLUTF8:
1138 PL_reg_flags |= RF_tainted;
1139 while (s < strend) {
dfe13c55 1140 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1141 if (tmp && regtry(prog, s))
1142 goto got_it;
1143 else
1144 tmp = doevery;
1145 }
1146 else
1147 tmp = 1;
1148 s += UTF8SKIP(s);
1149 }
1150 break;
a0d0e21e
LW
1151 case SPACE:
1152 while (s < strend) {
1153 if (isSPACE(*s)) {
1154 if (tmp && regtry(prog, s))
1155 goto got_it;
1156 else
1157 tmp = doevery;
2304df62 1158 }
a0d0e21e
LW
1159 else
1160 tmp = 1;
1161 s++;
1162 }
1163 break;
a0ed51b3
LW
1164 case SPACEUTF8:
1165 while (s < strend) {
dfe13c55 1166 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
1167 if (tmp && regtry(prog, s))
1168 goto got_it;
1169 else
1170 tmp = doevery;
1171 }
1172 else
1173 tmp = 1;
1174 s += UTF8SKIP(s);
1175 }
1176 break;
bbce6d69 1177 case SPACEL:
3280af22 1178 PL_reg_flags |= RF_tainted;
bbce6d69 1179 while (s < strend) {
1180 if (isSPACE_LC(*s)) {
1181 if (tmp && regtry(prog, s))
1182 goto got_it;
1183 else
1184 tmp = doevery;
1185 }
1186 else
1187 tmp = 1;
1188 s++;
1189 }
1190 break;
a0ed51b3
LW
1191 case SPACELUTF8:
1192 PL_reg_flags |= RF_tainted;
1193 while (s < strend) {
dfe13c55 1194 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
1195 if (tmp && regtry(prog, s))
1196 goto got_it;
1197 else
1198 tmp = doevery;
1199 }
1200 else
1201 tmp = 1;
1202 s += UTF8SKIP(s);
1203 }
1204 break;
a0d0e21e
LW
1205 case NSPACE:
1206 while (s < strend) {
1207 if (!isSPACE(*s)) {
1208 if (tmp && regtry(prog, s))
1209 goto got_it;
1210 else
1211 tmp = doevery;
a687059c 1212 }
a0d0e21e
LW
1213 else
1214 tmp = 1;
1215 s++;
1216 }
1217 break;
a0ed51b3
LW
1218 case NSPACEUTF8:
1219 while (s < strend) {
dfe13c55 1220 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1221 if (tmp && regtry(prog, s))
1222 goto got_it;
1223 else
1224 tmp = doevery;
1225 }
1226 else
1227 tmp = 1;
1228 s += UTF8SKIP(s);
1229 }
1230 break;
bbce6d69 1231 case NSPACEL:
3280af22 1232 PL_reg_flags |= RF_tainted;
bbce6d69 1233 while (s < strend) {
1234 if (!isSPACE_LC(*s)) {
1235 if (tmp && regtry(prog, s))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
1240 else
1241 tmp = 1;
1242 s++;
1243 }
1244 break;
a0ed51b3
LW
1245 case NSPACELUTF8:
1246 PL_reg_flags |= RF_tainted;
1247 while (s < strend) {
dfe13c55 1248 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1249 if (tmp && regtry(prog, s))
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
1254 else
1255 tmp = 1;
1256 s += UTF8SKIP(s);
1257 }
1258 break;
a0d0e21e
LW
1259 case DIGIT:
1260 while (s < strend) {
1261 if (isDIGIT(*s)) {
1262 if (tmp && regtry(prog, s))
1263 goto got_it;
1264 else
1265 tmp = doevery;
2b69d0c2 1266 }
a0d0e21e
LW
1267 else
1268 tmp = 1;
1269 s++;
1270 }
1271 break;
a0ed51b3
LW
1272 case DIGITUTF8:
1273 while (s < strend) {
dfe13c55 1274 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1275 if (tmp && regtry(prog, s))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
1280 else
1281 tmp = 1;
1282 s += UTF8SKIP(s);
1283 }
1284 break;
b8c5462f
JH
1285 case DIGITL:
1286 PL_reg_flags |= RF_tainted;
1287 while (s < strend) {
1288 if (isDIGIT_LC(*s)) {
1289 if (tmp && regtry(prog, s))
1290 goto got_it;
1291 else
1292 tmp = doevery;
1293 }
1294 else
1295 tmp = 1;
1296 s++;
1297 }
1298 break;
1299 case DIGITLUTF8:
1300 PL_reg_flags |= RF_tainted;
1301 while (s < strend) {
1302 if (isDIGIT_LC_utf8((U8*)s)) {
1303 if (tmp && regtry(prog, s))
1304 goto got_it;
1305 else
1306 tmp = doevery;
1307 }
1308 else
1309 tmp = 1;
1310 s += UTF8SKIP(s);
1311 }
1312 break;
a0d0e21e
LW
1313 case NDIGIT:
1314 while (s < strend) {
1315 if (!isDIGIT(*s)) {
1316 if (tmp && regtry(prog, s))
1317 goto got_it;
1318 else
1319 tmp = doevery;
a687059c 1320 }
a0d0e21e
LW
1321 else
1322 tmp = 1;
1323 s++;
1324 }
1325 break;
a0ed51b3
LW
1326 case NDIGITUTF8:
1327 while (s < strend) {
dfe13c55 1328 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1329 if (tmp && regtry(prog, s))
1330 goto got_it;
1331 else
1332 tmp = doevery;
1333 }
1334 else
1335 tmp = 1;
1336 s += UTF8SKIP(s);
1337 }
1338 break;
b8c5462f
JH
1339 case NDIGITL:
1340 PL_reg_flags |= RF_tainted;
1341 while (s < strend) {
1342 if (!isDIGIT_LC(*s)) {
1343 if (tmp && regtry(prog, s))
1344 goto got_it;
1345 else
1346 tmp = doevery;
1347 }
1348 else
1349 tmp = 1;
1350 s++;
a0ed51b3 1351 }
b8c5462f
JH
1352 break;
1353 case NDIGITLUTF8:
1354 PL_reg_flags |= RF_tainted;
1355 while (s < strend) {
1356 if (!isDIGIT_LC_utf8((U8*)s)) {
1357 if (tmp && regtry(prog, s))
1358 goto got_it;
cf93c79d 1359 else
b8c5462f
JH
1360 tmp = doevery;
1361 }
1362 else
1363 tmp = 1;
1364 s += UTF8SKIP(s);
1365 }
1366 break;
d6a28714
JH
1367 }
1368 }
1369 else {
1370 dontbother = 0;
1371 if (prog->float_substr != Nullsv) { /* Trim the end. */
1372 char *last;
1373 I32 oldpos = scream_pos;
1374
1375 if (flags & REXEC_SCREAM) {
1376 last = screaminstr(sv, prog->float_substr, s - strbeg,
1377 end_shift, &scream_pos, 1); /* last one */
1378 if (!last)
1379 last = scream_olds; /* Only one occurence. */
b8c5462f 1380 }
d6a28714
JH
1381 else {
1382 STRLEN len;
1383 char *little = SvPV(prog->float_substr, len);
1384
1385 if (SvTAIL(prog->float_substr)) {
1386 if (memEQ(strend - len + 1, little, len - 1))
1387 last = strend - len + 1;
1388 else if (!PL_multiline)
1389 last = memEQ(strend - len, little, len)
1390 ? strend - len : Nullch;
b8c5462f 1391 else
d6a28714
JH
1392 goto find_last;
1393 } else {
1394 find_last:
1395 if (len)
1396 last = rninstr(s, strend, little, little + len);
b8c5462f 1397 else
d6a28714 1398 last = strend; /* matching `$' */
b8c5462f 1399 }
b8c5462f 1400 }
d6a28714
JH
1401 if (last == NULL) goto phooey; /* Should not happen! */
1402 dontbother = strend - last + prog->float_min_offset;
1403 }
1404 if (minlen && (dontbother < minlen))
1405 dontbother = minlen - 1;
1406 strend -= dontbother; /* this one's always in bytes! */
1407 /* We don't know much -- general case. */
1408 if (UTF) {
1409 for (;;) {
1410 if (regtry(prog, s))
1411 goto got_it;
1412 if (s >= strend)
1413 break;
b8c5462f 1414 s += UTF8SKIP(s);
d6a28714
JH
1415 };
1416 }
1417 else {
1418 do {
1419 if (regtry(prog, s))
1420 goto got_it;
1421 } while (s++ < strend);
1422 }
1423 }
1424
1425 /* Failure. */
1426 goto phooey;
1427
1428got_it:
1429 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1430
1431 if (PL_reg_eval_set) {
1432 /* Preserve the current value of $^R */
1433 if (oreplsv != GvSV(PL_replgv))
1434 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1435 restored, the value remains
1436 the same. */
1437 restore_pos(aTHXo_ 0);
1438 }
1439
1440 /* make sure $`, $&, $', and $digit will work later */
1441 if ( !(flags & REXEC_NOT_FIRST) ) {
1442 if (RX_MATCH_COPIED(prog)) {
1443 Safefree(prog->subbeg);
1444 RX_MATCH_COPIED_off(prog);
1445 }
1446 if (flags & REXEC_COPY_STR) {
1447 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1448
1449 s = savepvn(strbeg, i);
1450 prog->subbeg = s;
1451 prog->sublen = i;
1452 RX_MATCH_COPIED_on(prog);
1453 }
1454 else {
1455 prog->subbeg = strbeg;
1456 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1457 }
1458 }
1459
1460 return 1;
1461
1462phooey:
1463 if (PL_reg_eval_set)
1464 restore_pos(aTHXo_ 0);
1465 return 0;
1466}
1467
1468/*
1469 - regtry - try match at specific point
1470 */
1471STATIC I32 /* 0 failure, 1 success */
1472S_regtry(pTHX_ regexp *prog, char *startpos)
1473{
1474 dTHR;
1475 register I32 i;
1476 register I32 *sp;
1477 register I32 *ep;
1478 CHECKPOINT lastcp;
1479
1480 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1481 MAGIC *mg;
1482
1483 PL_reg_eval_set = RS_init;
1484 DEBUG_r(DEBUG_s(
1485 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1486 PL_stack_sp - PL_stack_base);
1487 ));
1488 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1489 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1490 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1491 SAVETMPS;
1492 /* Apparently this is not needed, judging by wantarray. */
1493 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1494 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1495
1496 if (PL_reg_sv) {
1497 /* Make $_ available to executed code. */
1498 if (PL_reg_sv != DEFSV) {
1499 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1500 SAVESPTR(DEFSV);
1501 DEFSV = PL_reg_sv;
b8c5462f 1502 }
d6a28714
JH
1503
1504 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1505 && (mg = mg_find(PL_reg_sv, 'g')))) {
1506 /* prepare for quick setting of pos */
1507 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1508 mg = mg_find(PL_reg_sv, 'g');
1509 mg->mg_len = -1;
b8c5462f 1510 }
d6a28714
JH
1511 PL_reg_magic = mg;
1512 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1513 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1514 }
1515 if (!PL_reg_curpm)
1516 New(22,PL_reg_curpm, 1, PMOP);
1517 PL_reg_curpm->op_pmregexp = prog;
1518 PL_reg_oldcurpm = PL_curpm;
1519 PL_curpm = PL_reg_curpm;
1520 if (RX_MATCH_COPIED(prog)) {
1521 /* Here is a serious problem: we cannot rewrite subbeg,
1522 since it may be needed if this match fails. Thus
1523 $` inside (?{}) could fail... */
1524 PL_reg_oldsaved = prog->subbeg;
1525 PL_reg_oldsavedlen = prog->sublen;
1526 RX_MATCH_COPIED_off(prog);
1527 }
1528 else
1529 PL_reg_oldsaved = Nullch;
1530 prog->subbeg = PL_bostr;
1531 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1532 }
1533 prog->startp[0] = startpos - PL_bostr;
1534 PL_reginput = startpos;
1535 PL_regstartp = prog->startp;
1536 PL_regendp = prog->endp;
1537 PL_reglastparen = &prog->lastparen;
1538 prog->lastparen = 0;
1539 PL_regsize = 0;
1540 DEBUG_r(PL_reg_starttry = startpos);
1541 if (PL_reg_start_tmpl <= prog->nparens) {
1542 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1543 if(PL_reg_start_tmp)
1544 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1545 else
1546 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1547 }
1548
1549 /* XXXX What this code is doing here?!!! There should be no need
1550 to do this again and again, PL_reglastparen should take care of
1551 this! */
1552 sp = prog->startp;
1553 ep = prog->endp;
1554 if (prog->nparens) {
1555 for (i = prog->nparens; i >= 1; i--) {
1556 *++sp = -1;
1557 *++ep = -1;
1558 }
1559 }
1560 REGCP_SET;
1561 if (regmatch(prog->program + 1)) {
1562 prog->endp[0] = PL_reginput - PL_bostr;
1563 return 1;
1564 }
1565 REGCP_UNWIND;
1566 return 0;
1567}
1568
1569/*
1570 - regmatch - main matching routine
1571 *
1572 * Conceptually the strategy is simple: check to see whether the current
1573 * node matches, call self recursively to see whether the rest matches,
1574 * and then act accordingly. In practice we make some effort to avoid
1575 * recursion, in particular by going through "ordinary" nodes (that don't
1576 * need to know whether the rest of the match failed) by a loop instead of
1577 * by recursion.
1578 */
1579/* [lwall] I've hoisted the register declarations to the outer block in order to
1580 * maybe save a little bit of pushing and popping on the stack. It also takes
1581 * advantage of machines that use a register save mask on subroutine entry.
1582 */
1583STATIC I32 /* 0 failure, 1 success */
1584S_regmatch(pTHX_ regnode *prog)
1585{
1586 dTHR;
1587 register regnode *scan; /* Current node. */
1588 regnode *next; /* Next node. */
1589 regnode *inner; /* Next node in internal branch. */
1590 register I32 nextchr; /* renamed nextchr - nextchar colides with
1591 function of same name */
1592 register I32 n; /* no or next */
1593 register I32 ln; /* len or last */
1594 register char *s; /* operand or save */
1595 register char *locinput = PL_reginput;
1596 register I32 c1, c2, paren; /* case fold search, parenth */
1597 int minmod = 0, sw = 0, logical = 0;
1598#ifdef DEBUGGING
1599 PL_regindent++;
1600#endif
1601
1602 /* Note that nextchr is a byte even in UTF */
1603 nextchr = UCHARAT(locinput);
1604 scan = prog;
1605 while (scan != NULL) {
1606#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1607#ifdef DEBUGGING
1608# define sayYES goto yes
1609# define sayNO goto no
7821416a
IZ
1610# define sayYES_FINAL goto yes_final
1611# define sayYES_LOUD goto yes_loud
1612# define sayNO_FINAL goto no_final
1613# define sayNO_SILENT goto do_no
d6a28714
JH
1614# define saySAME(x) if (x) goto yes; else goto no
1615# define REPORT_CODE_OFF 24
1616#else
1617# define sayYES return 1
1618# define sayNO return 0
7821416a
IZ
1619# define sayYES_FINAL return 1
1620# define sayYES_LOUD return 1
1621# define sayNO_FINAL return 0
1622# define sayNO_SILENT return 0
d6a28714
JH
1623# define saySAME(x) return x
1624#endif
1625 DEBUG_r( {
1626 SV *prop = sv_newmortal();
1627 int docolor = *PL_colors[0];
1628 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1629 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1630 /* The part of the string before starttry has one color
1631 (pref0_len chars), between starttry and current
1632 position another one (pref_len - pref0_len chars),
1633 after the current position the third one.
1634 We assume that pref0_len <= pref_len, otherwise we
1635 decrease pref0_len. */
1636 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1637 ? (5 + taill) - l : locinput - PL_bostr);
1638 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1639
1640 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1641 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1642 ? (5 + taill) - pref_len : PL_regeol - locinput);
1643 if (pref0_len < 0)
1644 pref0_len = 0;
1645 if (pref0_len > pref_len)
1646 pref0_len = pref_len;
1647 regprop(prop, scan);
1648 PerlIO_printf(Perl_debug_log,
1649 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1650 locinput - PL_bostr,
1651 PL_colors[4], pref0_len,
1652 locinput - pref_len, PL_colors[5],
1653 PL_colors[2], pref_len - pref0_len,
1654 locinput - pref_len + pref0_len, PL_colors[3],
1655 (docolor ? "" : "> <"),
1656 PL_colors[0], l, locinput, PL_colors[1],
1657 15 - l - pref_len + 1,
1658 "",
1659 scan - PL_regprogram, PL_regindent*2, "",
1660 SvPVX(prop));
1661 } );
1662
1663 next = scan + NEXT_OFF(scan);
1664 if (next == scan)
1665 next = NULL;
1666
1667 switch (OP(scan)) {
1668 case BOL:
1669 if (locinput == PL_bostr
1670 ? PL_regprev == '\n'
1671 : (PL_multiline &&
1672 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1673 {
1674 /* regtill = regbol; */
b8c5462f
JH
1675 break;
1676 }
d6a28714
JH
1677 sayNO;
1678 case MBOL:
1679 if (locinput == PL_bostr
1680 ? PL_regprev == '\n'
1681 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1682 {
b8c5462f
JH
1683 break;
1684 }
d6a28714
JH
1685 sayNO;
1686 case SBOL:
1687 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1688 break;
d6a28714
JH
1689 sayNO;
1690 case GPOS:
1691 if (locinput == PL_reg_ganch)
1692 break;
1693 sayNO;
1694 case EOL:
1695 if (PL_multiline)
1696 goto meol;
1697 else
1698 goto seol;
1699 case MEOL:
1700 meol:
1701 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1702 sayNO;
b8c5462f 1703 break;
d6a28714
JH
1704 case SEOL:
1705 seol:
1706 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1707 sayNO;
d6a28714 1708 if (PL_regeol - locinput > 1)
b8c5462f 1709 sayNO;
b8c5462f 1710 break;
d6a28714
JH
1711 case EOS:
1712 if (PL_regeol != locinput)
b8c5462f 1713 sayNO;
d6a28714
JH
1714 break;
1715 case SANYUTF8:
b8c5462f 1716 if (nextchr & 0x80) {
b8c5462f 1717 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1718 if (locinput > PL_regeol)
1719 sayNO;
b8c5462f
JH
1720 nextchr = UCHARAT(locinput);
1721 break;
1722 }
d6a28714 1723 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1724 sayNO;
b8c5462f 1725 nextchr = UCHARAT(++locinput);
a0d0e21e 1726 break;
d6a28714
JH
1727 case SANY:
1728 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1729 sayNO;
1730 nextchr = UCHARAT(++locinput);
b85d18e9 1731 break;
d6a28714 1732 case ANYUTF8:
a0ed51b3 1733 if (nextchr & 0x80) {
b8c5462f 1734 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1735 if (locinput > PL_regeol)
1736 sayNO;
a0ed51b3
LW
1737 nextchr = UCHARAT(locinput);
1738 break;
1739 }
d6a28714 1740 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1741 sayNO;
1742 nextchr = UCHARAT(++locinput);
1743 break;
d6a28714
JH
1744 case REG_ANY:
1745 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1746 sayNO;
76e3520e 1747 nextchr = UCHARAT(++locinput);
a0d0e21e 1748 break;
d6a28714 1749 case EXACT:
cd439c50
IZ
1750 s = STRING(scan);
1751 ln = STR_LEN(scan);
d6a28714
JH
1752 /* Inline the first character, for speed. */
1753 if (UCHARAT(s) != nextchr)
1754 sayNO;
1755 if (PL_regeol - locinput < ln)
1756 sayNO;
1757 if (ln > 1 && memNE(s, locinput, ln))
1758 sayNO;
1759 locinput += ln;
1760 nextchr = UCHARAT(locinput);
1761 break;
1762 case EXACTFL:
b8c5462f
JH
1763 PL_reg_flags |= RF_tainted;
1764 /* FALL THROUGH */
d6a28714 1765 case EXACTF:
cd439c50
IZ
1766 s = STRING(scan);
1767 ln = STR_LEN(scan);
d6a28714
JH
1768
1769 if (UTF) {
1770 char *l = locinput;
1771 char *e = s + ln;
1772 c1 = OP(scan) == EXACTF;
1773 while (s < e) {
1774 if (l >= PL_regeol)
1775 sayNO;
1776 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1777 toLOWER_utf8((U8*)l) :
1778 toLOWER_LC_utf8((U8*)l)))
1779 {
1780 sayNO;
1781 }
1782 s += UTF8SKIP(s);
1783 l += UTF8SKIP(l);
b8c5462f 1784 }
d6a28714 1785 locinput = l;
a0ed51b3
LW
1786 nextchr = UCHARAT(locinput);
1787 break;
1788 }
d6a28714
JH
1789
1790 /* Inline the first character, for speed. */
1791 if (UCHARAT(s) != nextchr &&
1792 UCHARAT(s) != ((OP(scan) == EXACTF)
1793 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1794 sayNO;
d6a28714 1795 if (PL_regeol - locinput < ln)
b8c5462f 1796 sayNO;
d6a28714
JH
1797 if (ln > 1 && (OP(scan) == EXACTF
1798 ? ibcmp(s, locinput, ln)
1799 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1800 sayNO;
d6a28714
JH
1801 locinput += ln;
1802 nextchr = UCHARAT(locinput);
a0d0e21e 1803 break;
d6a28714 1804 case ANYOFUTF8:
cd439c50 1805 s = MASK(scan);
d6a28714 1806 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1807 sayNO;
d6a28714
JH
1808 if (locinput >= PL_regeol)
1809 sayNO;
1810 locinput += PL_utf8skip[nextchr];
1811 nextchr = UCHARAT(locinput);
1812 break;
1813 case ANYOF:
cd439c50 1814 s = MASK(scan);
d6a28714 1815 if (nextchr < 0)
b8c5462f 1816 nextchr = UCHARAT(locinput);
d6a28714
JH
1817 if (!REGINCLASS(s, nextchr))
1818 sayNO;
1819 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1820 sayNO;
b8c5462f
JH
1821 nextchr = UCHARAT(++locinput);
1822 break;
d6a28714 1823 case ALNUML:
b8c5462f
JH
1824 PL_reg_flags |= RF_tainted;
1825 /* FALL THROUGH */
d6a28714 1826 case ALNUM:
b8c5462f 1827 if (!nextchr)
4633a7c4 1828 sayNO;
d6a28714
JH
1829 if (!(OP(scan) == ALNUM
1830 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1831 sayNO;
1832 nextchr = UCHARAT(++locinput);
bbce6d69 1833 break;
d6a28714 1834 case ALNUMLUTF8:
3280af22 1835 PL_reg_flags |= RF_tainted;
bbce6d69 1836 /* FALL THROUGH */
d6a28714 1837 case ALNUMUTF8:
b8c5462f
JH
1838 if (!nextchr)
1839 sayNO;
1840 if (nextchr & 0x80) {
d6a28714
JH
1841 if (!(OP(scan) == ALNUMUTF8
1842 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1843 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1844 {
1845 sayNO;
a0ed51b3 1846 }
b8c5462f 1847 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1848 nextchr = UCHARAT(locinput);
1849 break;
1850 }
d6a28714
JH
1851 if (!(OP(scan) == ALNUMUTF8
1852 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1853 sayNO;
b8c5462f 1854 nextchr = UCHARAT(++locinput);
a0d0e21e 1855 break;
d6a28714 1856 case NALNUML:
b8c5462f
JH
1857 PL_reg_flags |= RF_tainted;
1858 /* FALL THROUGH */
d6a28714
JH
1859 case NALNUM:
1860 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1861 sayNO;
d6a28714
JH
1862 if (OP(scan) == NALNUM
1863 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1864 sayNO;
b8c5462f 1865 nextchr = UCHARAT(++locinput);
a0ed51b3 1866 break;
d6a28714 1867 case NALNUMLUTF8:
b8c5462f
JH
1868 PL_reg_flags |= RF_tainted;
1869 /* FALL THROUGH */
d6a28714 1870 case NALNUMUTF8:
3280af22 1871 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1872 sayNO;
b8c5462f 1873 if (nextchr & 0x80) {
d6a28714
JH
1874 if (OP(scan) == NALNUMUTF8
1875 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1876 : isALNUM_LC_utf8((U8*)locinput))
1877 {
b8c5462f 1878 sayNO;
d6a28714 1879 }
b8c5462f
JH
1880 locinput += PL_utf8skip[nextchr];
1881 nextchr = UCHARAT(locinput);
1882 break;
1883 }
d6a28714
JH
1884 if (OP(scan) == NALNUMUTF8
1885 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1886 sayNO;
76e3520e 1887 nextchr = UCHARAT(++locinput);
a0d0e21e 1888 break;
d6a28714
JH
1889 case BOUNDL:
1890 case NBOUNDL:
3280af22 1891 PL_reg_flags |= RF_tainted;
bbce6d69 1892 /* FALL THROUGH */
d6a28714
JH
1893 case BOUND:
1894 case NBOUND:
1895 /* was last char in word? */
1896 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1897 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1898 ln = isALNUM(ln);
1899 n = isALNUM(nextchr);
1900 }
1901 else {
1902 ln = isALNUM_LC(ln);
1903 n = isALNUM_LC(nextchr);
1904 }
1905 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1906 sayNO;
a0d0e21e 1907 break;
d6a28714
JH
1908 case BOUNDLUTF8:
1909 case NBOUNDLUTF8:
a0ed51b3
LW
1910 PL_reg_flags |= RF_tainted;
1911 /* FALL THROUGH */
d6a28714
JH
1912 case BOUNDUTF8:
1913 case NBOUNDUTF8:
1914 /* was last char in word? */
1915 ln = (locinput != PL_regbol)
1916 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1917 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1918 ln = isALNUM_uni(ln);
1919 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1920 }
d6a28714
JH
1921 else {
1922 ln = isALNUM_LC_uni(ln);
1923 n = isALNUM_LC_utf8((U8*)locinput);
1924 }
1925 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1926 sayNO;
a0ed51b3 1927 break;
d6a28714 1928 case SPACEL:
3280af22 1929 PL_reg_flags |= RF_tainted;
bbce6d69 1930 /* FALL THROUGH */
d6a28714
JH
1931 case SPACE:
1932 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1933 sayNO;
d6a28714
JH
1934 if (!(OP(scan) == SPACE
1935 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1936 sayNO;
76e3520e 1937 nextchr = UCHARAT(++locinput);
a0d0e21e 1938 break;
d6a28714 1939 case SPACELUTF8:
a0ed51b3
LW
1940 PL_reg_flags |= RF_tainted;
1941 /* FALL THROUGH */
d6a28714 1942 case SPACEUTF8:
a0ed51b3
LW
1943 if (!nextchr && locinput >= PL_regeol)
1944 sayNO;
1945 if (nextchr & 0x80) {
d6a28714
JH
1946 if (!(OP(scan) == SPACEUTF8
1947 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1948 : isSPACE_LC_utf8((U8*)locinput)))
1949 {
a0ed51b3 1950 sayNO;
d6a28714 1951 }
6f06b55f 1952 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1953 nextchr = UCHARAT(locinput);
1954 break;
1955 }
d6a28714
JH
1956 if (!(OP(scan) == SPACEUTF8
1957 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
1958 sayNO;
1959 nextchr = UCHARAT(++locinput);
1960 break;
d6a28714 1961 case NSPACEL:
3280af22 1962 PL_reg_flags |= RF_tainted;
bbce6d69 1963 /* FALL THROUGH */
d6a28714 1964 case NSPACE:
b8c5462f
JH
1965 if (!nextchr)
1966 sayNO;
d6a28714
JH
1967 if (OP(scan) == SPACE
1968 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1969 sayNO;
b8c5462f 1970 nextchr = UCHARAT(++locinput);
a0d0e21e 1971 break;
d6a28714 1972 case NSPACELUTF8:
a0ed51b3
LW
1973 PL_reg_flags |= RF_tainted;
1974 /* FALL THROUGH */
d6a28714 1975 case NSPACEUTF8:
b8c5462f
JH
1976 if (!nextchr)
1977 sayNO;
1978 if (nextchr & 0x80) {
d6a28714
JH
1979 if (OP(scan) == NSPACEUTF8
1980 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1981 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
1982 {
1983 sayNO;
1984 }
1985 locinput += PL_utf8skip[nextchr];
1986 nextchr = UCHARAT(locinput);
1987 break;
a0ed51b3 1988 }
d6a28714
JH
1989 if (OP(scan) == NSPACEUTF8
1990 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1991 sayNO;
76e3520e 1992 nextchr = UCHARAT(++locinput);
a0d0e21e 1993 break;
d6a28714 1994 case DIGITL:
a0ed51b3
LW
1995 PL_reg_flags |= RF_tainted;
1996 /* FALL THROUGH */
d6a28714 1997 case DIGIT:
a0ed51b3
LW
1998 if (!nextchr && locinput >= PL_regeol)
1999 sayNO;
d6a28714
JH
2000 if (!(OP(scan) == DIGIT
2001 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2002 sayNO;
76e3520e 2003 nextchr = UCHARAT(++locinput);
a0d0e21e 2004 break;
d6a28714 2005 case DIGITLUTF8:
a0ed51b3
LW
2006 PL_reg_flags |= RF_tainted;
2007 /* FALL THROUGH */
d6a28714 2008 case DIGITUTF8:
a0ed51b3
LW
2009 if (!nextchr)
2010 sayNO;
2011 if (nextchr & 0x80) {
d6a28714
JH
2012 if (OP(scan) == NDIGITUTF8
2013 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2014 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2015 {
a0ed51b3 2016 sayNO;
dfe13c55 2017 }
6f06b55f 2018 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2019 nextchr = UCHARAT(locinput);
2020 break;
2021 }
d6a28714 2022 if (!isDIGIT(nextchr))
a0ed51b3
LW
2023 sayNO;
2024 nextchr = UCHARAT(++locinput);
2025 break;
d6a28714 2026 case NDIGITL:
b8c5462f
JH
2027 PL_reg_flags |= RF_tainted;
2028 /* FALL THROUGH */
d6a28714 2029 case NDIGIT:
b8c5462f
JH
2030 if (!nextchr)
2031 sayNO;
d6a28714
JH
2032 if (OP(scan) == DIGIT
2033 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2034 sayNO;
76e3520e 2035 nextchr = UCHARAT(++locinput);
a0d0e21e 2036 break;
d6a28714 2037 case NDIGITLUTF8:
b8c5462f
JH
2038 PL_reg_flags |= RF_tainted;
2039 /* FALL THROUGH */
d6a28714 2040 case NDIGITUTF8:
b8c5462f
JH
2041 if (!nextchr && locinput >= PL_regeol)
2042 sayNO;
a0ed51b3 2043 if (nextchr & 0x80) {
d6a28714 2044 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2045 sayNO;
6f06b55f 2046 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2047 nextchr = UCHARAT(locinput);
2048 break;
2049 }
d6a28714 2050 if (isDIGIT(nextchr))
a0ed51b3
LW
2051 sayNO;
2052 nextchr = UCHARAT(++locinput);
2053 break;
2054 case CLUMP:
dfe13c55 2055 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2056 sayNO;
6f06b55f 2057 locinput += PL_utf8skip[nextchr];
dfe13c55 2058 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2059 locinput += UTF8SKIP(locinput);
2060 if (locinput > PL_regeol)
2061 sayNO;
2062 nextchr = UCHARAT(locinput);
2063 break;
c8756f30 2064 case REFFL:
3280af22 2065 PL_reg_flags |= RF_tainted;
c8756f30 2066 /* FALL THROUGH */
c277df42 2067 case REF:
c8756f30 2068 case REFF:
c277df42 2069 n = ARG(scan); /* which paren pair */
cf93c79d 2070 ln = PL_regstartp[n];
2c2d71f5 2071 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2072 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2073 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2074 if (ln == PL_regendp[n])
a0d0e21e 2075 break;
a0ed51b3 2076
cf93c79d 2077 s = PL_bostr + ln;
a0ed51b3
LW
2078 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2079 char *l = locinput;
cf93c79d 2080 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2081 /*
2082 * Note that we can't do the "other character" lookup trick as
2083 * in the 8-bit case (no pun intended) because in Unicode we
2084 * have to map both upper and title case to lower case.
2085 */
2086 if (OP(scan) == REFF) {
2087 while (s < e) {
2088 if (l >= PL_regeol)
2089 sayNO;
dfe13c55 2090 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2091 sayNO;
2092 s += UTF8SKIP(s);
2093 l += UTF8SKIP(l);
2094 }
2095 }
2096 else {
2097 while (s < e) {
2098 if (l >= PL_regeol)
2099 sayNO;
dfe13c55 2100 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2101 sayNO;
2102 s += UTF8SKIP(s);
2103 l += UTF8SKIP(l);
2104 }
2105 }
2106 locinput = l;
2107 nextchr = UCHARAT(locinput);
2108 break;
2109 }
2110
a0d0e21e 2111 /* Inline the first character, for speed. */
76e3520e 2112 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2113 (OP(scan) == REF ||
2114 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2115 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2116 sayNO;
cf93c79d 2117 ln = PL_regendp[n] - ln;
3280af22 2118 if (locinput + ln > PL_regeol)
4633a7c4 2119 sayNO;
c8756f30
AK
2120 if (ln > 1 && (OP(scan) == REF
2121 ? memNE(s, locinput, ln)
2122 : (OP(scan) == REFF
2123 ? ibcmp(s, locinput, ln)
2124 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2125 sayNO;
a0d0e21e 2126 locinput += ln;
76e3520e 2127 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2128 break;
2129
2130 case NOTHING:
c277df42 2131 case TAIL:
a0d0e21e
LW
2132 break;
2133 case BACK:
2134 break;
c277df42
IZ
2135 case EVAL:
2136 {
2137 dSP;
533c011a 2138 OP_4tree *oop = PL_op;
3280af22
NIS
2139 COP *ocurcop = PL_curcop;
2140 SV **ocurpad = PL_curpad;
c277df42
IZ
2141 SV *ret;
2142
2143 n = ARG(scan);
533c011a 2144 PL_op = (OP_4tree*)PL_regdata->data[n];
7b0972df 2145 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", (UV)PL_op) );
dfad63ad 2146 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2147 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2148
cea2e8a9 2149 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2150 SPAGAIN;
2151 ret = POPs;
2152 PUTBACK;
2153
0f5d15d6
IZ
2154 PL_op = oop;
2155 PL_curpad = ocurpad;
2156 PL_curcop = ocurcop;
c277df42 2157 if (logical) {
0f5d15d6
IZ
2158 if (logical == 2) { /* Postponed subexpression. */
2159 regexp *re;
22c35a8c 2160 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2161 re_cc_state state;
0f5d15d6
IZ
2162 CHECKPOINT cp, lastcp;
2163
2164 if(SvROK(ret) || SvRMAGICAL(ret)) {
2165 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2166
2167 if(SvMAGICAL(sv))
2168 mg = mg_find(sv, 'r');
2169 }
2170 if (mg) {
2171 re = (regexp *)mg->mg_obj;
df0003d4 2172 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2173 }
2174 else {
2175 STRLEN len;
2176 char *t = SvPV(ret, len);
2177 PMOP pm;
2178 char *oprecomp = PL_regprecomp;
2179 I32 osize = PL_regsize;
2180 I32 onpar = PL_regnpar;
2181
2182 pm.op_pmflags = 0;
cea2e8a9 2183 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2184 if (!(SvFLAGS(ret)
2185 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2186 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2187 PL_regprecomp = oprecomp;
2188 PL_regsize = osize;
2189 PL_regnpar = onpar;
2190 }
2191 DEBUG_r(
2192 PerlIO_printf(Perl_debug_log,
2193 "Entering embedded `%s%.60s%s%s'\n",
2194 PL_colors[0],
2195 re->precomp,
2196 PL_colors[1],
2197 (strlen(re->precomp) > 60 ? "..." : ""))
2198 );
2199 state.node = next;
2200 state.prev = PL_reg_call_cc;
2201 state.cc = PL_regcc;
2202 state.re = PL_reg_re;
2203
2ab05381 2204 PL_regcc = 0;
0f5d15d6
IZ
2205
2206 cp = regcppush(0); /* Save *all* the positions. */
2207 REGCP_SET;
2208 cache_re(re);
2209 state.ss = PL_savestack_ix;
2210 *PL_reglastparen = 0;
2211 PL_reg_call_cc = &state;
2212 PL_reginput = locinput;
2c2d71f5
JH
2213
2214 /* XXXX This is too dramatic a measure... */
2215 PL_reg_maxiter = 0;
2216
0f5d15d6 2217 if (regmatch(re->program + 1)) {
2c914db6
IZ
2218 /* Even though we succeeded, we need to restore
2219 global variables, since we may be wrapped inside
2220 SUSPEND, thus the match may be not finished yet. */
2221
2222 /* XXXX Do this only if SUSPENDed? */
2223 PL_reg_call_cc = state.prev;
2224 PL_regcc = state.cc;
2225 PL_reg_re = state.re;
2226 cache_re(PL_reg_re);
2227
2228 /* XXXX This is too dramatic a measure... */
2229 PL_reg_maxiter = 0;
2230
2231 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2232 ReREFCNT_dec(re);
2233 regcpblow(cp);
2234 sayYES;
2235 }
0f5d15d6
IZ
2236 ReREFCNT_dec(re);
2237 REGCP_UNWIND;
2238 regcppop();
2239 PL_reg_call_cc = state.prev;
2240 PL_regcc = state.cc;
2241 PL_reg_re = state.re;
d3790889 2242 cache_re(PL_reg_re);
2c2d71f5
JH
2243
2244 /* XXXX This is too dramatic a measure... */
2245 PL_reg_maxiter = 0;
2246
0f5d15d6
IZ
2247 sayNO;
2248 }
c277df42 2249 sw = SvTRUE(ret);
0f5d15d6 2250 logical = 0;
a0ed51b3
LW
2251 }
2252 else
3280af22 2253 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2254 break;
2255 }
a0d0e21e 2256 case OPEN:
c277df42 2257 n = ARG(scan); /* which paren pair */
3280af22
NIS
2258 PL_reg_start_tmp[n] = locinput;
2259 if (n > PL_regsize)
2260 PL_regsize = n;
a0d0e21e
LW
2261 break;
2262 case CLOSE:
c277df42 2263 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2264 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2265 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2266 if (n > *PL_reglastparen)
2267 *PL_reglastparen = n;
a0d0e21e 2268 break;
c277df42
IZ
2269 case GROUPP:
2270 n = ARG(scan); /* which paren pair */
cf93c79d 2271 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2272 break;
2273 case IFTHEN:
2c2d71f5 2274 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2275 if (sw)
2276 next = NEXTOPER(NEXTOPER(scan));
2277 else {
2278 next = scan + ARG(scan);
2279 if (OP(next) == IFTHEN) /* Fake one. */
2280 next = NEXTOPER(NEXTOPER(next));
2281 }
2282 break;
2283 case LOGICAL:
0f5d15d6 2284 logical = scan->flags;
c277df42 2285 break;
2ab05381
IZ
2286/*******************************************************************
2287 PL_regcc contains infoblock about the innermost (...)* loop, and
2288 a pointer to the next outer infoblock.
2289
2290 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2291
2292 1) After matching X, regnode for CURLYX is processed;
2293
2294 2) This regnode creates infoblock on the stack, and calls
2295 regmatch() recursively with the starting point at WHILEM node;
2296
2297 3) Each hit of WHILEM node tries to match A and Z (in the order
2298 depending on the current iteration, min/max of {min,max} and
2299 greediness). The information about where are nodes for "A"
2300 and "Z" is read from the infoblock, as is info on how many times "A"
2301 was already matched, and greediness.
2302
2303 4) After A matches, the same WHILEM node is hit again.
2304
2305 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2306 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2307 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2308 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2309 of the external loop.
2310
2311 Currently present infoblocks form a tree with a stem formed by PL_curcc
2312 and whatever it mentions via ->next, and additional attached trees
2313 corresponding to temporarily unset infoblocks as in "5" above.
2314
2315 In the following picture infoblocks for outer loop of
2316 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2317 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2318 infoblocks are drawn below the "reset" infoblock.
2319
2320 In fact in the picture below we do not show failed matches for Z and T
2321 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2322 more obvious *why* one needs to *temporary* unset infoblocks.]
2323
2324 Matched REx position InfoBlocks Comment
2325 (Y(A)*?Z)*?T x
2326 Y(A)*?Z)*?T x <- O
2327 Y (A)*?Z)*?T x <- O
2328 Y A)*?Z)*?T x <- O <- I
2329 YA )*?Z)*?T x <- O <- I
2330 YA A)*?Z)*?T x <- O <- I
2331 YAA )*?Z)*?T x <- O <- I
2332 YAA Z)*?T x <- O # Temporary unset I
2333 I
2334
2335 YAAZ Y(A)*?Z)*?T x <- O
2336 I
2337
2338 YAAZY (A)*?Z)*?T x <- O
2339 I
2340
2341 YAAZY A)*?Z)*?T x <- O <- I
2342 I
2343
2344 YAAZYA )*?Z)*?T x <- O <- I
2345 I
2346
2347 YAAZYA Z)*?T x <- O # Temporary unset I
2348 I,I
2349
2350 YAAZYAZ )*?T x <- O
2351 I,I
2352
2353 YAAZYAZ T x # Temporary unset O
2354 O
2355 I,I
2356
2357 YAAZYAZT x
2358 O
2359 I,I
2360 *******************************************************************/
a0d0e21e
LW
2361 case CURLYX: {
2362 CURCUR cc;
3280af22 2363 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2364
2365 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2366 next += ARG(next);
3280af22
NIS
2367 cc.oldcc = PL_regcc;
2368 PL_regcc = &cc;
2369 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2370 cc.cur = -1;
2371 cc.min = ARG1(scan);
2372 cc.max = ARG2(scan);
c277df42 2373 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2374 cc.next = next;
2375 cc.minmod = minmod;
2376 cc.lastloc = 0;
3280af22 2377 PL_reginput = locinput;
a0d0e21e
LW
2378 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2379 regcpblow(cp);
3280af22 2380 PL_regcc = cc.oldcc;
4633a7c4 2381 saySAME(n);
a0d0e21e
LW
2382 }
2383 /* NOT REACHED */
2384 case WHILEM: {
2385 /*
2386 * This is really hard to understand, because after we match
2387 * what we're trying to match, we must make sure the rest of
2c2d71f5 2388 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2389 * to go back UP the parse tree by recursing ever deeper. And
2390 * if it fails, we have to reset our parent's current state
2391 * that we can try again after backing off.
2392 */
2393
c277df42 2394 CHECKPOINT cp, lastcp;
3280af22 2395 CURCUR* cc = PL_regcc;
c277df42
IZ
2396 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2397
4633a7c4 2398 n = cc->cur + 1; /* how many we know we matched */
3280af22 2399 PL_reginput = locinput;
a0d0e21e 2400
c277df42
IZ
2401 DEBUG_r(
2402 PerlIO_printf(Perl_debug_log,
2403 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2404 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2405 (long)n, (long)cc->min,
2406 (long)cc->max, (long)cc)
2407 );
4633a7c4 2408
a0d0e21e
LW
2409 /* If degenerate scan matches "", assume scan done. */
2410
579cf2c3 2411 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2412 PL_regcc = cc->oldcc;
2ab05381
IZ
2413 if (PL_regcc)
2414 ln = PL_regcc->cur;
c277df42 2415 DEBUG_r(
c3464db5
DD
2416 PerlIO_printf(Perl_debug_log,
2417 "%*s empty match detected, try continuation...\n",
3280af22 2418 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2419 );
a0d0e21e 2420 if (regmatch(cc->next))
4633a7c4 2421 sayYES;
2ab05381
IZ
2422 if (PL_regcc)
2423 PL_regcc->cur = ln;
3280af22 2424 PL_regcc = cc;
4633a7c4 2425 sayNO;
a0d0e21e
LW
2426 }
2427
2428 /* First just match a string of min scans. */
2429
2430 if (n < cc->min) {
2431 cc->cur = n;
2432 cc->lastloc = locinput;
4633a7c4
LW
2433 if (regmatch(cc->scan))
2434 sayYES;
2435 cc->cur = n - 1;
c277df42 2436 cc->lastloc = lastloc;
4633a7c4 2437 sayNO;
a0d0e21e
LW
2438 }
2439
2c2d71f5
JH
2440 if (scan->flags) {
2441 /* Check whether we already were at this position.
2442 Postpone detection until we know the match is not
2443 *that* much linear. */
2444 if (!PL_reg_maxiter) {
2445 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2446 PL_reg_leftiter = PL_reg_maxiter;
2447 }
2448 if (PL_reg_leftiter-- == 0) {
2449 I32 size = (PL_reg_maxiter + 7)/8;
2450 if (PL_reg_poscache) {
2451 if (PL_reg_poscache_size < size) {
2452 Renew(PL_reg_poscache, size, char);
2453 PL_reg_poscache_size = size;
2454 }
2455 Zero(PL_reg_poscache, size, char);
2456 }
2457 else {
2458 PL_reg_poscache_size = size;
2459 Newz(29, PL_reg_poscache, size, char);
2460 }
2461 DEBUG_r(
2462 PerlIO_printf(Perl_debug_log,
2463 "%sDetected a super-linear match, switching on caching%s...\n",
2464 PL_colors[4], PL_colors[5])
2465 );
2466 }
2467 if (PL_reg_leftiter < 0) {
2468 I32 o = locinput - PL_bostr, b;
2469
2470 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2471 b = o % 8;
2472 o /= 8;
2473 if (PL_reg_poscache[o] & (1<<b)) {
2474 DEBUG_r(
2475 PerlIO_printf(Perl_debug_log,
2476 "%*s already tried at this position...\n",
2477 REPORT_CODE_OFF+PL_regindent*2, "")
2478 );
7821416a 2479 sayNO_SILENT;
2c2d71f5
JH
2480 }
2481 PL_reg_poscache[o] |= (1<<b);
2482 }
2483 }
2484
a0d0e21e
LW
2485 /* Prefer next over scan for minimal matching. */
2486
2487 if (cc->minmod) {
3280af22 2488 PL_regcc = cc->oldcc;
2ab05381
IZ
2489 if (PL_regcc)
2490 ln = PL_regcc->cur;
5f05dabc 2491 cp = regcppush(cc->parenfloor);
c277df42 2492 REGCP_SET;
5f05dabc 2493 if (regmatch(cc->next)) {
c277df42 2494 regcpblow(cp);
4633a7c4 2495 sayYES; /* All done. */
5f05dabc 2496 }
c277df42 2497 REGCP_UNWIND;
5f05dabc 2498 regcppop();
2ab05381
IZ
2499 if (PL_regcc)
2500 PL_regcc->cur = ln;
3280af22 2501 PL_regcc = cc;
a0d0e21e 2502
c277df42 2503 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2504 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2505 && !(PL_reg_flags & RF_warned)) {
2506 PL_reg_flags |= RF_warned;
cea2e8a9 2507 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2508 "Complex regular subexpression recursion",
2509 REG_INFTY - 1);
c277df42 2510 }
4633a7c4 2511 sayNO;
c277df42 2512 }
a687059c 2513
c277df42 2514 DEBUG_r(
c3464db5
DD
2515 PerlIO_printf(Perl_debug_log,
2516 "%*s trying longer...\n",
3280af22 2517 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2518 );
a0d0e21e 2519 /* Try scanning more and see if it helps. */
3280af22 2520 PL_reginput = locinput;
a0d0e21e
LW
2521 cc->cur = n;
2522 cc->lastloc = locinput;
5f05dabc 2523 cp = regcppush(cc->parenfloor);
c277df42 2524 REGCP_SET;
5f05dabc 2525 if (regmatch(cc->scan)) {
c277df42 2526 regcpblow(cp);
4633a7c4 2527 sayYES;
5f05dabc 2528 }
c277df42 2529 REGCP_UNWIND;
5f05dabc 2530 regcppop();
4633a7c4 2531 cc->cur = n - 1;
c277df42 2532 cc->lastloc = lastloc;
4633a7c4 2533 sayNO;
a0d0e21e
LW
2534 }
2535
2536 /* Prefer scan over next for maximal matching. */
2537
2538 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2539 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2540 cc->cur = n;
2541 cc->lastloc = locinput;
c277df42 2542 REGCP_SET;
5f05dabc 2543 if (regmatch(cc->scan)) {
c277df42 2544 regcpblow(cp);
4633a7c4 2545 sayYES;
5f05dabc 2546 }
c277df42 2547 REGCP_UNWIND;
a0d0e21e 2548 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2549 PL_reginput = locinput;
c277df42 2550 DEBUG_r(
c3464db5
DD
2551 PerlIO_printf(Perl_debug_log,
2552 "%*s failed, try continuation...\n",
3280af22 2553 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2554 );
2555 }
599cee73
PM
2556 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2557 && !(PL_reg_flags & RF_warned)) {
3280af22 2558 PL_reg_flags |= RF_warned;
cea2e8a9 2559 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2560 "Complex regular subexpression recursion",
2561 REG_INFTY - 1);
a0d0e21e
LW
2562 }
2563
2564 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2565 PL_regcc = cc->oldcc;
2ab05381
IZ
2566 if (PL_regcc)
2567 ln = PL_regcc->cur;
a0d0e21e 2568 if (regmatch(cc->next))
4633a7c4 2569 sayYES;
2ab05381
IZ
2570 if (PL_regcc)
2571 PL_regcc->cur = ln;
3280af22 2572 PL_regcc = cc;
4633a7c4 2573 cc->cur = n - 1;
c277df42 2574 cc->lastloc = lastloc;
4633a7c4 2575 sayNO;
a0d0e21e
LW
2576 }
2577 /* NOT REACHED */
c277df42
IZ
2578 case BRANCHJ:
2579 next = scan + ARG(scan);
2580 if (next == scan)
2581 next = NULL;
2582 inner = NEXTOPER(NEXTOPER(scan));
2583 goto do_branch;
2584 case BRANCH:
2585 inner = NEXTOPER(scan);
2586 do_branch:
2587 {
2588 CHECKPOINT lastcp;
2589 c1 = OP(scan);
2590 if (OP(next) != c1) /* No choice. */
2591 next = inner; /* Avoid recursion. */
a0d0e21e 2592 else {
3280af22 2593 int lastparen = *PL_reglastparen;
c277df42
IZ
2594
2595 REGCP_SET;
a0d0e21e 2596 do {
3280af22 2597 PL_reginput = locinput;
c277df42 2598 if (regmatch(inner))
4633a7c4 2599 sayYES;
c277df42 2600 REGCP_UNWIND;
3280af22 2601 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2602 PL_regendp[n] = -1;
3280af22 2603 *PL_reglastparen = n;
c277df42 2604 scan = next;
a0d0e21e 2605 /*SUPPRESS 560*/
c277df42
IZ
2606 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2607 next += n;
a0d0e21e 2608 else
c277df42 2609 next = NULL;
c277df42
IZ
2610 inner = NEXTOPER(scan);
2611 if (c1 == BRANCHJ) {
2612 inner = NEXTOPER(inner);
2613 }
2614 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2615 sayNO;
a0d0e21e 2616 /* NOTREACHED */
a687059c 2617 }
a0d0e21e
LW
2618 }
2619 break;
2620 case MINMOD:
2621 minmod = 1;
2622 break;
c277df42
IZ
2623 case CURLYM:
2624 {
00db4c45 2625 I32 l = 0;
c277df42
IZ
2626 CHECKPOINT lastcp;
2627
2628 /* We suppose that the next guy does not need
2629 backtracking: in particular, it is of constant length,
2630 and has no parenths to influence future backrefs. */
2631 ln = ARG1(scan); /* min to match */
2632 n = ARG2(scan); /* max to match */
c277df42
IZ
2633 paren = scan->flags;
2634 if (paren) {
3280af22
NIS
2635 if (paren > PL_regsize)
2636 PL_regsize = paren;
2637 if (paren > *PL_reglastparen)
2638 *PL_reglastparen = paren;
c277df42 2639 }
dc45a647 2640 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2641 if (paren)
2642 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2643 PL_reginput = locinput;
c277df42
IZ
2644 if (minmod) {
2645 minmod = 0;
2646 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2647 sayNO;
5f4b28b2 2648 if (ln && l == 0 && n >= ln
c277df42
IZ
2649 /* In fact, this is tricky. If paren, then the
2650 fact that we did/didnot match may influence
2651 future execution. */
2652 && !(paren && ln == 0))
2653 ln = n;
3280af22 2654 locinput = PL_reginput;
22c35a8c 2655 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2656 c1 = (U8)*STRING(next);
c277df42 2657 if (OP(next) == EXACTF)
22c35a8c 2658 c2 = PL_fold[c1];
c277df42 2659 else if (OP(next) == EXACTFL)
22c35a8c 2660 c2 = PL_fold_locale[c1];
c277df42
IZ
2661 else
2662 c2 = c1;
a0ed51b3
LW
2663 }
2664 else
c277df42
IZ
2665 c1 = c2 = -1000;
2666 REGCP_SET;
5f4b28b2 2667 /* This may be improved if l == 0. */
c277df42
IZ
2668 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2669 /* If it could work, try it. */
2670 if (c1 == -1000 ||
3280af22
NIS
2671 UCHARAT(PL_reginput) == c1 ||
2672 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2673 {
2674 if (paren) {
2675 if (n) {
cf93c79d
IZ
2676 PL_regstartp[paren] =
2677 HOPc(PL_reginput, -l) - PL_bostr;
2678 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2679 }
2680 else
cf93c79d 2681 PL_regendp[paren] = -1;
c277df42
IZ
2682 }
2683 if (regmatch(next))
2684 sayYES;
2685 REGCP_UNWIND;
2686 }
2687 /* Couldn't or didn't -- move forward. */
3280af22 2688 PL_reginput = locinput;
c277df42
IZ
2689 if (regrepeat_hard(scan, 1, &l)) {
2690 ln++;
3280af22 2691 locinput = PL_reginput;
c277df42
IZ
2692 }
2693 else
2694 sayNO;
2695 }
a0ed51b3
LW
2696 }
2697 else {
c277df42
IZ
2698 n = regrepeat_hard(scan, n, &l);
2699 if (n != 0 && l == 0
2700 /* In fact, this is tricky. If paren, then the
2701 fact that we did/didnot match may influence
2702 future execution. */
2703 && !(paren && ln == 0))
2704 ln = n;
3280af22 2705 locinput = PL_reginput;
c277df42 2706 DEBUG_r(
5c0ca799 2707 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2708 "%*s matched %d times, len=%"IVdf"...\n",
2709 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
2710 n, (IV)l)
c277df42
IZ
2711 );
2712 if (n >= ln) {
22c35a8c 2713 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2714 c1 = (U8)*STRING(next);
c277df42 2715 if (OP(next) == EXACTF)
22c35a8c 2716 c2 = PL_fold[c1];
c277df42 2717 else if (OP(next) == EXACTFL)
22c35a8c 2718 c2 = PL_fold_locale[c1];
c277df42
IZ
2719 else
2720 c2 = c1;
a0ed51b3
LW
2721 }
2722 else
c277df42
IZ
2723 c1 = c2 = -1000;
2724 }
2725 REGCP_SET;
2726 while (n >= ln) {
2727 /* If it could work, try it. */
2728 if (c1 == -1000 ||
3280af22
NIS
2729 UCHARAT(PL_reginput) == c1 ||
2730 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2731 {
2732 DEBUG_r(
c3464db5 2733 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2734 "%*s trying tail with n=%"IVdf"...\n",
2735 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2736 );
2737 if (paren) {
2738 if (n) {
cf93c79d
IZ
2739 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2740 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2741 }
a0ed51b3 2742 else
cf93c79d 2743 PL_regendp[paren] = -1;
c277df42 2744 }
a0ed51b3
LW
2745 if (regmatch(next))
2746 sayYES;
2747 REGCP_UNWIND;
2748 }
c277df42
IZ
2749 /* Couldn't or didn't -- back up. */
2750 n--;
dfe13c55 2751 locinput = HOPc(locinput, -l);
3280af22 2752 PL_reginput = locinput;
c277df42
IZ
2753 }
2754 }
2755 sayNO;
2756 break;
2757 }
2758 case CURLYN:
2759 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2760 if (paren > PL_regsize)
2761 PL_regsize = paren;
2762 if (paren > *PL_reglastparen)
2763 *PL_reglastparen = paren;
c277df42
IZ
2764 ln = ARG1(scan); /* min to match */
2765 n = ARG2(scan); /* max to match */
dc45a647 2766 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2767 goto repeat;
a0d0e21e 2768 case CURLY:
c277df42 2769 paren = 0;
a0d0e21e
LW
2770 ln = ARG1(scan); /* min to match */
2771 n = ARG2(scan); /* max to match */
dc45a647 2772 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2773 goto repeat;
2774 case STAR:
2775 ln = 0;
c277df42 2776 n = REG_INFTY;
a0d0e21e 2777 scan = NEXTOPER(scan);
c277df42 2778 paren = 0;
a0d0e21e
LW
2779 goto repeat;
2780 case PLUS:
c277df42
IZ
2781 ln = 1;
2782 n = REG_INFTY;
2783 scan = NEXTOPER(scan);
2784 paren = 0;
2785 repeat:
a0d0e21e
LW
2786 /*
2787 * Lookahead to avoid useless match attempts
2788 * when we know what character comes next.
2789 */
22c35a8c 2790 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2791 c1 = (U8)*STRING(next);
bbce6d69 2792 if (OP(next) == EXACTF)
22c35a8c 2793 c2 = PL_fold[c1];
bbce6d69 2794 else if (OP(next) == EXACTFL)
22c35a8c 2795 c2 = PL_fold_locale[c1];
bbce6d69 2796 else
2797 c2 = c1;
2798 }
a0d0e21e 2799 else
bbce6d69 2800 c1 = c2 = -1000;
3280af22 2801 PL_reginput = locinput;
a0d0e21e 2802 if (minmod) {
c277df42 2803 CHECKPOINT lastcp;
a0d0e21e
LW
2804 minmod = 0;
2805 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2806 sayNO;
a0ed51b3 2807 locinput = PL_reginput;
c277df42 2808 REGCP_SET;
0fe9bf95
IZ
2809 if (c1 != -1000) {
2810 char *e = locinput + n - ln; /* Should not check after this */
2811 char *old = locinput;
2812
2813 if (e >= PL_regeol || (n == REG_INFTY))
2814 e = PL_regeol - 1;
2815 while (1) {
2816 /* Find place 'next' could work */
2817 if (c1 == c2) {
2818 while (locinput <= e && *locinput != c1)
2819 locinput++;
2820 } else {
2821 while (locinput <= e
2822 && *locinput != c1
2823 && *locinput != c2)
2824 locinput++;
2825 }
2826 if (locinput > e)
2827 sayNO;
2828 /* PL_reginput == old now */
2829 if (locinput != old) {
2830 ln = 1; /* Did some */
2831 if (regrepeat(scan, locinput - old) <
2832 locinput - old)
2833 sayNO;
2834 }
2835 /* PL_reginput == locinput now */
2836 if (paren) {
2837 if (ln) {
cf93c79d
IZ
2838 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2839 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2840 }
2841 else
cf93c79d 2842 PL_regendp[paren] = -1;
0fe9bf95
IZ
2843 }
2844 if (regmatch(next))
2845 sayYES;
2846 PL_reginput = locinput; /* Could be reset... */
2847 REGCP_UNWIND;
2848 /* Couldn't or didn't -- move forward. */
2849 old = locinput++;
2850 }
2851 }
2852 else
c277df42 2853 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2854 /* If it could work, try it. */
bbce6d69 2855 if (c1 == -1000 ||
3280af22
NIS
2856 UCHARAT(PL_reginput) == c1 ||
2857 UCHARAT(PL_reginput) == c2)
bbce6d69 2858 {
c277df42
IZ
2859 if (paren) {
2860 if (n) {
cf93c79d
IZ
2861 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2862 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2863 }
2864 else
cf93c79d 2865 PL_regendp[paren] = -1;
c277df42 2866 }
a0d0e21e 2867 if (regmatch(next))
4633a7c4 2868 sayYES;
c277df42 2869 REGCP_UNWIND;
bbce6d69 2870 }
c277df42 2871 /* Couldn't or didn't -- move forward. */
a0ed51b3 2872 PL_reginput = locinput;
a0d0e21e
LW
2873 if (regrepeat(scan, 1)) {
2874 ln++;
a0ed51b3
LW
2875 locinput = PL_reginput;
2876 }
2877 else
4633a7c4 2878 sayNO;
a0d0e21e
LW
2879 }
2880 }
2881 else {
c277df42 2882 CHECKPOINT lastcp;
a0d0e21e 2883 n = regrepeat(scan, n);
a0ed51b3 2884 locinput = PL_reginput;
22c35a8c 2885 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2886 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2887 ln = n; /* why back off? */
c277df42
IZ
2888 REGCP_SET;
2889 if (paren) {
2890 while (n >= ln) {
2891 /* If it could work, try it. */
2892 if (c1 == -1000 ||
3280af22
NIS
2893 UCHARAT(PL_reginput) == c1 ||
2894 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2895 {
2896 if (paren && n) {
2897 if (n) {
cf93c79d
IZ
2898 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2899 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2900 }
2901 else
cf93c79d 2902 PL_regendp[paren] = -1;
c277df42
IZ
2903 }
2904 if (regmatch(next))
2905 sayYES;
2906 REGCP_UNWIND;
2907 }
2908 /* Couldn't or didn't -- back up. */
2909 n--;
dfe13c55 2910 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2911 }
a0ed51b3
LW
2912 }
2913 else {
c277df42
IZ
2914 while (n >= ln) {
2915 /* If it could work, try it. */
2916 if (c1 == -1000 ||
3280af22
NIS
2917 UCHARAT(PL_reginput) == c1 ||
2918 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2919 {
2920 if (regmatch(next))
2921 sayYES;
2922 REGCP_UNWIND;
2923 }
2924 /* Couldn't or didn't -- back up. */
2925 n--;
dfe13c55 2926 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2927 }
a0d0e21e
LW
2928 }
2929 }
4633a7c4 2930 sayNO;
c277df42 2931 break;
a0d0e21e 2932 case END:
0f5d15d6
IZ
2933 if (PL_reg_call_cc) {
2934 re_cc_state *cur_call_cc = PL_reg_call_cc;
2935 CURCUR *cctmp = PL_regcc;
2936 regexp *re = PL_reg_re;
2937 CHECKPOINT cp, lastcp;
2938
2939 cp = regcppush(0); /* Save *all* the positions. */
2940 REGCP_SET;
2941 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2942 the caller. */
2943 PL_reginput = locinput; /* Make position available to
2944 the callcc. */
2945 cache_re(PL_reg_call_cc->re);
2946 PL_regcc = PL_reg_call_cc->cc;
2947 PL_reg_call_cc = PL_reg_call_cc->prev;
2948 if (regmatch(cur_call_cc->node)) {
2949 PL_reg_call_cc = cur_call_cc;
2950 regcpblow(cp);
2951 sayYES;
2952 }
2953 REGCP_UNWIND;
2954 regcppop();
2955 PL_reg_call_cc = cur_call_cc;
2956 PL_regcc = cctmp;
2957 PL_reg_re = re;
2958 cache_re(re);
2959
2960 DEBUG_r(
2961 PerlIO_printf(Perl_debug_log,
2962 "%*s continuation failed...\n",
2963 REPORT_CODE_OFF+PL_regindent*2, "")
2964 );
7821416a 2965 sayNO_SILENT;
0f5d15d6 2966 }
7821416a
IZ
2967 if (locinput < PL_regtill) {
2968 DEBUG_r(PerlIO_printf(Perl_debug_log,
2969 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
2970 PL_colors[4],
2971 (long)(locinput - PL_reg_starttry),
2972 (long)(PL_regtill - PL_reg_starttry),
2973 PL_colors[5]));
2974 sayNO_FINAL; /* Cannot match: too short. */
2975 }
2976 PL_reginput = locinput; /* put where regtry can find it */
2977 sayYES_FINAL; /* Success! */
7e5428c5 2978 case SUCCEED:
3280af22 2979 PL_reginput = locinput; /* put where regtry can find it */
7821416a 2980 sayYES_LOUD; /* Success! */
c277df42
IZ
2981 case SUSPEND:
2982 n = 1;
9fe1d20c 2983 PL_reginput = locinput;
c277df42 2984 goto do_ifmatch;
a0d0e21e 2985 case UNLESSM:
c277df42 2986 n = 0;
a0ed51b3 2987 if (scan->flags) {
0fe9bf95
IZ
2988 if (UTF) { /* XXXX This is absolutely
2989 broken, we read before
2990 start of string. */
2991 s = HOPMAYBEc(locinput, -scan->flags);
2992 if (!s)
2993 goto say_yes;
2994 PL_reginput = s;
2995 }
2996 else {
2997 if (locinput < PL_bostr + scan->flags)
2998 goto say_yes;
2999 PL_reginput = locinput - scan->flags;
3000 goto do_ifmatch;
3001 }
a0ed51b3
LW
3002 }
3003 else
3004 PL_reginput = locinput;
c277df42
IZ
3005 goto do_ifmatch;
3006 case IFMATCH:
3007 n = 1;
a0ed51b3 3008 if (scan->flags) {
0fe9bf95
IZ
3009 if (UTF) { /* XXXX This is absolutely
3010 broken, we read before
3011 start of string. */
3012 s = HOPMAYBEc(locinput, -scan->flags);
3013 if (!s || s < PL_bostr)
3014 goto say_no;
3015 PL_reginput = s;
3016 }
3017 else {
3018 if (locinput < PL_bostr + scan->flags)
3019 goto say_no;
3020 PL_reginput = locinput - scan->flags;
3021 goto do_ifmatch;
3022 }
a0ed51b3
LW
3023 }
3024 else
3025 PL_reginput = locinput;
3026
c277df42 3027 do_ifmatch:
c277df42
IZ
3028 inner = NEXTOPER(NEXTOPER(scan));
3029 if (regmatch(inner) != n) {
3030 say_no:
3031 if (logical) {
3032 logical = 0;
3033 sw = 0;
3034 goto do_longjump;
a0ed51b3
LW
3035 }
3036 else
c277df42
IZ
3037 sayNO;
3038 }
3039 say_yes:
3040 if (logical) {
3041 logical = 0;
3042 sw = 1;
3043 }
fe44a5e8 3044 if (OP(scan) == SUSPEND) {
3280af22 3045 locinput = PL_reginput;
565764a8 3046 nextchr = UCHARAT(locinput);
fe44a5e8 3047 }
c277df42
IZ
3048 /* FALL THROUGH. */
3049 case LONGJMP:
3050 do_longjump:
3051 next = scan + ARG(scan);
3052 if (next == scan)
3053 next = NULL;
a0d0e21e
LW
3054 break;
3055 default:
bf49b057 3056 PerlIO_printf(Perl_error_log, "%lx %d\n",
c277df42 3057 (unsigned long)scan, OP(scan));
cea2e8a9 3058 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3059 }
a0d0e21e
LW
3060 scan = next;
3061 }
a687059c 3062
a0d0e21e
LW
3063 /*
3064 * We get here only if there's trouble -- normally "case END" is
3065 * the terminating point.
3066 */
cea2e8a9 3067 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3068 /*NOTREACHED*/
4633a7c4
LW
3069 sayNO;
3070
7821416a
IZ
3071yes_loud:
3072 DEBUG_r(
3073 PerlIO_printf(Perl_debug_log,
3074 "%*s %scould match...%s\n",
3075 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3076 );
3077 goto yes;
3078yes_final:
3079 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3080 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3081yes:
3082#ifdef DEBUGGING
3280af22 3083 PL_regindent--;
4633a7c4
LW
3084#endif
3085 return 1;
3086
3087no:
7821416a
IZ
3088 DEBUG_r(
3089 PerlIO_printf(Perl_debug_log,
3090 "%*s %sfailed...%s\n",
3091 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3092 );
3093 goto do_no;
3094no_final:
3095do_no:
4633a7c4 3096#ifdef DEBUGGING
3280af22 3097 PL_regindent--;
4633a7c4 3098#endif
a0d0e21e 3099 return 0;
a687059c
LW
3100}
3101
3102/*
3103 - regrepeat - repeatedly match something simple, report how many
3104 */
3105/*
3106 * [This routine now assumes that it will only match on things of length 1.
3107 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3108 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3109 */
76e3520e 3110STATIC I32
cea2e8a9 3111S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3112{
5c0ca799 3113 dTHR;
a0d0e21e
LW
3114 register char *scan;
3115 register char *opnd;
3116 register I32 c;
3280af22 3117 register char *loceol = PL_regeol;
a0ed51b3 3118 register I32 hardcount = 0;
a0d0e21e 3119
3280af22 3120 scan = PL_reginput;
c277df42 3121 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3122 loceol = scan + max;
a0d0e21e 3123 switch (OP(p)) {
22c35a8c 3124 case REG_ANY:
a0d0e21e
LW
3125 while (scan < loceol && *scan != '\n')
3126 scan++;
3127 break;
3128 case SANY:
3129 scan = loceol;
3130 break;
a0ed51b3
LW
3131 case ANYUTF8:
3132 loceol = PL_regeol;
3133 while (scan < loceol && *scan != '\n') {
3134 scan += UTF8SKIP(scan);
3135 hardcount++;
3136 }
3137 break;
3138 case SANYUTF8:
3139 loceol = PL_regeol;
3140 while (scan < loceol) {
3141 scan += UTF8SKIP(scan);
3142 hardcount++;
3143 }
3144 break;
bbce6d69 3145 case EXACT: /* length of string is 1 */
cd439c50 3146 c = (U8)*STRING(p);
bbce6d69 3147 while (scan < loceol && UCHARAT(scan) == c)
3148 scan++;
3149 break;
3150 case EXACTF: /* length of string is 1 */
cd439c50 3151 c = (U8)*STRING(p);
bbce6d69 3152 while (scan < loceol &&
22c35a8c 3153 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3154 scan++;
3155 break;
3156 case EXACTFL: /* length of string is 1 */
3280af22 3157 PL_reg_flags |= RF_tainted;
cd439c50 3158 c = (U8)*STRING(p);
bbce6d69 3159 while (scan < loceol &&
22c35a8c 3160 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3161 scan++;
3162 break;
a0ed51b3
LW
3163 case ANYOFUTF8:
3164 loceol = PL_regeol;
3165 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3166 scan += UTF8SKIP(scan);
3167 hardcount++;
3168 }
3169 break;
a0d0e21e 3170 case ANYOF:
cd439c50 3171 opnd = MASK(p);
ae5c130c 3172 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3173 scan++;
a0d0e21e
LW
3174 break;
3175 case ALNUM:
3176 while (scan < loceol && isALNUM(*scan))
3177 scan++;
3178 break;
a0ed51b3
LW
3179 case ALNUMUTF8:
3180 loceol = PL_regeol;
dfe13c55 3181 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3182 scan += UTF8SKIP(scan);
3183 hardcount++;
3184 }
3185 break;
bbce6d69 3186 case ALNUML:
3280af22 3187 PL_reg_flags |= RF_tainted;
bbce6d69 3188 while (scan < loceol && isALNUM_LC(*scan))
3189 scan++;
3190 break;
a0ed51b3
LW
3191 case ALNUMLUTF8:
3192 PL_reg_flags |= RF_tainted;
3193 loceol = PL_regeol;
dfe13c55 3194 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3195 scan += UTF8SKIP(scan);
3196 hardcount++;
3197 }
3198 break;
3199 break;
a0d0e21e
LW
3200 case NALNUM:
3201 while (scan < loceol && !isALNUM(*scan))
3202 scan++;
3203 break;
a0ed51b3
LW
3204 case NALNUMUTF8:
3205 loceol = PL_regeol;
dfe13c55 3206 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3207 scan += UTF8SKIP(scan);
3208 hardcount++;
3209 }
3210 break;
bbce6d69 3211 case NALNUML:
3280af22 3212 PL_reg_flags |= RF_tainted;
bbce6d69 3213 while (scan < loceol && !isALNUM_LC(*scan))
3214 scan++;
3215 break;
a0ed51b3
LW
3216 case NALNUMLUTF8:
3217 PL_reg_flags |= RF_tainted;
3218 loceol = PL_regeol;
dfe13c55 3219 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3220 scan += UTF8SKIP(scan);
3221 hardcount++;
3222 }
3223 break;
a0d0e21e
LW
3224 case SPACE:
3225 while (scan < loceol && isSPACE(*scan))
3226 scan++;
3227 break;
a0ed51b3
LW
3228 case SPACEUTF8:
3229 loceol = PL_regeol;
dfe13c55 3230 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3231 scan += UTF8SKIP(scan);
3232 hardcount++;
3233 }
3234 break;
bbce6d69 3235 case SPACEL:
3280af22 3236 PL_reg_flags |= RF_tainted;
bbce6d69 3237 while (scan < loceol && isSPACE_LC(*scan))
3238 scan++;
3239 break;
a0ed51b3
LW
3240 case SPACELUTF8:
3241 PL_reg_flags |= RF_tainted;
3242 loceol = PL_regeol;
dfe13c55 3243 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3244 scan += UTF8SKIP(scan);
3245 hardcount++;
3246 }
3247 break;
a0d0e21e
LW
3248 case NSPACE:
3249 while (scan < loceol && !isSPACE(*scan))
3250 scan++;
3251 break;
a0ed51b3
LW
3252 case NSPACEUTF8:
3253 loceol = PL_regeol;
dfe13c55 3254 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3255 scan += UTF8SKIP(scan);
3256 hardcount++;
3257 }
3258 break;
bbce6d69 3259 case NSPACEL:
3280af22 3260 PL_reg_flags |= RF_tainted;
bbce6d69 3261 while (scan < loceol && !isSPACE_LC(*scan))
3262 scan++;
3263 break;
a0ed51b3
LW
3264 case NSPACELUTF8:
3265 PL_reg_flags |= RF_tainted;
3266 loceol = PL_regeol;
dfe13c55 3267 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3268 scan += UTF8SKIP(scan);
3269 hardcount++;
3270 }
3271 break;
a0d0e21e
LW
3272 case DIGIT:
3273 while (scan < loceol && isDIGIT(*scan))
3274 scan++;
3275 break;
a0ed51b3
LW
3276 case DIGITUTF8:
3277 loceol = PL_regeol;
dfe13c55 3278 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3279 scan += UTF8SKIP(scan);
3280 hardcount++;
3281 }
3282 break;
3283 break;
a0d0e21e
LW
3284 case NDIGIT:
3285 while (scan < loceol && !isDIGIT(*scan))
3286 scan++;
3287 break;
a0ed51b3
LW
3288 case NDIGITUTF8:
3289 loceol = PL_regeol;
dfe13c55 3290 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3291 scan += UTF8SKIP(scan);
3292 hardcount++;
3293 }
3294 break;
a0d0e21e
LW
3295 default: /* Called on something of 0 width. */
3296 break; /* So match right here or not at all. */
3297 }
a687059c 3298
a0ed51b3
LW
3299 if (hardcount)
3300 c = hardcount;
3301 else
3302 c = scan - PL_reginput;
3280af22 3303 PL_reginput = scan;
a687059c 3304
c277df42
IZ
3305 DEBUG_r(
3306 {
3307 SV *prop = sv_newmortal();
3308
3309 regprop(prop, p);
3310 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3311 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3312 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3313 });
3314
a0d0e21e 3315 return(c);
a687059c
LW
3316}
3317
3318/*
c277df42
IZ
3319 - regrepeat_hard - repeatedly match something, report total lenth and length
3320 *
3321 * The repeater is supposed to have constant length.
3322 */
3323
76e3520e 3324STATIC I32
cea2e8a9 3325S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3326{
5c0ca799 3327 dTHR;
c277df42
IZ
3328 register char *scan;
3329 register char *start;
3280af22 3330 register char *loceol = PL_regeol;
a0ed51b3 3331 I32 l = 0;
708e3b05 3332 I32 count = 0, res = 1;
a0ed51b3
LW
3333
3334 if (!max)
3335 return 0;
c277df42 3336
3280af22 3337 start = PL_reginput;
a0ed51b3 3338 if (UTF) {
708e3b05 3339 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3340 if (!count++) {
3341 l = 0;
3342 while (start < PL_reginput) {
3343 l++;
3344 start += UTF8SKIP(start);
3345 }
3346 *lp = l;
3347 if (l == 0)
3348 return max;
3349 }
3350 if (count == max)
3351 return count;
3352 }
3353 }
3354 else {
708e3b05 3355 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3356 if (!count++) {
3357 *lp = l = PL_reginput - start;
3358 if (max != REG_INFTY && l*max < loceol - scan)
3359 loceol = scan + l*max;
3360 if (l == 0)
3361 return max;
c277df42
IZ
3362 }
3363 }
3364 }
708e3b05 3365 if (!res)
3280af22 3366 PL_reginput = scan;
c277df42 3367
a0ed51b3 3368 return count;
c277df42
IZ
3369}
3370
3371/*
cb8d8820 3372 - reginclass - determine if a character falls into a character class
bbce6d69 3373 */
3374
76e3520e 3375STATIC bool
cea2e8a9 3376S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3377{
5c0ca799 3378 dTHR;
b8c5462f 3379 char flags = ANYOF_FLAGS(p);
bbce6d69 3380 bool match = FALSE;
3381
3382 c &= 0xFF;
b8c5462f 3383 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3384 match = TRUE;
3385 else if (flags & ANYOF_FOLD) {
3386 I32 cf;
3387 if (flags & ANYOF_LOCALE) {
3280af22 3388 PL_reg_flags |= RF_tainted;
22c35a8c 3389 cf = PL_fold_locale[c];
bbce6d69 3390 }
3391 else
22c35a8c 3392 cf = PL_fold[c];
b8c5462f 3393 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3394 match = TRUE;
3395 }
3396
b8c5462f 3397 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3398 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3399 if (
3400 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3410 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3411 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3412 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3413 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3414 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3415 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3416 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3417 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3418 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3419 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3420 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3421 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3422 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3423 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3424 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3425 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3426 ) /* How's that for a conditional? */
bbce6d69 3427 {
3428 match = TRUE;
3429 }
3430 }
3431
ae5c130c 3432 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3433}
3434
a0ed51b3 3435STATIC bool
cea2e8a9 3436S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3437{
3438 dTHR;
a0ed51b3
LW
3439 char flags = ARG1(f);
3440 bool match = FALSE;
3441 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3442
3443 if (swash_fetch(sv, p))
3444 match = TRUE;
3445 else if (flags & ANYOF_FOLD) {
3446 I32 cf;
dfe13c55 3447 U8 tmpbuf[10];
a0ed51b3
LW
3448 if (flags & ANYOF_LOCALE) {
3449 PL_reg_flags |= RF_tainted;
3450 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3451 }
3452 else
3453 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3454 if (swash_fetch(sv, tmpbuf))
3455 match = TRUE;
3456 }
3457
b8c5462f 3458 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3459
3460 return (flags & ANYOF_INVERT) ? !match : match;
3461}
161b471a 3462
dfe13c55 3463STATIC U8 *
cea2e8a9 3464S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3465{
3466 dTHR;
a0ed51b3
LW
3467 if (off >= 0) {
3468 while (off-- && s < (U8*)PL_regeol)
3469 s += UTF8SKIP(s);
3470 }
3471 else {
3472 while (off++) {
3473 if (s > (U8*)PL_bostr) {
3474 s--;
3475 if (*s & 0x80) {
3476 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3477 s--;
3478 } /* XXX could check well-formedness here */
3479 }
3480 }
3481 }
3482 return s;
3483}
161b471a 3484
dfe13c55 3485STATIC U8 *
cea2e8a9 3486S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3487{
c485e607 3488 dTHR;
a0ed51b3
LW
3489 if (off >= 0) {
3490 while (off-- && s < (U8*)PL_regeol)
3491 s += UTF8SKIP(s);
3492 if (off >= 0)
3493 return 0;
3494 }
3495 else {
3496 while (off++) {
3497 if (s > (U8*)PL_bostr) {
3498 s--;
3499 if (*s & 0x80) {
3500 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3501 s--;
3502 } /* XXX could check well-formedness here */
3503 }
3504 else
3505 break;
3506 }
3507 if (off <= 0)
3508 return 0;
3509 }
3510 return s;
3511}
51371543
GS
3512
3513#ifdef PERL_OBJECT
3514#define NO_XSLOCKS
3515#include "XSUB.h"
3516#endif
3517
3518static void
3519restore_pos(pTHXo_ void *arg)
3520{
3521 dTHR;
3522 if (PL_reg_eval_set) {
3523 if (PL_reg_oldsaved) {
3524 PL_reg_re->subbeg = PL_reg_oldsaved;
3525 PL_reg_re->sublen = PL_reg_oldsavedlen;
3526 RX_MATCH_COPIED_on(PL_reg_re);
3527 }
3528 PL_reg_magic->mg_len = PL_reg_oldpos;
3529 PL_reg_eval_set = 0;
3530 PL_curpm = PL_reg_oldcurpm;
3531 }
3532}
3533