This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Patch for EPOC Support
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d
IZ
281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa
JH
292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5
JH
298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa
JH
301 char *t;
302 I32 ml_anch;
2c2d71f5
JH
303 char *tmp;
304 register char *other_last = Nullch;
30944b6d
IZ
305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa
JH
308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
b900a521 317 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5
JH
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5
JH
325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5
JH
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa
JH
349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5
JH
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa
JH
359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5
JH
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa
JH
382 ml_anch = 0;
383 s = strpos;
2c2d71f5
JH
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5
JH
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa
JH
412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5
JH
422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
7b0972df
JH
427 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
428 SvPVX(check),
2c2d71f5
JH
429 PL_colors[1], (SvTAIL(check) ? "$" : ""),
430 (s ? " at offset " : "...\n") ) );
431
432 if (!s)
433 goto fail_finish;
434
435 /* Finish the diagnostic message */
30944b6d 436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
437
438 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
439 Start with the other substr.
440 XXXX no SCREAM optimization yet - and a very coarse implementation
441 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
442 *always* match. Probably should be marked during compile...
443 Probably it is right to do no SCREAM here...
444 */
445
446 if (prog->float_substr && prog->anchored_substr) {
30944b6d 447 /* Take into account the "other" substring. */
2c2d71f5
JH
448 /* XXXX May be hopelessly wrong for UTF... */
449 if (!other_last)
450 other_last = strpos - 1;
451 if (check == prog->float_substr) {
30944b6d
IZ
452 do_other_anchored:
453 {
2c2d71f5
JH
454 char *last = s - start_shift, *last1, *last2;
455 char *s1 = s;
456
457 tmp = PL_bostr;
458 t = s - prog->check_offset_max;
459 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
460 && (!(prog->reganch & ROPT_UTF8)
461 || (PL_bostr = strpos, /* Used in regcopmaybe() */
462 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
463 && t > strpos)))
30944b6d 464 /* EMPTY */;
2c2d71f5
JH
465 else
466 t = strpos;
467 t += prog->anchored_offset;
468 if (t <= other_last)
469 t = other_last + 1;
470 PL_bostr = tmp;
471 last2 = last1 = strend - prog->minlen;
472 if (last < last1)
473 last1 = last;
474 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
475 /* On end-of-str: see comment below. */
476 s = fbm_instr((unsigned char*)t,
477 (unsigned char*)last1 + prog->anchored_offset
478 + SvCUR(prog->anchored_substr)
479 - (SvTAIL(prog->anchored_substr)!=0),
480 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
481 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
482 (s ? "Found" : "Contradicts"),
483 PL_colors[0],
7b0972df
JH
484 (int)(SvCUR(prog->anchored_substr)
485 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
486 SvPVX(prog->anchored_substr),
487 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
488 if (!s) {
489 if (last1 >= last2) {
490 DEBUG_r(PerlIO_printf(Perl_debug_log,
491 ", giving up...\n"));
492 goto fail_finish;
493 }
494 DEBUG_r(PerlIO_printf(Perl_debug_log,
495 ", trying floating at offset %ld...\n",
30944b6d 496 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
497 PL_regeol = strend; /* Used in HOP() */
498 other_last = last1 + prog->anchored_offset;
499 s = HOPc(last, 1);
500 goto restart;
501 }
502 else {
503 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 504 (long)(s - i_strpos)));
2c2d71f5
JH
505 t = s - prog->anchored_offset;
506 other_last = s - 1;
30944b6d 507 s = s1;
2c2d71f5
JH
508 if (t == strpos)
509 goto try_at_start;
2c2d71f5
JH
510 goto try_at_offset;
511 }
30944b6d 512 }
2c2d71f5
JH
513 }
514 else { /* Take into account the floating substring. */
515 char *last, *last1;
516 char *s1 = s;
517
518 t = s - start_shift;
519 last1 = last = strend - prog->minlen + prog->float_min_offset;
520 if (last - t > prog->float_max_offset)
521 last = t + prog->float_max_offset;
522 s = t + prog->float_min_offset;
523 if (s <= other_last)
524 s = other_last + 1;
525 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
526 /* fbm_instr() takes into account exact value of end-of-str
527 if the check is SvTAIL(ed). Since false positives are OK,
528 and end-of-str is not later than strend we are OK. */
529 s = fbm_instr((unsigned char*)s,
530 (unsigned char*)last + SvCUR(prog->float_substr)
531 - (SvTAIL(prog->float_substr)!=0),
532 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
533 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
534 (s ? "Found" : "Contradicts"),
535 PL_colors[0],
7b0972df
JH
536 (int)(SvCUR(prog->float_substr)
537 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
538 SvPVX(prog->float_substr),
539 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
540 if (!s) {
541 if (last1 == last) {
542 DEBUG_r(PerlIO_printf(Perl_debug_log,
543 ", giving up...\n"));
544 goto fail_finish;
545 }
546 DEBUG_r(PerlIO_printf(Perl_debug_log,
547 ", trying anchored starting at offset %ld...\n",
30944b6d 548 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
549 other_last = last;
550 PL_regeol = strend; /* Used in HOP() */
551 s = HOPc(t, 1);
552 goto restart;
553 }
554 else {
555 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 556 (long)(s - i_strpos)));
2c2d71f5 557 other_last = s - 1;
30944b6d 558 s = s1;
2c2d71f5
JH
559 if (t == strpos)
560 goto try_at_start;
2c2d71f5
JH
561 goto try_at_offset;
562 }
563 }
cad2e5aa 564 }
2c2d71f5
JH
565
566 t = s - prog->check_offset_max;
567 tmp = PL_bostr;
568 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
569 && (!(prog->reganch & ROPT_UTF8)
570 || (PL_bostr = strpos, /* Used in regcopmaybe() */
571 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
572 && t > strpos)))) {
573 PL_bostr = tmp;
574 /* Fixed substring is found far enough so that the match
575 cannot start at strpos. */
576 try_at_offset:
cad2e5aa 577 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
578 /* Eventually fbm_*() should handle this, but often
579 anchored_offset is not 0, so this check will not be wasted. */
580 /* XXXX In the code below we prefer to look for "^" even in
581 presence of anchored substrings. And we search even
582 beyond the found float position. These pessimizations
583 are historical artefacts only. */
584 find_anchor:
2c2d71f5 585 while (t < strend - prog->minlen) {
cad2e5aa
JH
586 if (*t == '\n') {
587 if (t < s - prog->check_offset_min) {
30944b6d
IZ
588 if (prog->anchored_substr) {
589 /* We definitely contradict the found anchored
590 substr. Due to the above check we do not
591 contradict "check" substr.
592 Thus we can arrive here only if check substr
593 is float. Redo checking for "other"=="fixed".
594 */
595 strpos = t + 1;
596 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
597 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
598 goto do_other_anchored;
599 }
cad2e5aa 600 s = t + 1;
2c2d71f5 601 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 602 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
603 goto set_useful;
604 }
2c2d71f5 605 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
606 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
607 strpos = s = t + 1;
cad2e5aa
JH
608 goto restart;
609 }
610 t++;
611 }
2c2d71f5
JH
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
613 PL_colors[0],PL_colors[1]));
614 goto fail_finish;
cad2e5aa
JH
615 }
616 s = t;
617 set_useful:
2c2d71f5 618 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
619 }
620 else {
2c2d71f5
JH
621 PL_bostr = tmp;
622 /* The found string does not prohibit matching at beg-of-str
623 - no optimization of calling REx engine can be performed,
624 unless it was an MBOL and we are not after MBOL. */
625 try_at_start:
626 /* Even in this situation we may use MBOL flag if strpos is offset
627 wrt the start of the string. */
628 if (ml_anch && sv
cad2e5aa
JH
629 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
630 t = strpos;
631 goto find_anchor;
632 }
30944b6d
IZ
633 DEBUG_r( if (ml_anch)
634 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
635 PL_colors[0],PL_colors[1]);
636 );
2c2d71f5 637 success_at_start:
30944b6d 638 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
639 && --BmUSEFUL(prog->check_substr) < 0
640 && prog->check_substr == prog->float_substr) { /* boo */
641 /* If flags & SOMETHING - do not do it many times on the same match */
642 SvREFCNT_dec(prog->check_substr);
643 prog->check_substr = Nullsv; /* disable */
644 prog->float_substr = Nullsv; /* clear */
645 s = strpos;
646 prog->reganch &= ~RE_USE_INTUIT;
647 }
648 else
649 s = strpos;
650 }
651
2c2d71f5 652 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 653 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 654 return s;
2c2d71f5
JH
655
656 fail_finish: /* Substring not found */
657 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 658 fail:
2c2d71f5 659 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
660 PL_colors[4],PL_colors[5]));
661 return Nullch;
662}
9661b544 663
a687059c 664/*
c277df42 665 - regexec_flags - match a regexp against a string
a687059c 666 */
79072805 667I32
864dbfa3 668Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 669 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
670/* strend: pointer to null at end of string */
671/* strbeg: real beginning of string */
672/* minend: end of match must be >=minend after stringarg. */
673/* data: May be used for some additional optimizations. */
674/* nosave: For optimizations. */
a687059c 675{
5c0ca799 676 dTHR;
a0d0e21e 677 register char *s;
c277df42 678 register regnode *c;
a0d0e21e
LW
679 register char *startpos = stringarg;
680 register I32 tmp;
c277df42 681 I32 minlen; /* must match at least this many chars */
a0d0e21e 682 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 683 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
684 constant substr. */ /* CC */
685 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
686 I32 scream_pos = -1; /* Internal iterator of scream. */
687 char *scream_olds;
3280af22 688 SV* oreplsv = GvSV(PL_replgv);
a687059c 689
2ab05381 690 PL_regcc = 0;
a0d0e21e 691
0f5d15d6 692 cache_re(prog);
a0d0e21e 693#ifdef DEBUGGING
3280af22 694 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
695#endif
696
697 /* Be paranoid... */
698 if (prog == NULL || startpos == NULL) {
cea2e8a9 699 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
700 return 0;
701 }
702
c277df42
IZ
703 minlen = prog->minlen;
704 if (strend - startpos < minlen) goto phooey;
705
a0d0e21e 706 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 707 PL_regprev = '\n';
a0d0e21e 708 else {
a0ed51b3 709 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
710 if (!PL_multiline && PL_regprev == '\n')
711 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 712 }
bbce6d69 713
a0d0e21e 714 /* Check validity of program. */
22c35a8c 715 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 716 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
717 }
718
3280af22
NIS
719 PL_reg_flags = 0;
720 PL_reg_eval_set = 0;
2c2d71f5 721 PL_reg_maxiter = 0;
a0d0e21e 722
a0ed51b3
LW
723 if (prog->reganch & ROPT_UTF8)
724 PL_reg_flags |= RF_utf8;
725
726 /* Mark beginning of line for ^ and lookbehind. */
727 PL_regbol = startpos;
728 PL_bostr = strbeg;
9661b544 729 PL_reg_sv = sv;
a0ed51b3
LW
730
731 /* Mark end of line for $ (and such) */
732 PL_regeol = strend;
733
734 /* see how far we have to get to not match where we matched before */
735 PL_regtill = startpos+minend;
736
0f5d15d6
IZ
737 /* We start without call_cc context. */
738 PL_reg_call_cc = 0;
739
a0d0e21e
LW
740 /* If there is a "must appear" string, look for it. */
741 s = startpos;
cad2e5aa 742
30382c73 743 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
cad2e5aa
JH
744 MAGIC *mg;
745
30382c73 746 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
cad2e5aa 747 PL_reg_ganch = startpos;
30382c73
IZ
748 else if (sv && SvTYPE(sv) >= SVt_PVMG
749 && SvMAGIC(sv)
750 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
751 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
752 if (prog->reganch & ROPT_ANCH_GPOS) {
753 if (s > PL_reg_ganch)
754 goto phooey;
755 s = PL_reg_ganch;
756 }
a0ed51b3 757 }
30382c73
IZ
758 else /* pos() not defined */
759 PL_reg_ganch = strbeg;
a0d0e21e 760 }
a687059c 761
cad2e5aa
JH
762 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
763 re_scream_pos_data d;
764
765 d.scream_olds = &scream_olds;
766 d.scream_pos = &scream_pos;
767 s = re_intuit_start(prog, sv, s, strend, flags, &d);
768 if (!s)
769 goto phooey; /* not present */
770 }
771
772 DEBUG_r( if (!PL_colorset) reginitcolors() );
773 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 774 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
775 PL_colors[4],PL_colors[5],PL_colors[0],
776 prog->precomp,
777 PL_colors[1],
c277df42 778 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 779 PL_colors[0],
b900a521 780 (int)(strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 781 startpos, PL_colors[1],
c277df42
IZ
782 (strend - startpos > 60 ? "..." : ""))
783 );
784
a0d0e21e 785 /* Simplest case: anchored match need be tried only once. */
774d564b 786 /* [unless only anchor is BOL and multiline is set] */
22e551b9 787 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 788 if (s == startpos && regtry(prog, startpos))
a0d0e21e 789 goto got_it;
22e551b9
IZ
790 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
791 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 792 {
cad2e5aa
JH
793 char *end;
794
a0d0e21e
LW
795 if (minlen)
796 dontbother = minlen - 1;
cad2e5aa 797 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 798 /* for multiline we only have to try after newlines */
cad2e5aa 799 if (prog->check_substr) {
30944b6d
IZ
800 if (s == startpos)
801 goto after_try;
cad2e5aa
JH
802 while (1) {
803 if (regtry(prog, s))
a0d0e21e 804 goto got_it;
30944b6d 805 after_try:
cad2e5aa
JH
806 if (s >= end)
807 goto phooey;
808 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
809 if (!s)
810 goto phooey;
811 }
812 } else {
813 if (s > startpos)
814 s--;
815 while (s < end) {
816 if (*s++ == '\n') { /* don't need PL_utf8skip here */
817 if (regtry(prog, s))
818 goto got_it;
819 }
820 }
35c8bce7 821 }
35c8bce7 822 }
a0d0e21e 823 goto phooey;
22e551b9
IZ
824 } else if (prog->reganch & ROPT_ANCH_GPOS) {
825 if (regtry(prog, PL_reg_ganch))
826 goto got_it;
827 goto phooey;
a0d0e21e 828 }
35c8bce7 829
a0d0e21e 830 /* Messy cases: unanchored match. */
c277df42
IZ
831 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
832 /* we have /x+whatever/ */
cad2e5aa 833 /* it must be a one character string (XXXX Except UTF?) */
c277df42 834 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
835 if (UTF) {
836 while (s < strend) {
837 if (*s == ch) {
838 if (regtry(prog, s)) goto got_it;
839 s += UTF8SKIP(s);
840 while (s < strend && *s == ch)
841 s += UTF8SKIP(s);
842 }
843 s += UTF8SKIP(s);
844 }
845 }
846 else {
847 while (s < strend) {
848 if (*s == ch) {
849 if (regtry(prog, s)) goto got_it;
c277df42 850 s++;
a0ed51b3
LW
851 while (s < strend && *s == ch)
852 s++;
853 }
854 s++;
a0d0e21e 855 }
a687059c 856 }
c277df42
IZ
857 }
858 /*SUPPRESS 560*/
859 else if (prog->anchored_substr != Nullsv
860 || (prog->float_substr != Nullsv
861 && prog->float_max_offset < strend - s)) {
862 SV *must = prog->anchored_substr
863 ? prog->anchored_substr : prog->float_substr;
864 I32 back_max =
865 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
866 I32 back_min =
867 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
868 I32 delta = back_max - back_min;
cf93c79d 869 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
870 -(I32)(CHR_SVLEN(must)
871 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
872 char *last1; /* Last position checked before */
873
874 if (s > PL_bostr)
dfe13c55 875 last1 = HOPc(s, -1);
a0ed51b3
LW
876 else
877 last1 = s - 1; /* bogus */
c277df42
IZ
878
879 /* XXXX check_substr already used to find `s', can optimize if
880 check_substr==must. */
881 scream_pos = -1;
882 dontbother = end_shift;
dfe13c55 883 strend = HOPc(strend, -dontbother);
c277df42 884 while ( (s <= last) &&
22e551b9
IZ
885 ((flags & REXEC_SCREAM)
886 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 887 end_shift, &scream_pos, 0))
a0ed51b3 888 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
889 (unsigned char*)strend, must,
890 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
891 if (HOPc(s, -back_max) > last1) {
892 last1 = HOPc(s, -back_min);
893 s = HOPc(s, -back_max);
a0ed51b3
LW
894 }
895 else {
dfe13c55 896 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 897
dfe13c55 898 last1 = HOPc(s, -back_min);
c277df42 899 s = t;
a0d0e21e 900 }
a0ed51b3
LW
901 if (UTF) {
902 while (s <= last1) {
903 if (regtry(prog, s))
904 goto got_it;
905 s += UTF8SKIP(s);
906 }
907 }
908 else {
909 while (s <= last1) {
910 if (regtry(prog, s))
911 goto got_it;
912 s++;
913 }
a0d0e21e
LW
914 }
915 }
916 goto phooey;
a0ed51b3
LW
917 }
918 else if (c = prog->regstclass) {
a0d0e21e 919 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
b3c9acc1
IZ
920 char *m;
921 int ln;
922 int c1;
923 int c2;
924 char *e;
a687059c 925
a0d0e21e
LW
926 if (minlen)
927 dontbother = minlen - 1;
dfe13c55 928 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
929 tmp = 1;
930 /* We know what class it must start with. */
931 switch (OP(c)) {
a0ed51b3 932 case ANYOFUTF8:
a0ed51b3
LW
933 while (s < strend) {
934 if (REGINCLASSUTF8(c, (U8*)s)) {
935 if (tmp && regtry(prog, s))
936 goto got_it;
937 else
938 tmp = doevery;
939 }
940 else
941 tmp = 1;
942 s += UTF8SKIP(s);
943 }
944 break;
a0d0e21e 945 case ANYOF:
a0d0e21e 946 while (s < strend) {
936ed897 947 if (REGINCLASS(c, *s)) {
a0d0e21e
LW
948 if (tmp && regtry(prog, s))
949 goto got_it;
950 else
951 tmp = doevery;
a687059c 952 }
a0d0e21e
LW
953 else
954 tmp = 1;
955 s++;
956 }
957 break;
b3c9acc1
IZ
958 case EXACTF:
959 m = STRING(c);
960 ln = STR_LEN(c);
961 c1 = *m;
962 c2 = PL_fold[c1];
963 goto do_exactf;
964 case EXACTFL:
965 m = STRING(c);
966 ln = STR_LEN(c);
967 c1 = *m;
968 c2 = PL_fold_locale[c1];
969 do_exactf:
970 e = strend - ln;
971
972 /* Here it is NOT UTF! */
973 if (c1 == c2) {
974 while (s <= e) {
975 if ( *s == c1
976 && (ln == 1 || (OP(c) == EXACTF
977 ? ibcmp(s, m, ln)
978 : ibcmp_locale(s, m, ln)))
979 && regtry(prog, s) )
980 goto got_it;
981 s++;
982 }
983 } else {
984 while (s <= e) {
985 if ( (*s == c1 || *s == c2)
986 && (ln == 1 || (OP(c) == EXACTF
987 ? ibcmp(s, m, ln)
988 : ibcmp_locale(s, m, ln)))
989 && regtry(prog, s) )
990 goto got_it;
991 s++;
992 }
993 }
994 break;
bbce6d69 995 case BOUNDL:
3280af22 996 PL_reg_flags |= RF_tainted;
bbce6d69 997 /* FALL THROUGH */
a0d0e21e 998 case BOUND:
a0ed51b3
LW
999 if (minlen) {
1000 dontbother++;
1001 strend -= 1;
1002 }
3280af22 1003 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1004 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1005 while (s < strend) {
95bac841 1006 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
1007 tmp = !tmp;
1008 if (regtry(prog, s))
1009 goto got_it;
a687059c 1010 }
a0d0e21e
LW
1011 s++;
1012 }
1013 if ((minlen || tmp) && regtry(prog,s))
1014 goto got_it;
1015 break;
a0ed51b3
LW
1016 case BOUNDLUTF8:
1017 PL_reg_flags |= RF_tainted;
1018 /* FALL THROUGH */
1019 case BOUNDUTF8:
1020 if (minlen) {
1021 dontbother++;
dfe13c55 1022 strend = reghop_c(strend, -1);
a0ed51b3 1023 }
dfe13c55 1024 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1025 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1026 while (s < strend) {
dfe13c55
GS
1027 if (tmp == !(OP(c) == BOUND ?
1028 swash_fetch(PL_utf8_alnum, (U8*)s) :
1029 isALNUM_LC_utf8((U8*)s)))
1030 {
a0ed51b3
LW
1031 tmp = !tmp;
1032 if (regtry(prog, s))
1033 goto got_it;
1034 }
1035 s += UTF8SKIP(s);
1036 }
1037 if ((minlen || tmp) && regtry(prog,s))
1038 goto got_it;
1039 break;
bbce6d69 1040 case NBOUNDL:
3280af22 1041 PL_reg_flags |= RF_tainted;
bbce6d69 1042 /* FALL THROUGH */
a0d0e21e 1043 case NBOUND:
a0ed51b3
LW
1044 if (minlen) {
1045 dontbother++;
1046 strend -= 1;
1047 }
3280af22 1048 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1049 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1050 while (s < strend) {
95bac841 1051 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
1052 tmp = !tmp;
1053 else if (regtry(prog, s))
1054 goto got_it;
1055 s++;
1056 }
1057 if ((minlen || !tmp) && regtry(prog,s))
1058 goto got_it;
1059 break;
a0ed51b3
LW
1060 case NBOUNDLUTF8:
1061 PL_reg_flags |= RF_tainted;
1062 /* FALL THROUGH */
1063 case NBOUNDUTF8:
1064 if (minlen) {
1065 dontbother++;
dfe13c55 1066 strend = reghop_c(strend, -1);
a0ed51b3 1067 }
dfe13c55 1068 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1069 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1070 while (s < strend) {
dfe13c55
GS
1071 if (tmp == !(OP(c) == NBOUND ?
1072 swash_fetch(PL_utf8_alnum, (U8*)s) :
1073 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
1074 tmp = !tmp;
1075 else if (regtry(prog, s))
1076 goto got_it;
1077 s += UTF8SKIP(s);
1078 }
1079 if ((minlen || !tmp) && regtry(prog,s))
1080 goto got_it;
1081 break;
a0d0e21e
LW
1082 case ALNUM:
1083 while (s < strend) {
bbce6d69 1084 if (isALNUM(*s)) {
1085 if (tmp && regtry(prog, s))
1086 goto got_it;
1087 else
1088 tmp = doevery;
1089 }
1090 else
1091 tmp = 1;
1092 s++;
1093 }
1094 break;
a0ed51b3
LW
1095 case ALNUMUTF8:
1096 while (s < strend) {
dfe13c55 1097 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1098 if (tmp && regtry(prog, s))
1099 goto got_it;
1100 else
1101 tmp = doevery;
1102 }
1103 else
1104 tmp = 1;
1105 s += UTF8SKIP(s);
1106 }
1107 break;
bbce6d69 1108 case ALNUML:
3280af22 1109 PL_reg_flags |= RF_tainted;
bbce6d69 1110 while (s < strend) {
1111 if (isALNUM_LC(*s)) {
a0d0e21e
LW
1112 if (tmp && regtry(prog, s))
1113 goto got_it;
a687059c 1114 else
a0d0e21e
LW
1115 tmp = doevery;
1116 }
1117 else
1118 tmp = 1;
1119 s++;
1120 }
1121 break;
a0ed51b3
LW
1122 case ALNUMLUTF8:
1123 PL_reg_flags |= RF_tainted;
1124 while (s < strend) {
dfe13c55 1125 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1126 if (tmp && regtry(prog, s))
1127 goto got_it;
1128 else
1129 tmp = doevery;
1130 }
1131 else
1132 tmp = 1;
1133 s += UTF8SKIP(s);
1134 }
1135 break;
a0d0e21e
LW
1136 case NALNUM:
1137 while (s < strend) {
bbce6d69 1138 if (!isALNUM(*s)) {
1139 if (tmp && regtry(prog, s))
1140 goto got_it;
1141 else
1142 tmp = doevery;
1143 }
1144 else
1145 tmp = 1;
1146 s++;
1147 }
1148 break;
a0ed51b3
LW
1149 case NALNUMUTF8:
1150 while (s < strend) {
dfe13c55 1151 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1152 if (tmp && regtry(prog, s))
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
1157 else
1158 tmp = 1;
1159 s += UTF8SKIP(s);
1160 }
1161 break;
bbce6d69 1162 case NALNUML:
3280af22 1163 PL_reg_flags |= RF_tainted;
bbce6d69 1164 while (s < strend) {
1165 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
1166 if (tmp && regtry(prog, s))
1167 goto got_it;
a687059c 1168 else
a0d0e21e 1169 tmp = doevery;
a687059c 1170 }
a0d0e21e
LW
1171 else
1172 tmp = 1;
1173 s++;
1174 }
1175 break;
a0ed51b3
LW
1176 case NALNUMLUTF8:
1177 PL_reg_flags |= RF_tainted;
1178 while (s < strend) {
dfe13c55 1179 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1180 if (tmp && regtry(prog, s))
1181 goto got_it;
1182 else
1183 tmp = doevery;
1184 }
1185 else
1186 tmp = 1;
1187 s += UTF8SKIP(s);
1188 }
1189 break;
a0d0e21e
LW
1190 case SPACE:
1191 while (s < strend) {
1192 if (isSPACE(*s)) {
1193 if (tmp && regtry(prog, s))
1194 goto got_it;
1195 else
1196 tmp = doevery;
2304df62 1197 }
a0d0e21e
LW
1198 else
1199 tmp = 1;
1200 s++;
1201 }
1202 break;
a0ed51b3
LW
1203 case SPACEUTF8:
1204 while (s < strend) {
dfe13c55 1205 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
1206 if (tmp && regtry(prog, s))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
1211 else
1212 tmp = 1;
1213 s += UTF8SKIP(s);
1214 }
1215 break;
bbce6d69 1216 case SPACEL:
3280af22 1217 PL_reg_flags |= RF_tainted;
bbce6d69 1218 while (s < strend) {
1219 if (isSPACE_LC(*s)) {
1220 if (tmp && regtry(prog, s))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
1225 else
1226 tmp = 1;
1227 s++;
1228 }
1229 break;
a0ed51b3
LW
1230 case SPACELUTF8:
1231 PL_reg_flags |= RF_tainted;
1232 while (s < strend) {
dfe13c55 1233 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
1234 if (tmp && regtry(prog, s))
1235 goto got_it;
1236 else
1237 tmp = doevery;
1238 }
1239 else
1240 tmp = 1;
1241 s += UTF8SKIP(s);
1242 }
1243 break;
a0d0e21e
LW
1244 case NSPACE:
1245 while (s < strend) {
1246 if (!isSPACE(*s)) {
1247 if (tmp && regtry(prog, s))
1248 goto got_it;
1249 else
1250 tmp = doevery;
a687059c 1251 }
a0d0e21e
LW
1252 else
1253 tmp = 1;
1254 s++;
1255 }
1256 break;
a0ed51b3
LW
1257 case NSPACEUTF8:
1258 while (s < strend) {
dfe13c55 1259 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1260 if (tmp && regtry(prog, s))
1261 goto got_it;
1262 else
1263 tmp = doevery;
1264 }
1265 else
1266 tmp = 1;
1267 s += UTF8SKIP(s);
1268 }
1269 break;
bbce6d69 1270 case NSPACEL:
3280af22 1271 PL_reg_flags |= RF_tainted;
bbce6d69 1272 while (s < strend) {
1273 if (!isSPACE_LC(*s)) {
1274 if (tmp && regtry(prog, s))
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
1279 else
1280 tmp = 1;
1281 s++;
1282 }
1283 break;
a0ed51b3
LW
1284 case NSPACELUTF8:
1285 PL_reg_flags |= RF_tainted;
1286 while (s < strend) {
dfe13c55 1287 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1288 if (tmp && regtry(prog, s))
1289 goto got_it;
1290 else
1291 tmp = doevery;
1292 }
1293 else
1294 tmp = 1;
1295 s += UTF8SKIP(s);
1296 }
1297 break;
a0d0e21e
LW
1298 case DIGIT:
1299 while (s < strend) {
1300 if (isDIGIT(*s)) {
1301 if (tmp && regtry(prog, s))
1302 goto got_it;
1303 else
1304 tmp = doevery;
2b69d0c2 1305 }
a0d0e21e
LW
1306 else
1307 tmp = 1;
1308 s++;
1309 }
1310 break;
a0ed51b3
LW
1311 case DIGITUTF8:
1312 while (s < strend) {
dfe13c55 1313 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1314 if (tmp && regtry(prog, s))
1315 goto got_it;
1316 else
1317 tmp = doevery;
1318 }
1319 else
1320 tmp = 1;
1321 s += UTF8SKIP(s);
1322 }
1323 break;
b8c5462f
JH
1324 case DIGITL:
1325 PL_reg_flags |= RF_tainted;
1326 while (s < strend) {
1327 if (isDIGIT_LC(*s)) {
1328 if (tmp && regtry(prog, s))
1329 goto got_it;
1330 else
1331 tmp = doevery;
1332 }
1333 else
1334 tmp = 1;
1335 s++;
1336 }
1337 break;
1338 case DIGITLUTF8:
1339 PL_reg_flags |= RF_tainted;
1340 while (s < strend) {
1341 if (isDIGIT_LC_utf8((U8*)s)) {
1342 if (tmp && regtry(prog, s))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
1347 else
1348 tmp = 1;
1349 s += UTF8SKIP(s);
1350 }
1351 break;
a0d0e21e
LW
1352 case NDIGIT:
1353 while (s < strend) {
1354 if (!isDIGIT(*s)) {
1355 if (tmp && regtry(prog, s))
1356 goto got_it;
1357 else
1358 tmp = doevery;
a687059c 1359 }
a0d0e21e
LW
1360 else
1361 tmp = 1;
1362 s++;
1363 }
1364 break;
a0ed51b3
LW
1365 case NDIGITUTF8:
1366 while (s < strend) {
dfe13c55 1367 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1368 if (tmp && regtry(prog, s))
1369 goto got_it;
1370 else
1371 tmp = doevery;
1372 }
1373 else
1374 tmp = 1;
1375 s += UTF8SKIP(s);
1376 }
1377 break;
b8c5462f
JH
1378 case NDIGITL:
1379 PL_reg_flags |= RF_tainted;
1380 while (s < strend) {
1381 if (!isDIGIT_LC(*s)) {
1382 if (tmp && regtry(prog, s))
1383 goto got_it;
1384 else
1385 tmp = doevery;
1386 }
1387 else
1388 tmp = 1;
1389 s++;
a0ed51b3 1390 }
b8c5462f
JH
1391 break;
1392 case NDIGITLUTF8:
1393 PL_reg_flags |= RF_tainted;
1394 while (s < strend) {
1395 if (!isDIGIT_LC_utf8((U8*)s)) {
1396 if (tmp && regtry(prog, s))
1397 goto got_it;
cf93c79d 1398 else
b8c5462f
JH
1399 tmp = doevery;
1400 }
1401 else
1402 tmp = 1;
1403 s += UTF8SKIP(s);
1404 }
1405 break;
b3c9acc1 1406 default:
7934575e 1407 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
b3c9acc1 1408 break;
d6a28714
JH
1409 }
1410 }
1411 else {
1412 dontbother = 0;
1413 if (prog->float_substr != Nullsv) { /* Trim the end. */
1414 char *last;
1415 I32 oldpos = scream_pos;
1416
1417 if (flags & REXEC_SCREAM) {
1418 last = screaminstr(sv, prog->float_substr, s - strbeg,
1419 end_shift, &scream_pos, 1); /* last one */
1420 if (!last)
1421 last = scream_olds; /* Only one occurence. */
b8c5462f 1422 }
d6a28714
JH
1423 else {
1424 STRLEN len;
1425 char *little = SvPV(prog->float_substr, len);
1426
1427 if (SvTAIL(prog->float_substr)) {
1428 if (memEQ(strend - len + 1, little, len - 1))
1429 last = strend - len + 1;
1430 else if (!PL_multiline)
1431 last = memEQ(strend - len, little, len)
1432 ? strend - len : Nullch;
b8c5462f 1433 else
d6a28714
JH
1434 goto find_last;
1435 } else {
1436 find_last:
1437 if (len)
1438 last = rninstr(s, strend, little, little + len);
b8c5462f 1439 else
d6a28714 1440 last = strend; /* matching `$' */
b8c5462f 1441 }
b8c5462f 1442 }
d6a28714
JH
1443 if (last == NULL) goto phooey; /* Should not happen! */
1444 dontbother = strend - last + prog->float_min_offset;
1445 }
1446 if (minlen && (dontbother < minlen))
1447 dontbother = minlen - 1;
1448 strend -= dontbother; /* this one's always in bytes! */
1449 /* We don't know much -- general case. */
1450 if (UTF) {
1451 for (;;) {
1452 if (regtry(prog, s))
1453 goto got_it;
1454 if (s >= strend)
1455 break;
b8c5462f 1456 s += UTF8SKIP(s);
d6a28714
JH
1457 };
1458 }
1459 else {
1460 do {
1461 if (regtry(prog, s))
1462 goto got_it;
1463 } while (s++ < strend);
1464 }
1465 }
1466
1467 /* Failure. */
1468 goto phooey;
1469
1470got_it:
1471 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1472
1473 if (PL_reg_eval_set) {
1474 /* Preserve the current value of $^R */
1475 if (oreplsv != GvSV(PL_replgv))
1476 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1477 restored, the value remains
1478 the same. */
1479 restore_pos(aTHXo_ 0);
1480 }
1481
1482 /* make sure $`, $&, $', and $digit will work later */
1483 if ( !(flags & REXEC_NOT_FIRST) ) {
1484 if (RX_MATCH_COPIED(prog)) {
1485 Safefree(prog->subbeg);
1486 RX_MATCH_COPIED_off(prog);
1487 }
1488 if (flags & REXEC_COPY_STR) {
1489 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1490
1491 s = savepvn(strbeg, i);
1492 prog->subbeg = s;
1493 prog->sublen = i;
1494 RX_MATCH_COPIED_on(prog);
1495 }
1496 else {
1497 prog->subbeg = strbeg;
1498 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1499 }
1500 }
1501
1502 return 1;
1503
1504phooey:
1505 if (PL_reg_eval_set)
1506 restore_pos(aTHXo_ 0);
1507 return 0;
1508}
1509
1510/*
1511 - regtry - try match at specific point
1512 */
1513STATIC I32 /* 0 failure, 1 success */
1514S_regtry(pTHX_ regexp *prog, char *startpos)
1515{
1516 dTHR;
1517 register I32 i;
1518 register I32 *sp;
1519 register I32 *ep;
1520 CHECKPOINT lastcp;
1521
1522 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1523 MAGIC *mg;
1524
1525 PL_reg_eval_set = RS_init;
1526 DEBUG_r(DEBUG_s(
b900a521
JH
1527 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1528 (IV)(PL_stack_sp - PL_stack_base));
d6a28714
JH
1529 ));
1530 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1531 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1532 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1533 SAVETMPS;
1534 /* Apparently this is not needed, judging by wantarray. */
1535 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1536 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1537
1538 if (PL_reg_sv) {
1539 /* Make $_ available to executed code. */
1540 if (PL_reg_sv != DEFSV) {
1541 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1542 SAVESPTR(DEFSV);
1543 DEFSV = PL_reg_sv;
b8c5462f 1544 }
d6a28714
JH
1545
1546 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1547 && (mg = mg_find(PL_reg_sv, 'g')))) {
1548 /* prepare for quick setting of pos */
1549 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1550 mg = mg_find(PL_reg_sv, 'g');
1551 mg->mg_len = -1;
b8c5462f 1552 }
d6a28714
JH
1553 PL_reg_magic = mg;
1554 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1555 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1556 }
1557 if (!PL_reg_curpm)
1558 New(22,PL_reg_curpm, 1, PMOP);
1559 PL_reg_curpm->op_pmregexp = prog;
1560 PL_reg_oldcurpm = PL_curpm;
1561 PL_curpm = PL_reg_curpm;
1562 if (RX_MATCH_COPIED(prog)) {
1563 /* Here is a serious problem: we cannot rewrite subbeg,
1564 since it may be needed if this match fails. Thus
1565 $` inside (?{}) could fail... */
1566 PL_reg_oldsaved = prog->subbeg;
1567 PL_reg_oldsavedlen = prog->sublen;
1568 RX_MATCH_COPIED_off(prog);
1569 }
1570 else
1571 PL_reg_oldsaved = Nullch;
1572 prog->subbeg = PL_bostr;
1573 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1574 }
1575 prog->startp[0] = startpos - PL_bostr;
1576 PL_reginput = startpos;
1577 PL_regstartp = prog->startp;
1578 PL_regendp = prog->endp;
1579 PL_reglastparen = &prog->lastparen;
1580 prog->lastparen = 0;
1581 PL_regsize = 0;
1582 DEBUG_r(PL_reg_starttry = startpos);
1583 if (PL_reg_start_tmpl <= prog->nparens) {
1584 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1585 if(PL_reg_start_tmp)
1586 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1587 else
1588 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1589 }
1590
1591 /* XXXX What this code is doing here?!!! There should be no need
1592 to do this again and again, PL_reglastparen should take care of
1593 this! */
1594 sp = prog->startp;
1595 ep = prog->endp;
1596 if (prog->nparens) {
1597 for (i = prog->nparens; i >= 1; i--) {
1598 *++sp = -1;
1599 *++ep = -1;
1600 }
1601 }
1602 REGCP_SET;
1603 if (regmatch(prog->program + 1)) {
1604 prog->endp[0] = PL_reginput - PL_bostr;
1605 return 1;
1606 }
1607 REGCP_UNWIND;
1608 return 0;
1609}
1610
1611/*
1612 - regmatch - main matching routine
1613 *
1614 * Conceptually the strategy is simple: check to see whether the current
1615 * node matches, call self recursively to see whether the rest matches,
1616 * and then act accordingly. In practice we make some effort to avoid
1617 * recursion, in particular by going through "ordinary" nodes (that don't
1618 * need to know whether the rest of the match failed) by a loop instead of
1619 * by recursion.
1620 */
1621/* [lwall] I've hoisted the register declarations to the outer block in order to
1622 * maybe save a little bit of pushing and popping on the stack. It also takes
1623 * advantage of machines that use a register save mask on subroutine entry.
1624 */
1625STATIC I32 /* 0 failure, 1 success */
1626S_regmatch(pTHX_ regnode *prog)
1627{
1628 dTHR;
1629 register regnode *scan; /* Current node. */
1630 regnode *next; /* Next node. */
1631 regnode *inner; /* Next node in internal branch. */
1632 register I32 nextchr; /* renamed nextchr - nextchar colides with
1633 function of same name */
1634 register I32 n; /* no or next */
1635 register I32 ln; /* len or last */
1636 register char *s; /* operand or save */
1637 register char *locinput = PL_reginput;
1638 register I32 c1, c2, paren; /* case fold search, parenth */
1639 int minmod = 0, sw = 0, logical = 0;
1640#ifdef DEBUGGING
1641 PL_regindent++;
1642#endif
1643
1644 /* Note that nextchr is a byte even in UTF */
1645 nextchr = UCHARAT(locinput);
1646 scan = prog;
1647 while (scan != NULL) {
1648#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1649#ifdef DEBUGGING
1650# define sayYES goto yes
1651# define sayNO goto no
7821416a
IZ
1652# define sayYES_FINAL goto yes_final
1653# define sayYES_LOUD goto yes_loud
1654# define sayNO_FINAL goto no_final
1655# define sayNO_SILENT goto do_no
d6a28714
JH
1656# define saySAME(x) if (x) goto yes; else goto no
1657# define REPORT_CODE_OFF 24
1658#else
1659# define sayYES return 1
1660# define sayNO return 0
7821416a
IZ
1661# define sayYES_FINAL return 1
1662# define sayYES_LOUD return 1
1663# define sayNO_FINAL return 0
1664# define sayNO_SILENT return 0
d6a28714
JH
1665# define saySAME(x) return x
1666#endif
1667 DEBUG_r( {
1668 SV *prop = sv_newmortal();
1669 int docolor = *PL_colors[0];
1670 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1671 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1672 /* The part of the string before starttry has one color
1673 (pref0_len chars), between starttry and current
1674 position another one (pref_len - pref0_len chars),
1675 after the current position the third one.
1676 We assume that pref0_len <= pref_len, otherwise we
1677 decrease pref0_len. */
1678 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1679 ? (5 + taill) - l : locinput - PL_bostr);
1680 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1681
1682 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1683 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1684 ? (5 + taill) - pref_len : PL_regeol - locinput);
1685 if (pref0_len < 0)
1686 pref0_len = 0;
1687 if (pref0_len > pref_len)
1688 pref0_len = pref_len;
1689 regprop(prop, scan);
1690 PerlIO_printf(Perl_debug_log,
b900a521
JH
1691 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1692 (IV)(locinput - PL_bostr),
d6a28714
JH
1693 PL_colors[4], pref0_len,
1694 locinput - pref_len, PL_colors[5],
1695 PL_colors[2], pref_len - pref0_len,
1696 locinput - pref_len + pref0_len, PL_colors[3],
1697 (docolor ? "" : "> <"),
1698 PL_colors[0], l, locinput, PL_colors[1],
1699 15 - l - pref_len + 1,
1700 "",
b900a521 1701 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1702 SvPVX(prop));
1703 } );
1704
1705 next = scan + NEXT_OFF(scan);
1706 if (next == scan)
1707 next = NULL;
1708
1709 switch (OP(scan)) {
1710 case BOL:
1711 if (locinput == PL_bostr
1712 ? PL_regprev == '\n'
1713 : (PL_multiline &&
1714 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1715 {
1716 /* regtill = regbol; */
b8c5462f
JH
1717 break;
1718 }
d6a28714
JH
1719 sayNO;
1720 case MBOL:
1721 if (locinput == PL_bostr
1722 ? PL_regprev == '\n'
1723 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1724 {
b8c5462f
JH
1725 break;
1726 }
d6a28714
JH
1727 sayNO;
1728 case SBOL:
1729 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1730 break;
d6a28714
JH
1731 sayNO;
1732 case GPOS:
1733 if (locinput == PL_reg_ganch)
1734 break;
1735 sayNO;
1736 case EOL:
1737 if (PL_multiline)
1738 goto meol;
1739 else
1740 goto seol;
1741 case MEOL:
1742 meol:
1743 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1744 sayNO;
b8c5462f 1745 break;
d6a28714
JH
1746 case SEOL:
1747 seol:
1748 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1749 sayNO;
d6a28714 1750 if (PL_regeol - locinput > 1)
b8c5462f 1751 sayNO;
b8c5462f 1752 break;
d6a28714
JH
1753 case EOS:
1754 if (PL_regeol != locinput)
b8c5462f 1755 sayNO;
d6a28714
JH
1756 break;
1757 case SANYUTF8:
b8c5462f 1758 if (nextchr & 0x80) {
b8c5462f 1759 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1760 if (locinput > PL_regeol)
1761 sayNO;
b8c5462f
JH
1762 nextchr = UCHARAT(locinput);
1763 break;
1764 }
d6a28714 1765 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1766 sayNO;
b8c5462f 1767 nextchr = UCHARAT(++locinput);
a0d0e21e 1768 break;
d6a28714
JH
1769 case SANY:
1770 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1771 sayNO;
1772 nextchr = UCHARAT(++locinput);
b85d18e9 1773 break;
d6a28714 1774 case ANYUTF8:
a0ed51b3 1775 if (nextchr & 0x80) {
b8c5462f 1776 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1777 if (locinput > PL_regeol)
1778 sayNO;
a0ed51b3
LW
1779 nextchr = UCHARAT(locinput);
1780 break;
1781 }
d6a28714 1782 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1783 sayNO;
1784 nextchr = UCHARAT(++locinput);
1785 break;
d6a28714
JH
1786 case REG_ANY:
1787 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1788 sayNO;
76e3520e 1789 nextchr = UCHARAT(++locinput);
a0d0e21e 1790 break;
d6a28714 1791 case EXACT:
cd439c50
IZ
1792 s = STRING(scan);
1793 ln = STR_LEN(scan);
d6a28714
JH
1794 /* Inline the first character, for speed. */
1795 if (UCHARAT(s) != nextchr)
1796 sayNO;
1797 if (PL_regeol - locinput < ln)
1798 sayNO;
1799 if (ln > 1 && memNE(s, locinput, ln))
1800 sayNO;
1801 locinput += ln;
1802 nextchr = UCHARAT(locinput);
1803 break;
1804 case EXACTFL:
b8c5462f
JH
1805 PL_reg_flags |= RF_tainted;
1806 /* FALL THROUGH */
d6a28714 1807 case EXACTF:
cd439c50
IZ
1808 s = STRING(scan);
1809 ln = STR_LEN(scan);
d6a28714
JH
1810
1811 if (UTF) {
1812 char *l = locinput;
1813 char *e = s + ln;
1814 c1 = OP(scan) == EXACTF;
1815 while (s < e) {
1816 if (l >= PL_regeol)
1817 sayNO;
1818 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1819 toLOWER_utf8((U8*)l) :
1820 toLOWER_LC_utf8((U8*)l)))
1821 {
1822 sayNO;
1823 }
1824 s += UTF8SKIP(s);
1825 l += UTF8SKIP(l);
b8c5462f 1826 }
d6a28714 1827 locinput = l;
a0ed51b3
LW
1828 nextchr = UCHARAT(locinput);
1829 break;
1830 }
d6a28714
JH
1831
1832 /* Inline the first character, for speed. */
1833 if (UCHARAT(s) != nextchr &&
1834 UCHARAT(s) != ((OP(scan) == EXACTF)
1835 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1836 sayNO;
d6a28714 1837 if (PL_regeol - locinput < ln)
b8c5462f 1838 sayNO;
d6a28714
JH
1839 if (ln > 1 && (OP(scan) == EXACTF
1840 ? ibcmp(s, locinput, ln)
1841 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1842 sayNO;
d6a28714
JH
1843 locinput += ln;
1844 nextchr = UCHARAT(locinput);
a0d0e21e 1845 break;
d6a28714 1846 case ANYOFUTF8:
d6a28714 1847 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1848 sayNO;
d6a28714
JH
1849 if (locinput >= PL_regeol)
1850 sayNO;
1851 locinput += PL_utf8skip[nextchr];
1852 nextchr = UCHARAT(locinput);
1853 break;
1854 case ANYOF:
d6a28714 1855 if (nextchr < 0)
b8c5462f 1856 nextchr = UCHARAT(locinput);
936ed897 1857 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1858 sayNO;
1859 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1860 sayNO;
b8c5462f
JH
1861 nextchr = UCHARAT(++locinput);
1862 break;
d6a28714 1863 case ALNUML:
b8c5462f
JH
1864 PL_reg_flags |= RF_tainted;
1865 /* FALL THROUGH */
d6a28714 1866 case ALNUM:
b8c5462f 1867 if (!nextchr)
4633a7c4 1868 sayNO;
d6a28714
JH
1869 if (!(OP(scan) == ALNUM
1870 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1871 sayNO;
1872 nextchr = UCHARAT(++locinput);
bbce6d69 1873 break;
d6a28714 1874 case ALNUMLUTF8:
3280af22 1875 PL_reg_flags |= RF_tainted;
bbce6d69 1876 /* FALL THROUGH */
d6a28714 1877 case ALNUMUTF8:
b8c5462f
JH
1878 if (!nextchr)
1879 sayNO;
1880 if (nextchr & 0x80) {
d6a28714
JH
1881 if (!(OP(scan) == ALNUMUTF8
1882 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1883 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1884 {
1885 sayNO;
a0ed51b3 1886 }
b8c5462f 1887 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1888 nextchr = UCHARAT(locinput);
1889 break;
1890 }
d6a28714
JH
1891 if (!(OP(scan) == ALNUMUTF8
1892 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1893 sayNO;
b8c5462f 1894 nextchr = UCHARAT(++locinput);
a0d0e21e 1895 break;
d6a28714 1896 case NALNUML:
b8c5462f
JH
1897 PL_reg_flags |= RF_tainted;
1898 /* FALL THROUGH */
d6a28714
JH
1899 case NALNUM:
1900 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1901 sayNO;
d6a28714
JH
1902 if (OP(scan) == NALNUM
1903 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1904 sayNO;
b8c5462f 1905 nextchr = UCHARAT(++locinput);
a0ed51b3 1906 break;
d6a28714 1907 case NALNUMLUTF8:
b8c5462f
JH
1908 PL_reg_flags |= RF_tainted;
1909 /* FALL THROUGH */
d6a28714 1910 case NALNUMUTF8:
3280af22 1911 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1912 sayNO;
b8c5462f 1913 if (nextchr & 0x80) {
d6a28714
JH
1914 if (OP(scan) == NALNUMUTF8
1915 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1916 : isALNUM_LC_utf8((U8*)locinput))
1917 {
b8c5462f 1918 sayNO;
d6a28714 1919 }
b8c5462f
JH
1920 locinput += PL_utf8skip[nextchr];
1921 nextchr = UCHARAT(locinput);
1922 break;
1923 }
d6a28714
JH
1924 if (OP(scan) == NALNUMUTF8
1925 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1926 sayNO;
76e3520e 1927 nextchr = UCHARAT(++locinput);
a0d0e21e 1928 break;
d6a28714
JH
1929 case BOUNDL:
1930 case NBOUNDL:
3280af22 1931 PL_reg_flags |= RF_tainted;
bbce6d69 1932 /* FALL THROUGH */
d6a28714
JH
1933 case BOUND:
1934 case NBOUND:
1935 /* was last char in word? */
1936 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1937 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1938 ln = isALNUM(ln);
1939 n = isALNUM(nextchr);
1940 }
1941 else {
1942 ln = isALNUM_LC(ln);
1943 n = isALNUM_LC(nextchr);
1944 }
1945 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1946 sayNO;
a0d0e21e 1947 break;
d6a28714
JH
1948 case BOUNDLUTF8:
1949 case NBOUNDLUTF8:
a0ed51b3
LW
1950 PL_reg_flags |= RF_tainted;
1951 /* FALL THROUGH */
d6a28714
JH
1952 case BOUNDUTF8:
1953 case NBOUNDUTF8:
1954 /* was last char in word? */
1955 ln = (locinput != PL_regbol)
1956 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1957 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1958 ln = isALNUM_uni(ln);
1959 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1960 }
d6a28714
JH
1961 else {
1962 ln = isALNUM_LC_uni(ln);
1963 n = isALNUM_LC_utf8((U8*)locinput);
1964 }
1965 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1966 sayNO;
a0ed51b3 1967 break;
d6a28714 1968 case SPACEL:
3280af22 1969 PL_reg_flags |= RF_tainted;
bbce6d69 1970 /* FALL THROUGH */
d6a28714
JH
1971 case SPACE:
1972 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1973 sayNO;
d6a28714
JH
1974 if (!(OP(scan) == SPACE
1975 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1976 sayNO;
76e3520e 1977 nextchr = UCHARAT(++locinput);
a0d0e21e 1978 break;
d6a28714 1979 case SPACELUTF8:
a0ed51b3
LW
1980 PL_reg_flags |= RF_tainted;
1981 /* FALL THROUGH */
d6a28714 1982 case SPACEUTF8:
a0ed51b3
LW
1983 if (!nextchr && locinput >= PL_regeol)
1984 sayNO;
1985 if (nextchr & 0x80) {
d6a28714
JH
1986 if (!(OP(scan) == SPACEUTF8
1987 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1988 : isSPACE_LC_utf8((U8*)locinput)))
1989 {
a0ed51b3 1990 sayNO;
d6a28714 1991 }
6f06b55f 1992 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1993 nextchr = UCHARAT(locinput);
1994 break;
1995 }
d6a28714
JH
1996 if (!(OP(scan) == SPACEUTF8
1997 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
1998 sayNO;
1999 nextchr = UCHARAT(++locinput);
2000 break;
d6a28714 2001 case NSPACEL:
3280af22 2002 PL_reg_flags |= RF_tainted;
bbce6d69 2003 /* FALL THROUGH */
d6a28714 2004 case NSPACE:
b8c5462f
JH
2005 if (!nextchr)
2006 sayNO;
d6a28714
JH
2007 if (OP(scan) == SPACE
2008 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2009 sayNO;
b8c5462f 2010 nextchr = UCHARAT(++locinput);
a0d0e21e 2011 break;
d6a28714 2012 case NSPACELUTF8:
a0ed51b3
LW
2013 PL_reg_flags |= RF_tainted;
2014 /* FALL THROUGH */
d6a28714 2015 case NSPACEUTF8:
b8c5462f
JH
2016 if (!nextchr)
2017 sayNO;
2018 if (nextchr & 0x80) {
d6a28714
JH
2019 if (OP(scan) == NSPACEUTF8
2020 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2021 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2022 {
2023 sayNO;
2024 }
2025 locinput += PL_utf8skip[nextchr];
2026 nextchr = UCHARAT(locinput);
2027 break;
a0ed51b3 2028 }
d6a28714
JH
2029 if (OP(scan) == NSPACEUTF8
2030 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2031 sayNO;
76e3520e 2032 nextchr = UCHARAT(++locinput);
a0d0e21e 2033 break;
d6a28714 2034 case DIGITL:
a0ed51b3
LW
2035 PL_reg_flags |= RF_tainted;
2036 /* FALL THROUGH */
d6a28714 2037 case DIGIT:
a0ed51b3
LW
2038 if (!nextchr && locinput >= PL_regeol)
2039 sayNO;
d6a28714
JH
2040 if (!(OP(scan) == DIGIT
2041 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2042 sayNO;
76e3520e 2043 nextchr = UCHARAT(++locinput);
a0d0e21e 2044 break;
d6a28714 2045 case DIGITLUTF8:
a0ed51b3
LW
2046 PL_reg_flags |= RF_tainted;
2047 /* FALL THROUGH */
d6a28714 2048 case DIGITUTF8:
a0ed51b3
LW
2049 if (!nextchr)
2050 sayNO;
2051 if (nextchr & 0x80) {
d6a28714
JH
2052 if (OP(scan) == NDIGITUTF8
2053 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2054 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2055 {
a0ed51b3 2056 sayNO;
dfe13c55 2057 }
6f06b55f 2058 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2059 nextchr = UCHARAT(locinput);
2060 break;
2061 }
d6a28714 2062 if (!isDIGIT(nextchr))
a0ed51b3
LW
2063 sayNO;
2064 nextchr = UCHARAT(++locinput);
2065 break;
d6a28714 2066 case NDIGITL:
b8c5462f
JH
2067 PL_reg_flags |= RF_tainted;
2068 /* FALL THROUGH */
d6a28714 2069 case NDIGIT:
b8c5462f
JH
2070 if (!nextchr)
2071 sayNO;
d6a28714
JH
2072 if (OP(scan) == DIGIT
2073 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2074 sayNO;
76e3520e 2075 nextchr = UCHARAT(++locinput);
a0d0e21e 2076 break;
d6a28714 2077 case NDIGITLUTF8:
b8c5462f
JH
2078 PL_reg_flags |= RF_tainted;
2079 /* FALL THROUGH */
d6a28714 2080 case NDIGITUTF8:
b8c5462f
JH
2081 if (!nextchr && locinput >= PL_regeol)
2082 sayNO;
a0ed51b3 2083 if (nextchr & 0x80) {
d6a28714 2084 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2085 sayNO;
6f06b55f 2086 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2087 nextchr = UCHARAT(locinput);
2088 break;
2089 }
d6a28714 2090 if (isDIGIT(nextchr))
a0ed51b3
LW
2091 sayNO;
2092 nextchr = UCHARAT(++locinput);
2093 break;
2094 case CLUMP:
dfe13c55 2095 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2096 sayNO;
6f06b55f 2097 locinput += PL_utf8skip[nextchr];
dfe13c55 2098 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2099 locinput += UTF8SKIP(locinput);
2100 if (locinput > PL_regeol)
2101 sayNO;
2102 nextchr = UCHARAT(locinput);
2103 break;
c8756f30 2104 case REFFL:
3280af22 2105 PL_reg_flags |= RF_tainted;
c8756f30 2106 /* FALL THROUGH */
c277df42 2107 case REF:
c8756f30 2108 case REFF:
c277df42 2109 n = ARG(scan); /* which paren pair */
cf93c79d 2110 ln = PL_regstartp[n];
2c2d71f5 2111 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2112 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2113 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2114 if (ln == PL_regendp[n])
a0d0e21e 2115 break;
a0ed51b3 2116
cf93c79d 2117 s = PL_bostr + ln;
a0ed51b3
LW
2118 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2119 char *l = locinput;
cf93c79d 2120 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2121 /*
2122 * Note that we can't do the "other character" lookup trick as
2123 * in the 8-bit case (no pun intended) because in Unicode we
2124 * have to map both upper and title case to lower case.
2125 */
2126 if (OP(scan) == REFF) {
2127 while (s < e) {
2128 if (l >= PL_regeol)
2129 sayNO;
dfe13c55 2130 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2131 sayNO;
2132 s += UTF8SKIP(s);
2133 l += UTF8SKIP(l);
2134 }
2135 }
2136 else {
2137 while (s < e) {
2138 if (l >= PL_regeol)
2139 sayNO;
dfe13c55 2140 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2141 sayNO;
2142 s += UTF8SKIP(s);
2143 l += UTF8SKIP(l);
2144 }
2145 }
2146 locinput = l;
2147 nextchr = UCHARAT(locinput);
2148 break;
2149 }
2150
a0d0e21e 2151 /* Inline the first character, for speed. */
76e3520e 2152 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2153 (OP(scan) == REF ||
2154 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2155 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2156 sayNO;
cf93c79d 2157 ln = PL_regendp[n] - ln;
3280af22 2158 if (locinput + ln > PL_regeol)
4633a7c4 2159 sayNO;
c8756f30
AK
2160 if (ln > 1 && (OP(scan) == REF
2161 ? memNE(s, locinput, ln)
2162 : (OP(scan) == REFF
2163 ? ibcmp(s, locinput, ln)
2164 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2165 sayNO;
a0d0e21e 2166 locinput += ln;
76e3520e 2167 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2168 break;
2169
2170 case NOTHING:
c277df42 2171 case TAIL:
a0d0e21e
LW
2172 break;
2173 case BACK:
2174 break;
c277df42
IZ
2175 case EVAL:
2176 {
2177 dSP;
533c011a 2178 OP_4tree *oop = PL_op;
3280af22
NIS
2179 COP *ocurcop = PL_curcop;
2180 SV **ocurpad = PL_curpad;
c277df42
IZ
2181 SV *ret;
2182
2183 n = ARG(scan);
533c011a 2184 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2185 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2186 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2187 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2188
cea2e8a9 2189 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2190 SPAGAIN;
2191 ret = POPs;
2192 PUTBACK;
2193
0f5d15d6
IZ
2194 PL_op = oop;
2195 PL_curpad = ocurpad;
2196 PL_curcop = ocurcop;
c277df42 2197 if (logical) {
0f5d15d6
IZ
2198 if (logical == 2) { /* Postponed subexpression. */
2199 regexp *re;
22c35a8c 2200 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2201 re_cc_state state;
0f5d15d6
IZ
2202 CHECKPOINT cp, lastcp;
2203
2204 if(SvROK(ret) || SvRMAGICAL(ret)) {
2205 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2206
2207 if(SvMAGICAL(sv))
2208 mg = mg_find(sv, 'r');
2209 }
2210 if (mg) {
2211 re = (regexp *)mg->mg_obj;
df0003d4 2212 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2213 }
2214 else {
2215 STRLEN len;
2216 char *t = SvPV(ret, len);
2217 PMOP pm;
2218 char *oprecomp = PL_regprecomp;
2219 I32 osize = PL_regsize;
2220 I32 onpar = PL_regnpar;
2221
2222 pm.op_pmflags = 0;
cea2e8a9 2223 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2224 if (!(SvFLAGS(ret)
2225 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2226 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2227 PL_regprecomp = oprecomp;
2228 PL_regsize = osize;
2229 PL_regnpar = onpar;
2230 }
2231 DEBUG_r(
2232 PerlIO_printf(Perl_debug_log,
2233 "Entering embedded `%s%.60s%s%s'\n",
2234 PL_colors[0],
2235 re->precomp,
2236 PL_colors[1],
2237 (strlen(re->precomp) > 60 ? "..." : ""))
2238 );
2239 state.node = next;
2240 state.prev = PL_reg_call_cc;
2241 state.cc = PL_regcc;
2242 state.re = PL_reg_re;
2243
2ab05381 2244 PL_regcc = 0;
0f5d15d6
IZ
2245
2246 cp = regcppush(0); /* Save *all* the positions. */
2247 REGCP_SET;
2248 cache_re(re);
2249 state.ss = PL_savestack_ix;
2250 *PL_reglastparen = 0;
2251 PL_reg_call_cc = &state;
2252 PL_reginput = locinput;
2c2d71f5
JH
2253
2254 /* XXXX This is too dramatic a measure... */
2255 PL_reg_maxiter = 0;
2256
0f5d15d6 2257 if (regmatch(re->program + 1)) {
2c914db6
IZ
2258 /* Even though we succeeded, we need to restore
2259 global variables, since we may be wrapped inside
2260 SUSPEND, thus the match may be not finished yet. */
2261
2262 /* XXXX Do this only if SUSPENDed? */
2263 PL_reg_call_cc = state.prev;
2264 PL_regcc = state.cc;
2265 PL_reg_re = state.re;
2266 cache_re(PL_reg_re);
2267
2268 /* XXXX This is too dramatic a measure... */
2269 PL_reg_maxiter = 0;
2270
2271 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2272 ReREFCNT_dec(re);
2273 regcpblow(cp);
2274 sayYES;
2275 }
0f5d15d6
IZ
2276 ReREFCNT_dec(re);
2277 REGCP_UNWIND;
2278 regcppop();
2279 PL_reg_call_cc = state.prev;
2280 PL_regcc = state.cc;
2281 PL_reg_re = state.re;
d3790889 2282 cache_re(PL_reg_re);
2c2d71f5
JH
2283
2284 /* XXXX This is too dramatic a measure... */
2285 PL_reg_maxiter = 0;
2286
0f5d15d6
IZ
2287 sayNO;
2288 }
c277df42 2289 sw = SvTRUE(ret);
0f5d15d6 2290 logical = 0;
a0ed51b3
LW
2291 }
2292 else
3280af22 2293 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2294 break;
2295 }
a0d0e21e 2296 case OPEN:
c277df42 2297 n = ARG(scan); /* which paren pair */
3280af22
NIS
2298 PL_reg_start_tmp[n] = locinput;
2299 if (n > PL_regsize)
2300 PL_regsize = n;
a0d0e21e
LW
2301 break;
2302 case CLOSE:
c277df42 2303 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2304 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2305 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2306 if (n > *PL_reglastparen)
2307 *PL_reglastparen = n;
a0d0e21e 2308 break;
c277df42
IZ
2309 case GROUPP:
2310 n = ARG(scan); /* which paren pair */
cf93c79d 2311 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2312 break;
2313 case IFTHEN:
2c2d71f5 2314 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2315 if (sw)
2316 next = NEXTOPER(NEXTOPER(scan));
2317 else {
2318 next = scan + ARG(scan);
2319 if (OP(next) == IFTHEN) /* Fake one. */
2320 next = NEXTOPER(NEXTOPER(next));
2321 }
2322 break;
2323 case LOGICAL:
0f5d15d6 2324 logical = scan->flags;
c277df42 2325 break;
2ab05381
IZ
2326/*******************************************************************
2327 PL_regcc contains infoblock about the innermost (...)* loop, and
2328 a pointer to the next outer infoblock.
2329
2330 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2331
2332 1) After matching X, regnode for CURLYX is processed;
2333
2334 2) This regnode creates infoblock on the stack, and calls
2335 regmatch() recursively with the starting point at WHILEM node;
2336
2337 3) Each hit of WHILEM node tries to match A and Z (in the order
2338 depending on the current iteration, min/max of {min,max} and
2339 greediness). The information about where are nodes for "A"
2340 and "Z" is read from the infoblock, as is info on how many times "A"
2341 was already matched, and greediness.
2342
2343 4) After A matches, the same WHILEM node is hit again.
2344
2345 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2346 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2347 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2348 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2349 of the external loop.
2350
2351 Currently present infoblocks form a tree with a stem formed by PL_curcc
2352 and whatever it mentions via ->next, and additional attached trees
2353 corresponding to temporarily unset infoblocks as in "5" above.
2354
2355 In the following picture infoblocks for outer loop of
2356 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2357 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2358 infoblocks are drawn below the "reset" infoblock.
2359
2360 In fact in the picture below we do not show failed matches for Z and T
2361 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2362 more obvious *why* one needs to *temporary* unset infoblocks.]
2363
2364 Matched REx position InfoBlocks Comment
2365 (Y(A)*?Z)*?T x
2366 Y(A)*?Z)*?T x <- O
2367 Y (A)*?Z)*?T x <- O
2368 Y A)*?Z)*?T x <- O <- I
2369 YA )*?Z)*?T x <- O <- I
2370 YA A)*?Z)*?T x <- O <- I
2371 YAA )*?Z)*?T x <- O <- I
2372 YAA Z)*?T x <- O # Temporary unset I
2373 I
2374
2375 YAAZ Y(A)*?Z)*?T x <- O
2376 I
2377
2378 YAAZY (A)*?Z)*?T x <- O
2379 I
2380
2381 YAAZY A)*?Z)*?T x <- O <- I
2382 I
2383
2384 YAAZYA )*?Z)*?T x <- O <- I
2385 I
2386
2387 YAAZYA Z)*?T x <- O # Temporary unset I
2388 I,I
2389
2390 YAAZYAZ )*?T x <- O
2391 I,I
2392
2393 YAAZYAZ T x # Temporary unset O
2394 O
2395 I,I
2396
2397 YAAZYAZT x
2398 O
2399 I,I
2400 *******************************************************************/
a0d0e21e
LW
2401 case CURLYX: {
2402 CURCUR cc;
3280af22 2403 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2404
2405 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2406 next += ARG(next);
3280af22
NIS
2407 cc.oldcc = PL_regcc;
2408 PL_regcc = &cc;
2409 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2410 cc.cur = -1;
2411 cc.min = ARG1(scan);
2412 cc.max = ARG2(scan);
c277df42 2413 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2414 cc.next = next;
2415 cc.minmod = minmod;
2416 cc.lastloc = 0;
3280af22 2417 PL_reginput = locinput;
a0d0e21e
LW
2418 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2419 regcpblow(cp);
3280af22 2420 PL_regcc = cc.oldcc;
4633a7c4 2421 saySAME(n);
a0d0e21e
LW
2422 }
2423 /* NOT REACHED */
2424 case WHILEM: {
2425 /*
2426 * This is really hard to understand, because after we match
2427 * what we're trying to match, we must make sure the rest of
2c2d71f5 2428 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2429 * to go back UP the parse tree by recursing ever deeper. And
2430 * if it fails, we have to reset our parent's current state
2431 * that we can try again after backing off.
2432 */
2433
c277df42 2434 CHECKPOINT cp, lastcp;
3280af22 2435 CURCUR* cc = PL_regcc;
c277df42
IZ
2436 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2437
4633a7c4 2438 n = cc->cur + 1; /* how many we know we matched */
3280af22 2439 PL_reginput = locinput;
a0d0e21e 2440
c277df42
IZ
2441 DEBUG_r(
2442 PerlIO_printf(Perl_debug_log,
2443 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2444 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2445 (long)n, (long)cc->min,
2446 (long)cc->max, (long)cc)
2447 );
4633a7c4 2448
a0d0e21e
LW
2449 /* If degenerate scan matches "", assume scan done. */
2450
579cf2c3 2451 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2452 PL_regcc = cc->oldcc;
2ab05381
IZ
2453 if (PL_regcc)
2454 ln = PL_regcc->cur;
c277df42 2455 DEBUG_r(
c3464db5
DD
2456 PerlIO_printf(Perl_debug_log,
2457 "%*s empty match detected, try continuation...\n",
3280af22 2458 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2459 );
a0d0e21e 2460 if (regmatch(cc->next))
4633a7c4 2461 sayYES;
2ab05381
IZ
2462 if (PL_regcc)
2463 PL_regcc->cur = ln;
3280af22 2464 PL_regcc = cc;
4633a7c4 2465 sayNO;
a0d0e21e
LW
2466 }
2467
2468 /* First just match a string of min scans. */
2469
2470 if (n < cc->min) {
2471 cc->cur = n;
2472 cc->lastloc = locinput;
4633a7c4
LW
2473 if (regmatch(cc->scan))
2474 sayYES;
2475 cc->cur = n - 1;
c277df42 2476 cc->lastloc = lastloc;
4633a7c4 2477 sayNO;
a0d0e21e
LW
2478 }
2479
2c2d71f5
JH
2480 if (scan->flags) {
2481 /* Check whether we already were at this position.
2482 Postpone detection until we know the match is not
2483 *that* much linear. */
2484 if (!PL_reg_maxiter) {
2485 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2486 PL_reg_leftiter = PL_reg_maxiter;
2487 }
2488 if (PL_reg_leftiter-- == 0) {
2489 I32 size = (PL_reg_maxiter + 7)/8;
2490 if (PL_reg_poscache) {
2491 if (PL_reg_poscache_size < size) {
2492 Renew(PL_reg_poscache, size, char);
2493 PL_reg_poscache_size = size;
2494 }
2495 Zero(PL_reg_poscache, size, char);
2496 }
2497 else {
2498 PL_reg_poscache_size = size;
2499 Newz(29, PL_reg_poscache, size, char);
2500 }
2501 DEBUG_r(
2502 PerlIO_printf(Perl_debug_log,
2503 "%sDetected a super-linear match, switching on caching%s...\n",
2504 PL_colors[4], PL_colors[5])
2505 );
2506 }
2507 if (PL_reg_leftiter < 0) {
2508 I32 o = locinput - PL_bostr, b;
2509
2510 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2511 b = o % 8;
2512 o /= 8;
2513 if (PL_reg_poscache[o] & (1<<b)) {
2514 DEBUG_r(
2515 PerlIO_printf(Perl_debug_log,
2516 "%*s already tried at this position...\n",
2517 REPORT_CODE_OFF+PL_regindent*2, "")
2518 );
7821416a 2519 sayNO_SILENT;
2c2d71f5
JH
2520 }
2521 PL_reg_poscache[o] |= (1<<b);
2522 }
2523 }
2524
a0d0e21e
LW
2525 /* Prefer next over scan for minimal matching. */
2526
2527 if (cc->minmod) {
3280af22 2528 PL_regcc = cc->oldcc;
2ab05381
IZ
2529 if (PL_regcc)
2530 ln = PL_regcc->cur;
5f05dabc 2531 cp = regcppush(cc->parenfloor);
c277df42 2532 REGCP_SET;
5f05dabc 2533 if (regmatch(cc->next)) {
c277df42 2534 regcpblow(cp);
4633a7c4 2535 sayYES; /* All done. */
5f05dabc 2536 }
c277df42 2537 REGCP_UNWIND;
5f05dabc 2538 regcppop();
2ab05381
IZ
2539 if (PL_regcc)
2540 PL_regcc->cur = ln;
3280af22 2541 PL_regcc = cc;
a0d0e21e 2542
c277df42 2543 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2544 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2545 && !(PL_reg_flags & RF_warned)) {
2546 PL_reg_flags |= RF_warned;
cea2e8a9 2547 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2548 "Complex regular subexpression recursion",
2549 REG_INFTY - 1);
c277df42 2550 }
4633a7c4 2551 sayNO;
c277df42 2552 }
a687059c 2553
c277df42 2554 DEBUG_r(
c3464db5
DD
2555 PerlIO_printf(Perl_debug_log,
2556 "%*s trying longer...\n",
3280af22 2557 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2558 );
a0d0e21e 2559 /* Try scanning more and see if it helps. */
3280af22 2560 PL_reginput = locinput;
a0d0e21e
LW
2561 cc->cur = n;
2562 cc->lastloc = locinput;
5f05dabc 2563 cp = regcppush(cc->parenfloor);
c277df42 2564 REGCP_SET;
5f05dabc 2565 if (regmatch(cc->scan)) {
c277df42 2566 regcpblow(cp);
4633a7c4 2567 sayYES;
5f05dabc 2568 }
c277df42 2569 REGCP_UNWIND;
5f05dabc 2570 regcppop();
4633a7c4 2571 cc->cur = n - 1;
c277df42 2572 cc->lastloc = lastloc;
4633a7c4 2573 sayNO;
a0d0e21e
LW
2574 }
2575
2576 /* Prefer scan over next for maximal matching. */
2577
2578 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2579 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2580 cc->cur = n;
2581 cc->lastloc = locinput;
c277df42 2582 REGCP_SET;
5f05dabc 2583 if (regmatch(cc->scan)) {
c277df42 2584 regcpblow(cp);
4633a7c4 2585 sayYES;
5f05dabc 2586 }
c277df42 2587 REGCP_UNWIND;
a0d0e21e 2588 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2589 PL_reginput = locinput;
c277df42 2590 DEBUG_r(
c3464db5
DD
2591 PerlIO_printf(Perl_debug_log,
2592 "%*s failed, try continuation...\n",
3280af22 2593 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2594 );
2595 }
599cee73
PM
2596 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2597 && !(PL_reg_flags & RF_warned)) {
3280af22 2598 PL_reg_flags |= RF_warned;
cea2e8a9 2599 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2600 "Complex regular subexpression recursion",
2601 REG_INFTY - 1);
a0d0e21e
LW
2602 }
2603
2604 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2605 PL_regcc = cc->oldcc;
2ab05381
IZ
2606 if (PL_regcc)
2607 ln = PL_regcc->cur;
a0d0e21e 2608 if (regmatch(cc->next))
4633a7c4 2609 sayYES;
2ab05381
IZ
2610 if (PL_regcc)
2611 PL_regcc->cur = ln;
3280af22 2612 PL_regcc = cc;
4633a7c4 2613 cc->cur = n - 1;
c277df42 2614 cc->lastloc = lastloc;
4633a7c4 2615 sayNO;
a0d0e21e
LW
2616 }
2617 /* NOT REACHED */
c277df42
IZ
2618 case BRANCHJ:
2619 next = scan + ARG(scan);
2620 if (next == scan)
2621 next = NULL;
2622 inner = NEXTOPER(NEXTOPER(scan));
2623 goto do_branch;
2624 case BRANCH:
2625 inner = NEXTOPER(scan);
2626 do_branch:
2627 {
2628 CHECKPOINT lastcp;
2629 c1 = OP(scan);
2630 if (OP(next) != c1) /* No choice. */
2631 next = inner; /* Avoid recursion. */
a0d0e21e 2632 else {
3280af22 2633 int lastparen = *PL_reglastparen;
c277df42
IZ
2634
2635 REGCP_SET;
a0d0e21e 2636 do {
3280af22 2637 PL_reginput = locinput;
c277df42 2638 if (regmatch(inner))
4633a7c4 2639 sayYES;
c277df42 2640 REGCP_UNWIND;
3280af22 2641 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2642 PL_regendp[n] = -1;
3280af22 2643 *PL_reglastparen = n;
c277df42 2644 scan = next;
a0d0e21e 2645 /*SUPPRESS 560*/
c277df42
IZ
2646 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2647 next += n;
a0d0e21e 2648 else
c277df42 2649 next = NULL;
c277df42
IZ
2650 inner = NEXTOPER(scan);
2651 if (c1 == BRANCHJ) {
2652 inner = NEXTOPER(inner);
2653 }
2654 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2655 sayNO;
a0d0e21e 2656 /* NOTREACHED */
a687059c 2657 }
a0d0e21e
LW
2658 }
2659 break;
2660 case MINMOD:
2661 minmod = 1;
2662 break;
c277df42
IZ
2663 case CURLYM:
2664 {
00db4c45 2665 I32 l = 0;
c277df42
IZ
2666 CHECKPOINT lastcp;
2667
2668 /* We suppose that the next guy does not need
2669 backtracking: in particular, it is of constant length,
2670 and has no parenths to influence future backrefs. */
2671 ln = ARG1(scan); /* min to match */
2672 n = ARG2(scan); /* max to match */
c277df42
IZ
2673 paren = scan->flags;
2674 if (paren) {
3280af22
NIS
2675 if (paren > PL_regsize)
2676 PL_regsize = paren;
2677 if (paren > *PL_reglastparen)
2678 *PL_reglastparen = paren;
c277df42 2679 }
dc45a647 2680 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2681 if (paren)
2682 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2683 PL_reginput = locinput;
c277df42
IZ
2684 if (minmod) {
2685 minmod = 0;
2686 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2687 sayNO;
5f4b28b2 2688 if (ln && l == 0 && n >= ln
c277df42
IZ
2689 /* In fact, this is tricky. If paren, then the
2690 fact that we did/didnot match may influence
2691 future execution. */
2692 && !(paren && ln == 0))
2693 ln = n;
3280af22 2694 locinput = PL_reginput;
22c35a8c 2695 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2696 c1 = (U8)*STRING(next);
c277df42 2697 if (OP(next) == EXACTF)
22c35a8c 2698 c2 = PL_fold[c1];
c277df42 2699 else if (OP(next) == EXACTFL)
22c35a8c 2700 c2 = PL_fold_locale[c1];
c277df42
IZ
2701 else
2702 c2 = c1;
a0ed51b3
LW
2703 }
2704 else
c277df42
IZ
2705 c1 = c2 = -1000;
2706 REGCP_SET;
5f4b28b2 2707 /* This may be improved if l == 0. */
c277df42
IZ
2708 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2709 /* If it could work, try it. */
2710 if (c1 == -1000 ||
3280af22
NIS
2711 UCHARAT(PL_reginput) == c1 ||
2712 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2713 {
2714 if (paren) {
2715 if (n) {
cf93c79d
IZ
2716 PL_regstartp[paren] =
2717 HOPc(PL_reginput, -l) - PL_bostr;
2718 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2719 }
2720 else
cf93c79d 2721 PL_regendp[paren] = -1;
c277df42
IZ
2722 }
2723 if (regmatch(next))
2724 sayYES;
2725 REGCP_UNWIND;
2726 }
2727 /* Couldn't or didn't -- move forward. */
3280af22 2728 PL_reginput = locinput;
c277df42
IZ
2729 if (regrepeat_hard(scan, 1, &l)) {
2730 ln++;
3280af22 2731 locinput = PL_reginput;
c277df42
IZ
2732 }
2733 else
2734 sayNO;
2735 }
a0ed51b3
LW
2736 }
2737 else {
c277df42
IZ
2738 n = regrepeat_hard(scan, n, &l);
2739 if (n != 0 && l == 0
2740 /* In fact, this is tricky. If paren, then the
2741 fact that we did/didnot match may influence
2742 future execution. */
2743 && !(paren && ln == 0))
2744 ln = n;
3280af22 2745 locinput = PL_reginput;
c277df42 2746 DEBUG_r(
5c0ca799 2747 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2748 "%*s matched %d times, len=%"IVdf"...\n",
2749 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
2750 n, (IV)l)
c277df42
IZ
2751 );
2752 if (n >= ln) {
22c35a8c 2753 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2754 c1 = (U8)*STRING(next);
c277df42 2755 if (OP(next) == EXACTF)
22c35a8c 2756 c2 = PL_fold[c1];
c277df42 2757 else if (OP(next) == EXACTFL)
22c35a8c 2758 c2 = PL_fold_locale[c1];
c277df42
IZ
2759 else
2760 c2 = c1;
a0ed51b3
LW
2761 }
2762 else
c277df42
IZ
2763 c1 = c2 = -1000;
2764 }
2765 REGCP_SET;
2766 while (n >= ln) {
2767 /* If it could work, try it. */
2768 if (c1 == -1000 ||
3280af22
NIS
2769 UCHARAT(PL_reginput) == c1 ||
2770 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2771 {
2772 DEBUG_r(
c3464db5 2773 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2774 "%*s trying tail with n=%"IVdf"...\n",
2775 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2776 );
2777 if (paren) {
2778 if (n) {
cf93c79d
IZ
2779 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2780 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2781 }
a0ed51b3 2782 else
cf93c79d 2783 PL_regendp[paren] = -1;
c277df42 2784 }
a0ed51b3
LW
2785 if (regmatch(next))
2786 sayYES;
2787 REGCP_UNWIND;
2788 }
c277df42
IZ
2789 /* Couldn't or didn't -- back up. */
2790 n--;
dfe13c55 2791 locinput = HOPc(locinput, -l);
3280af22 2792 PL_reginput = locinput;
c277df42
IZ
2793 }
2794 }
2795 sayNO;
2796 break;
2797 }
2798 case CURLYN:
2799 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2800 if (paren > PL_regsize)
2801 PL_regsize = paren;
2802 if (paren > *PL_reglastparen)
2803 *PL_reglastparen = paren;
c277df42
IZ
2804 ln = ARG1(scan); /* min to match */
2805 n = ARG2(scan); /* max to match */
dc45a647 2806 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2807 goto repeat;
a0d0e21e 2808 case CURLY:
c277df42 2809 paren = 0;
a0d0e21e
LW
2810 ln = ARG1(scan); /* min to match */
2811 n = ARG2(scan); /* max to match */
dc45a647 2812 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2813 goto repeat;
2814 case STAR:
2815 ln = 0;
c277df42 2816 n = REG_INFTY;
a0d0e21e 2817 scan = NEXTOPER(scan);
c277df42 2818 paren = 0;
a0d0e21e
LW
2819 goto repeat;
2820 case PLUS:
c277df42
IZ
2821 ln = 1;
2822 n = REG_INFTY;
2823 scan = NEXTOPER(scan);
2824 paren = 0;
2825 repeat:
a0d0e21e
LW
2826 /*
2827 * Lookahead to avoid useless match attempts
2828 * when we know what character comes next.
2829 */
22c35a8c 2830 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2831 c1 = (U8)*STRING(next);
bbce6d69 2832 if (OP(next) == EXACTF)
22c35a8c 2833 c2 = PL_fold[c1];
bbce6d69 2834 else if (OP(next) == EXACTFL)
22c35a8c 2835 c2 = PL_fold_locale[c1];
bbce6d69 2836 else
2837 c2 = c1;
2838 }
a0d0e21e 2839 else
bbce6d69 2840 c1 = c2 = -1000;
3280af22 2841 PL_reginput = locinput;
a0d0e21e 2842 if (minmod) {
c277df42 2843 CHECKPOINT lastcp;
a0d0e21e
LW
2844 minmod = 0;
2845 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2846 sayNO;
a0ed51b3 2847 locinput = PL_reginput;
c277df42 2848 REGCP_SET;
0fe9bf95
IZ
2849 if (c1 != -1000) {
2850 char *e = locinput + n - ln; /* Should not check after this */
2851 char *old = locinput;
2852
2853 if (e >= PL_regeol || (n == REG_INFTY))
2854 e = PL_regeol - 1;
2855 while (1) {
2856 /* Find place 'next' could work */
2857 if (c1 == c2) {
2858 while (locinput <= e && *locinput != c1)
2859 locinput++;
2860 } else {
2861 while (locinput <= e
2862 && *locinput != c1
2863 && *locinput != c2)
2864 locinput++;
2865 }
2866 if (locinput > e)
2867 sayNO;
2868 /* PL_reginput == old now */
2869 if (locinput != old) {
2870 ln = 1; /* Did some */
2871 if (regrepeat(scan, locinput - old) <
2872 locinput - old)
2873 sayNO;
2874 }
2875 /* PL_reginput == locinput now */
2876 if (paren) {
2877 if (ln) {
cf93c79d
IZ
2878 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2879 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2880 }
2881 else
cf93c79d 2882 PL_regendp[paren] = -1;
0fe9bf95
IZ
2883 }
2884 if (regmatch(next))
2885 sayYES;
2886 PL_reginput = locinput; /* Could be reset... */
2887 REGCP_UNWIND;
2888 /* Couldn't or didn't -- move forward. */
2889 old = locinput++;
2890 }
2891 }
2892 else
c277df42 2893 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2894 /* If it could work, try it. */
bbce6d69 2895 if (c1 == -1000 ||
3280af22
NIS
2896 UCHARAT(PL_reginput) == c1 ||
2897 UCHARAT(PL_reginput) == c2)
bbce6d69 2898 {
c277df42
IZ
2899 if (paren) {
2900 if (n) {
cf93c79d
IZ
2901 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2902 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2903 }
2904 else
cf93c79d 2905 PL_regendp[paren] = -1;
c277df42 2906 }
a0d0e21e 2907 if (regmatch(next))
4633a7c4 2908 sayYES;
c277df42 2909 REGCP_UNWIND;
bbce6d69 2910 }
c277df42 2911 /* Couldn't or didn't -- move forward. */
a0ed51b3 2912 PL_reginput = locinput;
a0d0e21e
LW
2913 if (regrepeat(scan, 1)) {
2914 ln++;
a0ed51b3
LW
2915 locinput = PL_reginput;
2916 }
2917 else
4633a7c4 2918 sayNO;
a0d0e21e
LW
2919 }
2920 }
2921 else {
c277df42 2922 CHECKPOINT lastcp;
a0d0e21e 2923 n = regrepeat(scan, n);
a0ed51b3 2924 locinput = PL_reginput;
22c35a8c 2925 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2926 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2927 ln = n; /* why back off? */
c277df42
IZ
2928 REGCP_SET;
2929 if (paren) {
2930 while (n >= ln) {
2931 /* If it could work, try it. */
2932 if (c1 == -1000 ||
3280af22
NIS
2933 UCHARAT(PL_reginput) == c1 ||
2934 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2935 {
2936 if (paren && n) {
2937 if (n) {
cf93c79d
IZ
2938 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2939 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2940 }
2941 else
cf93c79d 2942 PL_regendp[paren] = -1;
c277df42
IZ
2943 }
2944 if (regmatch(next))
2945 sayYES;
2946 REGCP_UNWIND;
2947 }
2948 /* Couldn't or didn't -- back up. */
2949 n--;
dfe13c55 2950 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2951 }
a0ed51b3
LW
2952 }
2953 else {
c277df42
IZ
2954 while (n >= ln) {
2955 /* If it could work, try it. */
2956 if (c1 == -1000 ||
3280af22
NIS
2957 UCHARAT(PL_reginput) == c1 ||
2958 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2959 {
2960 if (regmatch(next))
2961 sayYES;
2962 REGCP_UNWIND;
2963 }
2964 /* Couldn't or didn't -- back up. */
2965 n--;
dfe13c55 2966 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2967 }
a0d0e21e
LW
2968 }
2969 }
4633a7c4 2970 sayNO;
c277df42 2971 break;
a0d0e21e 2972 case END:
0f5d15d6
IZ
2973 if (PL_reg_call_cc) {
2974 re_cc_state *cur_call_cc = PL_reg_call_cc;
2975 CURCUR *cctmp = PL_regcc;
2976 regexp *re = PL_reg_re;
2977 CHECKPOINT cp, lastcp;
2978
2979 cp = regcppush(0); /* Save *all* the positions. */
2980 REGCP_SET;
2981 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2982 the caller. */
2983 PL_reginput = locinput; /* Make position available to
2984 the callcc. */
2985 cache_re(PL_reg_call_cc->re);
2986 PL_regcc = PL_reg_call_cc->cc;
2987 PL_reg_call_cc = PL_reg_call_cc->prev;
2988 if (regmatch(cur_call_cc->node)) {
2989 PL_reg_call_cc = cur_call_cc;
2990 regcpblow(cp);
2991 sayYES;
2992 }
2993 REGCP_UNWIND;
2994 regcppop();
2995 PL_reg_call_cc = cur_call_cc;
2996 PL_regcc = cctmp;
2997 PL_reg_re = re;
2998 cache_re(re);
2999
3000 DEBUG_r(
3001 PerlIO_printf(Perl_debug_log,
3002 "%*s continuation failed...\n",
3003 REPORT_CODE_OFF+PL_regindent*2, "")
3004 );
7821416a 3005 sayNO_SILENT;
0f5d15d6 3006 }
7821416a
IZ
3007 if (locinput < PL_regtill) {
3008 DEBUG_r(PerlIO_printf(Perl_debug_log,
3009 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3010 PL_colors[4],
3011 (long)(locinput - PL_reg_starttry),
3012 (long)(PL_regtill - PL_reg_starttry),
3013 PL_colors[5]));
3014 sayNO_FINAL; /* Cannot match: too short. */
3015 }
3016 PL_reginput = locinput; /* put where regtry can find it */
3017 sayYES_FINAL; /* Success! */
7e5428c5 3018 case SUCCEED:
3280af22 3019 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3020 sayYES_LOUD; /* Success! */
c277df42
IZ
3021 case SUSPEND:
3022 n = 1;
9fe1d20c 3023 PL_reginput = locinput;
c277df42 3024 goto do_ifmatch;
a0d0e21e 3025 case UNLESSM:
c277df42 3026 n = 0;
a0ed51b3 3027 if (scan->flags) {
0fe9bf95
IZ
3028 if (UTF) { /* XXXX This is absolutely
3029 broken, we read before
3030 start of string. */
3031 s = HOPMAYBEc(locinput, -scan->flags);
3032 if (!s)
3033 goto say_yes;
3034 PL_reginput = s;
3035 }
3036 else {
3037 if (locinput < PL_bostr + scan->flags)
3038 goto say_yes;
3039 PL_reginput = locinput - scan->flags;
3040 goto do_ifmatch;
3041 }
a0ed51b3
LW
3042 }
3043 else
3044 PL_reginput = locinput;
c277df42
IZ
3045 goto do_ifmatch;
3046 case IFMATCH:
3047 n = 1;
a0ed51b3 3048 if (scan->flags) {
0fe9bf95
IZ
3049 if (UTF) { /* XXXX This is absolutely
3050 broken, we read before
3051 start of string. */
3052 s = HOPMAYBEc(locinput, -scan->flags);
3053 if (!s || s < PL_bostr)
3054 goto say_no;
3055 PL_reginput = s;
3056 }
3057 else {
3058 if (locinput < PL_bostr + scan->flags)
3059 goto say_no;
3060 PL_reginput = locinput - scan->flags;
3061 goto do_ifmatch;
3062 }
a0ed51b3
LW
3063 }
3064 else
3065 PL_reginput = locinput;
3066
c277df42 3067 do_ifmatch:
c277df42
IZ
3068 inner = NEXTOPER(NEXTOPER(scan));
3069 if (regmatch(inner) != n) {
3070 say_no:
3071 if (logical) {
3072 logical = 0;
3073 sw = 0;
3074 goto do_longjump;
a0ed51b3
LW
3075 }
3076 else
c277df42
IZ
3077 sayNO;
3078 }
3079 say_yes:
3080 if (logical) {
3081 logical = 0;
3082 sw = 1;
3083 }
fe44a5e8 3084 if (OP(scan) == SUSPEND) {
3280af22 3085 locinput = PL_reginput;
565764a8 3086 nextchr = UCHARAT(locinput);
fe44a5e8 3087 }
c277df42
IZ
3088 /* FALL THROUGH. */
3089 case LONGJMP:
3090 do_longjump:
3091 next = scan + ARG(scan);
3092 if (next == scan)
3093 next = NULL;
a0d0e21e
LW
3094 break;
3095 default:
b900a521 3096 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3097 PTR2UV(scan), OP(scan));
cea2e8a9 3098 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3099 }
a0d0e21e
LW
3100 scan = next;
3101 }
a687059c 3102
a0d0e21e
LW
3103 /*
3104 * We get here only if there's trouble -- normally "case END" is
3105 * the terminating point.
3106 */
cea2e8a9 3107 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3108 /*NOTREACHED*/
4633a7c4
LW
3109 sayNO;
3110
7821416a
IZ
3111yes_loud:
3112 DEBUG_r(
3113 PerlIO_printf(Perl_debug_log,
3114 "%*s %scould match...%s\n",
3115 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3116 );
3117 goto yes;
3118yes_final:
3119 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3120 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3121yes:
3122#ifdef DEBUGGING
3280af22 3123 PL_regindent--;
4633a7c4
LW
3124#endif
3125 return 1;
3126
3127no:
7821416a
IZ
3128 DEBUG_r(
3129 PerlIO_printf(Perl_debug_log,
3130 "%*s %sfailed...%s\n",
3131 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3132 );
3133 goto do_no;
3134no_final:
3135do_no:
4633a7c4 3136#ifdef DEBUGGING
3280af22 3137 PL_regindent--;
4633a7c4 3138#endif
a0d0e21e 3139 return 0;
a687059c
LW
3140}
3141
3142/*
3143 - regrepeat - repeatedly match something simple, report how many
3144 */
3145/*
3146 * [This routine now assumes that it will only match on things of length 1.
3147 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3148 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3149 */
76e3520e 3150STATIC I32
cea2e8a9 3151S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3152{
5c0ca799 3153 dTHR;
a0d0e21e 3154 register char *scan;
a0d0e21e 3155 register I32 c;
3280af22 3156 register char *loceol = PL_regeol;
a0ed51b3 3157 register I32 hardcount = 0;
a0d0e21e 3158
3280af22 3159 scan = PL_reginput;
c277df42 3160 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3161 loceol = scan + max;
a0d0e21e 3162 switch (OP(p)) {
22c35a8c 3163 case REG_ANY:
a0d0e21e
LW
3164 while (scan < loceol && *scan != '\n')
3165 scan++;
3166 break;
3167 case SANY:
3168 scan = loceol;
3169 break;
a0ed51b3
LW
3170 case ANYUTF8:
3171 loceol = PL_regeol;
3172 while (scan < loceol && *scan != '\n') {
3173 scan += UTF8SKIP(scan);
3174 hardcount++;
3175 }
3176 break;
3177 case SANYUTF8:
3178 loceol = PL_regeol;
3179 while (scan < loceol) {
3180 scan += UTF8SKIP(scan);
3181 hardcount++;
3182 }
3183 break;
bbce6d69 3184 case EXACT: /* length of string is 1 */
cd439c50 3185 c = (U8)*STRING(p);
bbce6d69 3186 while (scan < loceol && UCHARAT(scan) == c)
3187 scan++;
3188 break;
3189 case EXACTF: /* length of string is 1 */
cd439c50 3190 c = (U8)*STRING(p);
bbce6d69 3191 while (scan < loceol &&
22c35a8c 3192 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3193 scan++;
3194 break;
3195 case EXACTFL: /* length of string is 1 */
3280af22 3196 PL_reg_flags |= RF_tainted;
cd439c50 3197 c = (U8)*STRING(p);
bbce6d69 3198 while (scan < loceol &&
22c35a8c 3199 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3200 scan++;
3201 break;
a0ed51b3
LW
3202 case ANYOFUTF8:
3203 loceol = PL_regeol;
3204 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3205 scan += UTF8SKIP(scan);
3206 hardcount++;
3207 }
3208 break;
a0d0e21e 3209 case ANYOF:
936ed897 3210 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3211 scan++;
a0d0e21e
LW
3212 break;
3213 case ALNUM:
3214 while (scan < loceol && isALNUM(*scan))
3215 scan++;
3216 break;
a0ed51b3
LW
3217 case ALNUMUTF8:
3218 loceol = PL_regeol;
dfe13c55 3219 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3220 scan += UTF8SKIP(scan);
3221 hardcount++;
3222 }
3223 break;
bbce6d69 3224 case ALNUML:
3280af22 3225 PL_reg_flags |= RF_tainted;
bbce6d69 3226 while (scan < loceol && isALNUM_LC(*scan))
3227 scan++;
3228 break;
a0ed51b3
LW
3229 case ALNUMLUTF8:
3230 PL_reg_flags |= RF_tainted;
3231 loceol = PL_regeol;
dfe13c55 3232 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3233 scan += UTF8SKIP(scan);
3234 hardcount++;
3235 }
3236 break;
3237 break;
a0d0e21e
LW
3238 case NALNUM:
3239 while (scan < loceol && !isALNUM(*scan))
3240 scan++;
3241 break;
a0ed51b3
LW
3242 case NALNUMUTF8:
3243 loceol = PL_regeol;
dfe13c55 3244 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3245 scan += UTF8SKIP(scan);
3246 hardcount++;
3247 }
3248 break;
bbce6d69 3249 case NALNUML:
3280af22 3250 PL_reg_flags |= RF_tainted;
bbce6d69 3251 while (scan < loceol && !isALNUM_LC(*scan))
3252 scan++;
3253 break;
a0ed51b3
LW
3254 case NALNUMLUTF8:
3255 PL_reg_flags |= RF_tainted;
3256 loceol = PL_regeol;
dfe13c55 3257 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3258 scan += UTF8SKIP(scan);
3259 hardcount++;
3260 }
3261 break;
a0d0e21e
LW
3262 case SPACE:
3263 while (scan < loceol && isSPACE(*scan))
3264 scan++;
3265 break;
a0ed51b3
LW
3266 case SPACEUTF8:
3267 loceol = PL_regeol;
dfe13c55 3268 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3269 scan += UTF8SKIP(scan);
3270 hardcount++;
3271 }
3272 break;
bbce6d69 3273 case SPACEL:
3280af22 3274 PL_reg_flags |= RF_tainted;
bbce6d69 3275 while (scan < loceol && isSPACE_LC(*scan))
3276 scan++;
3277 break;
a0ed51b3
LW
3278 case SPACELUTF8:
3279 PL_reg_flags |= RF_tainted;
3280 loceol = PL_regeol;
dfe13c55 3281 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3282 scan += UTF8SKIP(scan);
3283 hardcount++;
3284 }
3285 break;
a0d0e21e
LW
3286 case NSPACE:
3287 while (scan < loceol && !isSPACE(*scan))
3288 scan++;
3289 break;
a0ed51b3
LW
3290 case NSPACEUTF8:
3291 loceol = PL_regeol;
dfe13c55 3292 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3293 scan += UTF8SKIP(scan);
3294 hardcount++;
3295 }
3296 break;
bbce6d69 3297 case NSPACEL:
3280af22 3298 PL_reg_flags |= RF_tainted;
bbce6d69 3299 while (scan < loceol && !isSPACE_LC(*scan))
3300 scan++;
3301 break;
a0ed51b3
LW
3302 case NSPACELUTF8:
3303 PL_reg_flags |= RF_tainted;
3304 loceol = PL_regeol;
dfe13c55 3305 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3306 scan += UTF8SKIP(scan);
3307 hardcount++;
3308 }
3309 break;
a0d0e21e
LW
3310 case DIGIT:
3311 while (scan < loceol && isDIGIT(*scan))
3312 scan++;
3313 break;
a0ed51b3
LW
3314 case DIGITUTF8:
3315 loceol = PL_regeol;
dfe13c55 3316 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3317 scan += UTF8SKIP(scan);
3318 hardcount++;
3319 }
3320 break;
3321 break;
a0d0e21e
LW
3322 case NDIGIT:
3323 while (scan < loceol && !isDIGIT(*scan))
3324 scan++;
3325 break;
a0ed51b3
LW
3326 case NDIGITUTF8:
3327 loceol = PL_regeol;
dfe13c55 3328 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3329 scan += UTF8SKIP(scan);
3330 hardcount++;
3331 }
3332 break;
a0d0e21e
LW
3333 default: /* Called on something of 0 width. */
3334 break; /* So match right here or not at all. */
3335 }
a687059c 3336
a0ed51b3
LW
3337 if (hardcount)
3338 c = hardcount;
3339 else
3340 c = scan - PL_reginput;
3280af22 3341 PL_reginput = scan;
a687059c 3342
c277df42
IZ
3343 DEBUG_r(
3344 {
3345 SV *prop = sv_newmortal();
3346
3347 regprop(prop, p);
3348 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3349 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3350 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3351 });
3352
a0d0e21e 3353 return(c);
a687059c
LW
3354}
3355
3356/*
c277df42
IZ
3357 - regrepeat_hard - repeatedly match something, report total lenth and length
3358 *
3359 * The repeater is supposed to have constant length.
3360 */
3361
76e3520e 3362STATIC I32
cea2e8a9 3363S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3364{
5c0ca799 3365 dTHR;
c277df42
IZ
3366 register char *scan;
3367 register char *start;
3280af22 3368 register char *loceol = PL_regeol;
a0ed51b3 3369 I32 l = 0;
708e3b05 3370 I32 count = 0, res = 1;
a0ed51b3
LW
3371
3372 if (!max)
3373 return 0;
c277df42 3374
3280af22 3375 start = PL_reginput;
a0ed51b3 3376 if (UTF) {
708e3b05 3377 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3378 if (!count++) {
3379 l = 0;
3380 while (start < PL_reginput) {
3381 l++;
3382 start += UTF8SKIP(start);
3383 }
3384 *lp = l;
3385 if (l == 0)
3386 return max;
3387 }
3388 if (count == max)
3389 return count;
3390 }
3391 }
3392 else {
708e3b05 3393 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3394 if (!count++) {
3395 *lp = l = PL_reginput - start;
3396 if (max != REG_INFTY && l*max < loceol - scan)
3397 loceol = scan + l*max;
3398 if (l == 0)
3399 return max;
c277df42
IZ
3400 }
3401 }
3402 }
708e3b05 3403 if (!res)
3280af22 3404 PL_reginput = scan;
c277df42 3405
a0ed51b3 3406 return count;
c277df42
IZ
3407}
3408
3409/*
cb8d8820 3410 - reginclass - determine if a character falls into a character class
bbce6d69 3411 */
3412
76e3520e 3413STATIC bool
936ed897 3414S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3415{
5c0ca799 3416 dTHR;
b8c5462f 3417 char flags = ANYOF_FLAGS(p);
bbce6d69 3418 bool match = FALSE;
3419
3420 c &= 0xFF;
b8c5462f 3421 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3422 match = TRUE;
3423 else if (flags & ANYOF_FOLD) {
3424 I32 cf;
3425 if (flags & ANYOF_LOCALE) {
3280af22 3426 PL_reg_flags |= RF_tainted;
22c35a8c 3427 cf = PL_fold_locale[c];
bbce6d69 3428 }
3429 else
22c35a8c 3430 cf = PL_fold[c];
b8c5462f 3431 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3432 match = TRUE;
3433 }
3434
b8c5462f 3435 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3436 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3437 if (
3438 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3439 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3440 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3441 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3442 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3443 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3444 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3445 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3446 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3447 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3448 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3449 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3450 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3451 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3452 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3453 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3454 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3455 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3456 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3457 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3458 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3459 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3460 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3461 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3462 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3463 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3464 ) /* How's that for a conditional? */
bbce6d69 3465 {
3466 match = TRUE;
3467 }
3468 }
3469
ae5c130c 3470 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3471}
3472
a0ed51b3 3473STATIC bool
cea2e8a9 3474S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3475{
3476 dTHR;
a0ed51b3
LW
3477 char flags = ARG1(f);
3478 bool match = FALSE;
3479 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3480
3481 if (swash_fetch(sv, p))
3482 match = TRUE;
3483 else if (flags & ANYOF_FOLD) {
3484 I32 cf;
dfe13c55 3485 U8 tmpbuf[10];
a0ed51b3
LW
3486 if (flags & ANYOF_LOCALE) {
3487 PL_reg_flags |= RF_tainted;
3488 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3489 }
3490 else
3491 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3492 if (swash_fetch(sv, tmpbuf))
3493 match = TRUE;
3494 }
3495
b8c5462f 3496 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3497
3498 return (flags & ANYOF_INVERT) ? !match : match;
3499}
161b471a 3500
dfe13c55 3501STATIC U8 *
cea2e8a9 3502S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3503{
3504 dTHR;
a0ed51b3
LW
3505 if (off >= 0) {
3506 while (off-- && s < (U8*)PL_regeol)
3507 s += UTF8SKIP(s);
3508 }
3509 else {
3510 while (off++) {
3511 if (s > (U8*)PL_bostr) {
3512 s--;
3513 if (*s & 0x80) {
3514 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3515 s--;
3516 } /* XXX could check well-formedness here */
3517 }
3518 }
3519 }
3520 return s;
3521}
161b471a 3522
dfe13c55 3523STATIC U8 *
cea2e8a9 3524S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3525{
c485e607 3526 dTHR;
a0ed51b3
LW
3527 if (off >= 0) {
3528 while (off-- && s < (U8*)PL_regeol)
3529 s += UTF8SKIP(s);
3530 if (off >= 0)
3531 return 0;
3532 }
3533 else {
3534 while (off++) {
3535 if (s > (U8*)PL_bostr) {
3536 s--;
3537 if (*s & 0x80) {
3538 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3539 s--;
3540 } /* XXX could check well-formedness here */
3541 }
3542 else
3543 break;
3544 }
3545 if (off <= 0)
3546 return 0;
3547 }
3548 return s;
3549}
51371543
GS
3550
3551#ifdef PERL_OBJECT
3552#define NO_XSLOCKS
3553#include "XSUB.h"
3554#endif
3555
3556static void
3557restore_pos(pTHXo_ void *arg)
3558{
3559 dTHR;
3560 if (PL_reg_eval_set) {
3561 if (PL_reg_oldsaved) {
3562 PL_reg_re->subbeg = PL_reg_oldsaved;
3563 PL_reg_re->sublen = PL_reg_oldsavedlen;
3564 RX_MATCH_COPIED_on(PL_reg_re);
3565 }
3566 PL_reg_magic->mg_len = PL_reg_oldpos;
3567 PL_reg_eval_set = 0;
3568 PL_curpm = PL_reg_oldcurpm;
3569 }
3570}
3571