This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Document -Duselfs, -Duselongdouble, and -Dusemorebits.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
149 " Setting an EVAL scope, savestack=%i\n", \
3280af22 150 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
153 PerlIO_printf(Perl_debug_log, \
154 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 155 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5
DD
178 PerlIO_printf(Perl_debug_log,
179 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
180 paren, PL_regstartp[paren],
181 PL_reg_start_tmp[paren] - PL_bostr,
182 PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
188 PerlIO_printf(Perl_debug_log,
189 " restoring \\%d..\\%d to undef\n",
3280af22 190 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d
IZ
281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa
JH
292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5
JH
298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa
JH
301 char *t;
302 I32 ml_anch;
2c2d71f5
JH
303 char *tmp;
304 register char *other_last = Nullch;
30944b6d
IZ
305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa
JH
308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
317 (strend - strpos > 60 ? 60 : strend - strpos),
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5
JH
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5
JH
325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5
JH
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa
JH
349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5
JH
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa
JH
359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5
JH
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa
JH
382 ml_anch = 0;
383 s = strpos;
2c2d71f5
JH
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5
JH
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa
JH
412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5
JH
422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
427 SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
428 PL_colors[1], (SvTAIL(check) ? "$" : ""),
429 (s ? " at offset " : "...\n") ) );
430
431 if (!s)
432 goto fail_finish;
433
434 /* Finish the diagnostic message */
30944b6d 435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
436
437 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
438 Start with the other substr.
439 XXXX no SCREAM optimization yet - and a very coarse implementation
440 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
441 *always* match. Probably should be marked during compile...
442 Probably it is right to do no SCREAM here...
443 */
444
445 if (prog->float_substr && prog->anchored_substr) {
30944b6d 446 /* Take into account the "other" substring. */
2c2d71f5
JH
447 /* XXXX May be hopelessly wrong for UTF... */
448 if (!other_last)
449 other_last = strpos - 1;
450 if (check == prog->float_substr) {
30944b6d
IZ
451 do_other_anchored:
452 {
2c2d71f5
JH
453 char *last = s - start_shift, *last1, *last2;
454 char *s1 = s;
455
456 tmp = PL_bostr;
457 t = s - prog->check_offset_max;
458 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
459 && (!(prog->reganch & ROPT_UTF8)
460 || (PL_bostr = strpos, /* Used in regcopmaybe() */
461 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
462 && t > strpos)))
30944b6d 463 /* EMPTY */;
2c2d71f5
JH
464 else
465 t = strpos;
466 t += prog->anchored_offset;
467 if (t <= other_last)
468 t = other_last + 1;
469 PL_bostr = tmp;
470 last2 = last1 = strend - prog->minlen;
471 if (last < last1)
472 last1 = last;
473 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
474 /* On end-of-str: see comment below. */
475 s = fbm_instr((unsigned char*)t,
476 (unsigned char*)last1 + prog->anchored_offset
477 + SvCUR(prog->anchored_substr)
478 - (SvTAIL(prog->anchored_substr)!=0),
479 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
480 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
481 (s ? "Found" : "Contradicts"),
482 PL_colors[0],
483 SvCUR(prog->anchored_substr)
484 - (SvTAIL(prog->anchored_substr)!=0),
485 SvPVX(prog->anchored_substr),
486 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
487 if (!s) {
488 if (last1 >= last2) {
489 DEBUG_r(PerlIO_printf(Perl_debug_log,
490 ", giving up...\n"));
491 goto fail_finish;
492 }
493 DEBUG_r(PerlIO_printf(Perl_debug_log,
494 ", trying floating at offset %ld...\n",
30944b6d 495 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
496 PL_regeol = strend; /* Used in HOP() */
497 other_last = last1 + prog->anchored_offset;
498 s = HOPc(last, 1);
499 goto restart;
500 }
501 else {
502 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 503 (long)(s - i_strpos)));
2c2d71f5
JH
504 t = s - prog->anchored_offset;
505 other_last = s - 1;
30944b6d 506 s = s1;
2c2d71f5
JH
507 if (t == strpos)
508 goto try_at_start;
2c2d71f5
JH
509 goto try_at_offset;
510 }
30944b6d 511 }
2c2d71f5
JH
512 }
513 else { /* Take into account the floating substring. */
514 char *last, *last1;
515 char *s1 = s;
516
517 t = s - start_shift;
518 last1 = last = strend - prog->minlen + prog->float_min_offset;
519 if (last - t > prog->float_max_offset)
520 last = t + prog->float_max_offset;
521 s = t + prog->float_min_offset;
522 if (s <= other_last)
523 s = other_last + 1;
524 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
525 /* fbm_instr() takes into account exact value of end-of-str
526 if the check is SvTAIL(ed). Since false positives are OK,
527 and end-of-str is not later than strend we are OK. */
528 s = fbm_instr((unsigned char*)s,
529 (unsigned char*)last + SvCUR(prog->float_substr)
530 - (SvTAIL(prog->float_substr)!=0),
531 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
532 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
533 (s ? "Found" : "Contradicts"),
534 PL_colors[0],
535 SvCUR(prog->float_substr)
536 - (SvTAIL(prog->float_substr)!=0),
537 SvPVX(prog->float_substr),
538 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
539 if (!s) {
540 if (last1 == last) {
541 DEBUG_r(PerlIO_printf(Perl_debug_log,
542 ", giving up...\n"));
543 goto fail_finish;
544 }
545 DEBUG_r(PerlIO_printf(Perl_debug_log,
546 ", trying anchored starting at offset %ld...\n",
30944b6d 547 (long)(s1 + 1 - i_strpos)));
2c2d71f5
JH
548 other_last = last;
549 PL_regeol = strend; /* Used in HOP() */
550 s = HOPc(t, 1);
551 goto restart;
552 }
553 else {
554 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 555 (long)(s - i_strpos)));
2c2d71f5 556 other_last = s - 1;
30944b6d 557 s = s1;
2c2d71f5
JH
558 if (t == strpos)
559 goto try_at_start;
2c2d71f5
JH
560 goto try_at_offset;
561 }
562 }
cad2e5aa 563 }
2c2d71f5
JH
564
565 t = s - prog->check_offset_max;
566 tmp = PL_bostr;
567 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
568 && (!(prog->reganch & ROPT_UTF8)
569 || (PL_bostr = strpos, /* Used in regcopmaybe() */
570 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
571 && t > strpos)))) {
572 PL_bostr = tmp;
573 /* Fixed substring is found far enough so that the match
574 cannot start at strpos. */
575 try_at_offset:
cad2e5aa 576 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
577 /* Eventually fbm_*() should handle this, but often
578 anchored_offset is not 0, so this check will not be wasted. */
579 /* XXXX In the code below we prefer to look for "^" even in
580 presence of anchored substrings. And we search even
581 beyond the found float position. These pessimizations
582 are historical artefacts only. */
583 find_anchor:
2c2d71f5 584 while (t < strend - prog->minlen) {
cad2e5aa
JH
585 if (*t == '\n') {
586 if (t < s - prog->check_offset_min) {
30944b6d
IZ
587 if (prog->anchored_substr) {
588 /* We definitely contradict the found anchored
589 substr. Due to the above check we do not
590 contradict "check" substr.
591 Thus we can arrive here only if check substr
592 is float. Redo checking for "other"=="fixed".
593 */
594 strpos = t + 1;
595 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
596 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
597 goto do_other_anchored;
598 }
cad2e5aa 599 s = t + 1;
2c2d71f5 600 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 601 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
602 goto set_useful;
603 }
2c2d71f5 604 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
605 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
606 strpos = s = t + 1;
cad2e5aa
JH
607 goto restart;
608 }
609 t++;
610 }
2c2d71f5
JH
611 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
612 PL_colors[0],PL_colors[1]));
613 goto fail_finish;
cad2e5aa
JH
614 }
615 s = t;
616 set_useful:
2c2d71f5 617 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
618 }
619 else {
2c2d71f5
JH
620 PL_bostr = tmp;
621 /* The found string does not prohibit matching at beg-of-str
622 - no optimization of calling REx engine can be performed,
623 unless it was an MBOL and we are not after MBOL. */
624 try_at_start:
625 /* Even in this situation we may use MBOL flag if strpos is offset
626 wrt the start of the string. */
627 if (ml_anch && sv
cad2e5aa
JH
628 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
629 t = strpos;
630 goto find_anchor;
631 }
30944b6d
IZ
632 DEBUG_r( if (ml_anch)
633 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
634 PL_colors[0],PL_colors[1]);
635 );
2c2d71f5 636 success_at_start:
30944b6d 637 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
638 && --BmUSEFUL(prog->check_substr) < 0
639 && prog->check_substr == prog->float_substr) { /* boo */
640 /* If flags & SOMETHING - do not do it many times on the same match */
641 SvREFCNT_dec(prog->check_substr);
642 prog->check_substr = Nullsv; /* disable */
643 prog->float_substr = Nullsv; /* clear */
644 s = strpos;
645 prog->reganch &= ~RE_USE_INTUIT;
646 }
647 else
648 s = strpos;
649 }
650
2c2d71f5 651 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 652 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 653 return s;
2c2d71f5
JH
654
655 fail_finish: /* Substring not found */
656 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 657 fail:
2c2d71f5 658 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
659 PL_colors[4],PL_colors[5]));
660 return Nullch;
661}
9661b544 662
a687059c 663/*
c277df42 664 - regexec_flags - match a regexp against a string
a687059c 665 */
79072805 666I32
864dbfa3 667Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 668 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
669/* strend: pointer to null at end of string */
670/* strbeg: real beginning of string */
671/* minend: end of match must be >=minend after stringarg. */
672/* data: May be used for some additional optimizations. */
673/* nosave: For optimizations. */
a687059c 674{
5c0ca799 675 dTHR;
a0d0e21e 676 register char *s;
c277df42 677 register regnode *c;
a0d0e21e
LW
678 register char *startpos = stringarg;
679 register I32 tmp;
c277df42 680 I32 minlen; /* must match at least this many chars */
a0d0e21e 681 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 682 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
683 constant substr. */ /* CC */
684 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
685 I32 scream_pos = -1; /* Internal iterator of scream. */
686 char *scream_olds;
3280af22 687 SV* oreplsv = GvSV(PL_replgv);
a687059c 688
2ab05381 689 PL_regcc = 0;
a0d0e21e 690
0f5d15d6 691 cache_re(prog);
a0d0e21e 692#ifdef DEBUGGING
3280af22 693 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
694#endif
695
696 /* Be paranoid... */
697 if (prog == NULL || startpos == NULL) {
cea2e8a9 698 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
699 return 0;
700 }
701
c277df42
IZ
702 minlen = prog->minlen;
703 if (strend - startpos < minlen) goto phooey;
704
a0d0e21e 705 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 706 PL_regprev = '\n';
a0d0e21e 707 else {
a0ed51b3 708 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
709 if (!PL_multiline && PL_regprev == '\n')
710 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 711 }
bbce6d69 712
a0d0e21e 713 /* Check validity of program. */
22c35a8c 714 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 715 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
716 }
717
3280af22
NIS
718 PL_reg_flags = 0;
719 PL_reg_eval_set = 0;
2c2d71f5 720 PL_reg_maxiter = 0;
a0d0e21e 721
a0ed51b3
LW
722 if (prog->reganch & ROPT_UTF8)
723 PL_reg_flags |= RF_utf8;
724
725 /* Mark beginning of line for ^ and lookbehind. */
726 PL_regbol = startpos;
727 PL_bostr = strbeg;
9661b544 728 PL_reg_sv = sv;
a0ed51b3
LW
729
730 /* Mark end of line for $ (and such) */
731 PL_regeol = strend;
732
733 /* see how far we have to get to not match where we matched before */
734 PL_regtill = startpos+minend;
735
0f5d15d6
IZ
736 /* We start without call_cc context. */
737 PL_reg_call_cc = 0;
738
a0d0e21e
LW
739 /* If there is a "must appear" string, look for it. */
740 s = startpos;
cad2e5aa
JH
741
742 if (prog->reganch & ROPT_GPOS_SEEN) {
743 MAGIC *mg;
744
745 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
746 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
747 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 748 else
cad2e5aa
JH
749 PL_reg_ganch = startpos;
750 if (prog->reganch & ROPT_ANCH_GPOS) {
751 if (s > PL_reg_ganch)
752 goto phooey;
753 s = PL_reg_ganch;
a0ed51b3 754 }
a0d0e21e 755 }
a687059c 756
cad2e5aa
JH
757 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
758 re_scream_pos_data d;
759
760 d.scream_olds = &scream_olds;
761 d.scream_pos = &scream_pos;
762 s = re_intuit_start(prog, sv, s, strend, flags, &d);
763 if (!s)
764 goto phooey; /* not present */
765 }
766
767 DEBUG_r( if (!PL_colorset) reginitcolors() );
768 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 769 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
770 PL_colors[4],PL_colors[5],PL_colors[0],
771 prog->precomp,
772 PL_colors[1],
c277df42 773 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 774 PL_colors[0],
c277df42 775 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 776 startpos, PL_colors[1],
c277df42
IZ
777 (strend - startpos > 60 ? "..." : ""))
778 );
779
a0d0e21e 780 /* Simplest case: anchored match need be tried only once. */
774d564b 781 /* [unless only anchor is BOL and multiline is set] */
22e551b9 782 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 783 if (s == startpos && regtry(prog, startpos))
a0d0e21e 784 goto got_it;
22e551b9
IZ
785 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
786 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 787 {
cad2e5aa
JH
788 char *end;
789
a0d0e21e
LW
790 if (minlen)
791 dontbother = minlen - 1;
cad2e5aa 792 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 793 /* for multiline we only have to try after newlines */
cad2e5aa 794 if (prog->check_substr) {
30944b6d
IZ
795 if (s == startpos)
796 goto after_try;
cad2e5aa
JH
797 while (1) {
798 if (regtry(prog, s))
a0d0e21e 799 goto got_it;
30944b6d 800 after_try:
cad2e5aa
JH
801 if (s >= end)
802 goto phooey;
803 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
804 if (!s)
805 goto phooey;
806 }
807 } else {
808 if (s > startpos)
809 s--;
810 while (s < end) {
811 if (*s++ == '\n') { /* don't need PL_utf8skip here */
812 if (regtry(prog, s))
813 goto got_it;
814 }
815 }
35c8bce7 816 }
35c8bce7 817 }
a0d0e21e 818 goto phooey;
22e551b9
IZ
819 } else if (prog->reganch & ROPT_ANCH_GPOS) {
820 if (regtry(prog, PL_reg_ganch))
821 goto got_it;
822 goto phooey;
a0d0e21e 823 }
35c8bce7 824
a0d0e21e 825 /* Messy cases: unanchored match. */
c277df42
IZ
826 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
827 /* we have /x+whatever/ */
cad2e5aa 828 /* it must be a one character string (XXXX Except UTF?) */
c277df42 829 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
830 if (UTF) {
831 while (s < strend) {
832 if (*s == ch) {
833 if (regtry(prog, s)) goto got_it;
834 s += UTF8SKIP(s);
835 while (s < strend && *s == ch)
836 s += UTF8SKIP(s);
837 }
838 s += UTF8SKIP(s);
839 }
840 }
841 else {
842 while (s < strend) {
843 if (*s == ch) {
844 if (regtry(prog, s)) goto got_it;
c277df42 845 s++;
a0ed51b3
LW
846 while (s < strend && *s == ch)
847 s++;
848 }
849 s++;
a0d0e21e 850 }
a687059c 851 }
c277df42
IZ
852 }
853 /*SUPPRESS 560*/
854 else if (prog->anchored_substr != Nullsv
855 || (prog->float_substr != Nullsv
856 && prog->float_max_offset < strend - s)) {
857 SV *must = prog->anchored_substr
858 ? prog->anchored_substr : prog->float_substr;
859 I32 back_max =
860 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
861 I32 back_min =
862 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
863 I32 delta = back_max - back_min;
cf93c79d 864 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
865 -(I32)(CHR_SVLEN(must)
866 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
867 char *last1; /* Last position checked before */
868
869 if (s > PL_bostr)
dfe13c55 870 last1 = HOPc(s, -1);
a0ed51b3
LW
871 else
872 last1 = s - 1; /* bogus */
c277df42
IZ
873
874 /* XXXX check_substr already used to find `s', can optimize if
875 check_substr==must. */
876 scream_pos = -1;
877 dontbother = end_shift;
dfe13c55 878 strend = HOPc(strend, -dontbother);
c277df42 879 while ( (s <= last) &&
22e551b9
IZ
880 ((flags & REXEC_SCREAM)
881 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 882 end_shift, &scream_pos, 0))
a0ed51b3 883 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
884 (unsigned char*)strend, must,
885 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
886 if (HOPc(s, -back_max) > last1) {
887 last1 = HOPc(s, -back_min);
888 s = HOPc(s, -back_max);
a0ed51b3
LW
889 }
890 else {
dfe13c55 891 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 892
dfe13c55 893 last1 = HOPc(s, -back_min);
c277df42 894 s = t;
a0d0e21e 895 }
a0ed51b3
LW
896 if (UTF) {
897 while (s <= last1) {
898 if (regtry(prog, s))
899 goto got_it;
900 s += UTF8SKIP(s);
901 }
902 }
903 else {
904 while (s <= last1) {
905 if (regtry(prog, s))
906 goto got_it;
907 s++;
908 }
a0d0e21e
LW
909 }
910 }
911 goto phooey;
a0ed51b3
LW
912 }
913 else if (c = prog->regstclass) {
a0d0e21e 914 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 915 char *cc;
a687059c 916
a0d0e21e
LW
917 if (minlen)
918 dontbother = minlen - 1;
dfe13c55 919 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
920 tmp = 1;
921 /* We know what class it must start with. */
922 switch (OP(c)) {
a0ed51b3 923 case ANYOFUTF8:
cd439c50 924 cc = MASK(c);
a0ed51b3
LW
925 while (s < strend) {
926 if (REGINCLASSUTF8(c, (U8*)s)) {
927 if (tmp && regtry(prog, s))
928 goto got_it;
929 else
930 tmp = doevery;
931 }
932 else
933 tmp = 1;
934 s += UTF8SKIP(s);
935 }
936 break;
a0d0e21e 937 case ANYOF:
cd439c50 938 cc = MASK(c);
a0d0e21e 939 while (s < strend) {
a0ed51b3 940 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
941 if (tmp && regtry(prog, s))
942 goto got_it;
943 else
944 tmp = doevery;
a687059c 945 }
a0d0e21e
LW
946 else
947 tmp = 1;
948 s++;
949 }
950 break;
bbce6d69 951 case BOUNDL:
3280af22 952 PL_reg_flags |= RF_tainted;
bbce6d69 953 /* FALL THROUGH */
a0d0e21e 954 case BOUND:
a0ed51b3
LW
955 if (minlen) {
956 dontbother++;
957 strend -= 1;
958 }
3280af22 959 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 960 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 961 while (s < strend) {
95bac841 962 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
963 tmp = !tmp;
964 if (regtry(prog, s))
965 goto got_it;
a687059c 966 }
a0d0e21e
LW
967 s++;
968 }
969 if ((minlen || tmp) && regtry(prog,s))
970 goto got_it;
971 break;
a0ed51b3
LW
972 case BOUNDLUTF8:
973 PL_reg_flags |= RF_tainted;
974 /* FALL THROUGH */
975 case BOUNDUTF8:
976 if (minlen) {
977 dontbother++;
dfe13c55 978 strend = reghop_c(strend, -1);
a0ed51b3 979 }
dfe13c55 980 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
981 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
982 while (s < strend) {
dfe13c55
GS
983 if (tmp == !(OP(c) == BOUND ?
984 swash_fetch(PL_utf8_alnum, (U8*)s) :
985 isALNUM_LC_utf8((U8*)s)))
986 {
a0ed51b3
LW
987 tmp = !tmp;
988 if (regtry(prog, s))
989 goto got_it;
990 }
991 s += UTF8SKIP(s);
992 }
993 if ((minlen || tmp) && regtry(prog,s))
994 goto got_it;
995 break;
bbce6d69 996 case NBOUNDL:
3280af22 997 PL_reg_flags |= RF_tainted;
bbce6d69 998 /* FALL THROUGH */
a0d0e21e 999 case NBOUND:
a0ed51b3
LW
1000 if (minlen) {
1001 dontbother++;
1002 strend -= 1;
1003 }
3280af22 1004 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1005 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1006 while (s < strend) {
95bac841 1007 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
1008 tmp = !tmp;
1009 else if (regtry(prog, s))
1010 goto got_it;
1011 s++;
1012 }
1013 if ((minlen || !tmp) && regtry(prog,s))
1014 goto got_it;
1015 break;
a0ed51b3
LW
1016 case NBOUNDLUTF8:
1017 PL_reg_flags |= RF_tainted;
1018 /* FALL THROUGH */
1019 case NBOUNDUTF8:
1020 if (minlen) {
1021 dontbother++;
dfe13c55 1022 strend = reghop_c(strend, -1);
a0ed51b3 1023 }
dfe13c55 1024 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1025 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1026 while (s < strend) {
dfe13c55
GS
1027 if (tmp == !(OP(c) == NBOUND ?
1028 swash_fetch(PL_utf8_alnum, (U8*)s) :
1029 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
1030 tmp = !tmp;
1031 else if (regtry(prog, s))
1032 goto got_it;
1033 s += UTF8SKIP(s);
1034 }
1035 if ((minlen || !tmp) && regtry(prog,s))
1036 goto got_it;
1037 break;
a0d0e21e
LW
1038 case ALNUM:
1039 while (s < strend) {
bbce6d69 1040 if (isALNUM(*s)) {
1041 if (tmp && regtry(prog, s))
1042 goto got_it;
1043 else
1044 tmp = doevery;
1045 }
1046 else
1047 tmp = 1;
1048 s++;
1049 }
1050 break;
a0ed51b3
LW
1051 case ALNUMUTF8:
1052 while (s < strend) {
dfe13c55 1053 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1054 if (tmp && regtry(prog, s))
1055 goto got_it;
1056 else
1057 tmp = doevery;
1058 }
1059 else
1060 tmp = 1;
1061 s += UTF8SKIP(s);
1062 }
1063 break;
bbce6d69 1064 case ALNUML:
3280af22 1065 PL_reg_flags |= RF_tainted;
bbce6d69 1066 while (s < strend) {
1067 if (isALNUM_LC(*s)) {
a0d0e21e
LW
1068 if (tmp && regtry(prog, s))
1069 goto got_it;
a687059c 1070 else
a0d0e21e
LW
1071 tmp = doevery;
1072 }
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case ALNUMLUTF8:
1079 PL_reg_flags |= RF_tainted;
1080 while (s < strend) {
dfe13c55 1081 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1082 if (tmp && regtry(prog, s))
1083 goto got_it;
1084 else
1085 tmp = doevery;
1086 }
1087 else
1088 tmp = 1;
1089 s += UTF8SKIP(s);
1090 }
1091 break;
a0d0e21e
LW
1092 case NALNUM:
1093 while (s < strend) {
bbce6d69 1094 if (!isALNUM(*s)) {
1095 if (tmp && regtry(prog, s))
1096 goto got_it;
1097 else
1098 tmp = doevery;
1099 }
1100 else
1101 tmp = 1;
1102 s++;
1103 }
1104 break;
a0ed51b3
LW
1105 case NALNUMUTF8:
1106 while (s < strend) {
dfe13c55 1107 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
1108 if (tmp && regtry(prog, s))
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
1113 else
1114 tmp = 1;
1115 s += UTF8SKIP(s);
1116 }
1117 break;
bbce6d69 1118 case NALNUML:
3280af22 1119 PL_reg_flags |= RF_tainted;
bbce6d69 1120 while (s < strend) {
1121 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
1122 if (tmp && regtry(prog, s))
1123 goto got_it;
a687059c 1124 else
a0d0e21e 1125 tmp = doevery;
a687059c 1126 }
a0d0e21e
LW
1127 else
1128 tmp = 1;
1129 s++;
1130 }
1131 break;
a0ed51b3
LW
1132 case NALNUMLUTF8:
1133 PL_reg_flags |= RF_tainted;
1134 while (s < strend) {
dfe13c55 1135 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
1136 if (tmp && regtry(prog, s))
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
1141 else
1142 tmp = 1;
1143 s += UTF8SKIP(s);
1144 }
1145 break;
a0d0e21e
LW
1146 case SPACE:
1147 while (s < strend) {
1148 if (isSPACE(*s)) {
1149 if (tmp && regtry(prog, s))
1150 goto got_it;
1151 else
1152 tmp = doevery;
2304df62 1153 }
a0d0e21e
LW
1154 else
1155 tmp = 1;
1156 s++;
1157 }
1158 break;
a0ed51b3
LW
1159 case SPACEUTF8:
1160 while (s < strend) {
dfe13c55 1161 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
1162 if (tmp && regtry(prog, s))
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
1167 else
1168 tmp = 1;
1169 s += UTF8SKIP(s);
1170 }
1171 break;
bbce6d69 1172 case SPACEL:
3280af22 1173 PL_reg_flags |= RF_tainted;
bbce6d69 1174 while (s < strend) {
1175 if (isSPACE_LC(*s)) {
1176 if (tmp && regtry(prog, s))
1177 goto got_it;
1178 else
1179 tmp = doevery;
1180 }
1181 else
1182 tmp = 1;
1183 s++;
1184 }
1185 break;
a0ed51b3
LW
1186 case SPACELUTF8:
1187 PL_reg_flags |= RF_tainted;
1188 while (s < strend) {
dfe13c55 1189 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
1190 if (tmp && regtry(prog, s))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
1195 else
1196 tmp = 1;
1197 s += UTF8SKIP(s);
1198 }
1199 break;
a0d0e21e
LW
1200 case NSPACE:
1201 while (s < strend) {
1202 if (!isSPACE(*s)) {
1203 if (tmp && regtry(prog, s))
1204 goto got_it;
1205 else
1206 tmp = doevery;
a687059c 1207 }
a0d0e21e
LW
1208 else
1209 tmp = 1;
1210 s++;
1211 }
1212 break;
a0ed51b3
LW
1213 case NSPACEUTF8:
1214 while (s < strend) {
dfe13c55 1215 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1216 if (tmp && regtry(prog, s))
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
1221 else
1222 tmp = 1;
1223 s += UTF8SKIP(s);
1224 }
1225 break;
bbce6d69 1226 case NSPACEL:
3280af22 1227 PL_reg_flags |= RF_tainted;
bbce6d69 1228 while (s < strend) {
1229 if (!isSPACE_LC(*s)) {
1230 if (tmp && regtry(prog, s))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
1235 else
1236 tmp = 1;
1237 s++;
1238 }
1239 break;
a0ed51b3
LW
1240 case NSPACELUTF8:
1241 PL_reg_flags |= RF_tainted;
1242 while (s < strend) {
dfe13c55 1243 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1244 if (tmp && regtry(prog, s))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
1249 else
1250 tmp = 1;
1251 s += UTF8SKIP(s);
1252 }
1253 break;
a0d0e21e
LW
1254 case DIGIT:
1255 while (s < strend) {
1256 if (isDIGIT(*s)) {
1257 if (tmp && regtry(prog, s))
1258 goto got_it;
1259 else
1260 tmp = doevery;
2b69d0c2 1261 }
a0d0e21e
LW
1262 else
1263 tmp = 1;
1264 s++;
1265 }
1266 break;
a0ed51b3
LW
1267 case DIGITUTF8:
1268 while (s < strend) {
dfe13c55 1269 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1270 if (tmp && regtry(prog, s))
1271 goto got_it;
1272 else
1273 tmp = doevery;
1274 }
1275 else
1276 tmp = 1;
1277 s += UTF8SKIP(s);
1278 }
1279 break;
b8c5462f
JH
1280 case DIGITL:
1281 PL_reg_flags |= RF_tainted;
1282 while (s < strend) {
1283 if (isDIGIT_LC(*s)) {
1284 if (tmp && regtry(prog, s))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
1289 else
1290 tmp = 1;
1291 s++;
1292 }
1293 break;
1294 case DIGITLUTF8:
1295 PL_reg_flags |= RF_tainted;
1296 while (s < strend) {
1297 if (isDIGIT_LC_utf8((U8*)s)) {
1298 if (tmp && regtry(prog, s))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
1303 else
1304 tmp = 1;
1305 s += UTF8SKIP(s);
1306 }
1307 break;
a0d0e21e
LW
1308 case NDIGIT:
1309 while (s < strend) {
1310 if (!isDIGIT(*s)) {
1311 if (tmp && regtry(prog, s))
1312 goto got_it;
1313 else
1314 tmp = doevery;
a687059c 1315 }
a0d0e21e
LW
1316 else
1317 tmp = 1;
1318 s++;
1319 }
1320 break;
a0ed51b3
LW
1321 case NDIGITUTF8:
1322 while (s < strend) {
dfe13c55 1323 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1324 if (tmp && regtry(prog, s))
1325 goto got_it;
1326 else
1327 tmp = doevery;
1328 }
1329 else
1330 tmp = 1;
1331 s += UTF8SKIP(s);
1332 }
1333 break;
b8c5462f
JH
1334 case NDIGITL:
1335 PL_reg_flags |= RF_tainted;
1336 while (s < strend) {
1337 if (!isDIGIT_LC(*s)) {
1338 if (tmp && regtry(prog, s))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
1343 else
1344 tmp = 1;
1345 s++;
a0ed51b3 1346 }
b8c5462f
JH
1347 break;
1348 case NDIGITLUTF8:
1349 PL_reg_flags |= RF_tainted;
1350 while (s < strend) {
1351 if (!isDIGIT_LC_utf8((U8*)s)) {
1352 if (tmp && regtry(prog, s))
1353 goto got_it;
cf93c79d 1354 else
b8c5462f
JH
1355 tmp = doevery;
1356 }
1357 else
1358 tmp = 1;
1359 s += UTF8SKIP(s);
1360 }
1361 break;
d6a28714
JH
1362 }
1363 }
1364 else {
1365 dontbother = 0;
1366 if (prog->float_substr != Nullsv) { /* Trim the end. */
1367 char *last;
1368 I32 oldpos = scream_pos;
1369
1370 if (flags & REXEC_SCREAM) {
1371 last = screaminstr(sv, prog->float_substr, s - strbeg,
1372 end_shift, &scream_pos, 1); /* last one */
1373 if (!last)
1374 last = scream_olds; /* Only one occurence. */
b8c5462f 1375 }
d6a28714
JH
1376 else {
1377 STRLEN len;
1378 char *little = SvPV(prog->float_substr, len);
1379
1380 if (SvTAIL(prog->float_substr)) {
1381 if (memEQ(strend - len + 1, little, len - 1))
1382 last = strend - len + 1;
1383 else if (!PL_multiline)
1384 last = memEQ(strend - len, little, len)
1385 ? strend - len : Nullch;
b8c5462f 1386 else
d6a28714
JH
1387 goto find_last;
1388 } else {
1389 find_last:
1390 if (len)
1391 last = rninstr(s, strend, little, little + len);
b8c5462f 1392 else
d6a28714 1393 last = strend; /* matching `$' */
b8c5462f 1394 }
b8c5462f 1395 }
d6a28714
JH
1396 if (last == NULL) goto phooey; /* Should not happen! */
1397 dontbother = strend - last + prog->float_min_offset;
1398 }
1399 if (minlen && (dontbother < minlen))
1400 dontbother = minlen - 1;
1401 strend -= dontbother; /* this one's always in bytes! */
1402 /* We don't know much -- general case. */
1403 if (UTF) {
1404 for (;;) {
1405 if (regtry(prog, s))
1406 goto got_it;
1407 if (s >= strend)
1408 break;
b8c5462f 1409 s += UTF8SKIP(s);
d6a28714
JH
1410 };
1411 }
1412 else {
1413 do {
1414 if (regtry(prog, s))
1415 goto got_it;
1416 } while (s++ < strend);
1417 }
1418 }
1419
1420 /* Failure. */
1421 goto phooey;
1422
1423got_it:
1424 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1425
1426 if (PL_reg_eval_set) {
1427 /* Preserve the current value of $^R */
1428 if (oreplsv != GvSV(PL_replgv))
1429 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1430 restored, the value remains
1431 the same. */
1432 restore_pos(aTHXo_ 0);
1433 }
1434
1435 /* make sure $`, $&, $', and $digit will work later */
1436 if ( !(flags & REXEC_NOT_FIRST) ) {
1437 if (RX_MATCH_COPIED(prog)) {
1438 Safefree(prog->subbeg);
1439 RX_MATCH_COPIED_off(prog);
1440 }
1441 if (flags & REXEC_COPY_STR) {
1442 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1443
1444 s = savepvn(strbeg, i);
1445 prog->subbeg = s;
1446 prog->sublen = i;
1447 RX_MATCH_COPIED_on(prog);
1448 }
1449 else {
1450 prog->subbeg = strbeg;
1451 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1452 }
1453 }
1454
1455 return 1;
1456
1457phooey:
1458 if (PL_reg_eval_set)
1459 restore_pos(aTHXo_ 0);
1460 return 0;
1461}
1462
1463/*
1464 - regtry - try match at specific point
1465 */
1466STATIC I32 /* 0 failure, 1 success */
1467S_regtry(pTHX_ regexp *prog, char *startpos)
1468{
1469 dTHR;
1470 register I32 i;
1471 register I32 *sp;
1472 register I32 *ep;
1473 CHECKPOINT lastcp;
1474
1475 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1476 MAGIC *mg;
1477
1478 PL_reg_eval_set = RS_init;
1479 DEBUG_r(DEBUG_s(
1480 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
1481 PL_stack_sp - PL_stack_base);
1482 ));
1483 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1484 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1485 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1486 SAVETMPS;
1487 /* Apparently this is not needed, judging by wantarray. */
1488 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1489 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1490
1491 if (PL_reg_sv) {
1492 /* Make $_ available to executed code. */
1493 if (PL_reg_sv != DEFSV) {
1494 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1495 SAVESPTR(DEFSV);
1496 DEFSV = PL_reg_sv;
b8c5462f 1497 }
d6a28714
JH
1498
1499 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1500 && (mg = mg_find(PL_reg_sv, 'g')))) {
1501 /* prepare for quick setting of pos */
1502 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1503 mg = mg_find(PL_reg_sv, 'g');
1504 mg->mg_len = -1;
b8c5462f 1505 }
d6a28714
JH
1506 PL_reg_magic = mg;
1507 PL_reg_oldpos = mg->mg_len;
1508 SAVEDESTRUCTOR(restore_pos, 0);
1509 }
1510 if (!PL_reg_curpm)
1511 New(22,PL_reg_curpm, 1, PMOP);
1512 PL_reg_curpm->op_pmregexp = prog;
1513 PL_reg_oldcurpm = PL_curpm;
1514 PL_curpm = PL_reg_curpm;
1515 if (RX_MATCH_COPIED(prog)) {
1516 /* Here is a serious problem: we cannot rewrite subbeg,
1517 since it may be needed if this match fails. Thus
1518 $` inside (?{}) could fail... */
1519 PL_reg_oldsaved = prog->subbeg;
1520 PL_reg_oldsavedlen = prog->sublen;
1521 RX_MATCH_COPIED_off(prog);
1522 }
1523 else
1524 PL_reg_oldsaved = Nullch;
1525 prog->subbeg = PL_bostr;
1526 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1527 }
1528 prog->startp[0] = startpos - PL_bostr;
1529 PL_reginput = startpos;
1530 PL_regstartp = prog->startp;
1531 PL_regendp = prog->endp;
1532 PL_reglastparen = &prog->lastparen;
1533 prog->lastparen = 0;
1534 PL_regsize = 0;
1535 DEBUG_r(PL_reg_starttry = startpos);
1536 if (PL_reg_start_tmpl <= prog->nparens) {
1537 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1538 if(PL_reg_start_tmp)
1539 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1540 else
1541 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1542 }
1543
1544 /* XXXX What this code is doing here?!!! There should be no need
1545 to do this again and again, PL_reglastparen should take care of
1546 this! */
1547 sp = prog->startp;
1548 ep = prog->endp;
1549 if (prog->nparens) {
1550 for (i = prog->nparens; i >= 1; i--) {
1551 *++sp = -1;
1552 *++ep = -1;
1553 }
1554 }
1555 REGCP_SET;
1556 if (regmatch(prog->program + 1)) {
1557 prog->endp[0] = PL_reginput - PL_bostr;
1558 return 1;
1559 }
1560 REGCP_UNWIND;
1561 return 0;
1562}
1563
1564/*
1565 - regmatch - main matching routine
1566 *
1567 * Conceptually the strategy is simple: check to see whether the current
1568 * node matches, call self recursively to see whether the rest matches,
1569 * and then act accordingly. In practice we make some effort to avoid
1570 * recursion, in particular by going through "ordinary" nodes (that don't
1571 * need to know whether the rest of the match failed) by a loop instead of
1572 * by recursion.
1573 */
1574/* [lwall] I've hoisted the register declarations to the outer block in order to
1575 * maybe save a little bit of pushing and popping on the stack. It also takes
1576 * advantage of machines that use a register save mask on subroutine entry.
1577 */
1578STATIC I32 /* 0 failure, 1 success */
1579S_regmatch(pTHX_ regnode *prog)
1580{
1581 dTHR;
1582 register regnode *scan; /* Current node. */
1583 regnode *next; /* Next node. */
1584 regnode *inner; /* Next node in internal branch. */
1585 register I32 nextchr; /* renamed nextchr - nextchar colides with
1586 function of same name */
1587 register I32 n; /* no or next */
1588 register I32 ln; /* len or last */
1589 register char *s; /* operand or save */
1590 register char *locinput = PL_reginput;
1591 register I32 c1, c2, paren; /* case fold search, parenth */
1592 int minmod = 0, sw = 0, logical = 0;
1593#ifdef DEBUGGING
1594 PL_regindent++;
1595#endif
1596
1597 /* Note that nextchr is a byte even in UTF */
1598 nextchr = UCHARAT(locinput);
1599 scan = prog;
1600 while (scan != NULL) {
1601#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1602#ifdef DEBUGGING
1603# define sayYES goto yes
1604# define sayNO goto no
1605# define saySAME(x) if (x) goto yes; else goto no
1606# define REPORT_CODE_OFF 24
1607#else
1608# define sayYES return 1
1609# define sayNO return 0
1610# define saySAME(x) return x
1611#endif
1612 DEBUG_r( {
1613 SV *prop = sv_newmortal();
1614 int docolor = *PL_colors[0];
1615 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1616 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1617 /* The part of the string before starttry has one color
1618 (pref0_len chars), between starttry and current
1619 position another one (pref_len - pref0_len chars),
1620 after the current position the third one.
1621 We assume that pref0_len <= pref_len, otherwise we
1622 decrease pref0_len. */
1623 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1624 ? (5 + taill) - l : locinput - PL_bostr);
1625 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1626
1627 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1628 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1629 ? (5 + taill) - pref_len : PL_regeol - locinput);
1630 if (pref0_len < 0)
1631 pref0_len = 0;
1632 if (pref0_len > pref_len)
1633 pref0_len = pref_len;
1634 regprop(prop, scan);
1635 PerlIO_printf(Perl_debug_log,
1636 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
1637 locinput - PL_bostr,
1638 PL_colors[4], pref0_len,
1639 locinput - pref_len, PL_colors[5],
1640 PL_colors[2], pref_len - pref0_len,
1641 locinput - pref_len + pref0_len, PL_colors[3],
1642 (docolor ? "" : "> <"),
1643 PL_colors[0], l, locinput, PL_colors[1],
1644 15 - l - pref_len + 1,
1645 "",
1646 scan - PL_regprogram, PL_regindent*2, "",
1647 SvPVX(prop));
1648 } );
1649
1650 next = scan + NEXT_OFF(scan);
1651 if (next == scan)
1652 next = NULL;
1653
1654 switch (OP(scan)) {
1655 case BOL:
1656 if (locinput == PL_bostr
1657 ? PL_regprev == '\n'
1658 : (PL_multiline &&
1659 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1660 {
1661 /* regtill = regbol; */
b8c5462f
JH
1662 break;
1663 }
d6a28714
JH
1664 sayNO;
1665 case MBOL:
1666 if (locinput == PL_bostr
1667 ? PL_regprev == '\n'
1668 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1669 {
b8c5462f
JH
1670 break;
1671 }
d6a28714
JH
1672 sayNO;
1673 case SBOL:
1674 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1675 break;
d6a28714
JH
1676 sayNO;
1677 case GPOS:
1678 if (locinput == PL_reg_ganch)
1679 break;
1680 sayNO;
1681 case EOL:
1682 if (PL_multiline)
1683 goto meol;
1684 else
1685 goto seol;
1686 case MEOL:
1687 meol:
1688 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1689 sayNO;
b8c5462f 1690 break;
d6a28714
JH
1691 case SEOL:
1692 seol:
1693 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1694 sayNO;
d6a28714 1695 if (PL_regeol - locinput > 1)
b8c5462f 1696 sayNO;
b8c5462f 1697 break;
d6a28714
JH
1698 case EOS:
1699 if (PL_regeol != locinput)
b8c5462f 1700 sayNO;
d6a28714
JH
1701 break;
1702 case SANYUTF8:
b8c5462f 1703 if (nextchr & 0x80) {
b8c5462f 1704 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1705 if (locinput > PL_regeol)
1706 sayNO;
b8c5462f
JH
1707 nextchr = UCHARAT(locinput);
1708 break;
1709 }
d6a28714 1710 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1711 sayNO;
b8c5462f 1712 nextchr = UCHARAT(++locinput);
a0d0e21e 1713 break;
d6a28714
JH
1714 case SANY:
1715 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1716 sayNO;
1717 nextchr = UCHARAT(++locinput);
b85d18e9 1718 break;
d6a28714 1719 case ANYUTF8:
a0ed51b3 1720 if (nextchr & 0x80) {
b8c5462f 1721 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1722 if (locinput > PL_regeol)
1723 sayNO;
a0ed51b3
LW
1724 nextchr = UCHARAT(locinput);
1725 break;
1726 }
d6a28714 1727 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1728 sayNO;
1729 nextchr = UCHARAT(++locinput);
1730 break;
d6a28714
JH
1731 case REG_ANY:
1732 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1733 sayNO;
76e3520e 1734 nextchr = UCHARAT(++locinput);
a0d0e21e 1735 break;
d6a28714 1736 case EXACT:
cd439c50
IZ
1737 s = STRING(scan);
1738 ln = STR_LEN(scan);
d6a28714
JH
1739 /* Inline the first character, for speed. */
1740 if (UCHARAT(s) != nextchr)
1741 sayNO;
1742 if (PL_regeol - locinput < ln)
1743 sayNO;
1744 if (ln > 1 && memNE(s, locinput, ln))
1745 sayNO;
1746 locinput += ln;
1747 nextchr = UCHARAT(locinput);
1748 break;
1749 case EXACTFL:
b8c5462f
JH
1750 PL_reg_flags |= RF_tainted;
1751 /* FALL THROUGH */
d6a28714 1752 case EXACTF:
cd439c50
IZ
1753 s = STRING(scan);
1754 ln = STR_LEN(scan);
d6a28714
JH
1755
1756 if (UTF) {
1757 char *l = locinput;
1758 char *e = s + ln;
1759 c1 = OP(scan) == EXACTF;
1760 while (s < e) {
1761 if (l >= PL_regeol)
1762 sayNO;
1763 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1764 toLOWER_utf8((U8*)l) :
1765 toLOWER_LC_utf8((U8*)l)))
1766 {
1767 sayNO;
1768 }
1769 s += UTF8SKIP(s);
1770 l += UTF8SKIP(l);
b8c5462f 1771 }
d6a28714 1772 locinput = l;
a0ed51b3
LW
1773 nextchr = UCHARAT(locinput);
1774 break;
1775 }
d6a28714
JH
1776
1777 /* Inline the first character, for speed. */
1778 if (UCHARAT(s) != nextchr &&
1779 UCHARAT(s) != ((OP(scan) == EXACTF)
1780 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1781 sayNO;
d6a28714 1782 if (PL_regeol - locinput < ln)
b8c5462f 1783 sayNO;
d6a28714
JH
1784 if (ln > 1 && (OP(scan) == EXACTF
1785 ? ibcmp(s, locinput, ln)
1786 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1787 sayNO;
d6a28714
JH
1788 locinput += ln;
1789 nextchr = UCHARAT(locinput);
a0d0e21e 1790 break;
d6a28714 1791 case ANYOFUTF8:
cd439c50 1792 s = MASK(scan);
d6a28714 1793 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1794 sayNO;
d6a28714
JH
1795 if (locinput >= PL_regeol)
1796 sayNO;
1797 locinput += PL_utf8skip[nextchr];
1798 nextchr = UCHARAT(locinput);
1799 break;
1800 case ANYOF:
cd439c50 1801 s = MASK(scan);
d6a28714 1802 if (nextchr < 0)
b8c5462f 1803 nextchr = UCHARAT(locinput);
d6a28714
JH
1804 if (!REGINCLASS(s, nextchr))
1805 sayNO;
1806 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1807 sayNO;
b8c5462f
JH
1808 nextchr = UCHARAT(++locinput);
1809 break;
d6a28714 1810 case ALNUML:
b8c5462f
JH
1811 PL_reg_flags |= RF_tainted;
1812 /* FALL THROUGH */
d6a28714 1813 case ALNUM:
b8c5462f 1814 if (!nextchr)
4633a7c4 1815 sayNO;
d6a28714
JH
1816 if (!(OP(scan) == ALNUM
1817 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1818 sayNO;
1819 nextchr = UCHARAT(++locinput);
bbce6d69 1820 break;
d6a28714 1821 case ALNUMLUTF8:
3280af22 1822 PL_reg_flags |= RF_tainted;
bbce6d69 1823 /* FALL THROUGH */
d6a28714 1824 case ALNUMUTF8:
b8c5462f
JH
1825 if (!nextchr)
1826 sayNO;
1827 if (nextchr & 0x80) {
d6a28714
JH
1828 if (!(OP(scan) == ALNUMUTF8
1829 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1830 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1831 {
1832 sayNO;
a0ed51b3 1833 }
b8c5462f 1834 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1835 nextchr = UCHARAT(locinput);
1836 break;
1837 }
d6a28714
JH
1838 if (!(OP(scan) == ALNUMUTF8
1839 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1840 sayNO;
b8c5462f 1841 nextchr = UCHARAT(++locinput);
a0d0e21e 1842 break;
d6a28714 1843 case NALNUML:
b8c5462f
JH
1844 PL_reg_flags |= RF_tainted;
1845 /* FALL THROUGH */
d6a28714
JH
1846 case NALNUM:
1847 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1848 sayNO;
d6a28714
JH
1849 if (OP(scan) == NALNUM
1850 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1851 sayNO;
b8c5462f 1852 nextchr = UCHARAT(++locinput);
a0ed51b3 1853 break;
d6a28714 1854 case NALNUMLUTF8:
b8c5462f
JH
1855 PL_reg_flags |= RF_tainted;
1856 /* FALL THROUGH */
d6a28714 1857 case NALNUMUTF8:
3280af22 1858 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1859 sayNO;
b8c5462f 1860 if (nextchr & 0x80) {
d6a28714
JH
1861 if (OP(scan) == NALNUMUTF8
1862 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1863 : isALNUM_LC_utf8((U8*)locinput))
1864 {
b8c5462f 1865 sayNO;
d6a28714 1866 }
b8c5462f
JH
1867 locinput += PL_utf8skip[nextchr];
1868 nextchr = UCHARAT(locinput);
1869 break;
1870 }
d6a28714
JH
1871 if (OP(scan) == NALNUMUTF8
1872 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1873 sayNO;
76e3520e 1874 nextchr = UCHARAT(++locinput);
a0d0e21e 1875 break;
d6a28714
JH
1876 case BOUNDL:
1877 case NBOUNDL:
3280af22 1878 PL_reg_flags |= RF_tainted;
bbce6d69 1879 /* FALL THROUGH */
d6a28714
JH
1880 case BOUND:
1881 case NBOUND:
1882 /* was last char in word? */
1883 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1884 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1885 ln = isALNUM(ln);
1886 n = isALNUM(nextchr);
1887 }
1888 else {
1889 ln = isALNUM_LC(ln);
1890 n = isALNUM_LC(nextchr);
1891 }
1892 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1893 sayNO;
a0d0e21e 1894 break;
d6a28714
JH
1895 case BOUNDLUTF8:
1896 case NBOUNDLUTF8:
a0ed51b3
LW
1897 PL_reg_flags |= RF_tainted;
1898 /* FALL THROUGH */
d6a28714
JH
1899 case BOUNDUTF8:
1900 case NBOUNDUTF8:
1901 /* was last char in word? */
1902 ln = (locinput != PL_regbol)
1903 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1904 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1905 ln = isALNUM_uni(ln);
1906 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1907 }
d6a28714
JH
1908 else {
1909 ln = isALNUM_LC_uni(ln);
1910 n = isALNUM_LC_utf8((U8*)locinput);
1911 }
1912 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1913 sayNO;
a0ed51b3 1914 break;
d6a28714 1915 case SPACEL:
3280af22 1916 PL_reg_flags |= RF_tainted;
bbce6d69 1917 /* FALL THROUGH */
d6a28714
JH
1918 case SPACE:
1919 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1920 sayNO;
d6a28714
JH
1921 if (!(OP(scan) == SPACE
1922 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1923 sayNO;
76e3520e 1924 nextchr = UCHARAT(++locinput);
a0d0e21e 1925 break;
d6a28714 1926 case SPACELUTF8:
a0ed51b3
LW
1927 PL_reg_flags |= RF_tainted;
1928 /* FALL THROUGH */
d6a28714 1929 case SPACEUTF8:
a0ed51b3
LW
1930 if (!nextchr && locinput >= PL_regeol)
1931 sayNO;
1932 if (nextchr & 0x80) {
d6a28714
JH
1933 if (!(OP(scan) == SPACEUTF8
1934 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1935 : isSPACE_LC_utf8((U8*)locinput)))
1936 {
a0ed51b3 1937 sayNO;
d6a28714 1938 }
6f06b55f 1939 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1940 nextchr = UCHARAT(locinput);
1941 break;
1942 }
d6a28714
JH
1943 if (!(OP(scan) == SPACEUTF8
1944 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
1945 sayNO;
1946 nextchr = UCHARAT(++locinput);
1947 break;
d6a28714 1948 case NSPACEL:
3280af22 1949 PL_reg_flags |= RF_tainted;
bbce6d69 1950 /* FALL THROUGH */
d6a28714 1951 case NSPACE:
b8c5462f
JH
1952 if (!nextchr)
1953 sayNO;
d6a28714
JH
1954 if (OP(scan) == SPACE
1955 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1956 sayNO;
b8c5462f 1957 nextchr = UCHARAT(++locinput);
a0d0e21e 1958 break;
d6a28714 1959 case NSPACELUTF8:
a0ed51b3
LW
1960 PL_reg_flags |= RF_tainted;
1961 /* FALL THROUGH */
d6a28714 1962 case NSPACEUTF8:
b8c5462f
JH
1963 if (!nextchr)
1964 sayNO;
1965 if (nextchr & 0x80) {
d6a28714
JH
1966 if (OP(scan) == NSPACEUTF8
1967 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1968 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
1969 {
1970 sayNO;
1971 }
1972 locinput += PL_utf8skip[nextchr];
1973 nextchr = UCHARAT(locinput);
1974 break;
a0ed51b3 1975 }
d6a28714
JH
1976 if (OP(scan) == NSPACEUTF8
1977 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1978 sayNO;
76e3520e 1979 nextchr = UCHARAT(++locinput);
a0d0e21e 1980 break;
d6a28714 1981 case DIGITL:
a0ed51b3
LW
1982 PL_reg_flags |= RF_tainted;
1983 /* FALL THROUGH */
d6a28714 1984 case DIGIT:
a0ed51b3
LW
1985 if (!nextchr && locinput >= PL_regeol)
1986 sayNO;
d6a28714
JH
1987 if (!(OP(scan) == DIGIT
1988 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 1989 sayNO;
76e3520e 1990 nextchr = UCHARAT(++locinput);
a0d0e21e 1991 break;
d6a28714 1992 case DIGITLUTF8:
a0ed51b3
LW
1993 PL_reg_flags |= RF_tainted;
1994 /* FALL THROUGH */
d6a28714 1995 case DIGITUTF8:
a0ed51b3
LW
1996 if (!nextchr)
1997 sayNO;
1998 if (nextchr & 0x80) {
d6a28714
JH
1999 if (OP(scan) == NDIGITUTF8
2000 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2001 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2002 {
a0ed51b3 2003 sayNO;
dfe13c55 2004 }
6f06b55f 2005 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2006 nextchr = UCHARAT(locinput);
2007 break;
2008 }
d6a28714 2009 if (!isDIGIT(nextchr))
a0ed51b3
LW
2010 sayNO;
2011 nextchr = UCHARAT(++locinput);
2012 break;
d6a28714 2013 case NDIGITL:
b8c5462f
JH
2014 PL_reg_flags |= RF_tainted;
2015 /* FALL THROUGH */
d6a28714 2016 case NDIGIT:
b8c5462f
JH
2017 if (!nextchr)
2018 sayNO;
d6a28714
JH
2019 if (OP(scan) == DIGIT
2020 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2021 sayNO;
76e3520e 2022 nextchr = UCHARAT(++locinput);
a0d0e21e 2023 break;
d6a28714 2024 case NDIGITLUTF8:
b8c5462f
JH
2025 PL_reg_flags |= RF_tainted;
2026 /* FALL THROUGH */
d6a28714 2027 case NDIGITUTF8:
b8c5462f
JH
2028 if (!nextchr && locinput >= PL_regeol)
2029 sayNO;
a0ed51b3 2030 if (nextchr & 0x80) {
d6a28714 2031 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2032 sayNO;
6f06b55f 2033 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2034 nextchr = UCHARAT(locinput);
2035 break;
2036 }
d6a28714 2037 if (isDIGIT(nextchr))
a0ed51b3
LW
2038 sayNO;
2039 nextchr = UCHARAT(++locinput);
2040 break;
2041 case CLUMP:
dfe13c55 2042 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2043 sayNO;
6f06b55f 2044 locinput += PL_utf8skip[nextchr];
dfe13c55 2045 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2046 locinput += UTF8SKIP(locinput);
2047 if (locinput > PL_regeol)
2048 sayNO;
2049 nextchr = UCHARAT(locinput);
2050 break;
c8756f30 2051 case REFFL:
3280af22 2052 PL_reg_flags |= RF_tainted;
c8756f30 2053 /* FALL THROUGH */
c277df42 2054 case REF:
c8756f30 2055 case REFF:
c277df42 2056 n = ARG(scan); /* which paren pair */
cf93c79d 2057 ln = PL_regstartp[n];
2c2d71f5 2058 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2059 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2060 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2061 if (ln == PL_regendp[n])
a0d0e21e 2062 break;
a0ed51b3 2063
cf93c79d 2064 s = PL_bostr + ln;
a0ed51b3
LW
2065 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2066 char *l = locinput;
cf93c79d 2067 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2068 /*
2069 * Note that we can't do the "other character" lookup trick as
2070 * in the 8-bit case (no pun intended) because in Unicode we
2071 * have to map both upper and title case to lower case.
2072 */
2073 if (OP(scan) == REFF) {
2074 while (s < e) {
2075 if (l >= PL_regeol)
2076 sayNO;
dfe13c55 2077 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2078 sayNO;
2079 s += UTF8SKIP(s);
2080 l += UTF8SKIP(l);
2081 }
2082 }
2083 else {
2084 while (s < e) {
2085 if (l >= PL_regeol)
2086 sayNO;
dfe13c55 2087 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2088 sayNO;
2089 s += UTF8SKIP(s);
2090 l += UTF8SKIP(l);
2091 }
2092 }
2093 locinput = l;
2094 nextchr = UCHARAT(locinput);
2095 break;
2096 }
2097
a0d0e21e 2098 /* Inline the first character, for speed. */
76e3520e 2099 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2100 (OP(scan) == REF ||
2101 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2102 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2103 sayNO;
cf93c79d 2104 ln = PL_regendp[n] - ln;
3280af22 2105 if (locinput + ln > PL_regeol)
4633a7c4 2106 sayNO;
c8756f30
AK
2107 if (ln > 1 && (OP(scan) == REF
2108 ? memNE(s, locinput, ln)
2109 : (OP(scan) == REFF
2110 ? ibcmp(s, locinput, ln)
2111 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2112 sayNO;
a0d0e21e 2113 locinput += ln;
76e3520e 2114 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2115 break;
2116
2117 case NOTHING:
c277df42 2118 case TAIL:
a0d0e21e
LW
2119 break;
2120 case BACK:
2121 break;
c277df42
IZ
2122 case EVAL:
2123 {
2124 dSP;
533c011a 2125 OP_4tree *oop = PL_op;
3280af22
NIS
2126 COP *ocurcop = PL_curcop;
2127 SV **ocurpad = PL_curpad;
c277df42
IZ
2128 SV *ret;
2129
2130 n = ARG(scan);
533c011a
NIS
2131 PL_op = (OP_4tree*)PL_regdata->data[n];
2132 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 2133 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2134 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2135
cea2e8a9 2136 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2137 SPAGAIN;
2138 ret = POPs;
2139 PUTBACK;
2140
0f5d15d6
IZ
2141 PL_op = oop;
2142 PL_curpad = ocurpad;
2143 PL_curcop = ocurcop;
c277df42 2144 if (logical) {
0f5d15d6
IZ
2145 if (logical == 2) { /* Postponed subexpression. */
2146 regexp *re;
22c35a8c 2147 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2148 re_cc_state state;
0f5d15d6
IZ
2149 CHECKPOINT cp, lastcp;
2150
2151 if(SvROK(ret) || SvRMAGICAL(ret)) {
2152 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2153
2154 if(SvMAGICAL(sv))
2155 mg = mg_find(sv, 'r');
2156 }
2157 if (mg) {
2158 re = (regexp *)mg->mg_obj;
df0003d4 2159 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2160 }
2161 else {
2162 STRLEN len;
2163 char *t = SvPV(ret, len);
2164 PMOP pm;
2165 char *oprecomp = PL_regprecomp;
2166 I32 osize = PL_regsize;
2167 I32 onpar = PL_regnpar;
2168
2169 pm.op_pmflags = 0;
cea2e8a9 2170 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2171 if (!(SvFLAGS(ret)
2172 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2173 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2174 PL_regprecomp = oprecomp;
2175 PL_regsize = osize;
2176 PL_regnpar = onpar;
2177 }
2178 DEBUG_r(
2179 PerlIO_printf(Perl_debug_log,
2180 "Entering embedded `%s%.60s%s%s'\n",
2181 PL_colors[0],
2182 re->precomp,
2183 PL_colors[1],
2184 (strlen(re->precomp) > 60 ? "..." : ""))
2185 );
2186 state.node = next;
2187 state.prev = PL_reg_call_cc;
2188 state.cc = PL_regcc;
2189 state.re = PL_reg_re;
2190
2ab05381 2191 PL_regcc = 0;
0f5d15d6
IZ
2192
2193 cp = regcppush(0); /* Save *all* the positions. */
2194 REGCP_SET;
2195 cache_re(re);
2196 state.ss = PL_savestack_ix;
2197 *PL_reglastparen = 0;
2198 PL_reg_call_cc = &state;
2199 PL_reginput = locinput;
2c2d71f5
JH
2200
2201 /* XXXX This is too dramatic a measure... */
2202 PL_reg_maxiter = 0;
2203
0f5d15d6 2204 if (regmatch(re->program + 1)) {
2c914db6
IZ
2205 /* Even though we succeeded, we need to restore
2206 global variables, since we may be wrapped inside
2207 SUSPEND, thus the match may be not finished yet. */
2208
2209 /* XXXX Do this only if SUSPENDed? */
2210 PL_reg_call_cc = state.prev;
2211 PL_regcc = state.cc;
2212 PL_reg_re = state.re;
2213 cache_re(PL_reg_re);
2214
2215 /* XXXX This is too dramatic a measure... */
2216 PL_reg_maxiter = 0;
2217
2218 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2219 ReREFCNT_dec(re);
2220 regcpblow(cp);
2221 sayYES;
2222 }
2223 DEBUG_r(
2224 PerlIO_printf(Perl_debug_log,
2225 "%*s failed...\n",
2226 REPORT_CODE_OFF+PL_regindent*2, "")
2227 );
2228 ReREFCNT_dec(re);
2229 REGCP_UNWIND;
2230 regcppop();
2231 PL_reg_call_cc = state.prev;
2232 PL_regcc = state.cc;
2233 PL_reg_re = state.re;
d3790889 2234 cache_re(PL_reg_re);
2c2d71f5
JH
2235
2236 /* XXXX This is too dramatic a measure... */
2237 PL_reg_maxiter = 0;
2238
0f5d15d6
IZ
2239 sayNO;
2240 }
c277df42 2241 sw = SvTRUE(ret);
0f5d15d6 2242 logical = 0;
a0ed51b3
LW
2243 }
2244 else
3280af22 2245 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2246 break;
2247 }
a0d0e21e 2248 case OPEN:
c277df42 2249 n = ARG(scan); /* which paren pair */
3280af22
NIS
2250 PL_reg_start_tmp[n] = locinput;
2251 if (n > PL_regsize)
2252 PL_regsize = n;
a0d0e21e
LW
2253 break;
2254 case CLOSE:
c277df42 2255 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2256 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2257 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2258 if (n > *PL_reglastparen)
2259 *PL_reglastparen = n;
a0d0e21e 2260 break;
c277df42
IZ
2261 case GROUPP:
2262 n = ARG(scan); /* which paren pair */
cf93c79d 2263 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2264 break;
2265 case IFTHEN:
2c2d71f5 2266 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2267 if (sw)
2268 next = NEXTOPER(NEXTOPER(scan));
2269 else {
2270 next = scan + ARG(scan);
2271 if (OP(next) == IFTHEN) /* Fake one. */
2272 next = NEXTOPER(NEXTOPER(next));
2273 }
2274 break;
2275 case LOGICAL:
0f5d15d6 2276 logical = scan->flags;
c277df42 2277 break;
2ab05381
IZ
2278/*******************************************************************
2279 PL_regcc contains infoblock about the innermost (...)* loop, and
2280 a pointer to the next outer infoblock.
2281
2282 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2283
2284 1) After matching X, regnode for CURLYX is processed;
2285
2286 2) This regnode creates infoblock on the stack, and calls
2287 regmatch() recursively with the starting point at WHILEM node;
2288
2289 3) Each hit of WHILEM node tries to match A and Z (in the order
2290 depending on the current iteration, min/max of {min,max} and
2291 greediness). The information about where are nodes for "A"
2292 and "Z" is read from the infoblock, as is info on how many times "A"
2293 was already matched, and greediness.
2294
2295 4) After A matches, the same WHILEM node is hit again.
2296
2297 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2298 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2299 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2300 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2301 of the external loop.
2302
2303 Currently present infoblocks form a tree with a stem formed by PL_curcc
2304 and whatever it mentions via ->next, and additional attached trees
2305 corresponding to temporarily unset infoblocks as in "5" above.
2306
2307 In the following picture infoblocks for outer loop of
2308 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2309 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2310 infoblocks are drawn below the "reset" infoblock.
2311
2312 In fact in the picture below we do not show failed matches for Z and T
2313 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2314 more obvious *why* one needs to *temporary* unset infoblocks.]
2315
2316 Matched REx position InfoBlocks Comment
2317 (Y(A)*?Z)*?T x
2318 Y(A)*?Z)*?T x <- O
2319 Y (A)*?Z)*?T x <- O
2320 Y A)*?Z)*?T x <- O <- I
2321 YA )*?Z)*?T x <- O <- I
2322 YA A)*?Z)*?T x <- O <- I
2323 YAA )*?Z)*?T x <- O <- I
2324 YAA Z)*?T x <- O # Temporary unset I
2325 I
2326
2327 YAAZ Y(A)*?Z)*?T x <- O
2328 I
2329
2330 YAAZY (A)*?Z)*?T x <- O
2331 I
2332
2333 YAAZY A)*?Z)*?T x <- O <- I
2334 I
2335
2336 YAAZYA )*?Z)*?T x <- O <- I
2337 I
2338
2339 YAAZYA Z)*?T x <- O # Temporary unset I
2340 I,I
2341
2342 YAAZYAZ )*?T x <- O
2343 I,I
2344
2345 YAAZYAZ T x # Temporary unset O
2346 O
2347 I,I
2348
2349 YAAZYAZT x
2350 O
2351 I,I
2352 *******************************************************************/
a0d0e21e
LW
2353 case CURLYX: {
2354 CURCUR cc;
3280af22 2355 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2356
2357 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2358 next += ARG(next);
3280af22
NIS
2359 cc.oldcc = PL_regcc;
2360 PL_regcc = &cc;
2361 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2362 cc.cur = -1;
2363 cc.min = ARG1(scan);
2364 cc.max = ARG2(scan);
c277df42 2365 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2366 cc.next = next;
2367 cc.minmod = minmod;
2368 cc.lastloc = 0;
3280af22 2369 PL_reginput = locinput;
a0d0e21e
LW
2370 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2371 regcpblow(cp);
3280af22 2372 PL_regcc = cc.oldcc;
4633a7c4 2373 saySAME(n);
a0d0e21e
LW
2374 }
2375 /* NOT REACHED */
2376 case WHILEM: {
2377 /*
2378 * This is really hard to understand, because after we match
2379 * what we're trying to match, we must make sure the rest of
2c2d71f5 2380 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2381 * to go back UP the parse tree by recursing ever deeper. And
2382 * if it fails, we have to reset our parent's current state
2383 * that we can try again after backing off.
2384 */
2385
c277df42 2386 CHECKPOINT cp, lastcp;
3280af22 2387 CURCUR* cc = PL_regcc;
c277df42
IZ
2388 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2389
4633a7c4 2390 n = cc->cur + 1; /* how many we know we matched */
3280af22 2391 PL_reginput = locinput;
a0d0e21e 2392
c277df42
IZ
2393 DEBUG_r(
2394 PerlIO_printf(Perl_debug_log,
2395 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2396 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2397 (long)n, (long)cc->min,
2398 (long)cc->max, (long)cc)
2399 );
4633a7c4 2400
a0d0e21e
LW
2401 /* If degenerate scan matches "", assume scan done. */
2402
579cf2c3 2403 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2404 PL_regcc = cc->oldcc;
2ab05381
IZ
2405 if (PL_regcc)
2406 ln = PL_regcc->cur;
c277df42 2407 DEBUG_r(
c3464db5
DD
2408 PerlIO_printf(Perl_debug_log,
2409 "%*s empty match detected, try continuation...\n",
3280af22 2410 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2411 );
a0d0e21e 2412 if (regmatch(cc->next))
4633a7c4 2413 sayYES;
c277df42 2414 DEBUG_r(
c3464db5
DD
2415 PerlIO_printf(Perl_debug_log,
2416 "%*s failed...\n",
3280af22 2417 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2418 );
2ab05381
IZ
2419 if (PL_regcc)
2420 PL_regcc->cur = ln;
3280af22 2421 PL_regcc = cc;
4633a7c4 2422 sayNO;
a0d0e21e
LW
2423 }
2424
2425 /* First just match a string of min scans. */
2426
2427 if (n < cc->min) {
2428 cc->cur = n;
2429 cc->lastloc = locinput;
4633a7c4
LW
2430 if (regmatch(cc->scan))
2431 sayYES;
2432 cc->cur = n - 1;
c277df42
IZ
2433 cc->lastloc = lastloc;
2434 DEBUG_r(
c3464db5
DD
2435 PerlIO_printf(Perl_debug_log,
2436 "%*s failed...\n",
3280af22 2437 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2438 );
4633a7c4 2439 sayNO;
a0d0e21e
LW
2440 }
2441
2c2d71f5
JH
2442 if (scan->flags) {
2443 /* Check whether we already were at this position.
2444 Postpone detection until we know the match is not
2445 *that* much linear. */
2446 if (!PL_reg_maxiter) {
2447 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2448 PL_reg_leftiter = PL_reg_maxiter;
2449 }
2450 if (PL_reg_leftiter-- == 0) {
2451 I32 size = (PL_reg_maxiter + 7)/8;
2452 if (PL_reg_poscache) {
2453 if (PL_reg_poscache_size < size) {
2454 Renew(PL_reg_poscache, size, char);
2455 PL_reg_poscache_size = size;
2456 }
2457 Zero(PL_reg_poscache, size, char);
2458 }
2459 else {
2460 PL_reg_poscache_size = size;
2461 Newz(29, PL_reg_poscache, size, char);
2462 }
2463 DEBUG_r(
2464 PerlIO_printf(Perl_debug_log,
2465 "%sDetected a super-linear match, switching on caching%s...\n",
2466 PL_colors[4], PL_colors[5])
2467 );
2468 }
2469 if (PL_reg_leftiter < 0) {
2470 I32 o = locinput - PL_bostr, b;
2471
2472 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2473 b = o % 8;
2474 o /= 8;
2475 if (PL_reg_poscache[o] & (1<<b)) {
2476 DEBUG_r(
2477 PerlIO_printf(Perl_debug_log,
2478 "%*s already tried at this position...\n",
2479 REPORT_CODE_OFF+PL_regindent*2, "")
2480 );
2481 sayNO;
2482 }
2483 PL_reg_poscache[o] |= (1<<b);
2484 }
2485 }
2486
a0d0e21e
LW
2487 /* Prefer next over scan for minimal matching. */
2488
2489 if (cc->minmod) {
3280af22 2490 PL_regcc = cc->oldcc;
2ab05381
IZ
2491 if (PL_regcc)
2492 ln = PL_regcc->cur;
5f05dabc 2493 cp = regcppush(cc->parenfloor);
c277df42 2494 REGCP_SET;
5f05dabc 2495 if (regmatch(cc->next)) {
c277df42 2496 regcpblow(cp);
4633a7c4 2497 sayYES; /* All done. */
5f05dabc 2498 }
c277df42 2499 REGCP_UNWIND;
5f05dabc 2500 regcppop();
2ab05381
IZ
2501 if (PL_regcc)
2502 PL_regcc->cur = ln;
3280af22 2503 PL_regcc = cc;
a0d0e21e 2504
c277df42 2505 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2506 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2507 && !(PL_reg_flags & RF_warned)) {
2508 PL_reg_flags |= RF_warned;
cea2e8a9 2509 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2510 "Complex regular subexpression recursion",
2511 REG_INFTY - 1);
c277df42 2512 }
4633a7c4 2513 sayNO;
c277df42 2514 }
a687059c 2515
c277df42 2516 DEBUG_r(
c3464db5
DD
2517 PerlIO_printf(Perl_debug_log,
2518 "%*s trying longer...\n",
3280af22 2519 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2520 );
a0d0e21e 2521 /* Try scanning more and see if it helps. */
3280af22 2522 PL_reginput = locinput;
a0d0e21e
LW
2523 cc->cur = n;
2524 cc->lastloc = locinput;
5f05dabc 2525 cp = regcppush(cc->parenfloor);
c277df42 2526 REGCP_SET;
5f05dabc 2527 if (regmatch(cc->scan)) {
c277df42 2528 regcpblow(cp);
4633a7c4 2529 sayYES;
5f05dabc 2530 }
c277df42 2531 DEBUG_r(
c3464db5
DD
2532 PerlIO_printf(Perl_debug_log,
2533 "%*s failed...\n",
3280af22 2534 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2535 );
2536 REGCP_UNWIND;
5f05dabc 2537 regcppop();
4633a7c4 2538 cc->cur = n - 1;
c277df42 2539 cc->lastloc = lastloc;
4633a7c4 2540 sayNO;
a0d0e21e
LW
2541 }
2542
2543 /* Prefer scan over next for maximal matching. */
2544
2545 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2546 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2547 cc->cur = n;
2548 cc->lastloc = locinput;
c277df42 2549 REGCP_SET;
5f05dabc 2550 if (regmatch(cc->scan)) {
c277df42 2551 regcpblow(cp);
4633a7c4 2552 sayYES;
5f05dabc 2553 }
c277df42 2554 REGCP_UNWIND;
a0d0e21e 2555 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2556 PL_reginput = locinput;
c277df42 2557 DEBUG_r(
c3464db5
DD
2558 PerlIO_printf(Perl_debug_log,
2559 "%*s failed, try continuation...\n",
3280af22 2560 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2561 );
2562 }
599cee73
PM
2563 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2564 && !(PL_reg_flags & RF_warned)) {
3280af22 2565 PL_reg_flags |= RF_warned;
cea2e8a9 2566 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2567 "Complex regular subexpression recursion",
2568 REG_INFTY - 1);
a0d0e21e
LW
2569 }
2570
2571 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2572 PL_regcc = cc->oldcc;
2ab05381
IZ
2573 if (PL_regcc)
2574 ln = PL_regcc->cur;
a0d0e21e 2575 if (regmatch(cc->next))
4633a7c4 2576 sayYES;
c277df42 2577 DEBUG_r(
c3464db5 2578 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 2579 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2580 );
2ab05381
IZ
2581 if (PL_regcc)
2582 PL_regcc->cur = ln;
3280af22 2583 PL_regcc = cc;
4633a7c4 2584 cc->cur = n - 1;
c277df42 2585 cc->lastloc = lastloc;
4633a7c4 2586 sayNO;
a0d0e21e
LW
2587 }
2588 /* NOT REACHED */
c277df42
IZ
2589 case BRANCHJ:
2590 next = scan + ARG(scan);
2591 if (next == scan)
2592 next = NULL;
2593 inner = NEXTOPER(NEXTOPER(scan));
2594 goto do_branch;
2595 case BRANCH:
2596 inner = NEXTOPER(scan);
2597 do_branch:
2598 {
2599 CHECKPOINT lastcp;
2600 c1 = OP(scan);
2601 if (OP(next) != c1) /* No choice. */
2602 next = inner; /* Avoid recursion. */
a0d0e21e 2603 else {
3280af22 2604 int lastparen = *PL_reglastparen;
c277df42
IZ
2605
2606 REGCP_SET;
a0d0e21e 2607 do {
3280af22 2608 PL_reginput = locinput;
c277df42 2609 if (regmatch(inner))
4633a7c4 2610 sayYES;
c277df42 2611 REGCP_UNWIND;
3280af22 2612 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2613 PL_regendp[n] = -1;
3280af22 2614 *PL_reglastparen = n;
c277df42 2615 scan = next;
a0d0e21e 2616 /*SUPPRESS 560*/
c277df42
IZ
2617 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2618 next += n;
a0d0e21e 2619 else
c277df42 2620 next = NULL;
c277df42
IZ
2621 inner = NEXTOPER(scan);
2622 if (c1 == BRANCHJ) {
2623 inner = NEXTOPER(inner);
2624 }
2625 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2626 sayNO;
a0d0e21e 2627 /* NOTREACHED */
a687059c 2628 }
a0d0e21e
LW
2629 }
2630 break;
2631 case MINMOD:
2632 minmod = 1;
2633 break;
c277df42
IZ
2634 case CURLYM:
2635 {
00db4c45 2636 I32 l = 0;
c277df42
IZ
2637 CHECKPOINT lastcp;
2638
2639 /* We suppose that the next guy does not need
2640 backtracking: in particular, it is of constant length,
2641 and has no parenths to influence future backrefs. */
2642 ln = ARG1(scan); /* min to match */
2643 n = ARG2(scan); /* max to match */
c277df42
IZ
2644 paren = scan->flags;
2645 if (paren) {
3280af22
NIS
2646 if (paren > PL_regsize)
2647 PL_regsize = paren;
2648 if (paren > *PL_reglastparen)
2649 *PL_reglastparen = paren;
c277df42 2650 }
dc45a647 2651 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2652 if (paren)
2653 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2654 PL_reginput = locinput;
c277df42
IZ
2655 if (minmod) {
2656 minmod = 0;
2657 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2658 sayNO;
5f4b28b2 2659 if (ln && l == 0 && n >= ln
c277df42
IZ
2660 /* In fact, this is tricky. If paren, then the
2661 fact that we did/didnot match may influence
2662 future execution. */
2663 && !(paren && ln == 0))
2664 ln = n;
3280af22 2665 locinput = PL_reginput;
22c35a8c 2666 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2667 c1 = (U8)*STRING(next);
c277df42 2668 if (OP(next) == EXACTF)
22c35a8c 2669 c2 = PL_fold[c1];
c277df42 2670 else if (OP(next) == EXACTFL)
22c35a8c 2671 c2 = PL_fold_locale[c1];
c277df42
IZ
2672 else
2673 c2 = c1;
a0ed51b3
LW
2674 }
2675 else
c277df42
IZ
2676 c1 = c2 = -1000;
2677 REGCP_SET;
5f4b28b2 2678 /* This may be improved if l == 0. */
c277df42
IZ
2679 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2680 /* If it could work, try it. */
2681 if (c1 == -1000 ||
3280af22
NIS
2682 UCHARAT(PL_reginput) == c1 ||
2683 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2684 {
2685 if (paren) {
2686 if (n) {
cf93c79d
IZ
2687 PL_regstartp[paren] =
2688 HOPc(PL_reginput, -l) - PL_bostr;
2689 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2690 }
2691 else
cf93c79d 2692 PL_regendp[paren] = -1;
c277df42
IZ
2693 }
2694 if (regmatch(next))
2695 sayYES;
2696 REGCP_UNWIND;
2697 }
2698 /* Couldn't or didn't -- move forward. */
3280af22 2699 PL_reginput = locinput;
c277df42
IZ
2700 if (regrepeat_hard(scan, 1, &l)) {
2701 ln++;
3280af22 2702 locinput = PL_reginput;
c277df42
IZ
2703 }
2704 else
2705 sayNO;
2706 }
a0ed51b3
LW
2707 }
2708 else {
c277df42
IZ
2709 n = regrepeat_hard(scan, n, &l);
2710 if (n != 0 && l == 0
2711 /* In fact, this is tricky. If paren, then the
2712 fact that we did/didnot match may influence
2713 future execution. */
2714 && !(paren && ln == 0))
2715 ln = n;
3280af22 2716 locinput = PL_reginput;
c277df42 2717 DEBUG_r(
5c0ca799
GS
2718 PerlIO_printf(Perl_debug_log,
2719 "%*s matched %ld times, len=%ld...\n",
3280af22 2720 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2721 );
2722 if (n >= ln) {
22c35a8c 2723 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2724 c1 = (U8)*STRING(next);
c277df42 2725 if (OP(next) == EXACTF)
22c35a8c 2726 c2 = PL_fold[c1];
c277df42 2727 else if (OP(next) == EXACTFL)
22c35a8c 2728 c2 = PL_fold_locale[c1];
c277df42
IZ
2729 else
2730 c2 = c1;
a0ed51b3
LW
2731 }
2732 else
c277df42
IZ
2733 c1 = c2 = -1000;
2734 }
2735 REGCP_SET;
2736 while (n >= ln) {
2737 /* If it could work, try it. */
2738 if (c1 == -1000 ||
3280af22
NIS
2739 UCHARAT(PL_reginput) == c1 ||
2740 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2741 {
2742 DEBUG_r(
c3464db5
DD
2743 PerlIO_printf(Perl_debug_log,
2744 "%*s trying tail with n=%ld...\n",
3280af22 2745 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2746 );
2747 if (paren) {
2748 if (n) {
cf93c79d
IZ
2749 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2750 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2751 }
a0ed51b3 2752 else
cf93c79d 2753 PL_regendp[paren] = -1;
c277df42 2754 }
a0ed51b3
LW
2755 if (regmatch(next))
2756 sayYES;
2757 REGCP_UNWIND;
2758 }
c277df42
IZ
2759 /* Couldn't or didn't -- back up. */
2760 n--;
dfe13c55 2761 locinput = HOPc(locinput, -l);
3280af22 2762 PL_reginput = locinput;
c277df42
IZ
2763 }
2764 }
2765 sayNO;
2766 break;
2767 }
2768 case CURLYN:
2769 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2770 if (paren > PL_regsize)
2771 PL_regsize = paren;
2772 if (paren > *PL_reglastparen)
2773 *PL_reglastparen = paren;
c277df42
IZ
2774 ln = ARG1(scan); /* min to match */
2775 n = ARG2(scan); /* max to match */
dc45a647 2776 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2777 goto repeat;
a0d0e21e 2778 case CURLY:
c277df42 2779 paren = 0;
a0d0e21e
LW
2780 ln = ARG1(scan); /* min to match */
2781 n = ARG2(scan); /* max to match */
dc45a647 2782 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2783 goto repeat;
2784 case STAR:
2785 ln = 0;
c277df42 2786 n = REG_INFTY;
a0d0e21e 2787 scan = NEXTOPER(scan);
c277df42 2788 paren = 0;
a0d0e21e
LW
2789 goto repeat;
2790 case PLUS:
c277df42
IZ
2791 ln = 1;
2792 n = REG_INFTY;
2793 scan = NEXTOPER(scan);
2794 paren = 0;
2795 repeat:
a0d0e21e
LW
2796 /*
2797 * Lookahead to avoid useless match attempts
2798 * when we know what character comes next.
2799 */
22c35a8c 2800 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2801 c1 = (U8)*STRING(next);
bbce6d69 2802 if (OP(next) == EXACTF)
22c35a8c 2803 c2 = PL_fold[c1];
bbce6d69 2804 else if (OP(next) == EXACTFL)
22c35a8c 2805 c2 = PL_fold_locale[c1];
bbce6d69 2806 else
2807 c2 = c1;
2808 }
a0d0e21e 2809 else
bbce6d69 2810 c1 = c2 = -1000;
3280af22 2811 PL_reginput = locinput;
a0d0e21e 2812 if (minmod) {
c277df42 2813 CHECKPOINT lastcp;
a0d0e21e
LW
2814 minmod = 0;
2815 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2816 sayNO;
a0ed51b3 2817 locinput = PL_reginput;
c277df42 2818 REGCP_SET;
0fe9bf95
IZ
2819 if (c1 != -1000) {
2820 char *e = locinput + n - ln; /* Should not check after this */
2821 char *old = locinput;
2822
2823 if (e >= PL_regeol || (n == REG_INFTY))
2824 e = PL_regeol - 1;
2825 while (1) {
2826 /* Find place 'next' could work */
2827 if (c1 == c2) {
2828 while (locinput <= e && *locinput != c1)
2829 locinput++;
2830 } else {
2831 while (locinput <= e
2832 && *locinput != c1
2833 && *locinput != c2)
2834 locinput++;
2835 }
2836 if (locinput > e)
2837 sayNO;
2838 /* PL_reginput == old now */
2839 if (locinput != old) {
2840 ln = 1; /* Did some */
2841 if (regrepeat(scan, locinput - old) <
2842 locinput - old)
2843 sayNO;
2844 }
2845 /* PL_reginput == locinput now */
2846 if (paren) {
2847 if (ln) {
cf93c79d
IZ
2848 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2849 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2850 }
2851 else
cf93c79d 2852 PL_regendp[paren] = -1;
0fe9bf95
IZ
2853 }
2854 if (regmatch(next))
2855 sayYES;
2856 PL_reginput = locinput; /* Could be reset... */
2857 REGCP_UNWIND;
2858 /* Couldn't or didn't -- move forward. */
2859 old = locinput++;
2860 }
2861 }
2862 else
c277df42 2863 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2864 /* If it could work, try it. */
bbce6d69 2865 if (c1 == -1000 ||
3280af22
NIS
2866 UCHARAT(PL_reginput) == c1 ||
2867 UCHARAT(PL_reginput) == c2)
bbce6d69 2868 {
c277df42
IZ
2869 if (paren) {
2870 if (n) {
cf93c79d
IZ
2871 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2872 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2873 }
2874 else
cf93c79d 2875 PL_regendp[paren] = -1;
c277df42 2876 }
a0d0e21e 2877 if (regmatch(next))
4633a7c4 2878 sayYES;
c277df42 2879 REGCP_UNWIND;
bbce6d69 2880 }
c277df42 2881 /* Couldn't or didn't -- move forward. */
a0ed51b3 2882 PL_reginput = locinput;
a0d0e21e
LW
2883 if (regrepeat(scan, 1)) {
2884 ln++;
a0ed51b3
LW
2885 locinput = PL_reginput;
2886 }
2887 else
4633a7c4 2888 sayNO;
a0d0e21e
LW
2889 }
2890 }
2891 else {
c277df42 2892 CHECKPOINT lastcp;
a0d0e21e 2893 n = regrepeat(scan, n);
a0ed51b3 2894 locinput = PL_reginput;
22c35a8c 2895 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2896 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2897 ln = n; /* why back off? */
c277df42
IZ
2898 REGCP_SET;
2899 if (paren) {
2900 while (n >= ln) {
2901 /* If it could work, try it. */
2902 if (c1 == -1000 ||
3280af22
NIS
2903 UCHARAT(PL_reginput) == c1 ||
2904 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2905 {
2906 if (paren && n) {
2907 if (n) {
cf93c79d
IZ
2908 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2909 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2910 }
2911 else
cf93c79d 2912 PL_regendp[paren] = -1;
c277df42
IZ
2913 }
2914 if (regmatch(next))
2915 sayYES;
2916 REGCP_UNWIND;
2917 }
2918 /* Couldn't or didn't -- back up. */
2919 n--;
dfe13c55 2920 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2921 }
a0ed51b3
LW
2922 }
2923 else {
c277df42
IZ
2924 while (n >= ln) {
2925 /* If it could work, try it. */
2926 if (c1 == -1000 ||
3280af22
NIS
2927 UCHARAT(PL_reginput) == c1 ||
2928 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2929 {
2930 if (regmatch(next))
2931 sayYES;
2932 REGCP_UNWIND;
2933 }
2934 /* Couldn't or didn't -- back up. */
2935 n--;
dfe13c55 2936 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2937 }
a0d0e21e
LW
2938 }
2939 }
4633a7c4 2940 sayNO;
c277df42 2941 break;
a0d0e21e 2942 case END:
0f5d15d6
IZ
2943 if (PL_reg_call_cc) {
2944 re_cc_state *cur_call_cc = PL_reg_call_cc;
2945 CURCUR *cctmp = PL_regcc;
2946 regexp *re = PL_reg_re;
2947 CHECKPOINT cp, lastcp;
2948
2949 cp = regcppush(0); /* Save *all* the positions. */
2950 REGCP_SET;
2951 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2952 the caller. */
2953 PL_reginput = locinput; /* Make position available to
2954 the callcc. */
2955 cache_re(PL_reg_call_cc->re);
2956 PL_regcc = PL_reg_call_cc->cc;
2957 PL_reg_call_cc = PL_reg_call_cc->prev;
2958 if (regmatch(cur_call_cc->node)) {
2959 PL_reg_call_cc = cur_call_cc;
2960 regcpblow(cp);
2961 sayYES;
2962 }
2963 REGCP_UNWIND;
2964 regcppop();
2965 PL_reg_call_cc = cur_call_cc;
2966 PL_regcc = cctmp;
2967 PL_reg_re = re;
2968 cache_re(re);
2969
2970 DEBUG_r(
2971 PerlIO_printf(Perl_debug_log,
2972 "%*s continuation failed...\n",
2973 REPORT_CODE_OFF+PL_regindent*2, "")
2974 );
2975 sayNO;
2976 }
3280af22 2977 if (locinput < PL_regtill)
7e5428c5
IZ
2978 sayNO; /* Cannot match: too short. */
2979 /* Fall through */
2980 case SUCCEED:
3280af22 2981 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2982 sayYES; /* Success! */
c277df42
IZ
2983 case SUSPEND:
2984 n = 1;
9fe1d20c 2985 PL_reginput = locinput;
c277df42 2986 goto do_ifmatch;
a0d0e21e 2987 case UNLESSM:
c277df42 2988 n = 0;
a0ed51b3 2989 if (scan->flags) {
0fe9bf95
IZ
2990 if (UTF) { /* XXXX This is absolutely
2991 broken, we read before
2992 start of string. */
2993 s = HOPMAYBEc(locinput, -scan->flags);
2994 if (!s)
2995 goto say_yes;
2996 PL_reginput = s;
2997 }
2998 else {
2999 if (locinput < PL_bostr + scan->flags)
3000 goto say_yes;
3001 PL_reginput = locinput - scan->flags;
3002 goto do_ifmatch;
3003 }
a0ed51b3
LW
3004 }
3005 else
3006 PL_reginput = locinput;
c277df42
IZ
3007 goto do_ifmatch;
3008 case IFMATCH:
3009 n = 1;
a0ed51b3 3010 if (scan->flags) {
0fe9bf95
IZ
3011 if (UTF) { /* XXXX This is absolutely
3012 broken, we read before
3013 start of string. */
3014 s = HOPMAYBEc(locinput, -scan->flags);
3015 if (!s || s < PL_bostr)
3016 goto say_no;
3017 PL_reginput = s;
3018 }
3019 else {
3020 if (locinput < PL_bostr + scan->flags)
3021 goto say_no;
3022 PL_reginput = locinput - scan->flags;
3023 goto do_ifmatch;
3024 }
a0ed51b3
LW
3025 }
3026 else
3027 PL_reginput = locinput;
3028
c277df42 3029 do_ifmatch:
c277df42
IZ
3030 inner = NEXTOPER(NEXTOPER(scan));
3031 if (regmatch(inner) != n) {
3032 say_no:
3033 if (logical) {
3034 logical = 0;
3035 sw = 0;
3036 goto do_longjump;
a0ed51b3
LW
3037 }
3038 else
c277df42
IZ
3039 sayNO;
3040 }
3041 say_yes:
3042 if (logical) {
3043 logical = 0;
3044 sw = 1;
3045 }
fe44a5e8 3046 if (OP(scan) == SUSPEND) {
3280af22 3047 locinput = PL_reginput;
565764a8 3048 nextchr = UCHARAT(locinput);
fe44a5e8 3049 }
c277df42
IZ
3050 /* FALL THROUGH. */
3051 case LONGJMP:
3052 do_longjump:
3053 next = scan + ARG(scan);
3054 if (next == scan)
3055 next = NULL;
a0d0e21e
LW
3056 break;
3057 default:
c030ccd9 3058 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 3059 (unsigned long)scan, OP(scan));
cea2e8a9 3060 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3061 }
a0d0e21e
LW
3062 scan = next;
3063 }
a687059c 3064
a0d0e21e
LW
3065 /*
3066 * We get here only if there's trouble -- normally "case END" is
3067 * the terminating point.
3068 */
cea2e8a9 3069 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3070 /*NOTREACHED*/
4633a7c4
LW
3071 sayNO;
3072
3073yes:
3074#ifdef DEBUGGING
3280af22 3075 PL_regindent--;
4633a7c4
LW
3076#endif
3077 return 1;
3078
3079no:
3080#ifdef DEBUGGING
3280af22 3081 PL_regindent--;
4633a7c4 3082#endif
a0d0e21e 3083 return 0;
a687059c
LW
3084}
3085
3086/*
3087 - regrepeat - repeatedly match something simple, report how many
3088 */
3089/*
3090 * [This routine now assumes that it will only match on things of length 1.
3091 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3092 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3093 */
76e3520e 3094STATIC I32
cea2e8a9 3095S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3096{
5c0ca799 3097 dTHR;
a0d0e21e
LW
3098 register char *scan;
3099 register char *opnd;
3100 register I32 c;
3280af22 3101 register char *loceol = PL_regeol;
a0ed51b3 3102 register I32 hardcount = 0;
a0d0e21e 3103
3280af22 3104 scan = PL_reginput;
c277df42 3105 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3106 loceol = scan + max;
a0d0e21e 3107 switch (OP(p)) {
22c35a8c 3108 case REG_ANY:
a0d0e21e
LW
3109 while (scan < loceol && *scan != '\n')
3110 scan++;
3111 break;
3112 case SANY:
3113 scan = loceol;
3114 break;
a0ed51b3
LW
3115 case ANYUTF8:
3116 loceol = PL_regeol;
3117 while (scan < loceol && *scan != '\n') {
3118 scan += UTF8SKIP(scan);
3119 hardcount++;
3120 }
3121 break;
3122 case SANYUTF8:
3123 loceol = PL_regeol;
3124 while (scan < loceol) {
3125 scan += UTF8SKIP(scan);
3126 hardcount++;
3127 }
3128 break;
bbce6d69 3129 case EXACT: /* length of string is 1 */
cd439c50 3130 c = (U8)*STRING(p);
bbce6d69 3131 while (scan < loceol && UCHARAT(scan) == c)
3132 scan++;
3133 break;
3134 case EXACTF: /* length of string is 1 */
cd439c50 3135 c = (U8)*STRING(p);
bbce6d69 3136 while (scan < loceol &&
22c35a8c 3137 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3138 scan++;
3139 break;
3140 case EXACTFL: /* length of string is 1 */
3280af22 3141 PL_reg_flags |= RF_tainted;
cd439c50 3142 c = (U8)*STRING(p);
bbce6d69 3143 while (scan < loceol &&
22c35a8c 3144 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3145 scan++;
3146 break;
a0ed51b3
LW
3147 case ANYOFUTF8:
3148 loceol = PL_regeol;
3149 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3150 scan += UTF8SKIP(scan);
3151 hardcount++;
3152 }
3153 break;
a0d0e21e 3154 case ANYOF:
cd439c50 3155 opnd = MASK(p);
ae5c130c 3156 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 3157 scan++;
a0d0e21e
LW
3158 break;
3159 case ALNUM:
3160 while (scan < loceol && isALNUM(*scan))
3161 scan++;
3162 break;
a0ed51b3
LW
3163 case ALNUMUTF8:
3164 loceol = PL_regeol;
dfe13c55 3165 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3166 scan += UTF8SKIP(scan);
3167 hardcount++;
3168 }
3169 break;
bbce6d69 3170 case ALNUML:
3280af22 3171 PL_reg_flags |= RF_tainted;
bbce6d69 3172 while (scan < loceol && isALNUM_LC(*scan))
3173 scan++;
3174 break;
a0ed51b3
LW
3175 case ALNUMLUTF8:
3176 PL_reg_flags |= RF_tainted;
3177 loceol = PL_regeol;
dfe13c55 3178 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3179 scan += UTF8SKIP(scan);
3180 hardcount++;
3181 }
3182 break;
3183 break;
a0d0e21e
LW
3184 case NALNUM:
3185 while (scan < loceol && !isALNUM(*scan))
3186 scan++;
3187 break;
a0ed51b3
LW
3188 case NALNUMUTF8:
3189 loceol = PL_regeol;
dfe13c55 3190 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3191 scan += UTF8SKIP(scan);
3192 hardcount++;
3193 }
3194 break;
bbce6d69 3195 case NALNUML:
3280af22 3196 PL_reg_flags |= RF_tainted;
bbce6d69 3197 while (scan < loceol && !isALNUM_LC(*scan))
3198 scan++;
3199 break;
a0ed51b3
LW
3200 case NALNUMLUTF8:
3201 PL_reg_flags |= RF_tainted;
3202 loceol = PL_regeol;
dfe13c55 3203 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3204 scan += UTF8SKIP(scan);
3205 hardcount++;
3206 }
3207 break;
a0d0e21e
LW
3208 case SPACE:
3209 while (scan < loceol && isSPACE(*scan))
3210 scan++;
3211 break;
a0ed51b3
LW
3212 case SPACEUTF8:
3213 loceol = PL_regeol;
dfe13c55 3214 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3215 scan += UTF8SKIP(scan);
3216 hardcount++;
3217 }
3218 break;
bbce6d69 3219 case SPACEL:
3280af22 3220 PL_reg_flags |= RF_tainted;
bbce6d69 3221 while (scan < loceol && isSPACE_LC(*scan))
3222 scan++;
3223 break;
a0ed51b3
LW
3224 case SPACELUTF8:
3225 PL_reg_flags |= RF_tainted;
3226 loceol = PL_regeol;
dfe13c55 3227 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3228 scan += UTF8SKIP(scan);
3229 hardcount++;
3230 }
3231 break;
a0d0e21e
LW
3232 case NSPACE:
3233 while (scan < loceol && !isSPACE(*scan))
3234 scan++;
3235 break;
a0ed51b3
LW
3236 case NSPACEUTF8:
3237 loceol = PL_regeol;
dfe13c55 3238 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3239 scan += UTF8SKIP(scan);
3240 hardcount++;
3241 }
3242 break;
bbce6d69 3243 case NSPACEL:
3280af22 3244 PL_reg_flags |= RF_tainted;
bbce6d69 3245 while (scan < loceol && !isSPACE_LC(*scan))
3246 scan++;
3247 break;
a0ed51b3
LW
3248 case NSPACELUTF8:
3249 PL_reg_flags |= RF_tainted;
3250 loceol = PL_regeol;
dfe13c55 3251 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3252 scan += UTF8SKIP(scan);
3253 hardcount++;
3254 }
3255 break;
a0d0e21e
LW
3256 case DIGIT:
3257 while (scan < loceol && isDIGIT(*scan))
3258 scan++;
3259 break;
a0ed51b3
LW
3260 case DIGITUTF8:
3261 loceol = PL_regeol;
dfe13c55 3262 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3263 scan += UTF8SKIP(scan);
3264 hardcount++;
3265 }
3266 break;
3267 break;
a0d0e21e
LW
3268 case NDIGIT:
3269 while (scan < loceol && !isDIGIT(*scan))
3270 scan++;
3271 break;
a0ed51b3
LW
3272 case NDIGITUTF8:
3273 loceol = PL_regeol;
dfe13c55 3274 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3275 scan += UTF8SKIP(scan);
3276 hardcount++;
3277 }
3278 break;
a0d0e21e
LW
3279 default: /* Called on something of 0 width. */
3280 break; /* So match right here or not at all. */
3281 }
a687059c 3282
a0ed51b3
LW
3283 if (hardcount)
3284 c = hardcount;
3285 else
3286 c = scan - PL_reginput;
3280af22 3287 PL_reginput = scan;
a687059c 3288
c277df42
IZ
3289 DEBUG_r(
3290 {
3291 SV *prop = sv_newmortal();
3292
3293 regprop(prop, p);
3294 PerlIO_printf(Perl_debug_log,
3295 "%*s %s can match %ld times out of %ld...\n",
3296 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
3297 });
3298
a0d0e21e 3299 return(c);
a687059c
LW
3300}
3301
3302/*
c277df42
IZ
3303 - regrepeat_hard - repeatedly match something, report total lenth and length
3304 *
3305 * The repeater is supposed to have constant length.
3306 */
3307
76e3520e 3308STATIC I32
cea2e8a9 3309S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3310{
5c0ca799 3311 dTHR;
c277df42
IZ
3312 register char *scan;
3313 register char *start;
3280af22 3314 register char *loceol = PL_regeol;
a0ed51b3 3315 I32 l = 0;
708e3b05 3316 I32 count = 0, res = 1;
a0ed51b3
LW
3317
3318 if (!max)
3319 return 0;
c277df42 3320
3280af22 3321 start = PL_reginput;
a0ed51b3 3322 if (UTF) {
708e3b05 3323 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3324 if (!count++) {
3325 l = 0;
3326 while (start < PL_reginput) {
3327 l++;
3328 start += UTF8SKIP(start);
3329 }
3330 *lp = l;
3331 if (l == 0)
3332 return max;
3333 }
3334 if (count == max)
3335 return count;
3336 }
3337 }
3338 else {
708e3b05 3339 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3340 if (!count++) {
3341 *lp = l = PL_reginput - start;
3342 if (max != REG_INFTY && l*max < loceol - scan)
3343 loceol = scan + l*max;
3344 if (l == 0)
3345 return max;
c277df42
IZ
3346 }
3347 }
3348 }
708e3b05 3349 if (!res)
3280af22 3350 PL_reginput = scan;
c277df42 3351
a0ed51b3 3352 return count;
c277df42
IZ
3353}
3354
3355/*
cb8d8820 3356 - reginclass - determine if a character falls into a character class
bbce6d69 3357 */
3358
76e3520e 3359STATIC bool
cea2e8a9 3360S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 3361{
5c0ca799 3362 dTHR;
b8c5462f 3363 char flags = ANYOF_FLAGS(p);
bbce6d69 3364 bool match = FALSE;
3365
3366 c &= 0xFF;
b8c5462f 3367 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3368 match = TRUE;
3369 else if (flags & ANYOF_FOLD) {
3370 I32 cf;
3371 if (flags & ANYOF_LOCALE) {
3280af22 3372 PL_reg_flags |= RF_tainted;
22c35a8c 3373 cf = PL_fold_locale[c];
bbce6d69 3374 }
3375 else
22c35a8c 3376 cf = PL_fold[c];
b8c5462f 3377 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3378 match = TRUE;
3379 }
3380
b8c5462f 3381 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3382 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3383 if (
3384 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3385 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3386 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3387 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3388 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3389 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3390 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3391 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3392 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3393 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3394 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3395 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3396 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3397 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3398 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3399 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3400 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3401 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3402 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3403 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3404 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3405 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3406 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3407 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3408 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3409 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3410 ) /* How's that for a conditional? */
bbce6d69 3411 {
3412 match = TRUE;
3413 }
3414 }
3415
ae5c130c 3416 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3417}
3418
a0ed51b3 3419STATIC bool
cea2e8a9 3420S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3421{
3422 dTHR;
a0ed51b3
LW
3423 char flags = ARG1(f);
3424 bool match = FALSE;
3425 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3426
3427 if (swash_fetch(sv, p))
3428 match = TRUE;
3429 else if (flags & ANYOF_FOLD) {
3430 I32 cf;
dfe13c55 3431 U8 tmpbuf[10];
a0ed51b3
LW
3432 if (flags & ANYOF_LOCALE) {
3433 PL_reg_flags |= RF_tainted;
3434 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3435 }
3436 else
3437 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3438 if (swash_fetch(sv, tmpbuf))
3439 match = TRUE;
3440 }
3441
b8c5462f 3442 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3443
3444 return (flags & ANYOF_INVERT) ? !match : match;
3445}
161b471a 3446
dfe13c55 3447STATIC U8 *
cea2e8a9 3448S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3449{
3450 dTHR;
a0ed51b3
LW
3451 if (off >= 0) {
3452 while (off-- && s < (U8*)PL_regeol)
3453 s += UTF8SKIP(s);
3454 }
3455 else {
3456 while (off++) {
3457 if (s > (U8*)PL_bostr) {
3458 s--;
3459 if (*s & 0x80) {
3460 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3461 s--;
3462 } /* XXX could check well-formedness here */
3463 }
3464 }
3465 }
3466 return s;
3467}
161b471a 3468
dfe13c55 3469STATIC U8 *
cea2e8a9 3470S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3471{
c485e607 3472 dTHR;
a0ed51b3
LW
3473 if (off >= 0) {
3474 while (off-- && s < (U8*)PL_regeol)
3475 s += UTF8SKIP(s);
3476 if (off >= 0)
3477 return 0;
3478 }
3479 else {
3480 while (off++) {
3481 if (s > (U8*)PL_bostr) {
3482 s--;
3483 if (*s & 0x80) {
3484 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3485 s--;
3486 } /* XXX could check well-formedness here */
3487 }
3488 else
3489 break;
3490 }
3491 if (off <= 0)
3492 return 0;
3493 }
3494 return s;
3495}
51371543
GS
3496
3497#ifdef PERL_OBJECT
3498#define NO_XSLOCKS
3499#include "XSUB.h"
3500#endif
3501
3502static void
3503restore_pos(pTHXo_ void *arg)
3504{
3505 dTHR;
3506 if (PL_reg_eval_set) {
3507 if (PL_reg_oldsaved) {
3508 PL_reg_re->subbeg = PL_reg_oldsaved;
3509 PL_reg_re->sublen = PL_reg_oldsavedlen;
3510 RX_MATCH_COPIED_on(PL_reg_re);
3511 }
3512 PL_reg_magic->mg_len = PL_reg_oldpos;
3513 PL_reg_eval_set = 0;
3514 PL_curpm = PL_reg_oldcurpm;
3515 }
3516}
3517