This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Tweak the mkdir trailing slash code some more.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
56953603
IZ
45#endif
46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
51371543
GS
126static void restore_pos(pTHXo_ void *arg);
127
128
76e3520e 129STATIC CHECKPOINT
cea2e8a9 130S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 131{
3280af22
NIS
132 int retval = PL_savestack_ix;
133 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
134 int p;
135
136 SSCHECK(i + 5);
3280af22 137 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
138 SSPUSHINT(PL_regendp[p]);
139 SSPUSHINT(PL_regstartp[p]);
3280af22 140 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
141 SSPUSHINT(p);
142 }
3280af22
NIS
143 SSPUSHINT(PL_regsize);
144 SSPUSHINT(*PL_reglastparen);
145 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
146 SSPUSHINT(i + 3);
147 SSPUSHINT(SAVEt_REGCONTEXT);
148 return retval;
149}
150
c277df42 151/* These are needed since we do not localize EVAL nodes: */
02db2b7b 152# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 153 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 154 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 155
02db2b7b 156# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 157 PerlIO_printf(Perl_debug_log, \
faccc32b 158 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 159 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 160
76e3520e 161STATIC char *
cea2e8a9 162S_regcppop(pTHX)
a0d0e21e
LW
163{
164 I32 i = SSPOPINT;
165 U32 paren = 0;
166 char *input;
cf93c79d 167 I32 tmps;
a0d0e21e
LW
168 assert(i == SAVEt_REGCONTEXT);
169 i = SSPOPINT;
170 input = (char *) SSPOPPTR;
3280af22
NIS
171 *PL_reglastparen = SSPOPINT;
172 PL_regsize = SSPOPINT;
c277df42 173 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 174 paren = (U32)SSPOPINT;
3280af22 175 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
176 PL_regstartp[paren] = SSPOPINT;
177 tmps = SSPOPINT;
3280af22
NIS
178 if (paren <= *PL_reglastparen)
179 PL_regendp[paren] = tmps;
c277df42 180 DEBUG_r(
c3464db5 181 PerlIO_printf(Perl_debug_log,
b900a521
JH
182 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
183 (UV)paren, (IV)PL_regstartp[paren],
184 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
185 (IV)PL_regendp[paren],
3280af22 186 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 187 );
a0d0e21e 188 }
c277df42 189 DEBUG_r(
3280af22 190 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 191 PerlIO_printf(Perl_debug_log,
faccc32b
JH
192 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
193 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
194 }
195 );
daf18116 196#if 1
dafc8851
JH
197 /* It would seem that the similar code in regtry()
198 * already takes care of this, and in fact it is in
199 * a better location to since this code can #if 0-ed out
200 * but the code in regtry() is needed or otherwise tests
201 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
202 * (as of patchlevel 7877) will fail. Then again,
203 * this code seems to be necessary or otherwise
204 * building DynaLoader will fail:
205 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
206 * --jhi */
3280af22
NIS
207 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
208 if (paren > PL_regsize)
cf93c79d
IZ
209 PL_regstartp[paren] = -1;
210 PL_regendp[paren] = -1;
a0d0e21e 211 }
dafc8851 212#endif
a0d0e21e
LW
213 return input;
214}
215
0f5d15d6 216STATIC char *
cea2e8a9 217S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
218{
219 I32 tmp = PL_savestack_ix;
220
221 PL_savestack_ix = ss;
222 regcppop();
223 PL_savestack_ix = tmp;
942e002e 224 return Nullch;
0f5d15d6
IZ
225}
226
227typedef struct re_cc_state
228{
229 I32 ss;
230 regnode *node;
231 struct re_cc_state *prev;
232 CURCUR *cc;
233 regexp *re;
234} re_cc_state;
235
02db2b7b 236#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 237
29d1e993
HS
238#define TRYPAREN(paren, n, input) { \
239 if (paren) { \
240 if (n) { \
241 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
242 PL_regendp[paren] = input - PL_bostr; \
243 } \
244 else \
245 PL_regendp[paren] = -1; \
246 } \
247 if (regmatch(next)) \
248 sayYES; \
249 if (paren && n) \
250 PL_regendp[paren] = -1; \
251}
252
253
a687059c 254/*
e50aee73 255 * pregexec and friends
a687059c
LW
256 */
257
258/*
c277df42 259 - pregexec - match a regexp against a string
a687059c 260 */
c277df42 261I32
864dbfa3 262Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 263 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
264/* strend: pointer to null at end of string */
265/* strbeg: real beginning of string */
266/* minend: end of match must be >=minend after stringarg. */
267/* nosave: For optimizations. */
268{
269 return
270 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
271 nosave ? 0 : REXEC_COPY_STR);
272}
0f5d15d6
IZ
273
274STATIC void
cea2e8a9 275S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
276{
277 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
278#ifdef DEBUGGING
279 PL_regprogram = prog->program;
280#endif
281 PL_regnpar = prog->nparens;
282 PL_regdata = prog->data;
283 PL_reg_re = prog;
284}
22e551b9 285
cad2e5aa
JH
286/*
287 * Need to implement the following flags for reg_anch:
288 *
289 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
290 * USE_INTUIT_ML
291 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
292 * INTUIT_AUTORITATIVE_ML
293 * INTUIT_ONCE_NOML - Intuit can match in one location only.
294 * INTUIT_ONCE_ML
295 *
296 * Another flag for this function: SECOND_TIME (so that float substrs
297 * with giant delta may be not rechecked).
298 */
299
300/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
301
2c2d71f5 302/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
303 Otherwise, only SvCUR(sv) is used to get strbeg. */
304
305/* XXXX We assume that strpos is strbeg unless sv. */
306
6eb5f6b9
JH
307/* XXXX Some places assume that there is a fixed substring.
308 An update may be needed if optimizer marks as "INTUITable"
309 RExen without fixed substrings. Similarly, it is assumed that
310 lengths of all the strings are no more than minlen, thus they
311 cannot come from lookahead.
312 (Or minlen should take into account lookahead.) */
313
2c2d71f5
JH
314/* A failure to find a constant substring means that there is no need to make
315 an expensive call to REx engine, thus we celebrate a failure. Similarly,
316 finding a substring too deep into the string means that less calls to
30944b6d
IZ
317 regtry() should be needed.
318
319 REx compiler's optimizer found 4 possible hints:
320 a) Anchored substring;
321 b) Fixed substring;
322 c) Whether we are anchored (beginning-of-line or \G);
323 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 324 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
325 string which does not contradict any of them.
326 */
2c2d71f5 327
6eb5f6b9
JH
328/* Most of decisions we do here should have been done at compile time.
329 The nodes of the REx which we used for the search should have been
330 deleted from the finite automaton. */
331
cad2e5aa
JH
332char *
333Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
334 char *strend, U32 flags, re_scream_pos_data *data)
335{
2c2d71f5 336 register I32 start_shift;
cad2e5aa 337 /* Should be nonnegative! */
2c2d71f5
JH
338 register I32 end_shift;
339 register char *s;
340 register SV *check;
a1933d95 341 char *strbeg;
cad2e5aa
JH
342 char *t;
343 I32 ml_anch;
2c2d71f5 344 char *tmp;
6eb5f6b9
JH
345 register char *other_last = Nullch; /* other substr checked before this */
346 char *check_at; /* check substr found at this pos */
30944b6d
IZ
347#ifdef DEBUGGING
348 char *i_strpos = strpos;
349#endif
cad2e5aa
JH
350
351 DEBUG_r( if (!PL_colorset) reginitcolors() );
352 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 353 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
354 PL_colors[4],PL_colors[5],PL_colors[0],
355 prog->precomp,
356 PL_colors[1],
357 (strlen(prog->precomp) > 60 ? "..." : ""),
358 PL_colors[0],
b900a521 359 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
360 strpos, PL_colors[1],
361 (strend - strpos > 60 ? "..." : ""))
362 );
363
1aa99e6b
IH
364 if (prog->reganch & ROPT_UTF8)
365 PL_reg_flags |= RF_utf8;
366
367 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 369 goto fail;
2c2d71f5 370 }
a1933d95 371 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 372 PL_regeol = strend;
653099ff 373 check = prog->check_substr;
2c2d71f5 374 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
375 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
376 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 377 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 378
5675c1a6
IZ
379 if (!ml_anch) {
380 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
381 /* SvCUR is not set on references: SvRV and SvPVX overlap */
382 && sv && !SvROK(sv)
a1933d95 383 && (strpos != strbeg)) {
5675c1a6
IZ
384 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
385 goto fail;
386 }
387 if (prog->check_offset_min == prog->check_offset_max) {
2c2d71f5 388 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
389 I32 slen;
390
1aa99e6b 391 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
392 if (SvTAIL(check)) {
393 slen = SvCUR(check); /* >= 1 */
cad2e5aa 394
2c2d71f5
JH
395 if ( strend - s > slen || strend - s < slen - 1
396 || (strend - s == slen && strend[-1] != '\n')) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
398 goto fail_finish;
cad2e5aa
JH
399 }
400 /* Now should match s[0..slen-2] */
401 slen--;
653099ff 402 if (slen && (*SvPVX(check) != *s
cad2e5aa 403 || (slen > 1
653099ff 404 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
405 report_neq:
406 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
407 goto fail_finish;
408 }
cad2e5aa 409 }
653099ff
GS
410 else if (*SvPVX(check) != *s
411 || ((slen = SvCUR(check)) > 1
412 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
413 goto report_neq;
414 goto success_at_start;
5675c1a6 415 }
cad2e5aa 416 }
2c2d71f5 417 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 418 s = strpos;
2c2d71f5 419 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 420 end_shift = prog->minlen - start_shift -
653099ff 421 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 422 if (!ml_anch) {
653099ff
GS
423 I32 end = prog->check_offset_max + CHR_SVLEN(check)
424 - (SvTAIL(check) != 0);
1aa99e6b 425 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
426
427 if (end_shift < eshift)
428 end_shift = eshift;
429 }
cad2e5aa 430 }
2c2d71f5 431 else { /* Can match at random position */
cad2e5aa
JH
432 ml_anch = 0;
433 s = strpos;
2c2d71f5
JH
434 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
435 /* Should be nonnegative! */
436 end_shift = prog->minlen - start_shift -
653099ff 437 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
438 }
439
2c2d71f5 440#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 441 if (end_shift < 0)
6bbae5e6 442 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
443#endif
444
2c2d71f5
JH
445 restart:
446 /* Find a possible match in the region s..strend by looking for
447 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 448 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
449 I32 p = -1; /* Internal iterator of scream. */
450 I32 *pp = data ? data->scream_pos : &p;
451
2c2d71f5
JH
452 if (PL_screamfirst[BmRARE(check)] >= 0
453 || ( BmRARE(check) == '\n'
454 && (BmPREVIOUS(check) == SvCUR(check) - 1)
455 && SvTAIL(check) ))
456 s = screaminstr(sv, check,
457 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 458 else
2c2d71f5 459 goto fail_finish;
cad2e5aa
JH
460 if (data)
461 *data->scream_olds = s;
462 }
463 else
1aa99e6b
IH
464 s = fbm_instr(HOP3(s, start_shift, strend),
465 HOP3(strend, -end_shift, strbeg),
2c2d71f5 466 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
467
468 /* Update the count-of-usability, remove useless subpatterns,
469 unshift s. */
2c2d71f5
JH
470
471 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
472 (s ? "Found" : "Did not find"),
473 ((check == prog->anchored_substr) ? "anchored" : "floating"),
474 PL_colors[0],
7b0972df
JH
475 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
476 SvPVX(check),
2c2d71f5
JH
477 PL_colors[1], (SvTAIL(check) ? "$" : ""),
478 (s ? " at offset " : "...\n") ) );
479
480 if (!s)
481 goto fail_finish;
482
6eb5f6b9
JH
483 check_at = s;
484
2c2d71f5 485 /* Finish the diagnostic message */
30944b6d 486 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
487
488 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
489 Start with the other substr.
490 XXXX no SCREAM optimization yet - and a very coarse implementation
491 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
492 *always* match. Probably should be marked during compile...
493 Probably it is right to do no SCREAM here...
494 */
495
496 if (prog->float_substr && prog->anchored_substr) {
30944b6d 497 /* Take into account the "other" substring. */
2c2d71f5
JH
498 /* XXXX May be hopelessly wrong for UTF... */
499 if (!other_last)
6eb5f6b9 500 other_last = strpos;
2c2d71f5 501 if (check == prog->float_substr) {
30944b6d
IZ
502 do_other_anchored:
503 {
1aa99e6b 504 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
505 char *s1 = s;
506
2c2d71f5
JH
507 t = s - prog->check_offset_max;
508 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
509 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 510 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 511 && t > strpos)))
30944b6d 512 /* EMPTY */;
2c2d71f5
JH
513 else
514 t = strpos;
1aa99e6b 515 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
516 if (t < other_last) /* These positions already checked */
517 t = other_last;
1aa99e6b 518 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
519 if (last < last1)
520 last1 = last;
521 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
522 /* On end-of-str: see comment below. */
523 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
524 HOP3(HOP3(last1, prog->anchored_offset, strend)
525 + SvCUR(prog->anchored_substr),
526 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
527 prog->anchored_substr,
528 PL_multiline ? FBMrf_MULTILINE : 0);
529 DEBUG_r(PerlIO_printf(Perl_debug_log,
530 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
531 (s ? "Found" : "Contradicts"),
532 PL_colors[0],
7b0972df
JH
533 (int)(SvCUR(prog->anchored_substr)
534 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
535 SvPVX(prog->anchored_substr),
536 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
537 if (!s) {
538 if (last1 >= last2) {
539 DEBUG_r(PerlIO_printf(Perl_debug_log,
540 ", giving up...\n"));
541 goto fail_finish;
542 }
543 DEBUG_r(PerlIO_printf(Perl_debug_log,
544 ", trying floating at offset %ld...\n",
1aa99e6b
IH
545 (long)(HOP3c(s1, 1, strend) - i_strpos)));
546 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
547 s = HOP3c(last, 1, strend);
2c2d71f5
JH
548 goto restart;
549 }
550 else {
551 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 552 (long)(s - i_strpos)));
1aa99e6b
IH
553 t = HOP3c(s, -prog->anchored_offset, strbeg);
554 other_last = HOP3c(s, 1, strend);
30944b6d 555 s = s1;
2c2d71f5
JH
556 if (t == strpos)
557 goto try_at_start;
2c2d71f5
JH
558 goto try_at_offset;
559 }
30944b6d 560 }
2c2d71f5
JH
561 }
562 else { /* Take into account the floating substring. */
563 char *last, *last1;
564 char *s1 = s;
565
1aa99e6b
IH
566 t = HOP3c(s, -start_shift, strbeg);
567 last1 = last =
568 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
569 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
570 last = HOP3c(t, prog->float_max_offset, strend);
571 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
572 if (s < other_last)
573 s = other_last;
2c2d71f5
JH
574 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
575 /* fbm_instr() takes into account exact value of end-of-str
576 if the check is SvTAIL(ed). Since false positives are OK,
577 and end-of-str is not later than strend we are OK. */
578 s = fbm_instr((unsigned char*)s,
579 (unsigned char*)last + SvCUR(prog->float_substr)
580 - (SvTAIL(prog->float_substr)!=0),
581 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
582 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
583 (s ? "Found" : "Contradicts"),
584 PL_colors[0],
7b0972df
JH
585 (int)(SvCUR(prog->float_substr)
586 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
587 SvPVX(prog->float_substr),
588 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
589 if (!s) {
590 if (last1 == last) {
591 DEBUG_r(PerlIO_printf(Perl_debug_log,
592 ", giving up...\n"));
593 goto fail_finish;
594 }
595 DEBUG_r(PerlIO_printf(Perl_debug_log,
596 ", trying anchored starting at offset %ld...\n",
30944b6d 597 (long)(s1 + 1 - i_strpos)));
803ff556 598 other_last = last;
1aa99e6b 599 s = HOP3c(t, 1, strend);
2c2d71f5
JH
600 goto restart;
601 }
602 else {
603 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 604 (long)(s - i_strpos)));
803ff556 605 other_last = s; /* Fix this later. --Hugo */
30944b6d 606 s = s1;
2c2d71f5
JH
607 if (t == strpos)
608 goto try_at_start;
2c2d71f5
JH
609 goto try_at_offset;
610 }
611 }
cad2e5aa 612 }
2c2d71f5
JH
613
614 t = s - prog->check_offset_max;
2c2d71f5
JH
615 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
616 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
617 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
618 && t > strpos))) {
2c2d71f5
JH
619 /* Fixed substring is found far enough so that the match
620 cannot start at strpos. */
621 try_at_offset:
cad2e5aa 622 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
623 /* Eventually fbm_*() should handle this, but often
624 anchored_offset is not 0, so this check will not be wasted. */
625 /* XXXX In the code below we prefer to look for "^" even in
626 presence of anchored substrings. And we search even
627 beyond the found float position. These pessimizations
628 are historical artefacts only. */
629 find_anchor:
2c2d71f5 630 while (t < strend - prog->minlen) {
cad2e5aa 631 if (*t == '\n') {
4ee3650e 632 if (t < check_at - prog->check_offset_min) {
30944b6d 633 if (prog->anchored_substr) {
4ee3650e
GS
634 /* Since we moved from the found position,
635 we definitely contradict the found anchored
30944b6d
IZ
636 substr. Due to the above check we do not
637 contradict "check" substr.
638 Thus we can arrive here only if check substr
639 is float. Redo checking for "other"=="fixed".
640 */
641 strpos = t + 1;
642 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
643 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
644 goto do_other_anchored;
645 }
4ee3650e
GS
646 /* We don't contradict the found floating substring. */
647 /* XXXX Why not check for STCLASS? */
cad2e5aa 648 s = t + 1;
2c2d71f5 649 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 650 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
651 goto set_useful;
652 }
4ee3650e
GS
653 /* Position contradicts check-string */
654 /* XXXX probably better to look for check-string
655 than for "\n", so one should lower the limit for t? */
656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 657 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 658 other_last = strpos = s = t + 1;
cad2e5aa
JH
659 goto restart;
660 }
661 t++;
662 }
2c2d71f5
JH
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
664 PL_colors[0],PL_colors[1]));
665 goto fail_finish;
cad2e5aa 666 }
f5952150
GS
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
669 PL_colors[0],PL_colors[1]));
670 }
cad2e5aa
JH
671 s = t;
672 set_useful:
2c2d71f5 673 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
674 }
675 else {
f5952150 676 /* The found string does not prohibit matching at strpos,
2c2d71f5 677 - no optimization of calling REx engine can be performed,
f5952150
GS
678 unless it was an MBOL and we are not after MBOL,
679 or a future STCLASS check will fail this. */
2c2d71f5
JH
680 try_at_start:
681 /* Even in this situation we may use MBOL flag if strpos is offset
682 wrt the start of the string. */
05b4157f 683 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 684 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
685 /* May be due to an implicit anchor of m{.*foo} */
686 && !(prog->reganch & ROPT_IMPLICIT))
687 {
cad2e5aa
JH
688 t = strpos;
689 goto find_anchor;
690 }
30944b6d 691 DEBUG_r( if (ml_anch)
f5952150
GS
692 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
693 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 694 );
2c2d71f5 695 success_at_start:
30944b6d 696 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 697 && prog->check_substr /* Could be deleted already */
cad2e5aa 698 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
699 && prog->check_substr == prog->float_substr)
700 {
cad2e5aa 701 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 702 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
703 SvREFCNT_dec(prog->check_substr);
704 prog->check_substr = Nullsv; /* disable */
705 prog->float_substr = Nullsv; /* clear */
5e39e1e5 706 check = Nullsv; /* abort */
cad2e5aa 707 s = strpos;
3cf5c195
IZ
708 /* XXXX This is a remnant of the old implementation. It
709 looks wasteful, since now INTUIT can use many
6eb5f6b9 710 other heuristics. */
cad2e5aa
JH
711 prog->reganch &= ~RE_USE_INTUIT;
712 }
713 else
714 s = strpos;
715 }
716
6eb5f6b9
JH
717 /* Last resort... */
718 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
719 if (prog->regstclass) {
720 /* minlen == 0 is possible if regstclass is \b or \B,
721 and the fixed substr is ''$.
722 Since minlen is already taken into account, s+1 is before strend;
723 accidentally, minlen >= 1 guaranties no false positives at s + 1
724 even for \b or \B. But (minlen? 1 : 0) below assumes that
725 regstclass does not come from lookahead... */
726 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
727 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 728 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 729 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 730 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 731 : 1);
6eb5f6b9 732 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
733 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
734 : (prog->float_substr
735 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
736 cl_l, strend)
737 : strend);
a1933d95 738 char *startpos = strbeg;
6eb5f6b9
JH
739
740 t = s;
76384e4a 741 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 742 PL_regdata = prog->data;
76384e4a
GS
743 PL_bostr = startpos;
744 }
6eb5f6b9
JH
745 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
746 if (!s) {
747#ifdef DEBUGGING
748 char *what;
749#endif
750 if (endpos == strend) {
751 DEBUG_r( PerlIO_printf(Perl_debug_log,
752 "Could not match STCLASS...\n") );
753 goto fail;
754 }
66e933ab
GS
755 DEBUG_r( PerlIO_printf(Perl_debug_log,
756 "This position contradicts STCLASS...\n") );
653099ff
GS
757 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
758 goto fail;
6eb5f6b9
JH
759 /* Contradict one of substrings */
760 if (prog->anchored_substr) {
6eb5f6b9
JH
761 if (prog->anchored_substr == check) {
762 DEBUG_r( what = "anchored" );
763 hop_and_restart:
1aa99e6b 764 s = HOP3c(t, 1, strend);
66e933ab
GS
765 if (s + start_shift + end_shift > strend) {
766 /* XXXX Should be taken into account earlier? */
767 DEBUG_r( PerlIO_printf(Perl_debug_log,
768 "Could not match STCLASS...\n") );
769 goto fail;
770 }
5e39e1e5
HS
771 if (!check)
772 goto giveup;
6eb5f6b9 773 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 774 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
775 what, (long)(s + start_shift - i_strpos)) );
776 goto restart;
777 }
66e933ab 778 /* Have both, check_string is floating */
6eb5f6b9
JH
779 if (t + start_shift >= check_at) /* Contradicts floating=check */
780 goto retry_floating_check;
781 /* Recheck anchored substring, but not floating... */
782 s = check_at;
5e39e1e5
HS
783 if (!check)
784 goto giveup;
6eb5f6b9 785 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 786 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
787 (long)(other_last - i_strpos)) );
788 goto do_other_anchored;
789 }
60e71179
GS
790 /* Another way we could have checked stclass at the
791 current position only: */
792 if (ml_anch) {
793 s = t = t + 1;
5e39e1e5
HS
794 if (!check)
795 goto giveup;
60e71179 796 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
797 "Looking for /%s^%s/m starting at offset %ld...\n",
798 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 799 goto try_at_offset;
66e933ab 800 }
60e71179
GS
801 if (!prog->float_substr) /* Could have been deleted */
802 goto fail;
6eb5f6b9
JH
803 /* Check is floating subtring. */
804 retry_floating_check:
805 t = check_at - start_shift;
806 DEBUG_r( what = "floating" );
807 goto hop_and_restart;
808 }
809 DEBUG_r( if (t != s)
810 PerlIO_printf(Perl_debug_log,
811 "By STCLASS: moving %ld --> %ld\n",
812 (long)(t - i_strpos), (long)(s - i_strpos));
813 else
814 PerlIO_printf(Perl_debug_log,
815 "Does not contradict STCLASS...\n") );
816 }
5e39e1e5
HS
817 giveup:
818 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
819 PL_colors[4], (check ? "Guessed" : "Giving up"),
820 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 821 return s;
2c2d71f5
JH
822
823 fail_finish: /* Substring not found */
66e933ab
GS
824 if (prog->check_substr) /* could be removed already */
825 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 826 fail:
2c2d71f5 827 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
828 PL_colors[4],PL_colors[5]));
829 return Nullch;
830}
9661b544 831
6eb5f6b9 832/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
833STATIC char *
834S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 835{
6eb5f6b9
JH
836 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
837 char *m;
d8093b23
G
838 STRLEN ln;
839 unsigned int c1;
840 unsigned int c2;
6eb5f6b9
JH
841 char *e;
842 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 843 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 844
6eb5f6b9
JH
845 /* We know what class it must start with. */
846 switch (OP(c)) {
6eb5f6b9 847 case ANYOF:
a0ed51b3 848 while (s < strend) {
ffc61ed2 849 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
850 if (tmp && (norun || regtry(prog, s)))
851 goto got_it;
852 else
853 tmp = doevery;
a0ed51b3 854 }
6eb5f6b9
JH
855 else
856 tmp = 1;
ffc61ed2 857 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 858 }
6eb5f6b9
JH
859 break;
860 case EXACTF:
861 m = STRING(c);
862 ln = STR_LEN(c);
1aa99e6b
IH
863 if (UTF) {
864 c1 = to_utf8_lower((U8*)m);
865 c2 = to_utf8_upper((U8*)m);
866 }
867 else {
868 c1 = *(U8*)m;
869 c2 = PL_fold[c1];
870 }
6eb5f6b9
JH
871 goto do_exactf;
872 case EXACTFL:
873 m = STRING(c);
874 ln = STR_LEN(c);
d8093b23 875 c1 = *(U8*)m;
6eb5f6b9
JH
876 c2 = PL_fold_locale[c1];
877 do_exactf:
878 e = strend - ln;
b3c9acc1 879
6eb5f6b9
JH
880 if (norun && e < s)
881 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
882
883 if (do_utf8) {
884 STRLEN len;
885 if (c1 == c2)
886 while (s <= e) {
887 if ( utf8_to_uv_simple((U8*)s, &len) == c1
888 && regtry(prog, s) )
889 goto got_it;
890 s += len;
891 }
892 else
893 while (s <= e) {
894 UV c = utf8_to_uv_simple((U8*)s, &len);
895 if ( (c == c1 || c == c2) && regtry(prog, s) )
896 goto got_it;
897 s += len;
898 }
899 }
900 else {
901 if (c1 == c2)
902 while (s <= e) {
903 if ( *(U8*)s == c1
904 && (ln == 1 || !(OP(c) == EXACTF
905 ? ibcmp(s, m, ln)
906 : ibcmp_locale(s, m, ln)))
907 && (norun || regtry(prog, s)) )
908 goto got_it;
909 s++;
910 }
911 else
912 while (s <= e) {
913 if ( (*(U8*)s == c1 || *(U8*)s == c2)
914 && (ln == 1 || !(OP(c) == EXACTF
915 ? ibcmp(s, m, ln)
916 : ibcmp_locale(s, m, ln)))
917 && (norun || regtry(prog, s)) )
918 goto got_it;
919 s++;
920 }
b3c9acc1
IZ
921 }
922 break;
bbce6d69 923 case BOUNDL:
3280af22 924 PL_reg_flags |= RF_tainted;
bbce6d69 925 /* FALL THROUGH */
a0d0e21e 926 case BOUND:
ffc61ed2
JH
927 if (do_utf8) {
928 if (s == startpos)
929 tmp = '\n';
930 else {
1aa99e6b 931 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
932
933 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
934 }
935 tmp = ((OP(c) == BOUND ?
936 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
937 while (s < strend) {
938 if (tmp == !(OP(c) == BOUND ?
939 swash_fetch(PL_utf8_alnum, (U8*)s) :
940 isALNUM_LC_utf8((U8*)s)))
941 {
942 tmp = !tmp;
943 if ((norun || regtry(prog, s)))
944 goto got_it;
945 }
946 s += UTF8SKIP(s);
a687059c 947 }
a0d0e21e 948 }
667bb95a 949 else {
ffc61ed2
JH
950 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
951 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
952 while (s < strend) {
953 if (tmp ==
954 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
955 tmp = !tmp;
956 if ((norun || regtry(prog, s)))
957 goto got_it;
958 }
959 s++;
a0ed51b3 960 }
a0ed51b3 961 }
6eb5f6b9 962 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
963 goto got_it;
964 break;
bbce6d69 965 case NBOUNDL:
3280af22 966 PL_reg_flags |= RF_tainted;
bbce6d69 967 /* FALL THROUGH */
a0d0e21e 968 case NBOUND:
ffc61ed2
JH
969 if (do_utf8) {
970 if (s == startpos)
971 tmp = '\n';
972 else {
1aa99e6b 973 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
ffc61ed2
JH
974
975 tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0);
976 }
977 tmp = ((OP(c) == NBOUND ?
978 isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
979 while (s < strend) {
980 if (tmp == !(OP(c) == NBOUND ?
981 swash_fetch(PL_utf8_alnum, (U8*)s) :
982 isALNUM_LC_utf8((U8*)s)))
983 tmp = !tmp;
984 else if ((norun || regtry(prog, s)))
985 goto got_it;
986 s += UTF8SKIP(s);
987 }
a0d0e21e 988 }
667bb95a 989 else {
ffc61ed2
JH
990 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
991 tmp = ((OP(c) == NBOUND ?
992 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
993 while (s < strend) {
994 if (tmp ==
995 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
996 tmp = !tmp;
997 else if ((norun || regtry(prog, s)))
998 goto got_it;
999 s++;
1000 }
a0ed51b3 1001 }
6eb5f6b9 1002 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1003 goto got_it;
1004 break;
a0d0e21e 1005 case ALNUM:
ffc61ed2
JH
1006 if (do_utf8) {
1007 while (s < strend) {
1008 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1009 if (tmp && (norun || regtry(prog, s)))
1010 goto got_it;
1011 else
1012 tmp = doevery;
1013 }
bbce6d69 1014 else
ffc61ed2
JH
1015 tmp = 1;
1016 s += UTF8SKIP(s);
bbce6d69 1017 }
bbce6d69 1018 }
ffc61ed2
JH
1019 else {
1020 while (s < strend) {
1021 if (isALNUM(*s)) {
1022 if (tmp && (norun || regtry(prog, s)))
1023 goto got_it;
1024 else
1025 tmp = doevery;
1026 }
a0ed51b3 1027 else
ffc61ed2
JH
1028 tmp = 1;
1029 s++;
a0ed51b3 1030 }
a0ed51b3
LW
1031 }
1032 break;
bbce6d69 1033 case ALNUML:
3280af22 1034 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1035 if (do_utf8) {
1036 while (s < strend) {
1037 if (isALNUM_LC_utf8((U8*)s)) {
1038 if (tmp && (norun || regtry(prog, s)))
1039 goto got_it;
1040 else
1041 tmp = doevery;
1042 }
a687059c 1043 else
ffc61ed2
JH
1044 tmp = 1;
1045 s += UTF8SKIP(s);
a0d0e21e 1046 }
a0d0e21e 1047 }
ffc61ed2
JH
1048 else {
1049 while (s < strend) {
1050 if (isALNUM_LC(*s)) {
1051 if (tmp && (norun || regtry(prog, s)))
1052 goto got_it;
1053 else
1054 tmp = doevery;
1055 }
a0ed51b3 1056 else
ffc61ed2
JH
1057 tmp = 1;
1058 s++;
a0ed51b3 1059 }
a0ed51b3
LW
1060 }
1061 break;
a0d0e21e 1062 case NALNUM:
ffc61ed2
JH
1063 if (do_utf8) {
1064 while (s < strend) {
1065 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1066 if (tmp && (norun || regtry(prog, s)))
1067 goto got_it;
1068 else
1069 tmp = doevery;
1070 }
bbce6d69 1071 else
ffc61ed2
JH
1072 tmp = 1;
1073 s += UTF8SKIP(s);
bbce6d69 1074 }
bbce6d69 1075 }
ffc61ed2
JH
1076 else {
1077 while (s < strend) {
1078 if (!isALNUM(*s)) {
1079 if (tmp && (norun || regtry(prog, s)))
1080 goto got_it;
1081 else
1082 tmp = doevery;
1083 }
a0ed51b3 1084 else
ffc61ed2
JH
1085 tmp = 1;
1086 s++;
a0ed51b3 1087 }
a0ed51b3
LW
1088 }
1089 break;
bbce6d69 1090 case NALNUML:
3280af22 1091 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1092 if (do_utf8) {
1093 while (s < strend) {
1094 if (!isALNUM_LC_utf8((U8*)s)) {
1095 if (tmp && (norun || regtry(prog, s)))
1096 goto got_it;
1097 else
1098 tmp = doevery;
1099 }
a687059c 1100 else
ffc61ed2
JH
1101 tmp = 1;
1102 s += UTF8SKIP(s);
a687059c 1103 }
a0d0e21e 1104 }
ffc61ed2
JH
1105 else {
1106 while (s < strend) {
1107 if (!isALNUM_LC(*s)) {
1108 if (tmp && (norun || regtry(prog, s)))
1109 goto got_it;
1110 else
1111 tmp = doevery;
1112 }
a0ed51b3 1113 else
ffc61ed2
JH
1114 tmp = 1;
1115 s++;
a0ed51b3 1116 }
a0ed51b3
LW
1117 }
1118 break;
a0d0e21e 1119 case SPACE:
ffc61ed2
JH
1120 if (do_utf8) {
1121 while (s < strend) {
1122 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1124 goto got_it;
1125 else
1126 tmp = doevery;
1127 }
a0d0e21e 1128 else
ffc61ed2
JH
1129 tmp = 1;
1130 s += UTF8SKIP(s);
2304df62 1131 }
a0d0e21e 1132 }
ffc61ed2
JH
1133 else {
1134 while (s < strend) {
1135 if (isSPACE(*s)) {
1136 if (tmp && (norun || regtry(prog, s)))
1137 goto got_it;
1138 else
1139 tmp = doevery;
1140 }
a0ed51b3 1141 else
ffc61ed2
JH
1142 tmp = 1;
1143 s++;
a0ed51b3 1144 }
a0ed51b3
LW
1145 }
1146 break;
bbce6d69 1147 case SPACEL:
3280af22 1148 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1149 if (do_utf8) {
1150 while (s < strend) {
1151 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
bbce6d69 1157 else
ffc61ed2
JH
1158 tmp = 1;
1159 s += UTF8SKIP(s);
bbce6d69 1160 }
bbce6d69 1161 }
ffc61ed2
JH
1162 else {
1163 while (s < strend) {
1164 if (isSPACE_LC(*s)) {
1165 if (tmp && (norun || regtry(prog, s)))
1166 goto got_it;
1167 else
1168 tmp = doevery;
1169 }
a0ed51b3 1170 else
ffc61ed2
JH
1171 tmp = 1;
1172 s++;
a0ed51b3 1173 }
a0ed51b3
LW
1174 }
1175 break;
a0d0e21e 1176 case NSPACE:
ffc61ed2
JH
1177 if (do_utf8) {
1178 while (s < strend) {
1179 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1180 if (tmp && (norun || regtry(prog, s)))
1181 goto got_it;
1182 else
1183 tmp = doevery;
1184 }
a0d0e21e 1185 else
ffc61ed2
JH
1186 tmp = 1;
1187 s += UTF8SKIP(s);
a687059c 1188 }
a0d0e21e 1189 }
ffc61ed2
JH
1190 else {
1191 while (s < strend) {
1192 if (!isSPACE(*s)) {
1193 if (tmp && (norun || regtry(prog, s)))
1194 goto got_it;
1195 else
1196 tmp = doevery;
1197 }
a0ed51b3 1198 else
ffc61ed2
JH
1199 tmp = 1;
1200 s++;
a0ed51b3 1201 }
a0ed51b3
LW
1202 }
1203 break;
bbce6d69 1204 case NSPACEL:
3280af22 1205 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1206 if (do_utf8) {
1207 while (s < strend) {
1208 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1209 if (tmp && (norun || regtry(prog, s)))
1210 goto got_it;
1211 else
1212 tmp = doevery;
1213 }
bbce6d69 1214 else
ffc61ed2
JH
1215 tmp = 1;
1216 s += UTF8SKIP(s);
bbce6d69 1217 }
bbce6d69 1218 }
ffc61ed2
JH
1219 else {
1220 while (s < strend) {
1221 if (!isSPACE_LC(*s)) {
1222 if (tmp && (norun || regtry(prog, s)))
1223 goto got_it;
1224 else
1225 tmp = doevery;
1226 }
a0ed51b3 1227 else
ffc61ed2
JH
1228 tmp = 1;
1229 s++;
a0ed51b3 1230 }
a0ed51b3
LW
1231 }
1232 break;
a0d0e21e 1233 case DIGIT:
ffc61ed2
JH
1234 if (do_utf8) {
1235 while (s < strend) {
1236 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1237 if (tmp && (norun || regtry(prog, s)))
1238 goto got_it;
1239 else
1240 tmp = doevery;
1241 }
a0d0e21e 1242 else
ffc61ed2
JH
1243 tmp = 1;
1244 s += UTF8SKIP(s);
2b69d0c2 1245 }
a0d0e21e 1246 }
ffc61ed2
JH
1247 else {
1248 while (s < strend) {
1249 if (isDIGIT(*s)) {
1250 if (tmp && (norun || regtry(prog, s)))
1251 goto got_it;
1252 else
1253 tmp = doevery;
1254 }
a0ed51b3 1255 else
ffc61ed2
JH
1256 tmp = 1;
1257 s++;
a0ed51b3 1258 }
a0ed51b3
LW
1259 }
1260 break;
b8c5462f
JH
1261 case DIGITL:
1262 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1263 if (do_utf8) {
1264 while (s < strend) {
1265 if (isDIGIT_LC_utf8((U8*)s)) {
1266 if (tmp && (norun || regtry(prog, s)))
1267 goto got_it;
1268 else
1269 tmp = doevery;
1270 }
b8c5462f 1271 else
ffc61ed2
JH
1272 tmp = 1;
1273 s += UTF8SKIP(s);
b8c5462f 1274 }
b8c5462f 1275 }
ffc61ed2
JH
1276 else {
1277 while (s < strend) {
1278 if (isDIGIT_LC(*s)) {
1279 if (tmp && (norun || regtry(prog, s)))
1280 goto got_it;
1281 else
1282 tmp = doevery;
1283 }
b8c5462f 1284 else
ffc61ed2
JH
1285 tmp = 1;
1286 s++;
b8c5462f 1287 }
b8c5462f
JH
1288 }
1289 break;
a0d0e21e 1290 case NDIGIT:
ffc61ed2
JH
1291 if (do_utf8) {
1292 while (s < strend) {
1293 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1294 if (tmp && (norun || regtry(prog, s)))
1295 goto got_it;
1296 else
1297 tmp = doevery;
1298 }
a0d0e21e 1299 else
ffc61ed2
JH
1300 tmp = 1;
1301 s += UTF8SKIP(s);
a687059c 1302 }
a0d0e21e 1303 }
ffc61ed2
JH
1304 else {
1305 while (s < strend) {
1306 if (!isDIGIT(*s)) {
1307 if (tmp && (norun || regtry(prog, s)))
1308 goto got_it;
1309 else
1310 tmp = doevery;
1311 }
a0ed51b3 1312 else
ffc61ed2
JH
1313 tmp = 1;
1314 s++;
a0ed51b3 1315 }
a0ed51b3
LW
1316 }
1317 break;
b8c5462f
JH
1318 case NDIGITL:
1319 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1320 if (do_utf8) {
1321 while (s < strend) {
1322 if (!isDIGIT_LC_utf8((U8*)s)) {
1323 if (tmp && (norun || regtry(prog, s)))
1324 goto got_it;
1325 else
1326 tmp = doevery;
1327 }
b8c5462f 1328 else
ffc61ed2
JH
1329 tmp = 1;
1330 s += UTF8SKIP(s);
b8c5462f 1331 }
a0ed51b3 1332 }
ffc61ed2
JH
1333 else {
1334 while (s < strend) {
1335 if (!isDIGIT_LC(*s)) {
1336 if (tmp && (norun || regtry(prog, s)))
1337 goto got_it;
1338 else
1339 tmp = doevery;
1340 }
cf93c79d 1341 else
ffc61ed2
JH
1342 tmp = 1;
1343 s++;
b8c5462f 1344 }
b8c5462f
JH
1345 }
1346 break;
b3c9acc1 1347 default:
3c3eec57
GS
1348 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1349 break;
d6a28714 1350 }
6eb5f6b9
JH
1351 return 0;
1352 got_it:
1353 return s;
1354}
1355
1356/*
1357 - regexec_flags - match a regexp against a string
1358 */
1359I32
1360Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1361 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1362/* strend: pointer to null at end of string */
1363/* strbeg: real beginning of string */
1364/* minend: end of match must be >=minend after stringarg. */
1365/* data: May be used for some additional optimizations. */
1366/* nosave: For optimizations. */
1367{
6eb5f6b9
JH
1368 register char *s;
1369 register regnode *c;
1370 register char *startpos = stringarg;
6eb5f6b9
JH
1371 I32 minlen; /* must match at least this many chars */
1372 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1373 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1374 constant substr. */ /* CC */
1375 I32 end_shift = 0; /* Same for the end. */ /* CC */
1376 I32 scream_pos = -1; /* Internal iterator of scream. */
1377 char *scream_olds;
1378 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1379 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1380
1381 PL_regcc = 0;
1382
1383 cache_re(prog);
1384#ifdef DEBUGGING
1385 PL_regnarrate = PL_debug & 512;
1386#endif
1387
1388 /* Be paranoid... */
1389 if (prog == NULL || startpos == NULL) {
1390 Perl_croak(aTHX_ "NULL regexp parameter");
1391 return 0;
1392 }
1393
1394 minlen = prog->minlen;
1aa99e6b
IH
1395 if (do_utf8) {
1396 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1397 }
1398 else {
1399 if (strend - startpos < minlen) goto phooey;
1400 }
6eb5f6b9
JH
1401
1402 if (startpos == strbeg) /* is ^ valid at stringarg? */
1403 PL_regprev = '\n';
1404 else {
1aa99e6b
IH
1405 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1406 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
1407 PL_regprev = utf8_to_uv(s, (U8*)stringarg - s, NULL, 0);
1408 }
1409 else
1410 PL_regprev = (U32)stringarg[-1];
6eb5f6b9
JH
1411 if (!PL_multiline && PL_regprev == '\n')
1412 PL_regprev = '\0'; /* force ^ to NOT match */
1413 }
1414
1415 /* Check validity of program. */
1416 if (UCHARAT(prog->program) != REG_MAGIC) {
1417 Perl_croak(aTHX_ "corrupted regexp program");
1418 }
1419
1420 PL_reg_flags = 0;
1421 PL_reg_eval_set = 0;
1422 PL_reg_maxiter = 0;
1423
1424 if (prog->reganch & ROPT_UTF8)
1425 PL_reg_flags |= RF_utf8;
1426
1427 /* Mark beginning of line for ^ and lookbehind. */
1428 PL_regbol = startpos;
1429 PL_bostr = strbeg;
1430 PL_reg_sv = sv;
1431
1432 /* Mark end of line for $ (and such) */
1433 PL_regeol = strend;
1434
1435 /* see how far we have to get to not match where we matched before */
1436 PL_regtill = startpos+minend;
1437
1438 /* We start without call_cc context. */
1439 PL_reg_call_cc = 0;
1440
1441 /* If there is a "must appear" string, look for it. */
1442 s = startpos;
1443
1444 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1445 MAGIC *mg;
1446
1447 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1448 PL_reg_ganch = startpos;
1449 else if (sv && SvTYPE(sv) >= SVt_PVMG
1450 && SvMAGIC(sv)
1451 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1452 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1453 if (prog->reganch & ROPT_ANCH_GPOS) {
1454 if (s > PL_reg_ganch)
1455 goto phooey;
1456 s = PL_reg_ganch;
1457 }
1458 }
1459 else /* pos() not defined */
1460 PL_reg_ganch = strbeg;
1461 }
1462
699c3c34
JH
1463 if (do_utf8 == (UTF!=0) &&
1464 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1465 re_scream_pos_data d;
1466
1467 d.scream_olds = &scream_olds;
1468 d.scream_pos = &scream_pos;
1469 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1470 if (!s)
1471 goto phooey; /* not present */
1472 }
1473
1474 DEBUG_r( if (!PL_colorset) reginitcolors() );
1475 DEBUG_r(PerlIO_printf(Perl_debug_log,
1476 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1477 PL_colors[4],PL_colors[5],PL_colors[0],
1478 prog->precomp,
1479 PL_colors[1],
1480 (strlen(prog->precomp) > 60 ? "..." : ""),
1481 PL_colors[0],
1482 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1483 startpos, PL_colors[1],
1484 (strend - startpos > 60 ? "..." : ""))
1485 );
1486
1487 /* Simplest case: anchored match need be tried only once. */
1488 /* [unless only anchor is BOL and multiline is set] */
1489 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1490 if (s == startpos && regtry(prog, startpos))
1491 goto got_it;
1492 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1493 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1494 {
1495 char *end;
1496
1497 if (minlen)
1498 dontbother = minlen - 1;
1aa99e6b 1499 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1500 /* for multiline we only have to try after newlines */
1501 if (prog->check_substr) {
1502 if (s == startpos)
1503 goto after_try;
1504 while (1) {
1505 if (regtry(prog, s))
1506 goto got_it;
1507 after_try:
1508 if (s >= end)
1509 goto phooey;
1510 if (prog->reganch & RE_USE_INTUIT) {
1511 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1512 if (!s)
1513 goto phooey;
1514 }
1515 else
1516 s++;
1517 }
1518 } else {
1519 if (s > startpos)
1520 s--;
1521 while (s < end) {
1522 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1523 if (regtry(prog, s))
1524 goto got_it;
1525 }
1526 }
1527 }
1528 }
1529 goto phooey;
1530 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1531 if (regtry(prog, PL_reg_ganch))
1532 goto got_it;
1533 goto phooey;
1534 }
1535
1536 /* Messy cases: unanchored match. */
1537 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1538 /* we have /x+whatever/ */
1539 /* it must be a one character string (XXXX Except UTF?) */
1540 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1541#ifdef DEBUGGING
1542 int did_match = 0;
1543#endif
1544
1aa99e6b 1545 if (do_utf8) {
6eb5f6b9
JH
1546 while (s < strend) {
1547 if (*s == ch) {
bf93d4cc 1548 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1549 if (regtry(prog, s)) goto got_it;
1550 s += UTF8SKIP(s);
1551 while (s < strend && *s == ch)
1552 s += UTF8SKIP(s);
1553 }
1554 s += UTF8SKIP(s);
1555 }
1556 }
1557 else {
1558 while (s < strend) {
1559 if (*s == ch) {
bf93d4cc 1560 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1561 if (regtry(prog, s)) goto got_it;
1562 s++;
1563 while (s < strend && *s == ch)
1564 s++;
1565 }
1566 s++;
1567 }
1568 }
bf93d4cc
GS
1569 DEBUG_r(did_match ||
1570 PerlIO_printf(Perl_debug_log,
1571 "Did not find anchored character...\n"));
6eb5f6b9
JH
1572 }
1573 /*SUPPRESS 560*/
1aa99e6b
IH
1574 else if (do_utf8 == (UTF!=0) &&
1575 (prog->anchored_substr != Nullsv
1576 || (prog->float_substr != Nullsv
1577 && prog->float_max_offset < strend - s))) {
6eb5f6b9
JH
1578 SV *must = prog->anchored_substr
1579 ? prog->anchored_substr : prog->float_substr;
1580 I32 back_max =
1581 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1582 I32 back_min =
1583 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1584 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1585 -(I32)(CHR_SVLEN(must)
1aa99e6b 1586 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1587 char *last1; /* Last position checked before */
bf93d4cc
GS
1588#ifdef DEBUGGING
1589 int did_match = 0;
1590#endif
6eb5f6b9
JH
1591
1592 if (s > PL_bostr)
1593 last1 = HOPc(s, -1);
1594 else
1595 last1 = s - 1; /* bogus */
1596
1597 /* XXXX check_substr already used to find `s', can optimize if
1598 check_substr==must. */
1599 scream_pos = -1;
1600 dontbother = end_shift;
1601 strend = HOPc(strend, -dontbother);
1602 while ( (s <= last) &&
1603 ((flags & REXEC_SCREAM)
1aa99e6b 1604 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1605 end_shift, &scream_pos, 0))
1aa99e6b 1606 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
6eb5f6b9
JH
1607 (unsigned char*)strend, must,
1608 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1609 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1610 if (HOPc(s, -back_max) > last1) {
1611 last1 = HOPc(s, -back_min);
1612 s = HOPc(s, -back_max);
1613 }
1614 else {
1615 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1616
1617 last1 = HOPc(s, -back_min);
1618 s = t;
1619 }
1aa99e6b 1620 if (do_utf8) {
6eb5f6b9
JH
1621 while (s <= last1) {
1622 if (regtry(prog, s))
1623 goto got_it;
1624 s += UTF8SKIP(s);
1625 }
1626 }
1627 else {
1628 while (s <= last1) {
1629 if (regtry(prog, s))
1630 goto got_it;
1631 s++;
1632 }
1633 }
1634 }
bf93d4cc
GS
1635 DEBUG_r(did_match ||
1636 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1637 ((must == prog->anchored_substr)
1638 ? "anchored" : "floating"),
1639 PL_colors[0],
1640 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1641 SvPVX(must),
1642 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1643 goto phooey;
1644 }
155aba94 1645 else if ((c = prog->regstclass)) {
66e933ab
GS
1646 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1647 /* don't bother with what can't match */
6eb5f6b9 1648 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1649 DEBUG_r({
1650 SV *prop = sv_newmortal();
1651 regprop(prop, c);
1652 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1653 });
6eb5f6b9
JH
1654 if (find_byclass(prog, c, s, strend, startpos, 0))
1655 goto got_it;
bf93d4cc 1656 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1657 }
1658 else {
1659 dontbother = 0;
1660 if (prog->float_substr != Nullsv) { /* Trim the end. */
1661 char *last;
d6a28714
JH
1662
1663 if (flags & REXEC_SCREAM) {
1664 last = screaminstr(sv, prog->float_substr, s - strbeg,
1665 end_shift, &scream_pos, 1); /* last one */
1666 if (!last)
ffc61ed2 1667 last = scream_olds; /* Only one occurrence. */
b8c5462f 1668 }
d6a28714
JH
1669 else {
1670 STRLEN len;
1671 char *little = SvPV(prog->float_substr, len);
1672
1673 if (SvTAIL(prog->float_substr)) {
1674 if (memEQ(strend - len + 1, little, len - 1))
1675 last = strend - len + 1;
1676 else if (!PL_multiline)
1677 last = memEQ(strend - len, little, len)
1678 ? strend - len : Nullch;
b8c5462f 1679 else
d6a28714
JH
1680 goto find_last;
1681 } else {
1682 find_last:
1683 if (len)
1684 last = rninstr(s, strend, little, little + len);
b8c5462f 1685 else
d6a28714 1686 last = strend; /* matching `$' */
b8c5462f 1687 }
b8c5462f 1688 }
bf93d4cc
GS
1689 if (last == NULL) {
1690 DEBUG_r(PerlIO_printf(Perl_debug_log,
1691 "%sCan't trim the tail, match fails (should not happen)%s\n",
1692 PL_colors[4],PL_colors[5]));
1693 goto phooey; /* Should not happen! */
1694 }
d6a28714
JH
1695 dontbother = strend - last + prog->float_min_offset;
1696 }
1697 if (minlen && (dontbother < minlen))
1698 dontbother = minlen - 1;
1699 strend -= dontbother; /* this one's always in bytes! */
1700 /* We don't know much -- general case. */
1aa99e6b 1701 if (do_utf8) {
d6a28714
JH
1702 for (;;) {
1703 if (regtry(prog, s))
1704 goto got_it;
1705 if (s >= strend)
1706 break;
b8c5462f 1707 s += UTF8SKIP(s);
d6a28714
JH
1708 };
1709 }
1710 else {
1711 do {
1712 if (regtry(prog, s))
1713 goto got_it;
1714 } while (s++ < strend);
1715 }
1716 }
1717
1718 /* Failure. */
1719 goto phooey;
1720
1721got_it:
1722 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1723
1724 if (PL_reg_eval_set) {
1725 /* Preserve the current value of $^R */
1726 if (oreplsv != GvSV(PL_replgv))
1727 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1728 restored, the value remains
1729 the same. */
1730 restore_pos(aTHXo_ 0);
1731 }
1732
1733 /* make sure $`, $&, $', and $digit will work later */
1734 if ( !(flags & REXEC_NOT_FIRST) ) {
1735 if (RX_MATCH_COPIED(prog)) {
1736 Safefree(prog->subbeg);
1737 RX_MATCH_COPIED_off(prog);
1738 }
1739 if (flags & REXEC_COPY_STR) {
1740 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1741
1742 s = savepvn(strbeg, i);
1743 prog->subbeg = s;
1744 prog->sublen = i;
1745 RX_MATCH_COPIED_on(prog);
1746 }
1747 else {
1748 prog->subbeg = strbeg;
1749 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1750 }
1751 }
1752
1753 return 1;
1754
1755phooey:
bf93d4cc
GS
1756 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1757 PL_colors[4],PL_colors[5]));
d6a28714
JH
1758 if (PL_reg_eval_set)
1759 restore_pos(aTHXo_ 0);
1760 return 0;
1761}
1762
1763/*
1764 - regtry - try match at specific point
1765 */
1766STATIC I32 /* 0 failure, 1 success */
1767S_regtry(pTHX_ regexp *prog, char *startpos)
1768{
d6a28714
JH
1769 register I32 i;
1770 register I32 *sp;
1771 register I32 *ep;
1772 CHECKPOINT lastcp;
1773
02db2b7b
IZ
1774#ifdef DEBUGGING
1775 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1776#endif
d6a28714
JH
1777 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1778 MAGIC *mg;
1779
1780 PL_reg_eval_set = RS_init;
1781 DEBUG_r(DEBUG_s(
b900a521
JH
1782 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1783 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1784 ));
e8347627 1785 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1786 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1787 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1788 SAVETMPS;
1789 /* Apparently this is not needed, judging by wantarray. */
e8347627 1790 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1791 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1792
1793 if (PL_reg_sv) {
1794 /* Make $_ available to executed code. */
1795 if (PL_reg_sv != DEFSV) {
1796 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1797 SAVESPTR(DEFSV);
1798 DEFSV = PL_reg_sv;
b8c5462f 1799 }
d6a28714
JH
1800
1801 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1802 && (mg = mg_find(PL_reg_sv, 'g')))) {
1803 /* prepare for quick setting of pos */
1804 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1805 mg = mg_find(PL_reg_sv, 'g');
1806 mg->mg_len = -1;
b8c5462f 1807 }
d6a28714
JH
1808 PL_reg_magic = mg;
1809 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1810 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1811 }
1812 if (!PL_reg_curpm)
0f79a09d 1813 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1814 PL_reg_curpm->op_pmregexp = prog;
1815 PL_reg_oldcurpm = PL_curpm;
1816 PL_curpm = PL_reg_curpm;
1817 if (RX_MATCH_COPIED(prog)) {
1818 /* Here is a serious problem: we cannot rewrite subbeg,
1819 since it may be needed if this match fails. Thus
1820 $` inside (?{}) could fail... */
1821 PL_reg_oldsaved = prog->subbeg;
1822 PL_reg_oldsavedlen = prog->sublen;
1823 RX_MATCH_COPIED_off(prog);
1824 }
1825 else
1826 PL_reg_oldsaved = Nullch;
1827 prog->subbeg = PL_bostr;
1828 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1829 }
1830 prog->startp[0] = startpos - PL_bostr;
1831 PL_reginput = startpos;
1832 PL_regstartp = prog->startp;
1833 PL_regendp = prog->endp;
1834 PL_reglastparen = &prog->lastparen;
1835 prog->lastparen = 0;
1836 PL_regsize = 0;
1837 DEBUG_r(PL_reg_starttry = startpos);
1838 if (PL_reg_start_tmpl <= prog->nparens) {
1839 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1840 if(PL_reg_start_tmp)
1841 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1842 else
1843 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1844 }
1845
1846 /* XXXX What this code is doing here?!!! There should be no need
1847 to do this again and again, PL_reglastparen should take care of
3dd2943c 1848 this! --ilya*/
dafc8851
JH
1849
1850 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1851 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1852 * PL_reglastparen), is not needed at all by the test suite
1853 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1854 * enough, for building DynaLoader, or otherwise this
1855 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1856 * will happen. Meanwhile, this code *is* needed for the
1857 * above-mentioned test suite tests to succeed. The common theme
1858 * on those tests seems to be returning null fields from matches.
1859 * --jhi */
dafc8851 1860#if 1
d6a28714
JH
1861 sp = prog->startp;
1862 ep = prog->endp;
1863 if (prog->nparens) {
09e8ae3b 1864 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1865 *++sp = -1;
1866 *++ep = -1;
1867 }
1868 }
dafc8851 1869#endif
02db2b7b 1870 REGCP_SET(lastcp);
d6a28714
JH
1871 if (regmatch(prog->program + 1)) {
1872 prog->endp[0] = PL_reginput - PL_bostr;
1873 return 1;
1874 }
02db2b7b 1875 REGCP_UNWIND(lastcp);
d6a28714
JH
1876 return 0;
1877}
1878
02db2b7b
IZ
1879#define RE_UNWIND_BRANCH 1
1880#define RE_UNWIND_BRANCHJ 2
1881
1882union re_unwind_t;
1883
1884typedef struct { /* XX: makes sense to enlarge it... */
1885 I32 type;
1886 I32 prev;
1887 CHECKPOINT lastcp;
1888} re_unwind_generic_t;
1889
1890typedef struct {
1891 I32 type;
1892 I32 prev;
1893 CHECKPOINT lastcp;
1894 I32 lastparen;
1895 regnode *next;
1896 char *locinput;
1897 I32 nextchr;
1898#ifdef DEBUGGING
1899 int regindent;
1900#endif
1901} re_unwind_branch_t;
1902
1903typedef union re_unwind_t {
1904 I32 type;
1905 re_unwind_generic_t generic;
1906 re_unwind_branch_t branch;
1907} re_unwind_t;
1908
d6a28714
JH
1909/*
1910 - regmatch - main matching routine
1911 *
1912 * Conceptually the strategy is simple: check to see whether the current
1913 * node matches, call self recursively to see whether the rest matches,
1914 * and then act accordingly. In practice we make some effort to avoid
1915 * recursion, in particular by going through "ordinary" nodes (that don't
1916 * need to know whether the rest of the match failed) by a loop instead of
1917 * by recursion.
1918 */
1919/* [lwall] I've hoisted the register declarations to the outer block in order to
1920 * maybe save a little bit of pushing and popping on the stack. It also takes
1921 * advantage of machines that use a register save mask on subroutine entry.
1922 */
1923STATIC I32 /* 0 failure, 1 success */
1924S_regmatch(pTHX_ regnode *prog)
1925{
d6a28714
JH
1926 register regnode *scan; /* Current node. */
1927 regnode *next; /* Next node. */
1928 regnode *inner; /* Next node in internal branch. */
1929 register I32 nextchr; /* renamed nextchr - nextchar colides with
1930 function of same name */
1931 register I32 n; /* no or next */
1932 register I32 ln; /* len or last */
1933 register char *s; /* operand or save */
1934 register char *locinput = PL_reginput;
1935 register I32 c1, c2, paren; /* case fold search, parenth */
1936 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1937 I32 unwind = 0;
1938 I32 firstcp = PL_savestack_ix;
ffc61ed2 1939 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1940
d6a28714
JH
1941#ifdef DEBUGGING
1942 PL_regindent++;
1943#endif
1944
1945 /* Note that nextchr is a byte even in UTF */
1946 nextchr = UCHARAT(locinput);
1947 scan = prog;
1948 while (scan != NULL) {
1949#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1950#if 1
d6a28714
JH
1951# define sayYES goto yes
1952# define sayNO goto no
7821416a
IZ
1953# define sayYES_FINAL goto yes_final
1954# define sayYES_LOUD goto yes_loud
1955# define sayNO_FINAL goto no_final
1956# define sayNO_SILENT goto do_no
d6a28714
JH
1957# define saySAME(x) if (x) goto yes; else goto no
1958# define REPORT_CODE_OFF 24
1959#else
1960# define sayYES return 1
1961# define sayNO return 0
7821416a
IZ
1962# define sayYES_FINAL return 1
1963# define sayYES_LOUD return 1
1964# define sayNO_FINAL return 0
1965# define sayNO_SILENT return 0
d6a28714
JH
1966# define saySAME(x) return x
1967#endif
1968 DEBUG_r( {
1969 SV *prop = sv_newmortal();
1970 int docolor = *PL_colors[0];
1971 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1972 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
1973 /* The part of the string before starttry has one color
1974 (pref0_len chars), between starttry and current
1975 position another one (pref_len - pref0_len chars),
1976 after the current position the third one.
1977 We assume that pref0_len <= pref_len, otherwise we
1978 decrease pref0_len. */
1aa99e6b
IH
1979 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1980 ? (5 + taill) - l : locinput - PL_bostr;
1981 int pref0_len;
d6a28714 1982
1aa99e6b
IH
1983 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1984 pref_len++;
1985 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714
JH
1986 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1987 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1988 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
1989 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1990 l--;
d6a28714
JH
1991 if (pref0_len < 0)
1992 pref0_len = 0;
1993 if (pref0_len > pref_len)
1994 pref0_len = pref_len;
1995 regprop(prop, scan);
1996 PerlIO_printf(Perl_debug_log,
b900a521
JH
1997 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1998 (IV)(locinput - PL_bostr),
d6a28714
JH
1999 PL_colors[4], pref0_len,
2000 locinput - pref_len, PL_colors[5],
2001 PL_colors[2], pref_len - pref0_len,
2002 locinput - pref_len + pref0_len, PL_colors[3],
2003 (docolor ? "" : "> <"),
2004 PL_colors[0], l, locinput, PL_colors[1],
2005 15 - l - pref_len + 1,
2006 "",
b900a521 2007 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2008 SvPVX(prop));
2009 } );
2010
2011 next = scan + NEXT_OFF(scan);
2012 if (next == scan)
2013 next = NULL;
2014
2015 switch (OP(scan)) {
2016 case BOL:
2017 if (locinput == PL_bostr
2018 ? PL_regprev == '\n'
2019 : (PL_multiline &&
2020 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2021 {
2022 /* regtill = regbol; */
b8c5462f
JH
2023 break;
2024 }
d6a28714
JH
2025 sayNO;
2026 case MBOL:
2027 if (locinput == PL_bostr
2028 ? PL_regprev == '\n'
2029 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2030 {
b8c5462f
JH
2031 break;
2032 }
d6a28714
JH
2033 sayNO;
2034 case SBOL:
c2a73568 2035 if (locinput == PL_bostr)
b8c5462f 2036 break;
d6a28714
JH
2037 sayNO;
2038 case GPOS:
2039 if (locinput == PL_reg_ganch)
2040 break;
2041 sayNO;
2042 case EOL:
2043 if (PL_multiline)
2044 goto meol;
2045 else
2046 goto seol;
2047 case MEOL:
2048 meol:
2049 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2050 sayNO;
b8c5462f 2051 break;
d6a28714
JH
2052 case SEOL:
2053 seol:
2054 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2055 sayNO;
d6a28714 2056 if (PL_regeol - locinput > 1)
b8c5462f 2057 sayNO;
b8c5462f 2058 break;
d6a28714
JH
2059 case EOS:
2060 if (PL_regeol != locinput)
b8c5462f 2061 sayNO;
d6a28714 2062 break;
ffc61ed2 2063 case SANY:
1aa99e6b 2064 if (do_utf8) {
b8c5462f 2065 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2066 if (locinput > PL_regeol)
2067 sayNO;
b8c5462f
JH
2068 nextchr = UCHARAT(locinput);
2069 break;
2070 }
d6a28714 2071 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2072 sayNO;
b8c5462f 2073 nextchr = UCHARAT(++locinput);
a0d0e21e 2074 break;
ffc61ed2 2075 case REG_ANY:
1aa99e6b
IH
2076 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2077 sayNO;
2078 if (do_utf8) {
b8c5462f 2079 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2080 if (locinput > PL_regeol)
2081 sayNO;
a0ed51b3 2082 nextchr = UCHARAT(locinput);
a0ed51b3 2083 }
1aa99e6b
IH
2084 else
2085 nextchr = UCHARAT(++locinput);
a0ed51b3 2086 break;
d6a28714 2087 case EXACT:
cd439c50
IZ
2088 s = STRING(scan);
2089 ln = STR_LEN(scan);
1aa99e6b
IH
2090 if (do_utf8 != (UTF!=0)) {
2091 char *l = locinput;
2092 char *e = s + ln;
2093 STRLEN len;
2094 if (do_utf8)
2095 while (s < e) {
2096 if (l >= PL_regeol)
2097 sayNO;
2098 if (*((U8*)s) != utf8_to_uv_simple((U8*)l, &len))
2099 sayNO;
2100 s++;
2101 l += len;
2102 }
2103 else
2104 while (s < e) {
2105 if (l >= PL_regeol)
2106 sayNO;
2107 if (*((U8*)l) != utf8_to_uv_simple((U8*)s, &len))
2108 sayNO;
2109 s += len;
2110 l++;
2111 }
2112 locinput = l;
2113 nextchr = UCHARAT(locinput);
2114 break;
2115 }
d6a28714
JH
2116 /* Inline the first character, for speed. */
2117 if (UCHARAT(s) != nextchr)
2118 sayNO;
2119 if (PL_regeol - locinput < ln)
2120 sayNO;
2121 if (ln > 1 && memNE(s, locinput, ln))
2122 sayNO;
2123 locinput += ln;
2124 nextchr = UCHARAT(locinput);
2125 break;
2126 case EXACTFL:
b8c5462f
JH
2127 PL_reg_flags |= RF_tainted;
2128 /* FALL THROUGH */
d6a28714 2129 case EXACTF:
cd439c50
IZ
2130 s = STRING(scan);
2131 ln = STR_LEN(scan);
d6a28714 2132
1aa99e6b 2133 if (do_utf8) {
d6a28714 2134 char *l = locinput;
1aa99e6b
IH
2135 char *e;
2136 e = s + ln;
d6a28714
JH
2137 c1 = OP(scan) == EXACTF;
2138 while (s < e) {
1aa99e6b 2139 if (l >= PL_regeol) {
d6a28714
JH
2140 sayNO;
2141 }
1aa99e6b
IH
2142 if ((UTF ? utf8_to_uv((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
2143 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2144 sayNO;
2145 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2146 l += UTF8SKIP(l);
b8c5462f 2147 }
d6a28714 2148 locinput = l;
a0ed51b3
LW
2149 nextchr = UCHARAT(locinput);
2150 break;
2151 }
d6a28714
JH
2152
2153 /* Inline the first character, for speed. */
2154 if (UCHARAT(s) != nextchr &&
2155 UCHARAT(s) != ((OP(scan) == EXACTF)
2156 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2157 sayNO;
d6a28714 2158 if (PL_regeol - locinput < ln)
b8c5462f 2159 sayNO;
d6a28714
JH
2160 if (ln > 1 && (OP(scan) == EXACTF
2161 ? ibcmp(s, locinput, ln)
2162 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2163 sayNO;
d6a28714
JH
2164 locinput += ln;
2165 nextchr = UCHARAT(locinput);
a0d0e21e 2166 break;
d6a28714 2167 case ANYOF:
ffc61ed2
JH
2168 if (do_utf8) {
2169 if (!reginclass(scan, (U8*)locinput, do_utf8))
2170 sayNO;
2171 if (locinput >= PL_regeol)
2172 sayNO;
2173 locinput += PL_utf8skip[nextchr];
b8c5462f 2174 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2175 }
2176 else {
2177 if (nextchr < 0)
2178 nextchr = UCHARAT(locinput);
2179 if (!reginclass(scan, (U8*)locinput, do_utf8))
2180 sayNO;
2181 if (!nextchr && locinput >= PL_regeol)
2182 sayNO;
2183 nextchr = UCHARAT(++locinput);
2184 }
b8c5462f 2185 break;
d6a28714 2186 case ALNUML:
b8c5462f
JH
2187 PL_reg_flags |= RF_tainted;
2188 /* FALL THROUGH */
d6a28714 2189 case ALNUM:
b8c5462f 2190 if (!nextchr)
4633a7c4 2191 sayNO;
ffc61ed2
JH
2192 if (do_utf8) {
2193 if (!(OP(scan) == ALNUM
d6a28714
JH
2194 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2195 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2196 {
2197 sayNO;
a0ed51b3 2198 }
b8c5462f 2199 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2200 nextchr = UCHARAT(locinput);
2201 break;
2202 }
ffc61ed2 2203 if (!(OP(scan) == ALNUM
d6a28714 2204 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2205 sayNO;
b8c5462f 2206 nextchr = UCHARAT(++locinput);
a0d0e21e 2207 break;
d6a28714 2208 case NALNUML:
b8c5462f
JH
2209 PL_reg_flags |= RF_tainted;
2210 /* FALL THROUGH */
d6a28714
JH
2211 case NALNUM:
2212 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2213 sayNO;
ffc61ed2
JH
2214 if (do_utf8) {
2215 if (OP(scan) == NALNUM
d6a28714
JH
2216 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2217 : isALNUM_LC_utf8((U8*)locinput))
2218 {
b8c5462f 2219 sayNO;
d6a28714 2220 }
b8c5462f
JH
2221 locinput += PL_utf8skip[nextchr];
2222 nextchr = UCHARAT(locinput);
2223 break;
2224 }
ffc61ed2 2225 if (OP(scan) == NALNUM
d6a28714 2226 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2227 sayNO;
76e3520e 2228 nextchr = UCHARAT(++locinput);
a0d0e21e 2229 break;
d6a28714
JH
2230 case BOUNDL:
2231 case NBOUNDL:
3280af22 2232 PL_reg_flags |= RF_tainted;
bbce6d69 2233 /* FALL THROUGH */
d6a28714
JH
2234 case BOUND:
2235 case NBOUND:
2236 /* was last char in word? */
ffc61ed2
JH
2237 if (do_utf8) {
2238 if (locinput == PL_regbol)
2239 ln = PL_regprev;
2240 else {
2241 U8 *r = reghop((U8*)locinput, -1);
2242
2243 ln = utf8_to_uv(r, s - (char*)r, 0, 0);
2244 }
2245 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2246 ln = isALNUM_uni(ln);
2247 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2248 }
2249 else {
2250 ln = isALNUM_LC_uni(ln);
2251 n = isALNUM_LC_utf8((U8*)locinput);
2252 }
a0ed51b3 2253 }
d6a28714 2254 else {
ffc61ed2
JH
2255 ln = (locinput != PL_regbol) ?
2256 UCHARAT(locinput - 1) : PL_regprev;
2257 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2258 ln = isALNUM(ln);
2259 n = isALNUM(nextchr);
2260 }
2261 else {
2262 ln = isALNUM_LC(ln);
2263 n = isALNUM_LC(nextchr);
2264 }
d6a28714 2265 }
ffc61ed2
JH
2266 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2267 OP(scan) == BOUNDL))
2268 sayNO;
a0ed51b3 2269 break;
d6a28714 2270 case SPACEL:
3280af22 2271 PL_reg_flags |= RF_tainted;
bbce6d69 2272 /* FALL THROUGH */
d6a28714 2273 case SPACE:
9442cb0e 2274 if (!nextchr)
4633a7c4 2275 sayNO;
1aa99e6b 2276 if (do_utf8) {
fd400ab9 2277 if (UTF8_IS_CONTINUED(nextchr)) {
ffc61ed2
JH
2278 if (!(OP(scan) == SPACE
2279 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2280 : isSPACE_LC_utf8((U8*)locinput)))
2281 {
2282 sayNO;
2283 }
2284 locinput += PL_utf8skip[nextchr];
2285 nextchr = UCHARAT(locinput);
2286 break;
d6a28714 2287 }
ffc61ed2
JH
2288 if (!(OP(scan) == SPACE
2289 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2290 sayNO;
2291 nextchr = UCHARAT(++locinput);
2292 }
2293 else {
2294 if (!(OP(scan) == SPACE
2295 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2296 sayNO;
2297 nextchr = UCHARAT(++locinput);
a0ed51b3 2298 }
a0ed51b3 2299 break;
d6a28714 2300 case NSPACEL:
3280af22 2301 PL_reg_flags |= RF_tainted;
bbce6d69 2302 /* FALL THROUGH */
d6a28714 2303 case NSPACE:
9442cb0e 2304 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2305 sayNO;
1aa99e6b 2306 if (do_utf8) {
ffc61ed2 2307 if (OP(scan) == NSPACE
9442cb0e 2308 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2309 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2310 {
2311 sayNO;
2312 }
2313 locinput += PL_utf8skip[nextchr];
2314 nextchr = UCHARAT(locinput);
2315 break;
a0ed51b3 2316 }
ffc61ed2 2317 if (OP(scan) == NSPACE
d6a28714 2318 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2319 sayNO;
76e3520e 2320 nextchr = UCHARAT(++locinput);
a0d0e21e 2321 break;
d6a28714 2322 case DIGITL:
a0ed51b3
LW
2323 PL_reg_flags |= RF_tainted;
2324 /* FALL THROUGH */
d6a28714 2325 case DIGIT:
9442cb0e 2326 if (!nextchr)
a0ed51b3 2327 sayNO;
1aa99e6b 2328 if (do_utf8) {
ffc61ed2 2329 if (!(OP(scan) == DIGIT
9442cb0e
GS
2330 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2331 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2332 {
a0ed51b3 2333 sayNO;
dfe13c55 2334 }
6f06b55f 2335 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2336 nextchr = UCHARAT(locinput);
2337 break;
2338 }
ffc61ed2 2339 if (!(OP(scan) == DIGIT
9442cb0e 2340 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2341 sayNO;
2342 nextchr = UCHARAT(++locinput);
2343 break;
d6a28714 2344 case NDIGITL:
b8c5462f
JH
2345 PL_reg_flags |= RF_tainted;
2346 /* FALL THROUGH */
d6a28714 2347 case NDIGIT:
9442cb0e 2348 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2349 sayNO;
1aa99e6b 2350 if (do_utf8) {
ffc61ed2 2351 if (OP(scan) == NDIGIT
9442cb0e
GS
2352 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2353 : isDIGIT_LC_utf8((U8*)locinput))
2354 {
a0ed51b3 2355 sayNO;
9442cb0e 2356 }
6f06b55f 2357 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2358 nextchr = UCHARAT(locinput);
2359 break;
2360 }
ffc61ed2 2361 if (OP(scan) == NDIGIT
9442cb0e 2362 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2363 sayNO;
2364 nextchr = UCHARAT(++locinput);
2365 break;
2366 case CLUMP:
dfe13c55 2367 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2368 sayNO;
6f06b55f 2369 locinput += PL_utf8skip[nextchr];
dfe13c55 2370 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2371 locinput += UTF8SKIP(locinput);
2372 if (locinput > PL_regeol)
2373 sayNO;
2374 nextchr = UCHARAT(locinput);
2375 break;
c8756f30 2376 case REFFL:
3280af22 2377 PL_reg_flags |= RF_tainted;
c8756f30 2378 /* FALL THROUGH */
c277df42 2379 case REF:
c8756f30 2380 case REFF:
c277df42 2381 n = ARG(scan); /* which paren pair */
cf93c79d 2382 ln = PL_regstartp[n];
2c2d71f5 2383 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2384 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2385 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2386 if (ln == PL_regendp[n])
a0d0e21e 2387 break;
a0ed51b3 2388
cf93c79d 2389 s = PL_bostr + ln;
1aa99e6b 2390 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2391 char *l = locinput;
cf93c79d 2392 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2393 /*
2394 * Note that we can't do the "other character" lookup trick as
2395 * in the 8-bit case (no pun intended) because in Unicode we
2396 * have to map both upper and title case to lower case.
2397 */
2398 if (OP(scan) == REFF) {
2399 while (s < e) {
2400 if (l >= PL_regeol)
2401 sayNO;
dfe13c55 2402 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2403 sayNO;
2404 s += UTF8SKIP(s);
2405 l += UTF8SKIP(l);
2406 }
2407 }
2408 else {
2409 while (s < e) {
2410 if (l >= PL_regeol)
2411 sayNO;
dfe13c55 2412 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2413 sayNO;
2414 s += UTF8SKIP(s);
2415 l += UTF8SKIP(l);
2416 }
2417 }
2418 locinput = l;
2419 nextchr = UCHARAT(locinput);
2420 break;
2421 }
2422
a0d0e21e 2423 /* Inline the first character, for speed. */
76e3520e 2424 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2425 (OP(scan) == REF ||
2426 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2427 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2428 sayNO;
cf93c79d 2429 ln = PL_regendp[n] - ln;
3280af22 2430 if (locinput + ln > PL_regeol)
4633a7c4 2431 sayNO;
c8756f30
AK
2432 if (ln > 1 && (OP(scan) == REF
2433 ? memNE(s, locinput, ln)
2434 : (OP(scan) == REFF
2435 ? ibcmp(s, locinput, ln)
2436 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2437 sayNO;
a0d0e21e 2438 locinput += ln;
76e3520e 2439 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2440 break;
2441
2442 case NOTHING:
c277df42 2443 case TAIL:
a0d0e21e
LW
2444 break;
2445 case BACK:
2446 break;
c277df42
IZ
2447 case EVAL:
2448 {
2449 dSP;
533c011a 2450 OP_4tree *oop = PL_op;
3280af22
NIS
2451 COP *ocurcop = PL_curcop;
2452 SV **ocurpad = PL_curpad;
c277df42
IZ
2453 SV *ret;
2454
2455 n = ARG(scan);
533c011a 2456 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2457 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2458 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2459 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2460
cea2e8a9 2461 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2462 SPAGAIN;
2463 ret = POPs;
2464 PUTBACK;
2465
0f5d15d6
IZ
2466 PL_op = oop;
2467 PL_curpad = ocurpad;
2468 PL_curcop = ocurcop;
c277df42 2469 if (logical) {
0f5d15d6
IZ
2470 if (logical == 2) { /* Postponed subexpression. */
2471 regexp *re;
22c35a8c 2472 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2473 re_cc_state state;
0f5d15d6
IZ
2474 CHECKPOINT cp, lastcp;
2475
2476 if(SvROK(ret) || SvRMAGICAL(ret)) {
2477 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2478
2479 if(SvMAGICAL(sv))
2480 mg = mg_find(sv, 'r');
2481 }
2482 if (mg) {
2483 re = (regexp *)mg->mg_obj;
df0003d4 2484 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2485 }
2486 else {
2487 STRLEN len;
2488 char *t = SvPV(ret, len);
2489 PMOP pm;
2490 char *oprecomp = PL_regprecomp;
2491 I32 osize = PL_regsize;
2492 I32 onpar = PL_regnpar;
2493
2494 pm.op_pmflags = 0;
cea2e8a9 2495 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2496 if (!(SvFLAGS(ret)
2497 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2498 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2499 PL_regprecomp = oprecomp;
2500 PL_regsize = osize;
2501 PL_regnpar = onpar;
2502 }
2503 DEBUG_r(
2504 PerlIO_printf(Perl_debug_log,
2505 "Entering embedded `%s%.60s%s%s'\n",
2506 PL_colors[0],
2507 re->precomp,
2508 PL_colors[1],
2509 (strlen(re->precomp) > 60 ? "..." : ""))
2510 );
2511 state.node = next;
2512 state.prev = PL_reg_call_cc;
2513 state.cc = PL_regcc;
2514 state.re = PL_reg_re;
2515
2ab05381 2516 PL_regcc = 0;
0f5d15d6
IZ
2517
2518 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2519 REGCP_SET(lastcp);
0f5d15d6
IZ
2520 cache_re(re);
2521 state.ss = PL_savestack_ix;
2522 *PL_reglastparen = 0;
2523 PL_reg_call_cc = &state;
2524 PL_reginput = locinput;
2c2d71f5
JH
2525
2526 /* XXXX This is too dramatic a measure... */
2527 PL_reg_maxiter = 0;
2528
0f5d15d6 2529 if (regmatch(re->program + 1)) {
2c914db6
IZ
2530 /* Even though we succeeded, we need to restore
2531 global variables, since we may be wrapped inside
2532 SUSPEND, thus the match may be not finished yet. */
2533
2534 /* XXXX Do this only if SUSPENDed? */
2535 PL_reg_call_cc = state.prev;
2536 PL_regcc = state.cc;
2537 PL_reg_re = state.re;
2538 cache_re(PL_reg_re);
2539
2540 /* XXXX This is too dramatic a measure... */
2541 PL_reg_maxiter = 0;
2542
2543 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2544 ReREFCNT_dec(re);
2545 regcpblow(cp);
2546 sayYES;
2547 }
0f5d15d6 2548 ReREFCNT_dec(re);
02db2b7b 2549 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2550 regcppop();
2551 PL_reg_call_cc = state.prev;
2552 PL_regcc = state.cc;
2553 PL_reg_re = state.re;
d3790889 2554 cache_re(PL_reg_re);
2c2d71f5
JH
2555
2556 /* XXXX This is too dramatic a measure... */
2557 PL_reg_maxiter = 0;
2558
0f5d15d6
IZ
2559 sayNO;
2560 }
c277df42 2561 sw = SvTRUE(ret);
0f5d15d6 2562 logical = 0;
a0ed51b3
LW
2563 }
2564 else
3280af22 2565 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2566 break;
2567 }
a0d0e21e 2568 case OPEN:
c277df42 2569 n = ARG(scan); /* which paren pair */
3280af22
NIS
2570 PL_reg_start_tmp[n] = locinput;
2571 if (n > PL_regsize)
2572 PL_regsize = n;
a0d0e21e
LW
2573 break;
2574 case CLOSE:
c277df42 2575 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2576 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2577 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2578 if (n > *PL_reglastparen)
2579 *PL_reglastparen = n;
a0d0e21e 2580 break;
c277df42
IZ
2581 case GROUPP:
2582 n = ARG(scan); /* which paren pair */
cf93c79d 2583 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2584 break;
2585 case IFTHEN:
2c2d71f5 2586 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2587 if (sw)
2588 next = NEXTOPER(NEXTOPER(scan));
2589 else {
2590 next = scan + ARG(scan);
2591 if (OP(next) == IFTHEN) /* Fake one. */
2592 next = NEXTOPER(NEXTOPER(next));
2593 }
2594 break;
2595 case LOGICAL:
0f5d15d6 2596 logical = scan->flags;
c277df42 2597 break;
2ab05381
IZ
2598/*******************************************************************
2599 PL_regcc contains infoblock about the innermost (...)* loop, and
2600 a pointer to the next outer infoblock.
2601
2602 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2603
2604 1) After matching X, regnode for CURLYX is processed;
2605
2606 2) This regnode creates infoblock on the stack, and calls
2607 regmatch() recursively with the starting point at WHILEM node;
2608
2609 3) Each hit of WHILEM node tries to match A and Z (in the order
2610 depending on the current iteration, min/max of {min,max} and
2611 greediness). The information about where are nodes for "A"
2612 and "Z" is read from the infoblock, as is info on how many times "A"
2613 was already matched, and greediness.
2614
2615 4) After A matches, the same WHILEM node is hit again.
2616
2617 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2618 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2619 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2620 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2621 of the external loop.
2622
2623 Currently present infoblocks form a tree with a stem formed by PL_curcc
2624 and whatever it mentions via ->next, and additional attached trees
2625 corresponding to temporarily unset infoblocks as in "5" above.
2626
2627 In the following picture infoblocks for outer loop of
2628 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2629 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2630 infoblocks are drawn below the "reset" infoblock.
2631
2632 In fact in the picture below we do not show failed matches for Z and T
2633 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2634 more obvious *why* one needs to *temporary* unset infoblocks.]
2635
2636 Matched REx position InfoBlocks Comment
2637 (Y(A)*?Z)*?T x
2638 Y(A)*?Z)*?T x <- O
2639 Y (A)*?Z)*?T x <- O
2640 Y A)*?Z)*?T x <- O <- I
2641 YA )*?Z)*?T x <- O <- I
2642 YA A)*?Z)*?T x <- O <- I
2643 YAA )*?Z)*?T x <- O <- I
2644 YAA Z)*?T x <- O # Temporary unset I
2645 I
2646
2647 YAAZ Y(A)*?Z)*?T x <- O
2648 I
2649
2650 YAAZY (A)*?Z)*?T x <- O
2651 I
2652
2653 YAAZY A)*?Z)*?T x <- O <- I
2654 I
2655
2656 YAAZYA )*?Z)*?T x <- O <- I
2657 I
2658
2659 YAAZYA Z)*?T x <- O # Temporary unset I
2660 I,I
2661
2662 YAAZYAZ )*?T x <- O
2663 I,I
2664
2665 YAAZYAZ T x # Temporary unset O
2666 O
2667 I,I
2668
2669 YAAZYAZT x
2670 O
2671 I,I
2672 *******************************************************************/
a0d0e21e
LW
2673 case CURLYX: {
2674 CURCUR cc;
3280af22 2675 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2676 /* No need to save/restore up to this paren */
2677 I32 parenfloor = scan->flags;
c277df42
IZ
2678
2679 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2680 next += ARG(next);
3280af22
NIS
2681 cc.oldcc = PL_regcc;
2682 PL_regcc = &cc;
cb434fcc
IZ
2683 /* XXXX Probably it is better to teach regpush to support
2684 parenfloor > PL_regsize... */
2685 if (parenfloor > *PL_reglastparen)
2686 parenfloor = *PL_reglastparen; /* Pessimization... */
2687 cc.parenfloor = parenfloor;
a0d0e21e
LW
2688 cc.cur = -1;
2689 cc.min = ARG1(scan);
2690 cc.max = ARG2(scan);
c277df42 2691 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2692 cc.next = next;
2693 cc.minmod = minmod;
2694 cc.lastloc = 0;
3280af22 2695 PL_reginput = locinput;
a0d0e21e
LW
2696 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2697 regcpblow(cp);
3280af22 2698 PL_regcc = cc.oldcc;
4633a7c4 2699 saySAME(n);
a0d0e21e
LW
2700 }
2701 /* NOT REACHED */
2702 case WHILEM: {
2703 /*
2704 * This is really hard to understand, because after we match
2705 * what we're trying to match, we must make sure the rest of
2c2d71f5 2706 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2707 * to go back UP the parse tree by recursing ever deeper. And
2708 * if it fails, we have to reset our parent's current state
2709 * that we can try again after backing off.
2710 */
2711
c277df42 2712 CHECKPOINT cp, lastcp;
3280af22 2713 CURCUR* cc = PL_regcc;
c277df42
IZ
2714 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2715
4633a7c4 2716 n = cc->cur + 1; /* how many we know we matched */
3280af22 2717 PL_reginput = locinput;
a0d0e21e 2718
c277df42
IZ
2719 DEBUG_r(
2720 PerlIO_printf(Perl_debug_log,
2721 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2722 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2723 (long)n, (long)cc->min,
2724 (long)cc->max, (long)cc)
2725 );
4633a7c4 2726
a0d0e21e
LW
2727 /* If degenerate scan matches "", assume scan done. */
2728
579cf2c3 2729 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2730 PL_regcc = cc->oldcc;
2ab05381
IZ
2731 if (PL_regcc)
2732 ln = PL_regcc->cur;
c277df42 2733 DEBUG_r(
c3464db5
DD
2734 PerlIO_printf(Perl_debug_log,
2735 "%*s empty match detected, try continuation...\n",
3280af22 2736 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2737 );
a0d0e21e 2738 if (regmatch(cc->next))
4633a7c4 2739 sayYES;
2ab05381
IZ
2740 if (PL_regcc)
2741 PL_regcc->cur = ln;
3280af22 2742 PL_regcc = cc;
4633a7c4 2743 sayNO;
a0d0e21e
LW
2744 }
2745
2746 /* First just match a string of min scans. */
2747
2748 if (n < cc->min) {
2749 cc->cur = n;
2750 cc->lastloc = locinput;
4633a7c4
LW
2751 if (regmatch(cc->scan))
2752 sayYES;
2753 cc->cur = n - 1;
c277df42 2754 cc->lastloc = lastloc;
4633a7c4 2755 sayNO;
a0d0e21e
LW
2756 }
2757
2c2d71f5
JH
2758 if (scan->flags) {
2759 /* Check whether we already were at this position.
2760 Postpone detection until we know the match is not
2761 *that* much linear. */
2762 if (!PL_reg_maxiter) {
2763 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2764 PL_reg_leftiter = PL_reg_maxiter;
2765 }
2766 if (PL_reg_leftiter-- == 0) {
2767 I32 size = (PL_reg_maxiter + 7)/8;
2768 if (PL_reg_poscache) {
2769 if (PL_reg_poscache_size < size) {
2770 Renew(PL_reg_poscache, size, char);
2771 PL_reg_poscache_size = size;
2772 }
2773 Zero(PL_reg_poscache, size, char);
2774 }
2775 else {
2776 PL_reg_poscache_size = size;
2777 Newz(29, PL_reg_poscache, size, char);
2778 }
2779 DEBUG_r(
2780 PerlIO_printf(Perl_debug_log,
2781 "%sDetected a super-linear match, switching on caching%s...\n",
2782 PL_colors[4], PL_colors[5])
2783 );
2784 }
2785 if (PL_reg_leftiter < 0) {
2786 I32 o = locinput - PL_bostr, b;
2787
2788 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2789 b = o % 8;
2790 o /= 8;
2791 if (PL_reg_poscache[o] & (1<<b)) {
2792 DEBUG_r(
2793 PerlIO_printf(Perl_debug_log,
2794 "%*s already tried at this position...\n",
2795 REPORT_CODE_OFF+PL_regindent*2, "")
2796 );
7821416a 2797 sayNO_SILENT;
2c2d71f5
JH
2798 }
2799 PL_reg_poscache[o] |= (1<<b);
2800 }
2801 }
2802
a0d0e21e
LW
2803 /* Prefer next over scan for minimal matching. */
2804
2805 if (cc->minmod) {
3280af22 2806 PL_regcc = cc->oldcc;
2ab05381
IZ
2807 if (PL_regcc)
2808 ln = PL_regcc->cur;
5f05dabc 2809 cp = regcppush(cc->parenfloor);
02db2b7b 2810 REGCP_SET(lastcp);
5f05dabc 2811 if (regmatch(cc->next)) {
c277df42 2812 regcpblow(cp);
4633a7c4 2813 sayYES; /* All done. */
5f05dabc 2814 }
02db2b7b 2815 REGCP_UNWIND(lastcp);
5f05dabc 2816 regcppop();
2ab05381
IZ
2817 if (PL_regcc)
2818 PL_regcc->cur = ln;
3280af22 2819 PL_regcc = cc;
a0d0e21e 2820
c277df42 2821 if (n >= cc->max) { /* Maximum greed exceeded? */
e476b1b5 2822 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2823 && !(PL_reg_flags & RF_warned)) {
2824 PL_reg_flags |= RF_warned;
e476b1b5 2825 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2826 "Complex regular subexpression recursion",
2827 REG_INFTY - 1);
c277df42 2828 }
4633a7c4 2829 sayNO;
c277df42 2830 }
a687059c 2831
c277df42 2832 DEBUG_r(
c3464db5
DD
2833 PerlIO_printf(Perl_debug_log,
2834 "%*s trying longer...\n",
3280af22 2835 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2836 );
a0d0e21e 2837 /* Try scanning more and see if it helps. */
3280af22 2838 PL_reginput = locinput;
a0d0e21e
LW
2839 cc->cur = n;
2840 cc->lastloc = locinput;
5f05dabc 2841 cp = regcppush(cc->parenfloor);
02db2b7b 2842 REGCP_SET(lastcp);
5f05dabc 2843 if (regmatch(cc->scan)) {
c277df42 2844 regcpblow(cp);
4633a7c4 2845 sayYES;
5f05dabc 2846 }
02db2b7b 2847 REGCP_UNWIND(lastcp);
5f05dabc 2848 regcppop();
4633a7c4 2849 cc->cur = n - 1;
c277df42 2850 cc->lastloc = lastloc;
4633a7c4 2851 sayNO;
a0d0e21e
LW
2852 }
2853
2854 /* Prefer scan over next for maximal matching. */
2855
2856 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2857 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2858 cc->cur = n;
2859 cc->lastloc = locinput;
02db2b7b 2860 REGCP_SET(lastcp);
5f05dabc 2861 if (regmatch(cc->scan)) {
c277df42 2862 regcpblow(cp);
4633a7c4 2863 sayYES;
5f05dabc 2864 }
02db2b7b 2865 REGCP_UNWIND(lastcp);
a0d0e21e 2866 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2867 PL_reginput = locinput;
c277df42 2868 DEBUG_r(
c3464db5
DD
2869 PerlIO_printf(Perl_debug_log,
2870 "%*s failed, try continuation...\n",
3280af22 2871 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2872 );
2873 }
e476b1b5 2874 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2875 && !(PL_reg_flags & RF_warned)) {
3280af22 2876 PL_reg_flags |= RF_warned;
e476b1b5 2877 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2878 "Complex regular subexpression recursion",
2879 REG_INFTY - 1);
a0d0e21e
LW
2880 }
2881
2882 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2883 PL_regcc = cc->oldcc;
2ab05381
IZ
2884 if (PL_regcc)
2885 ln = PL_regcc->cur;
a0d0e21e 2886 if (regmatch(cc->next))
4633a7c4 2887 sayYES;
2ab05381
IZ
2888 if (PL_regcc)
2889 PL_regcc->cur = ln;
3280af22 2890 PL_regcc = cc;
4633a7c4 2891 cc->cur = n - 1;
c277df42 2892 cc->lastloc = lastloc;
4633a7c4 2893 sayNO;
a0d0e21e
LW
2894 }
2895 /* NOT REACHED */
c277df42
IZ
2896 case BRANCHJ:
2897 next = scan + ARG(scan);
2898 if (next == scan)
2899 next = NULL;
2900 inner = NEXTOPER(NEXTOPER(scan));
2901 goto do_branch;
2902 case BRANCH:
2903 inner = NEXTOPER(scan);
2904 do_branch:
2905 {
2906 CHECKPOINT lastcp;
2907 c1 = OP(scan);
2908 if (OP(next) != c1) /* No choice. */
2909 next = inner; /* Avoid recursion. */
a0d0e21e 2910 else {
02db2b7b
IZ
2911 I32 lastparen = *PL_reglastparen;
2912 I32 unwind1;
2913 re_unwind_branch_t *uw;
2914
2915 /* Put unwinding data on stack */
2916 unwind1 = SSNEWt(1,re_unwind_branch_t);
2917 uw = SSPTRt(unwind1,re_unwind_branch_t);
2918 uw->prev = unwind;
2919 unwind = unwind1;
2920 uw->type = ((c1 == BRANCH)
2921 ? RE_UNWIND_BRANCH
2922 : RE_UNWIND_BRANCHJ);
2923 uw->lastparen = lastparen;
2924 uw->next = next;
2925 uw->locinput = locinput;
2926 uw->nextchr = nextchr;
2927#ifdef DEBUGGING
2928 uw->regindent = ++PL_regindent;
2929#endif
c277df42 2930
02db2b7b
IZ
2931 REGCP_SET(uw->lastcp);
2932
2933 /* Now go into the first branch */
2934 next = inner;
a687059c 2935 }
a0d0e21e
LW
2936 }
2937 break;
2938 case MINMOD:
2939 minmod = 1;
2940 break;
c277df42
IZ
2941 case CURLYM:
2942 {
00db4c45 2943 I32 l = 0;
c277df42
IZ
2944 CHECKPOINT lastcp;
2945
2946 /* We suppose that the next guy does not need
2947 backtracking: in particular, it is of constant length,
2948 and has no parenths to influence future backrefs. */
2949 ln = ARG1(scan); /* min to match */
2950 n = ARG2(scan); /* max to match */
c277df42
IZ
2951 paren = scan->flags;
2952 if (paren) {
3280af22
NIS
2953 if (paren > PL_regsize)
2954 PL_regsize = paren;
2955 if (paren > *PL_reglastparen)
2956 *PL_reglastparen = paren;
c277df42 2957 }
dc45a647 2958 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2959 if (paren)
2960 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2961 PL_reginput = locinput;
c277df42
IZ
2962 if (minmod) {
2963 minmod = 0;
2964 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2965 sayNO;
5f4b28b2 2966 if (ln && l == 0 && n >= ln
c277df42
IZ
2967 /* In fact, this is tricky. If paren, then the
2968 fact that we did/didnot match may influence
2969 future execution. */
2970 && !(paren && ln == 0))
2971 ln = n;
3280af22 2972 locinput = PL_reginput;
22c35a8c 2973 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2974 c1 = (U8)*STRING(next);
c277df42 2975 if (OP(next) == EXACTF)
22c35a8c 2976 c2 = PL_fold[c1];
c277df42 2977 else if (OP(next) == EXACTFL)
22c35a8c 2978 c2 = PL_fold_locale[c1];
c277df42
IZ
2979 else
2980 c2 = c1;
a0ed51b3
LW
2981 }
2982 else
c277df42 2983 c1 = c2 = -1000;
02db2b7b 2984 REGCP_SET(lastcp);
5f4b28b2 2985 /* This may be improved if l == 0. */
c277df42
IZ
2986 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2987 /* If it could work, try it. */
2988 if (c1 == -1000 ||
3280af22
NIS
2989 UCHARAT(PL_reginput) == c1 ||
2990 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2991 {
2992 if (paren) {
2993 if (n) {
cf93c79d
IZ
2994 PL_regstartp[paren] =
2995 HOPc(PL_reginput, -l) - PL_bostr;
2996 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2997 }
2998 else
cf93c79d 2999 PL_regendp[paren] = -1;
c277df42
IZ
3000 }
3001 if (regmatch(next))
3002 sayYES;
02db2b7b 3003 REGCP_UNWIND(lastcp);
c277df42
IZ
3004 }
3005 /* Couldn't or didn't -- move forward. */
3280af22 3006 PL_reginput = locinput;
c277df42
IZ
3007 if (regrepeat_hard(scan, 1, &l)) {
3008 ln++;
3280af22 3009 locinput = PL_reginput;
c277df42
IZ
3010 }
3011 else
3012 sayNO;
3013 }
a0ed51b3
LW
3014 }
3015 else {
c277df42
IZ
3016 n = regrepeat_hard(scan, n, &l);
3017 if (n != 0 && l == 0
3018 /* In fact, this is tricky. If paren, then the
3019 fact that we did/didnot match may influence
3020 future execution. */
3021 && !(paren && ln == 0))
3022 ln = n;
3280af22 3023 locinput = PL_reginput;
c277df42 3024 DEBUG_r(
5c0ca799 3025 PerlIO_printf(Perl_debug_log,
faccc32b 3026 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3027 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3028 (IV) n, (IV)l)
c277df42
IZ
3029 );
3030 if (n >= ln) {
22c35a8c 3031 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3032 c1 = (U8)*STRING(next);
c277df42 3033 if (OP(next) == EXACTF)
22c35a8c 3034 c2 = PL_fold[c1];
c277df42 3035 else if (OP(next) == EXACTFL)
22c35a8c 3036 c2 = PL_fold_locale[c1];
c277df42
IZ
3037 else
3038 c2 = c1;
a0ed51b3
LW
3039 }
3040 else
c277df42
IZ
3041 c1 = c2 = -1000;
3042 }
02db2b7b 3043 REGCP_SET(lastcp);
c277df42
IZ
3044 while (n >= ln) {
3045 /* If it could work, try it. */
3046 if (c1 == -1000 ||
3280af22
NIS
3047 UCHARAT(PL_reginput) == c1 ||
3048 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3049 {
3050 DEBUG_r(
c3464db5 3051 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3052 "%*s trying tail with n=%"IVdf"...\n",
3053 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3054 );
3055 if (paren) {
3056 if (n) {
cf93c79d
IZ
3057 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3058 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3059 }
a0ed51b3 3060 else
cf93c79d 3061 PL_regendp[paren] = -1;
c277df42 3062 }
a0ed51b3
LW
3063 if (regmatch(next))
3064 sayYES;
02db2b7b 3065 REGCP_UNWIND(lastcp);
a0ed51b3 3066 }
c277df42
IZ
3067 /* Couldn't or didn't -- back up. */
3068 n--;
dfe13c55 3069 locinput = HOPc(locinput, -l);
3280af22 3070 PL_reginput = locinput;
c277df42
IZ
3071 }
3072 }
3073 sayNO;
3074 break;
3075 }
3076 case CURLYN:
3077 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3078 if (paren > PL_regsize)
3079 PL_regsize = paren;
3080 if (paren > *PL_reglastparen)
3081 *PL_reglastparen = paren;
c277df42
IZ
3082 ln = ARG1(scan); /* min to match */
3083 n = ARG2(scan); /* max to match */
dc45a647 3084 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3085 goto repeat;
a0d0e21e 3086 case CURLY:
c277df42 3087 paren = 0;
a0d0e21e
LW
3088 ln = ARG1(scan); /* min to match */
3089 n = ARG2(scan); /* max to match */
dc45a647 3090 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3091 goto repeat;
3092 case STAR:
3093 ln = 0;
c277df42 3094 n = REG_INFTY;
a0d0e21e 3095 scan = NEXTOPER(scan);
c277df42 3096 paren = 0;
a0d0e21e
LW
3097 goto repeat;
3098 case PLUS:
c277df42
IZ
3099 ln = 1;
3100 n = REG_INFTY;
3101 scan = NEXTOPER(scan);
3102 paren = 0;
3103 repeat:
a0d0e21e
LW
3104 /*
3105 * Lookahead to avoid useless match attempts
3106 * when we know what character comes next.
3107 */
22c35a8c 3108 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3109 U8 *s = (U8*)STRING(next);
3110 if (!UTF) {
3111 c2 = c1 = *s;
3112 if (OP(next) == EXACTF)
3113 c2 = PL_fold[c1];
3114 else if (OP(next) == EXACTFL)
3115 c2 = PL_fold_locale[c1];
3116 }
3117 else { /* UTF */
3118 if (OP(next) == EXACTF) {
3119 c1 = to_utf8_lower(s);
3120 c2 = to_utf8_upper(s);
3121 }
3122 else {
3123 c2 = c1 = utf8_to_uv_simple(s, NULL);
3124 }
3125 }
bbce6d69 3126 }
a0d0e21e 3127 else
bbce6d69 3128 c1 = c2 = -1000;
3280af22 3129 PL_reginput = locinput;
a0d0e21e 3130 if (minmod) {
c277df42 3131 CHECKPOINT lastcp;
a0d0e21e
LW
3132 minmod = 0;
3133 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3134 sayNO;
a0ed51b3 3135 locinput = PL_reginput;
02db2b7b 3136 REGCP_SET(lastcp);
0fe9bf95 3137 if (c1 != -1000) {
1aa99e6b 3138 char *e; /* Should not check after this */
0fe9bf95
IZ
3139 char *old = locinput;
3140
1aa99e6b 3141 if (n == REG_INFTY) {
0fe9bf95 3142 e = PL_regeol - 1;
1aa99e6b
IH
3143 if (do_utf8)
3144 while (UTF8_IS_CONTINUATION(*(U8*)e))
3145 e--;
3146 }
3147 else if (do_utf8) {
3148 int m = n - ln;
3149 for (e = locinput;
3150 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3151 e += UTF8SKIP(e);
3152 }
3153 else {
3154 e = locinput + n - ln;
3155 if (e >= PL_regeol)
3156 e = PL_regeol - 1;
3157 }
0fe9bf95 3158 while (1) {
1aa99e6b 3159 int count;
0fe9bf95 3160 /* Find place 'next' could work */
1aa99e6b
IH
3161 if (!do_utf8) {
3162 if (c1 == c2) {
3163 while (locinput <= e && *locinput != c1)
3164 locinput++;
3165 } else {
3166 while (locinput <= e
3167 && *locinput != c1
3168 && *locinput != c2)
3169 locinput++;
3170 }
3171 count = locinput - old;
3172 }
3173 else {
3174 STRLEN len;
3175 if (c1 == c2) {
3176 for (count = 0;
3177 locinput <= e &&
3178 utf8_to_uv_simple((U8*)locinput, &len) != c1;
3179 count++)
3180 locinput += len;
3181
3182 } else {
3183 for (count = 0; locinput <= e; count++) {
3184 UV c = utf8_to_uv_simple((U8*)locinput, &len);
3185 if (c == c1 || c == c2)
3186 break;
3187 locinput += len;
3188 }
3189 }
0fe9bf95
IZ
3190 }
3191 if (locinput > e)
3192 sayNO;
3193 /* PL_reginput == old now */
3194 if (locinput != old) {
3195 ln = 1; /* Did some */
1aa99e6b 3196 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3197 sayNO;
3198 }
3199 /* PL_reginput == locinput now */
29d1e993 3200 TRYPAREN(paren, ln, locinput);
0fe9bf95 3201 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3202 REGCP_UNWIND(lastcp);
0fe9bf95 3203 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3204 old = locinput;
3205 if (do_utf8)
3206 locinput += UTF8SKIP(locinput);
3207 else
3208 locinput++;
0fe9bf95
IZ
3209 }
3210 }
3211 else
c277df42 3212 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3213 UV c;
3214 if (c1 != -1000) {
3215 if (do_utf8)
3216 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3217 else
3218 c = UCHARAT(PL_reginput);
3219 }
a0d0e21e 3220 /* If it could work, try it. */
1aa99e6b 3221 if (c1 == -1000 || c == c1 || c == c2)
bbce6d69 3222 {
29d1e993 3223 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3224 REGCP_UNWIND(lastcp);
bbce6d69 3225 }
c277df42 3226 /* Couldn't or didn't -- move forward. */
a0ed51b3 3227 PL_reginput = locinput;
a0d0e21e
LW
3228 if (regrepeat(scan, 1)) {
3229 ln++;
a0ed51b3
LW
3230 locinput = PL_reginput;
3231 }
3232 else
4633a7c4 3233 sayNO;
a0d0e21e
LW
3234 }
3235 }
3236 else {
c277df42 3237 CHECKPOINT lastcp;
a0d0e21e 3238 n = regrepeat(scan, n);
a0ed51b3 3239 locinput = PL_reginput;
22c35a8c 3240 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3241 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3242 ln = n; /* why back off? */
1aeab75a
GS
3243 /* ...because $ and \Z can match before *and* after
3244 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3245 We should back off by one in this case. */
3246 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3247 ln--;
3248 }
02db2b7b 3249 REGCP_SET(lastcp);
c277df42 3250 if (paren) {
1aa99e6b 3251 UV c;
c277df42 3252 while (n >= ln) {
1aa99e6b
IH
3253 if (c1 != -1000) {
3254 if (do_utf8)
3255 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3256 else
3257 c = UCHARAT(PL_reginput);
3258 }
c277df42 3259 /* If it could work, try it. */
1aa99e6b 3260 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3261 {
29d1e993 3262 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3263 REGCP_UNWIND(lastcp);
c277df42
IZ
3264 }
3265 /* Couldn't or didn't -- back up. */
3266 n--;
dfe13c55 3267 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3268 }
a0ed51b3
LW
3269 }
3270 else {
1aa99e6b 3271 UV c;
c277df42 3272 while (n >= ln) {
1aa99e6b
IH
3273 if (c1 != -1000) {
3274 if (do_utf8)
3275 c = utf8_to_uv_simple((U8*)PL_reginput, NULL);
3276 else
3277 c = UCHARAT(PL_reginput);
3278 }
c277df42 3279 /* If it could work, try it. */
1aa99e6b 3280 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3281 {
29d1e993 3282 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3283 REGCP_UNWIND(lastcp);
c277df42
IZ
3284 }
3285 /* Couldn't or didn't -- back up. */
3286 n--;
dfe13c55 3287 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3288 }
a0d0e21e
LW
3289 }
3290 }
4633a7c4 3291 sayNO;
c277df42 3292 break;
a0d0e21e 3293 case END:
0f5d15d6
IZ
3294 if (PL_reg_call_cc) {
3295 re_cc_state *cur_call_cc = PL_reg_call_cc;
3296 CURCUR *cctmp = PL_regcc;
3297 regexp *re = PL_reg_re;
3298 CHECKPOINT cp, lastcp;
3299
3300 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3301 REGCP_SET(lastcp);
0f5d15d6
IZ
3302 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3303 the caller. */
3304 PL_reginput = locinput; /* Make position available to
3305 the callcc. */
3306 cache_re(PL_reg_call_cc->re);
3307 PL_regcc = PL_reg_call_cc->cc;
3308 PL_reg_call_cc = PL_reg_call_cc->prev;
3309 if (regmatch(cur_call_cc->node)) {
3310 PL_reg_call_cc = cur_call_cc;
3311 regcpblow(cp);
3312 sayYES;
3313 }
02db2b7b 3314 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3315 regcppop();
3316 PL_reg_call_cc = cur_call_cc;
3317 PL_regcc = cctmp;
3318 PL_reg_re = re;
3319 cache_re(re);
3320
3321 DEBUG_r(
3322 PerlIO_printf(Perl_debug_log,
3323 "%*s continuation failed...\n",
3324 REPORT_CODE_OFF+PL_regindent*2, "")
3325 );
7821416a 3326 sayNO_SILENT;
0f5d15d6 3327 }
7821416a
IZ
3328 if (locinput < PL_regtill) {
3329 DEBUG_r(PerlIO_printf(Perl_debug_log,
3330 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3331 PL_colors[4],
3332 (long)(locinput - PL_reg_starttry),
3333 (long)(PL_regtill - PL_reg_starttry),
3334 PL_colors[5]));
3335 sayNO_FINAL; /* Cannot match: too short. */
3336 }
3337 PL_reginput = locinput; /* put where regtry can find it */
3338 sayYES_FINAL; /* Success! */
7e5428c5 3339 case SUCCEED:
3280af22 3340 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3341 sayYES_LOUD; /* Success! */
c277df42
IZ
3342 case SUSPEND:
3343 n = 1;
9fe1d20c 3344 PL_reginput = locinput;
c277df42 3345 goto do_ifmatch;
a0d0e21e 3346 case UNLESSM:
c277df42 3347 n = 0;
a0ed51b3 3348 if (scan->flags) {
0fe9bf95
IZ
3349 if (UTF) { /* XXXX This is absolutely
3350 broken, we read before
3351 start of string. */
3352 s = HOPMAYBEc(locinput, -scan->flags);
3353 if (!s)
3354 goto say_yes;
3355 PL_reginput = s;
3356 }
3357 else {
3358 if (locinput < PL_bostr + scan->flags)
3359 goto say_yes;
3360 PL_reginput = locinput - scan->flags;
3361 goto do_ifmatch;
3362 }
a0ed51b3
LW
3363 }
3364 else
3365 PL_reginput = locinput;
c277df42
IZ
3366 goto do_ifmatch;
3367 case IFMATCH:
3368 n = 1;
a0ed51b3 3369 if (scan->flags) {
0fe9bf95
IZ
3370 if (UTF) { /* XXXX This is absolutely
3371 broken, we read before
3372 start of string. */
3373 s = HOPMAYBEc(locinput, -scan->flags);
3374 if (!s || s < PL_bostr)
3375 goto say_no;
3376 PL_reginput = s;
3377 }
3378 else {
3379 if (locinput < PL_bostr + scan->flags)
3380 goto say_no;
3381 PL_reginput = locinput - scan->flags;
3382 goto do_ifmatch;
3383 }
a0ed51b3
LW
3384 }
3385 else
3386 PL_reginput = locinput;
3387
c277df42 3388 do_ifmatch:
c277df42
IZ
3389 inner = NEXTOPER(NEXTOPER(scan));
3390 if (regmatch(inner) != n) {
3391 say_no:
3392 if (logical) {
3393 logical = 0;
3394 sw = 0;
3395 goto do_longjump;
a0ed51b3
LW
3396 }
3397 else
c277df42
IZ
3398 sayNO;
3399 }
3400 say_yes:
3401 if (logical) {
3402 logical = 0;
3403 sw = 1;
3404 }
fe44a5e8 3405 if (OP(scan) == SUSPEND) {
3280af22 3406 locinput = PL_reginput;
565764a8 3407 nextchr = UCHARAT(locinput);
fe44a5e8 3408 }
c277df42
IZ
3409 /* FALL THROUGH. */
3410 case LONGJMP:
3411 do_longjump:
3412 next = scan + ARG(scan);
3413 if (next == scan)
3414 next = NULL;
a0d0e21e
LW
3415 break;
3416 default:
b900a521 3417 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3418 PTR2UV(scan), OP(scan));
cea2e8a9 3419 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3420 }
02db2b7b 3421 reenter:
a0d0e21e
LW
3422 scan = next;
3423 }
a687059c 3424
a0d0e21e
LW
3425 /*
3426 * We get here only if there's trouble -- normally "case END" is
3427 * the terminating point.
3428 */
cea2e8a9 3429 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3430 /*NOTREACHED*/
4633a7c4
LW
3431 sayNO;
3432
7821416a
IZ
3433yes_loud:
3434 DEBUG_r(
3435 PerlIO_printf(Perl_debug_log,
3436 "%*s %scould match...%s\n",
3437 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3438 );
3439 goto yes;
3440yes_final:
3441 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3442 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3443yes:
3444#ifdef DEBUGGING
3280af22 3445 PL_regindent--;
4633a7c4 3446#endif
02db2b7b
IZ
3447
3448#if 0 /* Breaks $^R */
3449 if (unwind)
3450 regcpblow(firstcp);
3451#endif
4633a7c4
LW
3452 return 1;
3453
3454no:
7821416a
IZ
3455 DEBUG_r(
3456 PerlIO_printf(Perl_debug_log,
3457 "%*s %sfailed...%s\n",
3458 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3459 );
3460 goto do_no;
3461no_final:
3462do_no:
02db2b7b
IZ
3463 if (unwind) {
3464 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3465
3466 switch (uw->type) {
3467 case RE_UNWIND_BRANCH:
3468 case RE_UNWIND_BRANCHJ:
3469 {
3470 re_unwind_branch_t *uwb = &(uw->branch);
3471 I32 lastparen = uwb->lastparen;
3472
3473 REGCP_UNWIND(uwb->lastcp);
3474 for (n = *PL_reglastparen; n > lastparen; n--)
3475 PL_regendp[n] = -1;
3476 *PL_reglastparen = n;
3477 scan = next = uwb->next;
3478 if ( !scan ||
3479 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
3480 ? BRANCH : BRANCHJ) ) { /* Failure */
3481 unwind = uwb->prev;
3482#ifdef DEBUGGING
3483 PL_regindent--;
3484#endif
3485 goto do_no;
3486 }
3487 /* Have more choice yet. Reuse the same uwb. */
3488 /*SUPPRESS 560*/
3489 if ((n = (uwb->type == RE_UNWIND_BRANCH
3490 ? NEXT_OFF(next) : ARG(next))))
3491 next += n;
3492 else
3493 next = NULL; /* XXXX Needn't unwinding in this case... */
3494 uwb->next = next;
3495 next = NEXTOPER(scan);
3496 if (uwb->type == RE_UNWIND_BRANCHJ)
3497 next = NEXTOPER(next);
3498 locinput = uwb->locinput;
3499 nextchr = uwb->nextchr;
3500#ifdef DEBUGGING
3501 PL_regindent = uwb->regindent;
3502#endif
3503
3504 goto reenter;
3505 }
3506 /* NOT REACHED */
3507 default:
3508 Perl_croak(aTHX_ "regexp unwind memory corruption");
3509 }
3510 /* NOT REACHED */
3511 }
4633a7c4 3512#ifdef DEBUGGING
3280af22 3513 PL_regindent--;
4633a7c4 3514#endif
a0d0e21e 3515 return 0;
a687059c
LW
3516}
3517
3518/*
3519 - regrepeat - repeatedly match something simple, report how many
3520 */
3521/*
3522 * [This routine now assumes that it will only match on things of length 1.
3523 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3524 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3525 */
76e3520e 3526STATIC I32
cea2e8a9 3527S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3528{
a0d0e21e 3529 register char *scan;
a0d0e21e 3530 register I32 c;
3280af22 3531 register char *loceol = PL_regeol;
a0ed51b3 3532 register I32 hardcount = 0;
ffc61ed2 3533 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3534
3280af22 3535 scan = PL_reginput;
c277df42 3536 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3537 loceol = scan + max;
a0d0e21e 3538 switch (OP(p)) {
22c35a8c 3539 case REG_ANY:
1aa99e6b 3540 if (do_utf8) {
ffc61ed2 3541 loceol = PL_regeol;
1aa99e6b 3542 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3543 scan += UTF8SKIP(scan);
3544 hardcount++;
3545 }
3546 } else {
3547 while (scan < loceol && *scan != '\n')
3548 scan++;
a0ed51b3
LW
3549 }
3550 break;
ffc61ed2 3551 case SANY:
1aa99e6b 3552 if (do_utf8) {
ffc61ed2 3553 loceol = PL_regeol;
1aa99e6b 3554 while (hardcount < max && scan < loceol) {
ffc61ed2
JH
3555 scan += UTF8SKIP(scan);
3556 hardcount++;
3557 }
3558 } else {
3559 scan = loceol;
a0ed51b3
LW
3560 }
3561 break;
bbce6d69 3562 case EXACT: /* length of string is 1 */
cd439c50 3563 c = (U8)*STRING(p);
bbce6d69 3564 while (scan < loceol && UCHARAT(scan) == c)
3565 scan++;
3566 break;
3567 case EXACTF: /* length of string is 1 */
cd439c50 3568 c = (U8)*STRING(p);
bbce6d69 3569 while (scan < loceol &&
22c35a8c 3570 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3571 scan++;
3572 break;
3573 case EXACTFL: /* length of string is 1 */
3280af22 3574 PL_reg_flags |= RF_tainted;
cd439c50 3575 c = (U8)*STRING(p);
bbce6d69 3576 while (scan < loceol &&
22c35a8c 3577 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3578 scan++;
3579 break;
3580 case ANYOF:
ffc61ed2
JH
3581 if (do_utf8) {
3582 loceol = PL_regeol;
1aa99e6b
IH
3583 while (hardcount < max && scan < loceol &&
3584 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3585 scan += UTF8SKIP(scan);
3586 hardcount++;
3587 }
3588 } else {
3589 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3590 scan++;
3591 }
a0d0e21e
LW
3592 break;
3593 case ALNUM:
1aa99e6b 3594 if (do_utf8) {
ffc61ed2 3595 loceol = PL_regeol;
1aa99e6b
IH
3596 while (hardcount < max && scan < loceol &&
3597 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3598 scan += UTF8SKIP(scan);
3599 hardcount++;
3600 }
3601 } else {
3602 while (scan < loceol && isALNUM(*scan))
3603 scan++;
a0ed51b3
LW
3604 }
3605 break;
bbce6d69 3606 case ALNUML:
3280af22 3607 PL_reg_flags |= RF_tainted;
1aa99e6b 3608 if (do_utf8) {
ffc61ed2 3609 loceol = PL_regeol;
1aa99e6b
IH
3610 while (hardcount < max && scan < loceol &&
3611 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3612 scan += UTF8SKIP(scan);
3613 hardcount++;
3614 }
3615 } else {
3616 while (scan < loceol && isALNUM_LC(*scan))
3617 scan++;
a0ed51b3
LW
3618 }
3619 break;
a0d0e21e 3620 case NALNUM:
1aa99e6b 3621 if (do_utf8) {
ffc61ed2 3622 loceol = PL_regeol;
1aa99e6b
IH
3623 while (hardcount < max && scan < loceol &&
3624 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3625 scan += UTF8SKIP(scan);
3626 hardcount++;
3627 }
3628 } else {
3629 while (scan < loceol && !isALNUM(*scan))
3630 scan++;
a0ed51b3
LW
3631 }
3632 break;
bbce6d69 3633 case NALNUML:
3280af22 3634 PL_reg_flags |= RF_tainted;
1aa99e6b 3635 if (do_utf8) {
ffc61ed2 3636 loceol = PL_regeol;
1aa99e6b
IH
3637 while (hardcount < max && scan < loceol &&
3638 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3639 scan += UTF8SKIP(scan);
3640 hardcount++;
3641 }
3642 } else {
3643 while (scan < loceol && !isALNUM_LC(*scan))
3644 scan++;
a0ed51b3
LW
3645 }
3646 break;
a0d0e21e 3647 case SPACE:
1aa99e6b 3648 if (do_utf8) {
ffc61ed2 3649 loceol = PL_regeol;
1aa99e6b 3650 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3651 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3652 scan += UTF8SKIP(scan);
3653 hardcount++;
3654 }
3655 } else {
3656 while (scan < loceol && isSPACE(*scan))
3657 scan++;
a0ed51b3
LW
3658 }
3659 break;
bbce6d69 3660 case SPACEL:
3280af22 3661 PL_reg_flags |= RF_tainted;
1aa99e6b 3662 if (do_utf8) {
ffc61ed2 3663 loceol = PL_regeol;
1aa99e6b 3664 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3665 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3666 scan += UTF8SKIP(scan);
3667 hardcount++;
3668 }
3669 } else {
3670 while (scan < loceol && isSPACE_LC(*scan))
3671 scan++;
a0ed51b3
LW
3672 }
3673 break;
a0d0e21e 3674 case NSPACE:
1aa99e6b 3675 if (do_utf8) {
ffc61ed2 3676 loceol = PL_regeol;
1aa99e6b 3677 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3678 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3679 scan += UTF8SKIP(scan);
3680 hardcount++;
3681 }
3682 } else {
3683 while (scan < loceol && !isSPACE(*scan))
3684 scan++;
3685 break;
a0ed51b3 3686 }
bbce6d69 3687 case NSPACEL:
3280af22 3688 PL_reg_flags |= RF_tainted;
1aa99e6b 3689 if (do_utf8) {
ffc61ed2 3690 loceol = PL_regeol;
1aa99e6b 3691 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3692 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3693 scan += UTF8SKIP(scan);
3694 hardcount++;
3695 }
3696 } else {
3697 while (scan < loceol && !isSPACE_LC(*scan))
3698 scan++;
a0ed51b3
LW
3699 }
3700 break;
a0d0e21e 3701 case DIGIT:
1aa99e6b 3702 if (do_utf8) {
ffc61ed2 3703 loceol = PL_regeol;
1aa99e6b
IH
3704 while (hardcount < max && scan < loceol &&
3705 swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3706 scan += UTF8SKIP(scan);
3707 hardcount++;
3708 }
3709 } else {
3710 while (scan < loceol && isDIGIT(*scan))
3711 scan++;
a0ed51b3
LW
3712 }
3713 break;
a0d0e21e 3714 case NDIGIT:
1aa99e6b 3715 if (do_utf8) {
ffc61ed2 3716 loceol = PL_regeol;
1aa99e6b
IH
3717 while (hardcount < max && scan < loceol &&
3718 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3719 scan += UTF8SKIP(scan);
3720 hardcount++;
3721 }
3722 } else {
3723 while (scan < loceol && !isDIGIT(*scan))
3724 scan++;
a0ed51b3
LW
3725 }
3726 break;
a0d0e21e
LW
3727 default: /* Called on something of 0 width. */
3728 break; /* So match right here or not at all. */
3729 }
a687059c 3730
a0ed51b3
LW
3731 if (hardcount)
3732 c = hardcount;
3733 else
3734 c = scan - PL_reginput;
3280af22 3735 PL_reginput = scan;
a687059c 3736
c277df42
IZ
3737 DEBUG_r(
3738 {
3739 SV *prop = sv_newmortal();
3740
3741 regprop(prop, p);
3742 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3743 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3744 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3745 });
3746
a0d0e21e 3747 return(c);
a687059c
LW
3748}
3749
3750/*
c277df42
IZ
3751 - regrepeat_hard - repeatedly match something, report total lenth and length
3752 *
3753 * The repeater is supposed to have constant length.
3754 */
3755
76e3520e 3756STATIC I32
cea2e8a9 3757S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3758{
3759 register char *scan;
3760 register char *start;
3280af22 3761 register char *loceol = PL_regeol;
a0ed51b3 3762 I32 l = 0;
708e3b05 3763 I32 count = 0, res = 1;
a0ed51b3
LW
3764
3765 if (!max)
3766 return 0;
c277df42 3767
3280af22 3768 start = PL_reginput;
1aa99e6b 3769 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3770 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3771 if (!count++) {
3772 l = 0;
3773 while (start < PL_reginput) {
3774 l++;
3775 start += UTF8SKIP(start);
3776 }
3777 *lp = l;
3778 if (l == 0)
3779 return max;
3780 }
3781 if (count == max)
3782 return count;
3783 }
3784 }
3785 else {
708e3b05 3786 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3787 if (!count++) {
3788 *lp = l = PL_reginput - start;
3789 if (max != REG_INFTY && l*max < loceol - scan)
3790 loceol = scan + l*max;
3791 if (l == 0)
3792 return max;
c277df42
IZ
3793 }
3794 }
3795 }
708e3b05 3796 if (!res)
3280af22 3797 PL_reginput = scan;
c277df42 3798
a0ed51b3 3799 return count;
c277df42
IZ
3800}
3801
3802/*
ffc61ed2
JH
3803- regclass_swash - prepare the utf8 swash
3804*/
3805
3806SV *
3807Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3808{
3809 SV *sw = NULL;
3810 SV *si = NULL;
3811
3812 if (PL_regdata && PL_regdata->count) {
3813 U32 n = ARG(node);
3814
3815 if (PL_regdata->what[n] == 's') {
3816 SV *rv = (SV*)PL_regdata->data[n];
3817 AV *av = (AV*)SvRV((SV*)rv);
3818 SV **a;
3819
3820 si = *av_fetch(av, 0, FALSE);
3821 a = av_fetch(av, 1, FALSE);
3822
3823 if (a)
3824 sw = *a;
3825 else if (si && doinit) {
3826 sw = swash_init("utf8", "", si, 1, 0);
3827 (void)av_store(av, 1, sw);
3828 }
3829 }
3830 }
3831
3832 if (initsvp)
3833 *initsvp = si;
3834
3835 return sw;
3836}
3837
3838/*
cb8d8820 3839 - reginclass - determine if a character falls into a character class
bbce6d69 3840 */
3841
76e3520e 3842STATIC bool
ffc61ed2 3843S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3844{
ffc61ed2 3845 char flags = ANYOF_FLAGS(n);
bbce6d69 3846 bool match = FALSE;
1aa99e6b
IH
3847 UV c;
3848 STRLEN len;
3849
3850 if (do_utf8)
3851 c = utf8_to_uv_simple(p, &len);
3852 else
3853 c = *p;
bbce6d69 3854
ffc61ed2
JH
3855 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3856 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3857 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3858 match = TRUE;
bbce6d69 3859 }
1aa99e6b
IH
3860 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3861 match = TRUE;
ffc61ed2
JH
3862 if (!match) {
3863 SV *sw = regclass_swash(n, TRUE, 0);
3864
3865 if (sw) {
3866 if (swash_fetch(sw, p))
3867 match = TRUE;
3868 else if (flags & ANYOF_FOLD) {
3869 U8 tmpbuf[UTF8_MAXLEN+1];
3870
3871 if (flags & ANYOF_LOCALE) {
3872 PL_reg_flags |= RF_tainted;
3873 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3874 }
1aa99e6b
IH
3875 else
3876 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
ffc61ed2
JH
3877 if (swash_fetch(sw, tmpbuf))
3878 match = TRUE;
3879 }
3880 }
bbce6d69 3881 }
3882 }
1aa99e6b 3883 if (!match && c < 256) {
ffc61ed2
JH
3884 if (ANYOF_BITMAP_TEST(n, c))
3885 match = TRUE;
3886 else if (flags & ANYOF_FOLD) {
3887 I32 f;
a0ed51b3 3888
ffc61ed2
JH
3889 if (flags & ANYOF_LOCALE) {
3890 PL_reg_flags |= RF_tainted;
3891 f = PL_fold_locale[c];
3892 }
3893 else
3894 f = PL_fold[c];
3895 if (f != c && ANYOF_BITMAP_TEST(n, f))
3896 match = TRUE;
3897 }
3898
3899 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3900 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3901 if (
3902 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3903 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3904 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3905 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3906 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3907 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3908 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3909 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3910 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3911 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3912 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3913 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3932 ) /* How's that for a conditional? */
3933 {
3934 match = TRUE;
3935 }
a0ed51b3 3936 }
a0ed51b3
LW
3937 }
3938
a0ed51b3
LW
3939 return (flags & ANYOF_INVERT) ? !match : match;
3940}
161b471a 3941
dfe13c55 3942STATIC U8 *
cea2e8a9 3943S_reghop(pTHX_ U8 *s, I32 off)
c485e607 3944{
1aa99e6b
IH
3945 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3946}
3947
3948STATIC U8 *
3949S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
3950{
a0ed51b3 3951 if (off >= 0) {
1aa99e6b 3952 while (off-- && s < lim) {
ffc61ed2 3953 /* XXX could check well-formedness here */
a0ed51b3 3954 s += UTF8SKIP(s);
ffc61ed2 3955 }
a0ed51b3
LW
3956 }
3957 else {
3958 while (off++) {
1aa99e6b 3959 if (s > lim) {
a0ed51b3 3960 s--;
ffc61ed2 3961 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3962 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3963 s--;
ffc61ed2
JH
3964 }
3965 /* XXX could check well-formedness here */
a0ed51b3
LW
3966 }
3967 }
3968 }
3969 return s;
3970}
161b471a 3971
dfe13c55 3972STATIC U8 *
1aa99e6b
IH
3973S_reghopmaybe(pTHX_ U8 *s, I32 off)
3974{
3975 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3976}
3977
3978STATIC U8 *
3979S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3980{
3981 if (off >= 0) {
1aa99e6b 3982 while (off-- && s < lim) {
ffc61ed2 3983 /* XXX could check well-formedness here */
a0ed51b3 3984 s += UTF8SKIP(s);
ffc61ed2 3985 }
a0ed51b3
LW
3986 if (off >= 0)
3987 return 0;
3988 }
3989 else {
3990 while (off++) {
1aa99e6b 3991 if (s > lim) {
a0ed51b3 3992 s--;
ffc61ed2 3993 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3994 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3995 s--;
ffc61ed2
JH
3996 }
3997 /* XXX could check well-formedness here */
a0ed51b3
LW
3998 }
3999 else
4000 break;
4001 }
4002 if (off <= 0)
4003 return 0;
4004 }
4005 return s;
4006}
51371543
GS
4007
4008#ifdef PERL_OBJECT
51371543
GS
4009#include "XSUB.h"
4010#endif
4011
4012static void
4013restore_pos(pTHXo_ void *arg)
4014{
51371543
GS
4015 if (PL_reg_eval_set) {
4016 if (PL_reg_oldsaved) {
4017 PL_reg_re->subbeg = PL_reg_oldsaved;
4018 PL_reg_re->sublen = PL_reg_oldsavedlen;
4019 RX_MATCH_COPIED_on(PL_reg_re);
4020 }
4021 PL_reg_magic->mg_len = PL_reg_oldpos;
4022 PL_reg_eval_set = 0;
4023 PL_curpm = PL_reg_oldcurpm;
4024 }
4025}