This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
better documentation for goto &NAME (from M. J. T. Guy)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5
JH
337 }
338 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
339 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
340 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 341 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
342
343 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 344 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
345 I32 slen;
346
347 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
348 && (sv && (strpos + SvCUR(sv) != strend)) ) {
349 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 350 goto fail;
2c2d71f5 351 }
adac82c7 352 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 353 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
354 if (SvTAIL(prog->check_substr)) {
355 slen = SvCUR(prog->check_substr); /* >= 1 */
356
2c2d71f5
JH
357 if ( strend - s > slen || strend - s < slen - 1
358 || (strend - s == slen && strend[-1] != '\n')) {
359 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
360 goto fail_finish;
cad2e5aa
JH
361 }
362 /* Now should match s[0..slen-2] */
363 slen--;
364 if (slen && (*SvPVX(prog->check_substr) != *s
365 || (slen > 1
2c2d71f5
JH
366 && memNE(SvPVX(prog->check_substr), s, slen)))) {
367 report_neq:
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
369 goto fail_finish;
370 }
cad2e5aa
JH
371 }
372 else if (*SvPVX(prog->check_substr) != *s
373 || ((slen = SvCUR(prog->check_substr)) > 1
374 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
375 goto report_neq;
376 goto success_at_start;
cad2e5aa 377 }
2c2d71f5 378 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 379 s = strpos;
2c2d71f5
JH
380 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
381 /* Should be nonnegative! */
382 end_shift = prog->minlen - start_shift -
383 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
384 if (!ml_anch) {
385 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
386 - (SvTAIL(prog->check_substr) != 0);
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
399 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
407 check = prog->check_substr;
408 restart:
409 /* Find a possible match in the region s..strend by looking for
410 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 411 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
412 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
413 I32 p = -1; /* Internal iterator of scream. */
414 I32 *pp = data ? data->scream_pos : &p;
415
2c2d71f5
JH
416 if (PL_screamfirst[BmRARE(check)] >= 0
417 || ( BmRARE(check) == '\n'
418 && (BmPREVIOUS(check) == SvCUR(check) - 1)
419 && SvTAIL(check) ))
420 s = screaminstr(sv, check,
421 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 422 else
2c2d71f5 423 goto fail_finish;
cad2e5aa
JH
424 if (data)
425 *data->scream_olds = s;
426 }
427 else
428 s = fbm_instr((unsigned char*)s + start_shift,
429 (unsigned char*)strend - end_shift,
2c2d71f5 430 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
431
432 /* Update the count-of-usability, remove useless subpatterns,
433 unshift s. */
2c2d71f5
JH
434
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
436 (s ? "Found" : "Did not find"),
437 ((check == prog->anchored_substr) ? "anchored" : "floating"),
438 PL_colors[0],
7b0972df
JH
439 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
440 SvPVX(check),
2c2d71f5
JH
441 PL_colors[1], (SvTAIL(check) ? "$" : ""),
442 (s ? " at offset " : "...\n") ) );
443
444 if (!s)
445 goto fail_finish;
446
6eb5f6b9
JH
447 check_at = s;
448
2c2d71f5 449 /* Finish the diagnostic message */
30944b6d 450 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
451
452 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
453 Start with the other substr.
454 XXXX no SCREAM optimization yet - and a very coarse implementation
455 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
456 *always* match. Probably should be marked during compile...
457 Probably it is right to do no SCREAM here...
458 */
459
460 if (prog->float_substr && prog->anchored_substr) {
30944b6d 461 /* Take into account the "other" substring. */
2c2d71f5
JH
462 /* XXXX May be hopelessly wrong for UTF... */
463 if (!other_last)
6eb5f6b9 464 other_last = strpos;
2c2d71f5 465 if (check == prog->float_substr) {
30944b6d
IZ
466 do_other_anchored:
467 {
2c2d71f5
JH
468 char *last = s - start_shift, *last1, *last2;
469 char *s1 = s;
470
471 tmp = PL_bostr;
472 t = s - prog->check_offset_max;
473 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
474 && (!(prog->reganch & ROPT_UTF8)
475 || (PL_bostr = strpos, /* Used in regcopmaybe() */
476 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
477 && t > strpos)))
30944b6d 478 /* EMPTY */;
2c2d71f5
JH
479 else
480 t = strpos;
481 t += prog->anchored_offset;
6eb5f6b9
JH
482 if (t < other_last) /* These positions already checked */
483 t = other_last;
2c2d71f5
JH
484 PL_bostr = tmp;
485 last2 = last1 = strend - prog->minlen;
486 if (last < last1)
487 last1 = last;
488 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
489 /* On end-of-str: see comment below. */
490 s = fbm_instr((unsigned char*)t,
491 (unsigned char*)last1 + prog->anchored_offset
492 + SvCUR(prog->anchored_substr)
493 - (SvTAIL(prog->anchored_substr)!=0),
494 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
495 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
496 (s ? "Found" : "Contradicts"),
497 PL_colors[0],
7b0972df
JH
498 (int)(SvCUR(prog->anchored_substr)
499 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
500 SvPVX(prog->anchored_substr),
501 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
502 if (!s) {
503 if (last1 >= last2) {
504 DEBUG_r(PerlIO_printf(Perl_debug_log,
505 ", giving up...\n"));
506 goto fail_finish;
507 }
508 DEBUG_r(PerlIO_printf(Perl_debug_log,
509 ", trying floating at offset %ld...\n",
30944b6d 510 (long)(s1 + 1 - i_strpos)));
2c2d71f5 511 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 512 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
513 s = HOPc(last, 1);
514 goto restart;
515 }
516 else {
517 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 518 (long)(s - i_strpos)));
2c2d71f5 519 t = s - prog->anchored_offset;
6eb5f6b9 520 other_last = s + 1;
30944b6d 521 s = s1;
2c2d71f5
JH
522 if (t == strpos)
523 goto try_at_start;
2c2d71f5
JH
524 goto try_at_offset;
525 }
30944b6d 526 }
2c2d71f5
JH
527 }
528 else { /* Take into account the floating substring. */
529 char *last, *last1;
530 char *s1 = s;
531
532 t = s - start_shift;
533 last1 = last = strend - prog->minlen + prog->float_min_offset;
534 if (last - t > prog->float_max_offset)
535 last = t + prog->float_max_offset;
536 s = t + prog->float_min_offset;
6eb5f6b9
JH
537 if (s < other_last)
538 s = other_last;
2c2d71f5
JH
539 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
540 /* fbm_instr() takes into account exact value of end-of-str
541 if the check is SvTAIL(ed). Since false positives are OK,
542 and end-of-str is not later than strend we are OK. */
543 s = fbm_instr((unsigned char*)s,
544 (unsigned char*)last + SvCUR(prog->float_substr)
545 - (SvTAIL(prog->float_substr)!=0),
546 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
547 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
548 (s ? "Found" : "Contradicts"),
549 PL_colors[0],
7b0972df
JH
550 (int)(SvCUR(prog->float_substr)
551 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
552 SvPVX(prog->float_substr),
553 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
554 if (!s) {
555 if (last1 == last) {
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
557 ", giving up...\n"));
558 goto fail_finish;
559 }
560 DEBUG_r(PerlIO_printf(Perl_debug_log,
561 ", trying anchored starting at offset %ld...\n",
30944b6d 562 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 563 other_last = last + 1;
2c2d71f5
JH
564 PL_regeol = strend; /* Used in HOP() */
565 s = HOPc(t, 1);
566 goto restart;
567 }
568 else {
569 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 570 (long)(s - i_strpos)));
6eb5f6b9 571 other_last = s + 1;
30944b6d 572 s = s1;
2c2d71f5
JH
573 if (t == strpos)
574 goto try_at_start;
2c2d71f5
JH
575 goto try_at_offset;
576 }
577 }
cad2e5aa 578 }
2c2d71f5
JH
579
580 t = s - prog->check_offset_max;
581 tmp = PL_bostr;
582 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
583 && (!(prog->reganch & ROPT_UTF8)
584 || (PL_bostr = strpos, /* Used in regcopmaybe() */
585 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
586 && t > strpos)))) {
587 PL_bostr = tmp;
588 /* Fixed substring is found far enough so that the match
589 cannot start at strpos. */
590 try_at_offset:
cad2e5aa 591 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
592 /* Eventually fbm_*() should handle this, but often
593 anchored_offset is not 0, so this check will not be wasted. */
594 /* XXXX In the code below we prefer to look for "^" even in
595 presence of anchored substrings. And we search even
596 beyond the found float position. These pessimizations
597 are historical artefacts only. */
598 find_anchor:
2c2d71f5 599 while (t < strend - prog->minlen) {
cad2e5aa
JH
600 if (*t == '\n') {
601 if (t < s - prog->check_offset_min) {
30944b6d
IZ
602 if (prog->anchored_substr) {
603 /* We definitely contradict the found anchored
604 substr. Due to the above check we do not
605 contradict "check" substr.
606 Thus we can arrive here only if check substr
607 is float. Redo checking for "other"=="fixed".
608 */
609 strpos = t + 1;
610 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
611 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
612 goto do_other_anchored;
613 }
cad2e5aa 614 s = t + 1;
2c2d71f5 615 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 616 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
617 goto set_useful;
618 }
2c2d71f5 619 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
620 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
621 strpos = s = t + 1;
cad2e5aa
JH
622 goto restart;
623 }
624 t++;
625 }
2c2d71f5
JH
626 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
627 PL_colors[0],PL_colors[1]));
628 goto fail_finish;
cad2e5aa
JH
629 }
630 s = t;
631 set_useful:
2c2d71f5 632 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
633 }
634 else {
2c2d71f5
JH
635 PL_bostr = tmp;
636 /* The found string does not prohibit matching at beg-of-str
637 - no optimization of calling REx engine can be performed,
638 unless it was an MBOL and we are not after MBOL. */
639 try_at_start:
640 /* Even in this situation we may use MBOL flag if strpos is offset
641 wrt the start of the string. */
642 if (ml_anch && sv
cad2e5aa
JH
643 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
644 t = strpos;
645 goto find_anchor;
646 }
30944b6d
IZ
647 DEBUG_r( if (ml_anch)
648 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
649 PL_colors[0],PL_colors[1]);
650 );
2c2d71f5 651 success_at_start:
30944b6d 652 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
653 && --BmUSEFUL(prog->check_substr) < 0
654 && prog->check_substr == prog->float_substr) { /* boo */
655 /* If flags & SOMETHING - do not do it many times on the same match */
656 SvREFCNT_dec(prog->check_substr);
657 prog->check_substr = Nullsv; /* disable */
658 prog->float_substr = Nullsv; /* clear */
659 s = strpos;
3cf5c195
IZ
660 /* XXXX This is a remnant of the old implementation. It
661 looks wasteful, since now INTUIT can use many
6eb5f6b9 662 other heuristics. */
cad2e5aa
JH
663 prog->reganch &= ~RE_USE_INTUIT;
664 }
665 else
666 s = strpos;
667 }
668
6eb5f6b9
JH
669 /* Last resort... */
670 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
671 if (prog->regstclass) {
672 /* minlen == 0 is possible if regstclass is \b or \B,
673 and the fixed substr is ''$.
674 Since minlen is already taken into account, s+1 is before strend;
675 accidentally, minlen >= 1 guaranties no false positives at s + 1
676 even for \b or \B. But (minlen? 1 : 0) below assumes that
677 regstclass does not come from lookahead... */
678 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
679 This leaves EXACTF only, which is dealt with in find_byclass(). */
680 char *endpos = (prog->anchored_substr || ml_anch)
681 ? s + (prog->minlen? 1 : 0)
682 : (prog->float_substr ? check_at - start_shift + 1
683 : strend) ;
684 char *startpos = sv ? strend - SvCUR(sv) : s;
685
686 t = s;
687 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
688 if (!s) {
689#ifdef DEBUGGING
690 char *what;
691#endif
692 if (endpos == strend) {
693 DEBUG_r( PerlIO_printf(Perl_debug_log,
694 "Could not match STCLASS...\n") );
695 goto fail;
696 }
697 /* Contradict one of substrings */
698 if (prog->anchored_substr) {
699 DEBUG_r( PerlIO_printf(Perl_debug_log,
700 "This position contradicts STCLASS...\n") );
701 if (prog->anchored_substr == check) {
702 DEBUG_r( what = "anchored" );
703 hop_and_restart:
704 PL_regeol = strend; /* Used in HOP() */
705 s = HOPc(t, 1);
706 DEBUG_r( PerlIO_printf(Perl_debug_log,
707 "trying %s substr starting at offset %ld...\n",
708 what, (long)(s + start_shift - i_strpos)) );
709 goto restart;
710 }
711 /* Have both, check is floating */
712 if (t + start_shift >= check_at) /* Contradicts floating=check */
713 goto retry_floating_check;
714 /* Recheck anchored substring, but not floating... */
715 s = check_at;
716 DEBUG_r( PerlIO_printf(Perl_debug_log,
717 "trying anchored substr starting at offset %ld...\n",
718 (long)(other_last - i_strpos)) );
719 goto do_other_anchored;
720 }
721 /* Check is floating subtring. */
722 retry_floating_check:
723 t = check_at - start_shift;
724 DEBUG_r( what = "floating" );
725 goto hop_and_restart;
726 }
727 DEBUG_r( if (t != s)
728 PerlIO_printf(Perl_debug_log,
729 "By STCLASS: moving %ld --> %ld\n",
730 (long)(t - i_strpos), (long)(s - i_strpos));
731 else
732 PerlIO_printf(Perl_debug_log,
733 "Does not contradict STCLASS...\n") );
734 }
2c2d71f5 735 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 736 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 737 return s;
2c2d71f5
JH
738
739 fail_finish: /* Substring not found */
740 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 741 fail:
2c2d71f5 742 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
743 PL_colors[4],PL_colors[5]));
744 return Nullch;
745}
9661b544 746
6eb5f6b9
JH
747/* We know what class REx starts with. Try to find this position... */
748static char *
749find_byclass(regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 750{
6eb5f6b9
JH
751 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
752 char *m;
753 int ln;
754 int c1;
755 int c2;
756 char *e;
757 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 758
6eb5f6b9
JH
759 /* We know what class it must start with. */
760 switch (OP(c)) {
761 case ANYOFUTF8:
762 while (s < strend) {
763 if (REGINCLASSUTF8(c, (U8*)s)) {
764 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 765 goto got_it;
3cf5c195 766 else
6eb5f6b9 767 tmp = doevery;
a0ed51b3 768 }
6eb5f6b9
JH
769 else
770 tmp = 1;
a0ed51b3
LW
771 s += UTF8SKIP(s);
772 }
6eb5f6b9
JH
773 break;
774 case ANYOF:
a0ed51b3 775 while (s < strend) {
6eb5f6b9
JH
776 if (REGINCLASS(c, *s)) {
777 if (tmp && (norun || regtry(prog, s)))
778 goto got_it;
779 else
780 tmp = doevery;
a0ed51b3 781 }
6eb5f6b9
JH
782 else
783 tmp = 1;
a0ed51b3 784 s++;
a0d0e21e 785 }
6eb5f6b9
JH
786 break;
787 case EXACTF:
788 m = STRING(c);
789 ln = STR_LEN(c);
790 c1 = *m;
791 c2 = PL_fold[c1];
792 goto do_exactf;
793 case EXACTFL:
794 m = STRING(c);
795 ln = STR_LEN(c);
796 c1 = *m;
797 c2 = PL_fold_locale[c1];
798 do_exactf:
799 e = strend - ln;
b3c9acc1 800
6eb5f6b9
JH
801 if (norun && e < s)
802 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
803 /* Here it is NOT UTF! */
804 if (c1 == c2) {
805 while (s <= e) {
806 if ( *s == c1
807 && (ln == 1 || (OP(c) == EXACTF
808 ? ibcmp(s, m, ln)
809 : ibcmp_locale(s, m, ln)))
6eb5f6b9 810 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
811 goto got_it;
812 s++;
813 }
814 } else {
815 while (s <= e) {
816 if ( (*s == c1 || *s == c2)
817 && (ln == 1 || (OP(c) == EXACTF
818 ? ibcmp(s, m, ln)
819 : ibcmp_locale(s, m, ln)))
6eb5f6b9 820 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
821 goto got_it;
822 s++;
823 }
824 }
825 break;
bbce6d69 826 case BOUNDL:
3280af22 827 PL_reg_flags |= RF_tainted;
bbce6d69 828 /* FALL THROUGH */
a0d0e21e 829 case BOUND:
6eb5f6b9 830 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 831 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 832 while (s < strend) {
95bac841 833 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 834 tmp = !tmp;
6eb5f6b9 835 if ((norun || regtry(prog, s)))
a0d0e21e 836 goto got_it;
a687059c 837 }
a0d0e21e
LW
838 s++;
839 }
6eb5f6b9 840 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
841 goto got_it;
842 break;
a0ed51b3
LW
843 case BOUNDLUTF8:
844 PL_reg_flags |= RF_tainted;
845 /* FALL THROUGH */
846 case BOUNDUTF8:
6eb5f6b9 847 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
848 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
849 while (s < strend) {
dfe13c55
GS
850 if (tmp == !(OP(c) == BOUND ?
851 swash_fetch(PL_utf8_alnum, (U8*)s) :
852 isALNUM_LC_utf8((U8*)s)))
853 {
a0ed51b3 854 tmp = !tmp;
6eb5f6b9 855 if ((norun || regtry(prog, s)))
a0ed51b3
LW
856 goto got_it;
857 }
858 s += UTF8SKIP(s);
859 }
6eb5f6b9 860 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
861 goto got_it;
862 break;
bbce6d69 863 case NBOUNDL:
3280af22 864 PL_reg_flags |= RF_tainted;
bbce6d69 865 /* FALL THROUGH */
a0d0e21e 866 case NBOUND:
6eb5f6b9 867 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 868 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 869 while (s < strend) {
95bac841 870 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 871 tmp = !tmp;
6eb5f6b9 872 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
873 goto got_it;
874 s++;
875 }
6eb5f6b9 876 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
877 goto got_it;
878 break;
a0ed51b3
LW
879 case NBOUNDLUTF8:
880 PL_reg_flags |= RF_tainted;
881 /* FALL THROUGH */
882 case NBOUNDUTF8:
6eb5f6b9 883 if (prog->minlen)
dfe13c55 884 strend = reghop_c(strend, -1);
6eb5f6b9 885 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
886 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
887 while (s < strend) {
dfe13c55
GS
888 if (tmp == !(OP(c) == NBOUND ?
889 swash_fetch(PL_utf8_alnum, (U8*)s) :
890 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 891 tmp = !tmp;
6eb5f6b9 892 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
893 goto got_it;
894 s += UTF8SKIP(s);
895 }
6eb5f6b9 896 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
897 goto got_it;
898 break;
a0d0e21e
LW
899 case ALNUM:
900 while (s < strend) {
bbce6d69 901 if (isALNUM(*s)) {
6eb5f6b9 902 if (tmp && (norun || regtry(prog, s)))
bbce6d69 903 goto got_it;
904 else
905 tmp = doevery;
906 }
907 else
908 tmp = 1;
909 s++;
910 }
911 break;
a0ed51b3
LW
912 case ALNUMUTF8:
913 while (s < strend) {
dfe13c55 914 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 915 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
916 goto got_it;
917 else
918 tmp = doevery;
919 }
920 else
921 tmp = 1;
922 s += UTF8SKIP(s);
923 }
924 break;
bbce6d69 925 case ALNUML:
3280af22 926 PL_reg_flags |= RF_tainted;
bbce6d69 927 while (s < strend) {
928 if (isALNUM_LC(*s)) {
6eb5f6b9 929 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 930 goto got_it;
a687059c 931 else
a0d0e21e
LW
932 tmp = doevery;
933 }
934 else
935 tmp = 1;
936 s++;
937 }
938 break;
a0ed51b3
LW
939 case ALNUMLUTF8:
940 PL_reg_flags |= RF_tainted;
941 while (s < strend) {
dfe13c55 942 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 943 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
944 goto got_it;
945 else
946 tmp = doevery;
947 }
948 else
949 tmp = 1;
950 s += UTF8SKIP(s);
951 }
952 break;
a0d0e21e
LW
953 case NALNUM:
954 while (s < strend) {
bbce6d69 955 if (!isALNUM(*s)) {
6eb5f6b9 956 if (tmp && (norun || regtry(prog, s)))
bbce6d69 957 goto got_it;
958 else
959 tmp = doevery;
960 }
961 else
962 tmp = 1;
963 s++;
964 }
965 break;
a0ed51b3
LW
966 case NALNUMUTF8:
967 while (s < strend) {
dfe13c55 968 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 969 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
970 goto got_it;
971 else
972 tmp = doevery;
973 }
974 else
975 tmp = 1;
976 s += UTF8SKIP(s);
977 }
978 break;
bbce6d69 979 case NALNUML:
3280af22 980 PL_reg_flags |= RF_tainted;
bbce6d69 981 while (s < strend) {
982 if (!isALNUM_LC(*s)) {
6eb5f6b9 983 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 984 goto got_it;
a687059c 985 else
a0d0e21e 986 tmp = doevery;
a687059c 987 }
a0d0e21e
LW
988 else
989 tmp = 1;
990 s++;
991 }
992 break;
a0ed51b3
LW
993 case NALNUMLUTF8:
994 PL_reg_flags |= RF_tainted;
995 while (s < strend) {
dfe13c55 996 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 997 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
998 goto got_it;
999 else
1000 tmp = doevery;
1001 }
1002 else
1003 tmp = 1;
1004 s += UTF8SKIP(s);
1005 }
1006 break;
a0d0e21e
LW
1007 case SPACE:
1008 while (s < strend) {
1009 if (isSPACE(*s)) {
6eb5f6b9 1010 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1011 goto got_it;
1012 else
1013 tmp = doevery;
2304df62 1014 }
a0d0e21e
LW
1015 else
1016 tmp = 1;
1017 s++;
1018 }
1019 break;
a0ed51b3
LW
1020 case SPACEUTF8:
1021 while (s < strend) {
dfe13c55 1022 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1023 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1024 goto got_it;
1025 else
1026 tmp = doevery;
1027 }
1028 else
1029 tmp = 1;
1030 s += UTF8SKIP(s);
1031 }
1032 break;
bbce6d69 1033 case SPACEL:
3280af22 1034 PL_reg_flags |= RF_tainted;
bbce6d69 1035 while (s < strend) {
1036 if (isSPACE_LC(*s)) {
6eb5f6b9 1037 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1038 goto got_it;
1039 else
1040 tmp = doevery;
1041 }
1042 else
1043 tmp = 1;
1044 s++;
1045 }
1046 break;
a0ed51b3
LW
1047 case SPACELUTF8:
1048 PL_reg_flags |= RF_tainted;
1049 while (s < strend) {
dfe13c55 1050 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1051 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1052 goto got_it;
1053 else
1054 tmp = doevery;
1055 }
1056 else
1057 tmp = 1;
1058 s += UTF8SKIP(s);
1059 }
1060 break;
a0d0e21e
LW
1061 case NSPACE:
1062 while (s < strend) {
1063 if (!isSPACE(*s)) {
6eb5f6b9 1064 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1065 goto got_it;
1066 else
1067 tmp = doevery;
a687059c 1068 }
a0d0e21e
LW
1069 else
1070 tmp = 1;
1071 s++;
1072 }
1073 break;
a0ed51b3
LW
1074 case NSPACEUTF8:
1075 while (s < strend) {
dfe13c55 1076 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1077 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1078 goto got_it;
1079 else
1080 tmp = doevery;
1081 }
1082 else
1083 tmp = 1;
1084 s += UTF8SKIP(s);
1085 }
1086 break;
bbce6d69 1087 case NSPACEL:
3280af22 1088 PL_reg_flags |= RF_tainted;
bbce6d69 1089 while (s < strend) {
1090 if (!isSPACE_LC(*s)) {
6eb5f6b9 1091 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1092 goto got_it;
1093 else
1094 tmp = doevery;
1095 }
1096 else
1097 tmp = 1;
1098 s++;
1099 }
1100 break;
a0ed51b3
LW
1101 case NSPACELUTF8:
1102 PL_reg_flags |= RF_tainted;
1103 while (s < strend) {
dfe13c55 1104 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1105 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1106 goto got_it;
1107 else
1108 tmp = doevery;
1109 }
1110 else
1111 tmp = 1;
1112 s += UTF8SKIP(s);
1113 }
1114 break;
a0d0e21e
LW
1115 case DIGIT:
1116 while (s < strend) {
1117 if (isDIGIT(*s)) {
6eb5f6b9 1118 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1119 goto got_it;
1120 else
1121 tmp = doevery;
2b69d0c2 1122 }
a0d0e21e
LW
1123 else
1124 tmp = 1;
1125 s++;
1126 }
1127 break;
a0ed51b3
LW
1128 case DIGITUTF8:
1129 while (s < strend) {
dfe13c55 1130 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1131 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1132 goto got_it;
1133 else
1134 tmp = doevery;
1135 }
1136 else
1137 tmp = 1;
1138 s += UTF8SKIP(s);
1139 }
1140 break;
b8c5462f
JH
1141 case DIGITL:
1142 PL_reg_flags |= RF_tainted;
1143 while (s < strend) {
1144 if (isDIGIT_LC(*s)) {
6eb5f6b9 1145 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
1150 else
1151 tmp = 1;
1152 s++;
1153 }
1154 break;
1155 case DIGITLUTF8:
1156 PL_reg_flags |= RF_tainted;
1157 while (s < strend) {
1158 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1159 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1160 goto got_it;
1161 else
1162 tmp = doevery;
1163 }
1164 else
1165 tmp = 1;
1166 s += UTF8SKIP(s);
1167 }
1168 break;
a0d0e21e
LW
1169 case NDIGIT:
1170 while (s < strend) {
1171 if (!isDIGIT(*s)) {
6eb5f6b9 1172 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1173 goto got_it;
1174 else
1175 tmp = doevery;
a687059c 1176 }
a0d0e21e
LW
1177 else
1178 tmp = 1;
1179 s++;
1180 }
1181 break;
a0ed51b3
LW
1182 case NDIGITUTF8:
1183 while (s < strend) {
dfe13c55 1184 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1185 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1186 goto got_it;
1187 else
1188 tmp = doevery;
1189 }
1190 else
1191 tmp = 1;
1192 s += UTF8SKIP(s);
1193 }
1194 break;
b8c5462f
JH
1195 case NDIGITL:
1196 PL_reg_flags |= RF_tainted;
1197 while (s < strend) {
1198 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1199 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
1204 else
1205 tmp = 1;
1206 s++;
a0ed51b3 1207 }
b8c5462f
JH
1208 break;
1209 case NDIGITLUTF8:
1210 PL_reg_flags |= RF_tainted;
1211 while (s < strend) {
1212 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1213 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1214 goto got_it;
cf93c79d 1215 else
b8c5462f
JH
1216 tmp = doevery;
1217 }
1218 else
1219 tmp = 1;
1220 s += UTF8SKIP(s);
1221 }
1222 break;
b3c9acc1 1223 default:
6eb5f6b9
JH
1224 {
1225 dTHX;
1226 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1227 break;
1228 }
d6a28714 1229 }
6eb5f6b9
JH
1230 return 0;
1231 got_it:
1232 return s;
1233}
1234
1235/*
1236 - regexec_flags - match a regexp against a string
1237 */
1238I32
1239Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1240 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1241/* strend: pointer to null at end of string */
1242/* strbeg: real beginning of string */
1243/* minend: end of match must be >=minend after stringarg. */
1244/* data: May be used for some additional optimizations. */
1245/* nosave: For optimizations. */
1246{
1247 dTHR;
1248 register char *s;
1249 register regnode *c;
1250 register char *startpos = stringarg;
1251 register I32 tmp;
1252 I32 minlen; /* must match at least this many chars */
1253 I32 dontbother = 0; /* how many characters not to try at end */
1254 I32 start_shift = 0; /* Offset of the start to find
1255 constant substr. */ /* CC */
1256 I32 end_shift = 0; /* Same for the end. */ /* CC */
1257 I32 scream_pos = -1; /* Internal iterator of scream. */
1258 char *scream_olds;
1259 SV* oreplsv = GvSV(PL_replgv);
1260
1261 PL_regcc = 0;
1262
1263 cache_re(prog);
1264#ifdef DEBUGGING
1265 PL_regnarrate = PL_debug & 512;
1266#endif
1267
1268 /* Be paranoid... */
1269 if (prog == NULL || startpos == NULL) {
1270 Perl_croak(aTHX_ "NULL regexp parameter");
1271 return 0;
1272 }
1273
1274 minlen = prog->minlen;
1275 if (strend - startpos < minlen) goto phooey;
1276
1277 if (startpos == strbeg) /* is ^ valid at stringarg? */
1278 PL_regprev = '\n';
1279 else {
1280 PL_regprev = (U32)stringarg[-1];
1281 if (!PL_multiline && PL_regprev == '\n')
1282 PL_regprev = '\0'; /* force ^ to NOT match */
1283 }
1284
1285 /* Check validity of program. */
1286 if (UCHARAT(prog->program) != REG_MAGIC) {
1287 Perl_croak(aTHX_ "corrupted regexp program");
1288 }
1289
1290 PL_reg_flags = 0;
1291 PL_reg_eval_set = 0;
1292 PL_reg_maxiter = 0;
1293
1294 if (prog->reganch & ROPT_UTF8)
1295 PL_reg_flags |= RF_utf8;
1296
1297 /* Mark beginning of line for ^ and lookbehind. */
1298 PL_regbol = startpos;
1299 PL_bostr = strbeg;
1300 PL_reg_sv = sv;
1301
1302 /* Mark end of line for $ (and such) */
1303 PL_regeol = strend;
1304
1305 /* see how far we have to get to not match where we matched before */
1306 PL_regtill = startpos+minend;
1307
1308 /* We start without call_cc context. */
1309 PL_reg_call_cc = 0;
1310
1311 /* If there is a "must appear" string, look for it. */
1312 s = startpos;
1313
1314 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1315 MAGIC *mg;
1316
1317 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1318 PL_reg_ganch = startpos;
1319 else if (sv && SvTYPE(sv) >= SVt_PVMG
1320 && SvMAGIC(sv)
1321 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1322 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1323 if (prog->reganch & ROPT_ANCH_GPOS) {
1324 if (s > PL_reg_ganch)
1325 goto phooey;
1326 s = PL_reg_ganch;
1327 }
1328 }
1329 else /* pos() not defined */
1330 PL_reg_ganch = strbeg;
1331 }
1332
1333 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1334 re_scream_pos_data d;
1335
1336 d.scream_olds = &scream_olds;
1337 d.scream_pos = &scream_pos;
1338 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1339 if (!s)
1340 goto phooey; /* not present */
1341 }
1342
1343 DEBUG_r( if (!PL_colorset) reginitcolors() );
1344 DEBUG_r(PerlIO_printf(Perl_debug_log,
1345 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1346 PL_colors[4],PL_colors[5],PL_colors[0],
1347 prog->precomp,
1348 PL_colors[1],
1349 (strlen(prog->precomp) > 60 ? "..." : ""),
1350 PL_colors[0],
1351 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1352 startpos, PL_colors[1],
1353 (strend - startpos > 60 ? "..." : ""))
1354 );
1355
1356 /* Simplest case: anchored match need be tried only once. */
1357 /* [unless only anchor is BOL and multiline is set] */
1358 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1359 if (s == startpos && regtry(prog, startpos))
1360 goto got_it;
1361 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1362 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1363 {
1364 char *end;
1365
1366 if (minlen)
1367 dontbother = minlen - 1;
1368 end = HOPc(strend, -dontbother) - 1;
1369 /* for multiline we only have to try after newlines */
1370 if (prog->check_substr) {
1371 if (s == startpos)
1372 goto after_try;
1373 while (1) {
1374 if (regtry(prog, s))
1375 goto got_it;
1376 after_try:
1377 if (s >= end)
1378 goto phooey;
1379 if (prog->reganch & RE_USE_INTUIT) {
1380 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1381 if (!s)
1382 goto phooey;
1383 }
1384 else
1385 s++;
1386 }
1387 } else {
1388 if (s > startpos)
1389 s--;
1390 while (s < end) {
1391 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1392 if (regtry(prog, s))
1393 goto got_it;
1394 }
1395 }
1396 }
1397 }
1398 goto phooey;
1399 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1400 if (regtry(prog, PL_reg_ganch))
1401 goto got_it;
1402 goto phooey;
1403 }
1404
1405 /* Messy cases: unanchored match. */
1406 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1407 /* we have /x+whatever/ */
1408 /* it must be a one character string (XXXX Except UTF?) */
1409 char ch = SvPVX(prog->anchored_substr)[0];
1410 if (UTF) {
1411 while (s < strend) {
1412 if (*s == ch) {
1413 if (regtry(prog, s)) goto got_it;
1414 s += UTF8SKIP(s);
1415 while (s < strend && *s == ch)
1416 s += UTF8SKIP(s);
1417 }
1418 s += UTF8SKIP(s);
1419 }
1420 }
1421 else {
1422 while (s < strend) {
1423 if (*s == ch) {
1424 if (regtry(prog, s)) goto got_it;
1425 s++;
1426 while (s < strend && *s == ch)
1427 s++;
1428 }
1429 s++;
1430 }
1431 }
1432 }
1433 /*SUPPRESS 560*/
1434 else if (prog->anchored_substr != Nullsv
1435 || (prog->float_substr != Nullsv
1436 && prog->float_max_offset < strend - s)) {
1437 SV *must = prog->anchored_substr
1438 ? prog->anchored_substr : prog->float_substr;
1439 I32 back_max =
1440 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1441 I32 back_min =
1442 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1443 I32 delta = back_max - back_min;
1444 char *last = HOPc(strend, /* Cannot start after this */
1445 -(I32)(CHR_SVLEN(must)
1446 - (SvTAIL(must) != 0) + back_min));
1447 char *last1; /* Last position checked before */
1448
1449 if (s > PL_bostr)
1450 last1 = HOPc(s, -1);
1451 else
1452 last1 = s - 1; /* bogus */
1453
1454 /* XXXX check_substr already used to find `s', can optimize if
1455 check_substr==must. */
1456 scream_pos = -1;
1457 dontbother = end_shift;
1458 strend = HOPc(strend, -dontbother);
1459 while ( (s <= last) &&
1460 ((flags & REXEC_SCREAM)
1461 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1462 end_shift, &scream_pos, 0))
1463 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1464 (unsigned char*)strend, must,
1465 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1466 if (HOPc(s, -back_max) > last1) {
1467 last1 = HOPc(s, -back_min);
1468 s = HOPc(s, -back_max);
1469 }
1470 else {
1471 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1472
1473 last1 = HOPc(s, -back_min);
1474 s = t;
1475 }
1476 if (UTF) {
1477 while (s <= last1) {
1478 if (regtry(prog, s))
1479 goto got_it;
1480 s += UTF8SKIP(s);
1481 }
1482 }
1483 else {
1484 while (s <= last1) {
1485 if (regtry(prog, s))
1486 goto got_it;
1487 s++;
1488 }
1489 }
1490 }
1491 goto phooey;
1492 }
1493 else if (c = prog->regstclass) {
1494 if (minlen) /* don't bother with what can't match */
1495 strend = HOPc(strend, -(minlen - 1));
1496 if (find_byclass(prog, c, s, strend, startpos, 0))
1497 goto got_it;
d6a28714
JH
1498 }
1499 else {
1500 dontbother = 0;
1501 if (prog->float_substr != Nullsv) { /* Trim the end. */
1502 char *last;
1503 I32 oldpos = scream_pos;
1504
1505 if (flags & REXEC_SCREAM) {
1506 last = screaminstr(sv, prog->float_substr, s - strbeg,
1507 end_shift, &scream_pos, 1); /* last one */
1508 if (!last)
1509 last = scream_olds; /* Only one occurence. */
b8c5462f 1510 }
d6a28714
JH
1511 else {
1512 STRLEN len;
1513 char *little = SvPV(prog->float_substr, len);
1514
1515 if (SvTAIL(prog->float_substr)) {
1516 if (memEQ(strend - len + 1, little, len - 1))
1517 last = strend - len + 1;
1518 else if (!PL_multiline)
1519 last = memEQ(strend - len, little, len)
1520 ? strend - len : Nullch;
b8c5462f 1521 else
d6a28714
JH
1522 goto find_last;
1523 } else {
1524 find_last:
1525 if (len)
1526 last = rninstr(s, strend, little, little + len);
b8c5462f 1527 else
d6a28714 1528 last = strend; /* matching `$' */
b8c5462f 1529 }
b8c5462f 1530 }
d6a28714
JH
1531 if (last == NULL) goto phooey; /* Should not happen! */
1532 dontbother = strend - last + prog->float_min_offset;
1533 }
1534 if (minlen && (dontbother < minlen))
1535 dontbother = minlen - 1;
1536 strend -= dontbother; /* this one's always in bytes! */
1537 /* We don't know much -- general case. */
1538 if (UTF) {
1539 for (;;) {
1540 if (regtry(prog, s))
1541 goto got_it;
1542 if (s >= strend)
1543 break;
b8c5462f 1544 s += UTF8SKIP(s);
d6a28714
JH
1545 };
1546 }
1547 else {
1548 do {
1549 if (regtry(prog, s))
1550 goto got_it;
1551 } while (s++ < strend);
1552 }
1553 }
1554
1555 /* Failure. */
1556 goto phooey;
1557
1558got_it:
1559 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1560
1561 if (PL_reg_eval_set) {
1562 /* Preserve the current value of $^R */
1563 if (oreplsv != GvSV(PL_replgv))
1564 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1565 restored, the value remains
1566 the same. */
1567 restore_pos(aTHXo_ 0);
1568 }
1569
1570 /* make sure $`, $&, $', and $digit will work later */
1571 if ( !(flags & REXEC_NOT_FIRST) ) {
1572 if (RX_MATCH_COPIED(prog)) {
1573 Safefree(prog->subbeg);
1574 RX_MATCH_COPIED_off(prog);
1575 }
1576 if (flags & REXEC_COPY_STR) {
1577 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1578
1579 s = savepvn(strbeg, i);
1580 prog->subbeg = s;
1581 prog->sublen = i;
1582 RX_MATCH_COPIED_on(prog);
1583 }
1584 else {
1585 prog->subbeg = strbeg;
1586 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1587 }
1588 }
1589
1590 return 1;
1591
1592phooey:
1593 if (PL_reg_eval_set)
1594 restore_pos(aTHXo_ 0);
1595 return 0;
1596}
1597
1598/*
1599 - regtry - try match at specific point
1600 */
1601STATIC I32 /* 0 failure, 1 success */
1602S_regtry(pTHX_ regexp *prog, char *startpos)
1603{
1604 dTHR;
1605 register I32 i;
1606 register I32 *sp;
1607 register I32 *ep;
1608 CHECKPOINT lastcp;
1609
1610 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1611 MAGIC *mg;
1612
1613 PL_reg_eval_set = RS_init;
1614 DEBUG_r(DEBUG_s(
b900a521
JH
1615 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1616 (IV)(PL_stack_sp - PL_stack_base));
d6a28714
JH
1617 ));
1618 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1619 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1620 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1621 SAVETMPS;
1622 /* Apparently this is not needed, judging by wantarray. */
1623 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1624 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1625
1626 if (PL_reg_sv) {
1627 /* Make $_ available to executed code. */
1628 if (PL_reg_sv != DEFSV) {
1629 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1630 SAVESPTR(DEFSV);
1631 DEFSV = PL_reg_sv;
b8c5462f 1632 }
d6a28714
JH
1633
1634 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1635 && (mg = mg_find(PL_reg_sv, 'g')))) {
1636 /* prepare for quick setting of pos */
1637 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1638 mg = mg_find(PL_reg_sv, 'g');
1639 mg->mg_len = -1;
b8c5462f 1640 }
d6a28714
JH
1641 PL_reg_magic = mg;
1642 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1643 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1644 }
1645 if (!PL_reg_curpm)
1646 New(22,PL_reg_curpm, 1, PMOP);
1647 PL_reg_curpm->op_pmregexp = prog;
1648 PL_reg_oldcurpm = PL_curpm;
1649 PL_curpm = PL_reg_curpm;
1650 if (RX_MATCH_COPIED(prog)) {
1651 /* Here is a serious problem: we cannot rewrite subbeg,
1652 since it may be needed if this match fails. Thus
1653 $` inside (?{}) could fail... */
1654 PL_reg_oldsaved = prog->subbeg;
1655 PL_reg_oldsavedlen = prog->sublen;
1656 RX_MATCH_COPIED_off(prog);
1657 }
1658 else
1659 PL_reg_oldsaved = Nullch;
1660 prog->subbeg = PL_bostr;
1661 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1662 }
1663 prog->startp[0] = startpos - PL_bostr;
1664 PL_reginput = startpos;
1665 PL_regstartp = prog->startp;
1666 PL_regendp = prog->endp;
1667 PL_reglastparen = &prog->lastparen;
1668 prog->lastparen = 0;
1669 PL_regsize = 0;
1670 DEBUG_r(PL_reg_starttry = startpos);
1671 if (PL_reg_start_tmpl <= prog->nparens) {
1672 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1673 if(PL_reg_start_tmp)
1674 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1675 else
1676 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1677 }
1678
1679 /* XXXX What this code is doing here?!!! There should be no need
1680 to do this again and again, PL_reglastparen should take care of
1681 this! */
1682 sp = prog->startp;
1683 ep = prog->endp;
1684 if (prog->nparens) {
1685 for (i = prog->nparens; i >= 1; i--) {
1686 *++sp = -1;
1687 *++ep = -1;
1688 }
1689 }
1690 REGCP_SET;
1691 if (regmatch(prog->program + 1)) {
1692 prog->endp[0] = PL_reginput - PL_bostr;
1693 return 1;
1694 }
1695 REGCP_UNWIND;
1696 return 0;
1697}
1698
1699/*
1700 - regmatch - main matching routine
1701 *
1702 * Conceptually the strategy is simple: check to see whether the current
1703 * node matches, call self recursively to see whether the rest matches,
1704 * and then act accordingly. In practice we make some effort to avoid
1705 * recursion, in particular by going through "ordinary" nodes (that don't
1706 * need to know whether the rest of the match failed) by a loop instead of
1707 * by recursion.
1708 */
1709/* [lwall] I've hoisted the register declarations to the outer block in order to
1710 * maybe save a little bit of pushing and popping on the stack. It also takes
1711 * advantage of machines that use a register save mask on subroutine entry.
1712 */
1713STATIC I32 /* 0 failure, 1 success */
1714S_regmatch(pTHX_ regnode *prog)
1715{
1716 dTHR;
1717 register regnode *scan; /* Current node. */
1718 regnode *next; /* Next node. */
1719 regnode *inner; /* Next node in internal branch. */
1720 register I32 nextchr; /* renamed nextchr - nextchar colides with
1721 function of same name */
1722 register I32 n; /* no or next */
1723 register I32 ln; /* len or last */
1724 register char *s; /* operand or save */
1725 register char *locinput = PL_reginput;
1726 register I32 c1, c2, paren; /* case fold search, parenth */
1727 int minmod = 0, sw = 0, logical = 0;
1728#ifdef DEBUGGING
1729 PL_regindent++;
1730#endif
1731
1732 /* Note that nextchr is a byte even in UTF */
1733 nextchr = UCHARAT(locinput);
1734 scan = prog;
1735 while (scan != NULL) {
1736#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1737#ifdef DEBUGGING
1738# define sayYES goto yes
1739# define sayNO goto no
7821416a
IZ
1740# define sayYES_FINAL goto yes_final
1741# define sayYES_LOUD goto yes_loud
1742# define sayNO_FINAL goto no_final
1743# define sayNO_SILENT goto do_no
d6a28714
JH
1744# define saySAME(x) if (x) goto yes; else goto no
1745# define REPORT_CODE_OFF 24
1746#else
1747# define sayYES return 1
1748# define sayNO return 0
7821416a
IZ
1749# define sayYES_FINAL return 1
1750# define sayYES_LOUD return 1
1751# define sayNO_FINAL return 0
1752# define sayNO_SILENT return 0
d6a28714
JH
1753# define saySAME(x) return x
1754#endif
1755 DEBUG_r( {
1756 SV *prop = sv_newmortal();
1757 int docolor = *PL_colors[0];
1758 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1759 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1760 /* The part of the string before starttry has one color
1761 (pref0_len chars), between starttry and current
1762 position another one (pref_len - pref0_len chars),
1763 after the current position the third one.
1764 We assume that pref0_len <= pref_len, otherwise we
1765 decrease pref0_len. */
1766 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1767 ? (5 + taill) - l : locinput - PL_bostr);
1768 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1769
1770 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1771 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1772 ? (5 + taill) - pref_len : PL_regeol - locinput);
1773 if (pref0_len < 0)
1774 pref0_len = 0;
1775 if (pref0_len > pref_len)
1776 pref0_len = pref_len;
1777 regprop(prop, scan);
1778 PerlIO_printf(Perl_debug_log,
b900a521
JH
1779 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1780 (IV)(locinput - PL_bostr),
d6a28714
JH
1781 PL_colors[4], pref0_len,
1782 locinput - pref_len, PL_colors[5],
1783 PL_colors[2], pref_len - pref0_len,
1784 locinput - pref_len + pref0_len, PL_colors[3],
1785 (docolor ? "" : "> <"),
1786 PL_colors[0], l, locinput, PL_colors[1],
1787 15 - l - pref_len + 1,
1788 "",
b900a521 1789 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1790 SvPVX(prop));
1791 } );
1792
1793 next = scan + NEXT_OFF(scan);
1794 if (next == scan)
1795 next = NULL;
1796
1797 switch (OP(scan)) {
1798 case BOL:
1799 if (locinput == PL_bostr
1800 ? PL_regprev == '\n'
1801 : (PL_multiline &&
1802 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1803 {
1804 /* regtill = regbol; */
b8c5462f
JH
1805 break;
1806 }
d6a28714
JH
1807 sayNO;
1808 case MBOL:
1809 if (locinput == PL_bostr
1810 ? PL_regprev == '\n'
1811 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1812 {
b8c5462f
JH
1813 break;
1814 }
d6a28714
JH
1815 sayNO;
1816 case SBOL:
1817 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1818 break;
d6a28714
JH
1819 sayNO;
1820 case GPOS:
1821 if (locinput == PL_reg_ganch)
1822 break;
1823 sayNO;
1824 case EOL:
1825 if (PL_multiline)
1826 goto meol;
1827 else
1828 goto seol;
1829 case MEOL:
1830 meol:
1831 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1832 sayNO;
b8c5462f 1833 break;
d6a28714
JH
1834 case SEOL:
1835 seol:
1836 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1837 sayNO;
d6a28714 1838 if (PL_regeol - locinput > 1)
b8c5462f 1839 sayNO;
b8c5462f 1840 break;
d6a28714
JH
1841 case EOS:
1842 if (PL_regeol != locinput)
b8c5462f 1843 sayNO;
d6a28714
JH
1844 break;
1845 case SANYUTF8:
b8c5462f 1846 if (nextchr & 0x80) {
b8c5462f 1847 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1848 if (locinput > PL_regeol)
1849 sayNO;
b8c5462f
JH
1850 nextchr = UCHARAT(locinput);
1851 break;
1852 }
d6a28714 1853 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1854 sayNO;
b8c5462f 1855 nextchr = UCHARAT(++locinput);
a0d0e21e 1856 break;
d6a28714
JH
1857 case SANY:
1858 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1859 sayNO;
1860 nextchr = UCHARAT(++locinput);
b85d18e9 1861 break;
d6a28714 1862 case ANYUTF8:
a0ed51b3 1863 if (nextchr & 0x80) {
b8c5462f 1864 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1865 if (locinput > PL_regeol)
1866 sayNO;
a0ed51b3
LW
1867 nextchr = UCHARAT(locinput);
1868 break;
1869 }
d6a28714 1870 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1871 sayNO;
1872 nextchr = UCHARAT(++locinput);
1873 break;
d6a28714
JH
1874 case REG_ANY:
1875 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1876 sayNO;
76e3520e 1877 nextchr = UCHARAT(++locinput);
a0d0e21e 1878 break;
d6a28714 1879 case EXACT:
cd439c50
IZ
1880 s = STRING(scan);
1881 ln = STR_LEN(scan);
d6a28714
JH
1882 /* Inline the first character, for speed. */
1883 if (UCHARAT(s) != nextchr)
1884 sayNO;
1885 if (PL_regeol - locinput < ln)
1886 sayNO;
1887 if (ln > 1 && memNE(s, locinput, ln))
1888 sayNO;
1889 locinput += ln;
1890 nextchr = UCHARAT(locinput);
1891 break;
1892 case EXACTFL:
b8c5462f
JH
1893 PL_reg_flags |= RF_tainted;
1894 /* FALL THROUGH */
d6a28714 1895 case EXACTF:
cd439c50
IZ
1896 s = STRING(scan);
1897 ln = STR_LEN(scan);
d6a28714
JH
1898
1899 if (UTF) {
1900 char *l = locinput;
1901 char *e = s + ln;
1902 c1 = OP(scan) == EXACTF;
1903 while (s < e) {
1904 if (l >= PL_regeol)
1905 sayNO;
1906 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1907 toLOWER_utf8((U8*)l) :
1908 toLOWER_LC_utf8((U8*)l)))
1909 {
1910 sayNO;
1911 }
1912 s += UTF8SKIP(s);
1913 l += UTF8SKIP(l);
b8c5462f 1914 }
d6a28714 1915 locinput = l;
a0ed51b3
LW
1916 nextchr = UCHARAT(locinput);
1917 break;
1918 }
d6a28714
JH
1919
1920 /* Inline the first character, for speed. */
1921 if (UCHARAT(s) != nextchr &&
1922 UCHARAT(s) != ((OP(scan) == EXACTF)
1923 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1924 sayNO;
d6a28714 1925 if (PL_regeol - locinput < ln)
b8c5462f 1926 sayNO;
d6a28714
JH
1927 if (ln > 1 && (OP(scan) == EXACTF
1928 ? ibcmp(s, locinput, ln)
1929 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1930 sayNO;
d6a28714
JH
1931 locinput += ln;
1932 nextchr = UCHARAT(locinput);
a0d0e21e 1933 break;
d6a28714 1934 case ANYOFUTF8:
d6a28714 1935 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1936 sayNO;
d6a28714
JH
1937 if (locinput >= PL_regeol)
1938 sayNO;
1939 locinput += PL_utf8skip[nextchr];
1940 nextchr = UCHARAT(locinput);
1941 break;
1942 case ANYOF:
d6a28714 1943 if (nextchr < 0)
b8c5462f 1944 nextchr = UCHARAT(locinput);
936ed897 1945 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1946 sayNO;
1947 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1948 sayNO;
b8c5462f
JH
1949 nextchr = UCHARAT(++locinput);
1950 break;
d6a28714 1951 case ALNUML:
b8c5462f
JH
1952 PL_reg_flags |= RF_tainted;
1953 /* FALL THROUGH */
d6a28714 1954 case ALNUM:
b8c5462f 1955 if (!nextchr)
4633a7c4 1956 sayNO;
d6a28714
JH
1957 if (!(OP(scan) == ALNUM
1958 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1959 sayNO;
1960 nextchr = UCHARAT(++locinput);
bbce6d69 1961 break;
d6a28714 1962 case ALNUMLUTF8:
3280af22 1963 PL_reg_flags |= RF_tainted;
bbce6d69 1964 /* FALL THROUGH */
d6a28714 1965 case ALNUMUTF8:
b8c5462f
JH
1966 if (!nextchr)
1967 sayNO;
1968 if (nextchr & 0x80) {
d6a28714
JH
1969 if (!(OP(scan) == ALNUMUTF8
1970 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1971 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1972 {
1973 sayNO;
a0ed51b3 1974 }
b8c5462f 1975 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1976 nextchr = UCHARAT(locinput);
1977 break;
1978 }
d6a28714
JH
1979 if (!(OP(scan) == ALNUMUTF8
1980 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1981 sayNO;
b8c5462f 1982 nextchr = UCHARAT(++locinput);
a0d0e21e 1983 break;
d6a28714 1984 case NALNUML:
b8c5462f
JH
1985 PL_reg_flags |= RF_tainted;
1986 /* FALL THROUGH */
d6a28714
JH
1987 case NALNUM:
1988 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1989 sayNO;
d6a28714
JH
1990 if (OP(scan) == NALNUM
1991 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1992 sayNO;
b8c5462f 1993 nextchr = UCHARAT(++locinput);
a0ed51b3 1994 break;
d6a28714 1995 case NALNUMLUTF8:
b8c5462f
JH
1996 PL_reg_flags |= RF_tainted;
1997 /* FALL THROUGH */
d6a28714 1998 case NALNUMUTF8:
3280af22 1999 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2000 sayNO;
b8c5462f 2001 if (nextchr & 0x80) {
d6a28714
JH
2002 if (OP(scan) == NALNUMUTF8
2003 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2004 : isALNUM_LC_utf8((U8*)locinput))
2005 {
b8c5462f 2006 sayNO;
d6a28714 2007 }
b8c5462f
JH
2008 locinput += PL_utf8skip[nextchr];
2009 nextchr = UCHARAT(locinput);
2010 break;
2011 }
d6a28714
JH
2012 if (OP(scan) == NALNUMUTF8
2013 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2014 sayNO;
76e3520e 2015 nextchr = UCHARAT(++locinput);
a0d0e21e 2016 break;
d6a28714
JH
2017 case BOUNDL:
2018 case NBOUNDL:
3280af22 2019 PL_reg_flags |= RF_tainted;
bbce6d69 2020 /* FALL THROUGH */
d6a28714
JH
2021 case BOUND:
2022 case NBOUND:
2023 /* was last char in word? */
2024 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2025 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2026 ln = isALNUM(ln);
2027 n = isALNUM(nextchr);
2028 }
2029 else {
2030 ln = isALNUM_LC(ln);
2031 n = isALNUM_LC(nextchr);
2032 }
2033 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2034 sayNO;
a0d0e21e 2035 break;
d6a28714
JH
2036 case BOUNDLUTF8:
2037 case NBOUNDLUTF8:
a0ed51b3
LW
2038 PL_reg_flags |= RF_tainted;
2039 /* FALL THROUGH */
d6a28714
JH
2040 case BOUNDUTF8:
2041 case NBOUNDUTF8:
2042 /* was last char in word? */
2043 ln = (locinput != PL_regbol)
2044 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2045 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2046 ln = isALNUM_uni(ln);
2047 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2048 }
d6a28714
JH
2049 else {
2050 ln = isALNUM_LC_uni(ln);
2051 n = isALNUM_LC_utf8((U8*)locinput);
2052 }
2053 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2054 sayNO;
a0ed51b3 2055 break;
d6a28714 2056 case SPACEL:
3280af22 2057 PL_reg_flags |= RF_tainted;
bbce6d69 2058 /* FALL THROUGH */
d6a28714
JH
2059 case SPACE:
2060 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2061 sayNO;
d6a28714
JH
2062 if (!(OP(scan) == SPACE
2063 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2064 sayNO;
76e3520e 2065 nextchr = UCHARAT(++locinput);
a0d0e21e 2066 break;
d6a28714 2067 case SPACELUTF8:
a0ed51b3
LW
2068 PL_reg_flags |= RF_tainted;
2069 /* FALL THROUGH */
d6a28714 2070 case SPACEUTF8:
a0ed51b3
LW
2071 if (!nextchr && locinput >= PL_regeol)
2072 sayNO;
2073 if (nextchr & 0x80) {
d6a28714
JH
2074 if (!(OP(scan) == SPACEUTF8
2075 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2076 : isSPACE_LC_utf8((U8*)locinput)))
2077 {
a0ed51b3 2078 sayNO;
d6a28714 2079 }
6f06b55f 2080 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2081 nextchr = UCHARAT(locinput);
2082 break;
2083 }
d6a28714
JH
2084 if (!(OP(scan) == SPACEUTF8
2085 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2086 sayNO;
2087 nextchr = UCHARAT(++locinput);
2088 break;
d6a28714 2089 case NSPACEL:
3280af22 2090 PL_reg_flags |= RF_tainted;
bbce6d69 2091 /* FALL THROUGH */
d6a28714 2092 case NSPACE:
b8c5462f
JH
2093 if (!nextchr)
2094 sayNO;
d6a28714
JH
2095 if (OP(scan) == SPACE
2096 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2097 sayNO;
b8c5462f 2098 nextchr = UCHARAT(++locinput);
a0d0e21e 2099 break;
d6a28714 2100 case NSPACELUTF8:
a0ed51b3
LW
2101 PL_reg_flags |= RF_tainted;
2102 /* FALL THROUGH */
d6a28714 2103 case NSPACEUTF8:
b8c5462f
JH
2104 if (!nextchr)
2105 sayNO;
2106 if (nextchr & 0x80) {
d6a28714
JH
2107 if (OP(scan) == NSPACEUTF8
2108 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2109 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2110 {
2111 sayNO;
2112 }
2113 locinput += PL_utf8skip[nextchr];
2114 nextchr = UCHARAT(locinput);
2115 break;
a0ed51b3 2116 }
d6a28714
JH
2117 if (OP(scan) == NSPACEUTF8
2118 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2119 sayNO;
76e3520e 2120 nextchr = UCHARAT(++locinput);
a0d0e21e 2121 break;
d6a28714 2122 case DIGITL:
a0ed51b3
LW
2123 PL_reg_flags |= RF_tainted;
2124 /* FALL THROUGH */
d6a28714 2125 case DIGIT:
a0ed51b3
LW
2126 if (!nextchr && locinput >= PL_regeol)
2127 sayNO;
d6a28714
JH
2128 if (!(OP(scan) == DIGIT
2129 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2130 sayNO;
76e3520e 2131 nextchr = UCHARAT(++locinput);
a0d0e21e 2132 break;
d6a28714 2133 case DIGITLUTF8:
a0ed51b3
LW
2134 PL_reg_flags |= RF_tainted;
2135 /* FALL THROUGH */
d6a28714 2136 case DIGITUTF8:
a0ed51b3
LW
2137 if (!nextchr)
2138 sayNO;
2139 if (nextchr & 0x80) {
d6a28714
JH
2140 if (OP(scan) == NDIGITUTF8
2141 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2142 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2143 {
a0ed51b3 2144 sayNO;
dfe13c55 2145 }
6f06b55f 2146 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2147 nextchr = UCHARAT(locinput);
2148 break;
2149 }
d6a28714 2150 if (!isDIGIT(nextchr))
a0ed51b3
LW
2151 sayNO;
2152 nextchr = UCHARAT(++locinput);
2153 break;
d6a28714 2154 case NDIGITL:
b8c5462f
JH
2155 PL_reg_flags |= RF_tainted;
2156 /* FALL THROUGH */
d6a28714 2157 case NDIGIT:
b8c5462f
JH
2158 if (!nextchr)
2159 sayNO;
d6a28714
JH
2160 if (OP(scan) == DIGIT
2161 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2162 sayNO;
76e3520e 2163 nextchr = UCHARAT(++locinput);
a0d0e21e 2164 break;
d6a28714 2165 case NDIGITLUTF8:
b8c5462f
JH
2166 PL_reg_flags |= RF_tainted;
2167 /* FALL THROUGH */
d6a28714 2168 case NDIGITUTF8:
b8c5462f
JH
2169 if (!nextchr && locinput >= PL_regeol)
2170 sayNO;
a0ed51b3 2171 if (nextchr & 0x80) {
d6a28714 2172 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2173 sayNO;
6f06b55f 2174 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2175 nextchr = UCHARAT(locinput);
2176 break;
2177 }
d6a28714 2178 if (isDIGIT(nextchr))
a0ed51b3
LW
2179 sayNO;
2180 nextchr = UCHARAT(++locinput);
2181 break;
2182 case CLUMP:
dfe13c55 2183 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2184 sayNO;
6f06b55f 2185 locinput += PL_utf8skip[nextchr];
dfe13c55 2186 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2187 locinput += UTF8SKIP(locinput);
2188 if (locinput > PL_regeol)
2189 sayNO;
2190 nextchr = UCHARAT(locinput);
2191 break;
c8756f30 2192 case REFFL:
3280af22 2193 PL_reg_flags |= RF_tainted;
c8756f30 2194 /* FALL THROUGH */
c277df42 2195 case REF:
c8756f30 2196 case REFF:
c277df42 2197 n = ARG(scan); /* which paren pair */
cf93c79d 2198 ln = PL_regstartp[n];
2c2d71f5 2199 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2200 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2201 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2202 if (ln == PL_regendp[n])
a0d0e21e 2203 break;
a0ed51b3 2204
cf93c79d 2205 s = PL_bostr + ln;
a0ed51b3
LW
2206 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2207 char *l = locinput;
cf93c79d 2208 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2209 /*
2210 * Note that we can't do the "other character" lookup trick as
2211 * in the 8-bit case (no pun intended) because in Unicode we
2212 * have to map both upper and title case to lower case.
2213 */
2214 if (OP(scan) == REFF) {
2215 while (s < e) {
2216 if (l >= PL_regeol)
2217 sayNO;
dfe13c55 2218 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2219 sayNO;
2220 s += UTF8SKIP(s);
2221 l += UTF8SKIP(l);
2222 }
2223 }
2224 else {
2225 while (s < e) {
2226 if (l >= PL_regeol)
2227 sayNO;
dfe13c55 2228 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2229 sayNO;
2230 s += UTF8SKIP(s);
2231 l += UTF8SKIP(l);
2232 }
2233 }
2234 locinput = l;
2235 nextchr = UCHARAT(locinput);
2236 break;
2237 }
2238
a0d0e21e 2239 /* Inline the first character, for speed. */
76e3520e 2240 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2241 (OP(scan) == REF ||
2242 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2243 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2244 sayNO;
cf93c79d 2245 ln = PL_regendp[n] - ln;
3280af22 2246 if (locinput + ln > PL_regeol)
4633a7c4 2247 sayNO;
c8756f30
AK
2248 if (ln > 1 && (OP(scan) == REF
2249 ? memNE(s, locinput, ln)
2250 : (OP(scan) == REFF
2251 ? ibcmp(s, locinput, ln)
2252 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2253 sayNO;
a0d0e21e 2254 locinput += ln;
76e3520e 2255 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2256 break;
2257
2258 case NOTHING:
c277df42 2259 case TAIL:
a0d0e21e
LW
2260 break;
2261 case BACK:
2262 break;
c277df42
IZ
2263 case EVAL:
2264 {
2265 dSP;
533c011a 2266 OP_4tree *oop = PL_op;
3280af22
NIS
2267 COP *ocurcop = PL_curcop;
2268 SV **ocurpad = PL_curpad;
c277df42
IZ
2269 SV *ret;
2270
2271 n = ARG(scan);
533c011a 2272 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2273 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2274 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2275 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2276
cea2e8a9 2277 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2278 SPAGAIN;
2279 ret = POPs;
2280 PUTBACK;
2281
0f5d15d6
IZ
2282 PL_op = oop;
2283 PL_curpad = ocurpad;
2284 PL_curcop = ocurcop;
c277df42 2285 if (logical) {
0f5d15d6
IZ
2286 if (logical == 2) { /* Postponed subexpression. */
2287 regexp *re;
22c35a8c 2288 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2289 re_cc_state state;
0f5d15d6
IZ
2290 CHECKPOINT cp, lastcp;
2291
2292 if(SvROK(ret) || SvRMAGICAL(ret)) {
2293 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2294
2295 if(SvMAGICAL(sv))
2296 mg = mg_find(sv, 'r');
2297 }
2298 if (mg) {
2299 re = (regexp *)mg->mg_obj;
df0003d4 2300 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2301 }
2302 else {
2303 STRLEN len;
2304 char *t = SvPV(ret, len);
2305 PMOP pm;
2306 char *oprecomp = PL_regprecomp;
2307 I32 osize = PL_regsize;
2308 I32 onpar = PL_regnpar;
2309
2310 pm.op_pmflags = 0;
cea2e8a9 2311 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2312 if (!(SvFLAGS(ret)
2313 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2314 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2315 PL_regprecomp = oprecomp;
2316 PL_regsize = osize;
2317 PL_regnpar = onpar;
2318 }
2319 DEBUG_r(
2320 PerlIO_printf(Perl_debug_log,
2321 "Entering embedded `%s%.60s%s%s'\n",
2322 PL_colors[0],
2323 re->precomp,
2324 PL_colors[1],
2325 (strlen(re->precomp) > 60 ? "..." : ""))
2326 );
2327 state.node = next;
2328 state.prev = PL_reg_call_cc;
2329 state.cc = PL_regcc;
2330 state.re = PL_reg_re;
2331
2ab05381 2332 PL_regcc = 0;
0f5d15d6
IZ
2333
2334 cp = regcppush(0); /* Save *all* the positions. */
2335 REGCP_SET;
2336 cache_re(re);
2337 state.ss = PL_savestack_ix;
2338 *PL_reglastparen = 0;
2339 PL_reg_call_cc = &state;
2340 PL_reginput = locinput;
2c2d71f5
JH
2341
2342 /* XXXX This is too dramatic a measure... */
2343 PL_reg_maxiter = 0;
2344
0f5d15d6 2345 if (regmatch(re->program + 1)) {
2c914db6
IZ
2346 /* Even though we succeeded, we need to restore
2347 global variables, since we may be wrapped inside
2348 SUSPEND, thus the match may be not finished yet. */
2349
2350 /* XXXX Do this only if SUSPENDed? */
2351 PL_reg_call_cc = state.prev;
2352 PL_regcc = state.cc;
2353 PL_reg_re = state.re;
2354 cache_re(PL_reg_re);
2355
2356 /* XXXX This is too dramatic a measure... */
2357 PL_reg_maxiter = 0;
2358
2359 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2360 ReREFCNT_dec(re);
2361 regcpblow(cp);
2362 sayYES;
2363 }
0f5d15d6
IZ
2364 ReREFCNT_dec(re);
2365 REGCP_UNWIND;
2366 regcppop();
2367 PL_reg_call_cc = state.prev;
2368 PL_regcc = state.cc;
2369 PL_reg_re = state.re;
d3790889 2370 cache_re(PL_reg_re);
2c2d71f5
JH
2371
2372 /* XXXX This is too dramatic a measure... */
2373 PL_reg_maxiter = 0;
2374
0f5d15d6
IZ
2375 sayNO;
2376 }
c277df42 2377 sw = SvTRUE(ret);
0f5d15d6 2378 logical = 0;
a0ed51b3
LW
2379 }
2380 else
3280af22 2381 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2382 break;
2383 }
a0d0e21e 2384 case OPEN:
c277df42 2385 n = ARG(scan); /* which paren pair */
3280af22
NIS
2386 PL_reg_start_tmp[n] = locinput;
2387 if (n > PL_regsize)
2388 PL_regsize = n;
a0d0e21e
LW
2389 break;
2390 case CLOSE:
c277df42 2391 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2392 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2393 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2394 if (n > *PL_reglastparen)
2395 *PL_reglastparen = n;
a0d0e21e 2396 break;
c277df42
IZ
2397 case GROUPP:
2398 n = ARG(scan); /* which paren pair */
cf93c79d 2399 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2400 break;
2401 case IFTHEN:
2c2d71f5 2402 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2403 if (sw)
2404 next = NEXTOPER(NEXTOPER(scan));
2405 else {
2406 next = scan + ARG(scan);
2407 if (OP(next) == IFTHEN) /* Fake one. */
2408 next = NEXTOPER(NEXTOPER(next));
2409 }
2410 break;
2411 case LOGICAL:
0f5d15d6 2412 logical = scan->flags;
c277df42 2413 break;
2ab05381
IZ
2414/*******************************************************************
2415 PL_regcc contains infoblock about the innermost (...)* loop, and
2416 a pointer to the next outer infoblock.
2417
2418 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2419
2420 1) After matching X, regnode for CURLYX is processed;
2421
2422 2) This regnode creates infoblock on the stack, and calls
2423 regmatch() recursively with the starting point at WHILEM node;
2424
2425 3) Each hit of WHILEM node tries to match A and Z (in the order
2426 depending on the current iteration, min/max of {min,max} and
2427 greediness). The information about where are nodes for "A"
2428 and "Z" is read from the infoblock, as is info on how many times "A"
2429 was already matched, and greediness.
2430
2431 4) After A matches, the same WHILEM node is hit again.
2432
2433 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2434 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2435 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2436 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2437 of the external loop.
2438
2439 Currently present infoblocks form a tree with a stem formed by PL_curcc
2440 and whatever it mentions via ->next, and additional attached trees
2441 corresponding to temporarily unset infoblocks as in "5" above.
2442
2443 In the following picture infoblocks for outer loop of
2444 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2445 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2446 infoblocks are drawn below the "reset" infoblock.
2447
2448 In fact in the picture below we do not show failed matches for Z and T
2449 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2450 more obvious *why* one needs to *temporary* unset infoblocks.]
2451
2452 Matched REx position InfoBlocks Comment
2453 (Y(A)*?Z)*?T x
2454 Y(A)*?Z)*?T x <- O
2455 Y (A)*?Z)*?T x <- O
2456 Y A)*?Z)*?T x <- O <- I
2457 YA )*?Z)*?T x <- O <- I
2458 YA A)*?Z)*?T x <- O <- I
2459 YAA )*?Z)*?T x <- O <- I
2460 YAA Z)*?T x <- O # Temporary unset I
2461 I
2462
2463 YAAZ Y(A)*?Z)*?T x <- O
2464 I
2465
2466 YAAZY (A)*?Z)*?T x <- O
2467 I
2468
2469 YAAZY A)*?Z)*?T x <- O <- I
2470 I
2471
2472 YAAZYA )*?Z)*?T x <- O <- I
2473 I
2474
2475 YAAZYA Z)*?T x <- O # Temporary unset I
2476 I,I
2477
2478 YAAZYAZ )*?T x <- O
2479 I,I
2480
2481 YAAZYAZ T x # Temporary unset O
2482 O
2483 I,I
2484
2485 YAAZYAZT x
2486 O
2487 I,I
2488 *******************************************************************/
a0d0e21e
LW
2489 case CURLYX: {
2490 CURCUR cc;
3280af22 2491 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2492
2493 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2494 next += ARG(next);
3280af22
NIS
2495 cc.oldcc = PL_regcc;
2496 PL_regcc = &cc;
2497 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2498 cc.cur = -1;
2499 cc.min = ARG1(scan);
2500 cc.max = ARG2(scan);
c277df42 2501 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2502 cc.next = next;
2503 cc.minmod = minmod;
2504 cc.lastloc = 0;
3280af22 2505 PL_reginput = locinput;
a0d0e21e
LW
2506 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2507 regcpblow(cp);
3280af22 2508 PL_regcc = cc.oldcc;
4633a7c4 2509 saySAME(n);
a0d0e21e
LW
2510 }
2511 /* NOT REACHED */
2512 case WHILEM: {
2513 /*
2514 * This is really hard to understand, because after we match
2515 * what we're trying to match, we must make sure the rest of
2c2d71f5 2516 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2517 * to go back UP the parse tree by recursing ever deeper. And
2518 * if it fails, we have to reset our parent's current state
2519 * that we can try again after backing off.
2520 */
2521
c277df42 2522 CHECKPOINT cp, lastcp;
3280af22 2523 CURCUR* cc = PL_regcc;
c277df42
IZ
2524 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2525
4633a7c4 2526 n = cc->cur + 1; /* how many we know we matched */
3280af22 2527 PL_reginput = locinput;
a0d0e21e 2528
c277df42
IZ
2529 DEBUG_r(
2530 PerlIO_printf(Perl_debug_log,
2531 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2532 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2533 (long)n, (long)cc->min,
2534 (long)cc->max, (long)cc)
2535 );
4633a7c4 2536
a0d0e21e
LW
2537 /* If degenerate scan matches "", assume scan done. */
2538
579cf2c3 2539 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2540 PL_regcc = cc->oldcc;
2ab05381
IZ
2541 if (PL_regcc)
2542 ln = PL_regcc->cur;
c277df42 2543 DEBUG_r(
c3464db5
DD
2544 PerlIO_printf(Perl_debug_log,
2545 "%*s empty match detected, try continuation...\n",
3280af22 2546 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2547 );
a0d0e21e 2548 if (regmatch(cc->next))
4633a7c4 2549 sayYES;
2ab05381
IZ
2550 if (PL_regcc)
2551 PL_regcc->cur = ln;
3280af22 2552 PL_regcc = cc;
4633a7c4 2553 sayNO;
a0d0e21e
LW
2554 }
2555
2556 /* First just match a string of min scans. */
2557
2558 if (n < cc->min) {
2559 cc->cur = n;
2560 cc->lastloc = locinput;
4633a7c4
LW
2561 if (regmatch(cc->scan))
2562 sayYES;
2563 cc->cur = n - 1;
c277df42 2564 cc->lastloc = lastloc;
4633a7c4 2565 sayNO;
a0d0e21e
LW
2566 }
2567
2c2d71f5
JH
2568 if (scan->flags) {
2569 /* Check whether we already were at this position.
2570 Postpone detection until we know the match is not
2571 *that* much linear. */
2572 if (!PL_reg_maxiter) {
2573 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2574 PL_reg_leftiter = PL_reg_maxiter;
2575 }
2576 if (PL_reg_leftiter-- == 0) {
2577 I32 size = (PL_reg_maxiter + 7)/8;
2578 if (PL_reg_poscache) {
2579 if (PL_reg_poscache_size < size) {
2580 Renew(PL_reg_poscache, size, char);
2581 PL_reg_poscache_size = size;
2582 }
2583 Zero(PL_reg_poscache, size, char);
2584 }
2585 else {
2586 PL_reg_poscache_size = size;
2587 Newz(29, PL_reg_poscache, size, char);
2588 }
2589 DEBUG_r(
2590 PerlIO_printf(Perl_debug_log,
2591 "%sDetected a super-linear match, switching on caching%s...\n",
2592 PL_colors[4], PL_colors[5])
2593 );
2594 }
2595 if (PL_reg_leftiter < 0) {
2596 I32 o = locinput - PL_bostr, b;
2597
2598 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2599 b = o % 8;
2600 o /= 8;
2601 if (PL_reg_poscache[o] & (1<<b)) {
2602 DEBUG_r(
2603 PerlIO_printf(Perl_debug_log,
2604 "%*s already tried at this position...\n",
2605 REPORT_CODE_OFF+PL_regindent*2, "")
2606 );
7821416a 2607 sayNO_SILENT;
2c2d71f5
JH
2608 }
2609 PL_reg_poscache[o] |= (1<<b);
2610 }
2611 }
2612
a0d0e21e
LW
2613 /* Prefer next over scan for minimal matching. */
2614
2615 if (cc->minmod) {
3280af22 2616 PL_regcc = cc->oldcc;
2ab05381
IZ
2617 if (PL_regcc)
2618 ln = PL_regcc->cur;
5f05dabc 2619 cp = regcppush(cc->parenfloor);
c277df42 2620 REGCP_SET;
5f05dabc 2621 if (regmatch(cc->next)) {
c277df42 2622 regcpblow(cp);
4633a7c4 2623 sayYES; /* All done. */
5f05dabc 2624 }
c277df42 2625 REGCP_UNWIND;
5f05dabc 2626 regcppop();
2ab05381
IZ
2627 if (PL_regcc)
2628 PL_regcc->cur = ln;
3280af22 2629 PL_regcc = cc;
a0d0e21e 2630
c277df42 2631 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2632 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2633 && !(PL_reg_flags & RF_warned)) {
2634 PL_reg_flags |= RF_warned;
cea2e8a9 2635 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2636 "Complex regular subexpression recursion",
2637 REG_INFTY - 1);
c277df42 2638 }
4633a7c4 2639 sayNO;
c277df42 2640 }
a687059c 2641
c277df42 2642 DEBUG_r(
c3464db5
DD
2643 PerlIO_printf(Perl_debug_log,
2644 "%*s trying longer...\n",
3280af22 2645 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2646 );
a0d0e21e 2647 /* Try scanning more and see if it helps. */
3280af22 2648 PL_reginput = locinput;
a0d0e21e
LW
2649 cc->cur = n;
2650 cc->lastloc = locinput;
5f05dabc 2651 cp = regcppush(cc->parenfloor);
c277df42 2652 REGCP_SET;
5f05dabc 2653 if (regmatch(cc->scan)) {
c277df42 2654 regcpblow(cp);
4633a7c4 2655 sayYES;
5f05dabc 2656 }
c277df42 2657 REGCP_UNWIND;
5f05dabc 2658 regcppop();
4633a7c4 2659 cc->cur = n - 1;
c277df42 2660 cc->lastloc = lastloc;
4633a7c4 2661 sayNO;
a0d0e21e
LW
2662 }
2663
2664 /* Prefer scan over next for maximal matching. */
2665
2666 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2667 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2668 cc->cur = n;
2669 cc->lastloc = locinput;
c277df42 2670 REGCP_SET;
5f05dabc 2671 if (regmatch(cc->scan)) {
c277df42 2672 regcpblow(cp);
4633a7c4 2673 sayYES;
5f05dabc 2674 }
c277df42 2675 REGCP_UNWIND;
a0d0e21e 2676 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2677 PL_reginput = locinput;
c277df42 2678 DEBUG_r(
c3464db5
DD
2679 PerlIO_printf(Perl_debug_log,
2680 "%*s failed, try continuation...\n",
3280af22 2681 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2682 );
2683 }
599cee73
PM
2684 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2685 && !(PL_reg_flags & RF_warned)) {
3280af22 2686 PL_reg_flags |= RF_warned;
cea2e8a9 2687 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2688 "Complex regular subexpression recursion",
2689 REG_INFTY - 1);
a0d0e21e
LW
2690 }
2691
2692 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2693 PL_regcc = cc->oldcc;
2ab05381
IZ
2694 if (PL_regcc)
2695 ln = PL_regcc->cur;
a0d0e21e 2696 if (regmatch(cc->next))
4633a7c4 2697 sayYES;
2ab05381
IZ
2698 if (PL_regcc)
2699 PL_regcc->cur = ln;
3280af22 2700 PL_regcc = cc;
4633a7c4 2701 cc->cur = n - 1;
c277df42 2702 cc->lastloc = lastloc;
4633a7c4 2703 sayNO;
a0d0e21e
LW
2704 }
2705 /* NOT REACHED */
c277df42
IZ
2706 case BRANCHJ:
2707 next = scan + ARG(scan);
2708 if (next == scan)
2709 next = NULL;
2710 inner = NEXTOPER(NEXTOPER(scan));
2711 goto do_branch;
2712 case BRANCH:
2713 inner = NEXTOPER(scan);
2714 do_branch:
2715 {
2716 CHECKPOINT lastcp;
2717 c1 = OP(scan);
2718 if (OP(next) != c1) /* No choice. */
2719 next = inner; /* Avoid recursion. */
a0d0e21e 2720 else {
3280af22 2721 int lastparen = *PL_reglastparen;
c277df42
IZ
2722
2723 REGCP_SET;
a0d0e21e 2724 do {
3280af22 2725 PL_reginput = locinput;
c277df42 2726 if (regmatch(inner))
4633a7c4 2727 sayYES;
c277df42 2728 REGCP_UNWIND;
3280af22 2729 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2730 PL_regendp[n] = -1;
3280af22 2731 *PL_reglastparen = n;
c277df42 2732 scan = next;
a0d0e21e 2733 /*SUPPRESS 560*/
c277df42
IZ
2734 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2735 next += n;
a0d0e21e 2736 else
c277df42 2737 next = NULL;
c277df42
IZ
2738 inner = NEXTOPER(scan);
2739 if (c1 == BRANCHJ) {
2740 inner = NEXTOPER(inner);
2741 }
2742 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2743 sayNO;
a0d0e21e 2744 /* NOTREACHED */
a687059c 2745 }
a0d0e21e
LW
2746 }
2747 break;
2748 case MINMOD:
2749 minmod = 1;
2750 break;
c277df42
IZ
2751 case CURLYM:
2752 {
00db4c45 2753 I32 l = 0;
c277df42
IZ
2754 CHECKPOINT lastcp;
2755
2756 /* We suppose that the next guy does not need
2757 backtracking: in particular, it is of constant length,
2758 and has no parenths to influence future backrefs. */
2759 ln = ARG1(scan); /* min to match */
2760 n = ARG2(scan); /* max to match */
c277df42
IZ
2761 paren = scan->flags;
2762 if (paren) {
3280af22
NIS
2763 if (paren > PL_regsize)
2764 PL_regsize = paren;
2765 if (paren > *PL_reglastparen)
2766 *PL_reglastparen = paren;
c277df42 2767 }
dc45a647 2768 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2769 if (paren)
2770 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2771 PL_reginput = locinput;
c277df42
IZ
2772 if (minmod) {
2773 minmod = 0;
2774 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2775 sayNO;
5f4b28b2 2776 if (ln && l == 0 && n >= ln
c277df42
IZ
2777 /* In fact, this is tricky. If paren, then the
2778 fact that we did/didnot match may influence
2779 future execution. */
2780 && !(paren && ln == 0))
2781 ln = n;
3280af22 2782 locinput = PL_reginput;
22c35a8c 2783 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2784 c1 = (U8)*STRING(next);
c277df42 2785 if (OP(next) == EXACTF)
22c35a8c 2786 c2 = PL_fold[c1];
c277df42 2787 else if (OP(next) == EXACTFL)
22c35a8c 2788 c2 = PL_fold_locale[c1];
c277df42
IZ
2789 else
2790 c2 = c1;
a0ed51b3
LW
2791 }
2792 else
c277df42
IZ
2793 c1 = c2 = -1000;
2794 REGCP_SET;
5f4b28b2 2795 /* This may be improved if l == 0. */
c277df42
IZ
2796 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2797 /* If it could work, try it. */
2798 if (c1 == -1000 ||
3280af22
NIS
2799 UCHARAT(PL_reginput) == c1 ||
2800 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2801 {
2802 if (paren) {
2803 if (n) {
cf93c79d
IZ
2804 PL_regstartp[paren] =
2805 HOPc(PL_reginput, -l) - PL_bostr;
2806 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2807 }
2808 else
cf93c79d 2809 PL_regendp[paren] = -1;
c277df42
IZ
2810 }
2811 if (regmatch(next))
2812 sayYES;
2813 REGCP_UNWIND;
2814 }
2815 /* Couldn't or didn't -- move forward. */
3280af22 2816 PL_reginput = locinput;
c277df42
IZ
2817 if (regrepeat_hard(scan, 1, &l)) {
2818 ln++;
3280af22 2819 locinput = PL_reginput;
c277df42
IZ
2820 }
2821 else
2822 sayNO;
2823 }
a0ed51b3
LW
2824 }
2825 else {
c277df42
IZ
2826 n = regrepeat_hard(scan, n, &l);
2827 if (n != 0 && l == 0
2828 /* In fact, this is tricky. If paren, then the
2829 fact that we did/didnot match may influence
2830 future execution. */
2831 && !(paren && ln == 0))
2832 ln = n;
3280af22 2833 locinput = PL_reginput;
c277df42 2834 DEBUG_r(
5c0ca799 2835 PerlIO_printf(Perl_debug_log,
faccc32b 2836 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2837 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2838 (IV) n, (IV)l)
c277df42
IZ
2839 );
2840 if (n >= ln) {
22c35a8c 2841 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2842 c1 = (U8)*STRING(next);
c277df42 2843 if (OP(next) == EXACTF)
22c35a8c 2844 c2 = PL_fold[c1];
c277df42 2845 else if (OP(next) == EXACTFL)
22c35a8c 2846 c2 = PL_fold_locale[c1];
c277df42
IZ
2847 else
2848 c2 = c1;
a0ed51b3
LW
2849 }
2850 else
c277df42
IZ
2851 c1 = c2 = -1000;
2852 }
2853 REGCP_SET;
2854 while (n >= ln) {
2855 /* If it could work, try it. */
2856 if (c1 == -1000 ||
3280af22
NIS
2857 UCHARAT(PL_reginput) == c1 ||
2858 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2859 {
2860 DEBUG_r(
c3464db5 2861 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2862 "%*s trying tail with n=%"IVdf"...\n",
2863 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2864 );
2865 if (paren) {
2866 if (n) {
cf93c79d
IZ
2867 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2868 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2869 }
a0ed51b3 2870 else
cf93c79d 2871 PL_regendp[paren] = -1;
c277df42 2872 }
a0ed51b3
LW
2873 if (regmatch(next))
2874 sayYES;
2875 REGCP_UNWIND;
2876 }
c277df42
IZ
2877 /* Couldn't or didn't -- back up. */
2878 n--;
dfe13c55 2879 locinput = HOPc(locinput, -l);
3280af22 2880 PL_reginput = locinput;
c277df42
IZ
2881 }
2882 }
2883 sayNO;
2884 break;
2885 }
2886 case CURLYN:
2887 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2888 if (paren > PL_regsize)
2889 PL_regsize = paren;
2890 if (paren > *PL_reglastparen)
2891 *PL_reglastparen = paren;
c277df42
IZ
2892 ln = ARG1(scan); /* min to match */
2893 n = ARG2(scan); /* max to match */
dc45a647 2894 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2895 goto repeat;
a0d0e21e 2896 case CURLY:
c277df42 2897 paren = 0;
a0d0e21e
LW
2898 ln = ARG1(scan); /* min to match */
2899 n = ARG2(scan); /* max to match */
dc45a647 2900 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2901 goto repeat;
2902 case STAR:
2903 ln = 0;
c277df42 2904 n = REG_INFTY;
a0d0e21e 2905 scan = NEXTOPER(scan);
c277df42 2906 paren = 0;
a0d0e21e
LW
2907 goto repeat;
2908 case PLUS:
c277df42
IZ
2909 ln = 1;
2910 n = REG_INFTY;
2911 scan = NEXTOPER(scan);
2912 paren = 0;
2913 repeat:
a0d0e21e
LW
2914 /*
2915 * Lookahead to avoid useless match attempts
2916 * when we know what character comes next.
2917 */
22c35a8c 2918 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2919 c1 = (U8)*STRING(next);
bbce6d69 2920 if (OP(next) == EXACTF)
22c35a8c 2921 c2 = PL_fold[c1];
bbce6d69 2922 else if (OP(next) == EXACTFL)
22c35a8c 2923 c2 = PL_fold_locale[c1];
bbce6d69 2924 else
2925 c2 = c1;
2926 }
a0d0e21e 2927 else
bbce6d69 2928 c1 = c2 = -1000;
3280af22 2929 PL_reginput = locinput;
a0d0e21e 2930 if (minmod) {
c277df42 2931 CHECKPOINT lastcp;
a0d0e21e
LW
2932 minmod = 0;
2933 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2934 sayNO;
a0ed51b3 2935 locinput = PL_reginput;
c277df42 2936 REGCP_SET;
0fe9bf95
IZ
2937 if (c1 != -1000) {
2938 char *e = locinput + n - ln; /* Should not check after this */
2939 char *old = locinput;
2940
2941 if (e >= PL_regeol || (n == REG_INFTY))
2942 e = PL_regeol - 1;
2943 while (1) {
2944 /* Find place 'next' could work */
2945 if (c1 == c2) {
2946 while (locinput <= e && *locinput != c1)
2947 locinput++;
2948 } else {
2949 while (locinput <= e
2950 && *locinput != c1
2951 && *locinput != c2)
2952 locinput++;
2953 }
2954 if (locinput > e)
2955 sayNO;
2956 /* PL_reginput == old now */
2957 if (locinput != old) {
2958 ln = 1; /* Did some */
2959 if (regrepeat(scan, locinput - old) <
2960 locinput - old)
2961 sayNO;
2962 }
2963 /* PL_reginput == locinput now */
2964 if (paren) {
2965 if (ln) {
cf93c79d
IZ
2966 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2967 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2968 }
2969 else
cf93c79d 2970 PL_regendp[paren] = -1;
0fe9bf95
IZ
2971 }
2972 if (regmatch(next))
2973 sayYES;
2974 PL_reginput = locinput; /* Could be reset... */
2975 REGCP_UNWIND;
2976 /* Couldn't or didn't -- move forward. */
2977 old = locinput++;
2978 }
2979 }
2980 else
c277df42 2981 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2982 /* If it could work, try it. */
bbce6d69 2983 if (c1 == -1000 ||
3280af22
NIS
2984 UCHARAT(PL_reginput) == c1 ||
2985 UCHARAT(PL_reginput) == c2)
bbce6d69 2986 {
c277df42
IZ
2987 if (paren) {
2988 if (n) {
cf93c79d
IZ
2989 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2990 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2991 }
2992 else
cf93c79d 2993 PL_regendp[paren] = -1;
c277df42 2994 }
a0d0e21e 2995 if (regmatch(next))
4633a7c4 2996 sayYES;
c277df42 2997 REGCP_UNWIND;
bbce6d69 2998 }
c277df42 2999 /* Couldn't or didn't -- move forward. */
a0ed51b3 3000 PL_reginput = locinput;
a0d0e21e
LW
3001 if (regrepeat(scan, 1)) {
3002 ln++;
a0ed51b3
LW
3003 locinput = PL_reginput;
3004 }
3005 else
4633a7c4 3006 sayNO;
a0d0e21e
LW
3007 }
3008 }
3009 else {
c277df42 3010 CHECKPOINT lastcp;
a0d0e21e 3011 n = regrepeat(scan, n);
a0ed51b3 3012 locinput = PL_reginput;
22c35a8c 3013 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3014 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3015 ln = n; /* why back off? */
c277df42
IZ
3016 REGCP_SET;
3017 if (paren) {
3018 while (n >= ln) {
3019 /* If it could work, try it. */
3020 if (c1 == -1000 ||
3280af22
NIS
3021 UCHARAT(PL_reginput) == c1 ||
3022 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3023 {
3024 if (paren && n) {
3025 if (n) {
cf93c79d
IZ
3026 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3027 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3028 }
3029 else
cf93c79d 3030 PL_regendp[paren] = -1;
c277df42
IZ
3031 }
3032 if (regmatch(next))
3033 sayYES;
3034 REGCP_UNWIND;
3035 }
3036 /* Couldn't or didn't -- back up. */
3037 n--;
dfe13c55 3038 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3039 }
a0ed51b3
LW
3040 }
3041 else {
c277df42
IZ
3042 while (n >= ln) {
3043 /* If it could work, try it. */
3044 if (c1 == -1000 ||
3280af22
NIS
3045 UCHARAT(PL_reginput) == c1 ||
3046 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3047 {
3048 if (regmatch(next))
3049 sayYES;
3050 REGCP_UNWIND;
3051 }
3052 /* Couldn't or didn't -- back up. */
3053 n--;
dfe13c55 3054 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3055 }
a0d0e21e
LW
3056 }
3057 }
4633a7c4 3058 sayNO;
c277df42 3059 break;
a0d0e21e 3060 case END:
0f5d15d6
IZ
3061 if (PL_reg_call_cc) {
3062 re_cc_state *cur_call_cc = PL_reg_call_cc;
3063 CURCUR *cctmp = PL_regcc;
3064 regexp *re = PL_reg_re;
3065 CHECKPOINT cp, lastcp;
3066
3067 cp = regcppush(0); /* Save *all* the positions. */
3068 REGCP_SET;
3069 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3070 the caller. */
3071 PL_reginput = locinput; /* Make position available to
3072 the callcc. */
3073 cache_re(PL_reg_call_cc->re);
3074 PL_regcc = PL_reg_call_cc->cc;
3075 PL_reg_call_cc = PL_reg_call_cc->prev;
3076 if (regmatch(cur_call_cc->node)) {
3077 PL_reg_call_cc = cur_call_cc;
3078 regcpblow(cp);
3079 sayYES;
3080 }
3081 REGCP_UNWIND;
3082 regcppop();
3083 PL_reg_call_cc = cur_call_cc;
3084 PL_regcc = cctmp;
3085 PL_reg_re = re;
3086 cache_re(re);
3087
3088 DEBUG_r(
3089 PerlIO_printf(Perl_debug_log,
3090 "%*s continuation failed...\n",
3091 REPORT_CODE_OFF+PL_regindent*2, "")
3092 );
7821416a 3093 sayNO_SILENT;
0f5d15d6 3094 }
7821416a
IZ
3095 if (locinput < PL_regtill) {
3096 DEBUG_r(PerlIO_printf(Perl_debug_log,
3097 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3098 PL_colors[4],
3099 (long)(locinput - PL_reg_starttry),
3100 (long)(PL_regtill - PL_reg_starttry),
3101 PL_colors[5]));
3102 sayNO_FINAL; /* Cannot match: too short. */
3103 }
3104 PL_reginput = locinput; /* put where regtry can find it */
3105 sayYES_FINAL; /* Success! */
7e5428c5 3106 case SUCCEED:
3280af22 3107 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3108 sayYES_LOUD; /* Success! */
c277df42
IZ
3109 case SUSPEND:
3110 n = 1;
9fe1d20c 3111 PL_reginput = locinput;
c277df42 3112 goto do_ifmatch;
a0d0e21e 3113 case UNLESSM:
c277df42 3114 n = 0;
a0ed51b3 3115 if (scan->flags) {
0fe9bf95
IZ
3116 if (UTF) { /* XXXX This is absolutely
3117 broken, we read before
3118 start of string. */
3119 s = HOPMAYBEc(locinput, -scan->flags);
3120 if (!s)
3121 goto say_yes;
3122 PL_reginput = s;
3123 }
3124 else {
3125 if (locinput < PL_bostr + scan->flags)
3126 goto say_yes;
3127 PL_reginput = locinput - scan->flags;
3128 goto do_ifmatch;
3129 }
a0ed51b3
LW
3130 }
3131 else
3132 PL_reginput = locinput;
c277df42
IZ
3133 goto do_ifmatch;
3134 case IFMATCH:
3135 n = 1;
a0ed51b3 3136 if (scan->flags) {
0fe9bf95
IZ
3137 if (UTF) { /* XXXX This is absolutely
3138 broken, we read before
3139 start of string. */
3140 s = HOPMAYBEc(locinput, -scan->flags);
3141 if (!s || s < PL_bostr)
3142 goto say_no;
3143 PL_reginput = s;
3144 }
3145 else {
3146 if (locinput < PL_bostr + scan->flags)
3147 goto say_no;
3148 PL_reginput = locinput - scan->flags;
3149 goto do_ifmatch;
3150 }
a0ed51b3
LW
3151 }
3152 else
3153 PL_reginput = locinput;
3154
c277df42 3155 do_ifmatch:
c277df42
IZ
3156 inner = NEXTOPER(NEXTOPER(scan));
3157 if (regmatch(inner) != n) {
3158 say_no:
3159 if (logical) {
3160 logical = 0;
3161 sw = 0;
3162 goto do_longjump;
a0ed51b3
LW
3163 }
3164 else
c277df42
IZ
3165 sayNO;
3166 }
3167 say_yes:
3168 if (logical) {
3169 logical = 0;
3170 sw = 1;
3171 }
fe44a5e8 3172 if (OP(scan) == SUSPEND) {
3280af22 3173 locinput = PL_reginput;
565764a8 3174 nextchr = UCHARAT(locinput);
fe44a5e8 3175 }
c277df42
IZ
3176 /* FALL THROUGH. */
3177 case LONGJMP:
3178 do_longjump:
3179 next = scan + ARG(scan);
3180 if (next == scan)
3181 next = NULL;
a0d0e21e
LW
3182 break;
3183 default:
b900a521 3184 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3185 PTR2UV(scan), OP(scan));
cea2e8a9 3186 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3187 }
a0d0e21e
LW
3188 scan = next;
3189 }
a687059c 3190
a0d0e21e
LW
3191 /*
3192 * We get here only if there's trouble -- normally "case END" is
3193 * the terminating point.
3194 */
cea2e8a9 3195 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3196 /*NOTREACHED*/
4633a7c4
LW
3197 sayNO;
3198
7821416a
IZ
3199yes_loud:
3200 DEBUG_r(
3201 PerlIO_printf(Perl_debug_log,
3202 "%*s %scould match...%s\n",
3203 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3204 );
3205 goto yes;
3206yes_final:
3207 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3208 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3209yes:
3210#ifdef DEBUGGING
3280af22 3211 PL_regindent--;
4633a7c4
LW
3212#endif
3213 return 1;
3214
3215no:
7821416a
IZ
3216 DEBUG_r(
3217 PerlIO_printf(Perl_debug_log,
3218 "%*s %sfailed...%s\n",
3219 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3220 );
3221 goto do_no;
3222no_final:
3223do_no:
4633a7c4 3224#ifdef DEBUGGING
3280af22 3225 PL_regindent--;
4633a7c4 3226#endif
a0d0e21e 3227 return 0;
a687059c
LW
3228}
3229
3230/*
3231 - regrepeat - repeatedly match something simple, report how many
3232 */
3233/*
3234 * [This routine now assumes that it will only match on things of length 1.
3235 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3236 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3237 */
76e3520e 3238STATIC I32
cea2e8a9 3239S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3240{
5c0ca799 3241 dTHR;
a0d0e21e 3242 register char *scan;
a0d0e21e 3243 register I32 c;
3280af22 3244 register char *loceol = PL_regeol;
a0ed51b3 3245 register I32 hardcount = 0;
a0d0e21e 3246
3280af22 3247 scan = PL_reginput;
c277df42 3248 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3249 loceol = scan + max;
a0d0e21e 3250 switch (OP(p)) {
22c35a8c 3251 case REG_ANY:
a0d0e21e
LW
3252 while (scan < loceol && *scan != '\n')
3253 scan++;
3254 break;
3255 case SANY:
3256 scan = loceol;
3257 break;
a0ed51b3
LW
3258 case ANYUTF8:
3259 loceol = PL_regeol;
3260 while (scan < loceol && *scan != '\n') {
3261 scan += UTF8SKIP(scan);
3262 hardcount++;
3263 }
3264 break;
3265 case SANYUTF8:
3266 loceol = PL_regeol;
3267 while (scan < loceol) {
3268 scan += UTF8SKIP(scan);
3269 hardcount++;
3270 }
3271 break;
bbce6d69 3272 case EXACT: /* length of string is 1 */
cd439c50 3273 c = (U8)*STRING(p);
bbce6d69 3274 while (scan < loceol && UCHARAT(scan) == c)
3275 scan++;
3276 break;
3277 case EXACTF: /* length of string is 1 */
cd439c50 3278 c = (U8)*STRING(p);
bbce6d69 3279 while (scan < loceol &&
22c35a8c 3280 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3281 scan++;
3282 break;
3283 case EXACTFL: /* length of string is 1 */
3280af22 3284 PL_reg_flags |= RF_tainted;
cd439c50 3285 c = (U8)*STRING(p);
bbce6d69 3286 while (scan < loceol &&
22c35a8c 3287 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3288 scan++;
3289 break;
a0ed51b3
LW
3290 case ANYOFUTF8:
3291 loceol = PL_regeol;
3292 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3293 scan += UTF8SKIP(scan);
3294 hardcount++;
3295 }
3296 break;
a0d0e21e 3297 case ANYOF:
936ed897 3298 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3299 scan++;
a0d0e21e
LW
3300 break;
3301 case ALNUM:
3302 while (scan < loceol && isALNUM(*scan))
3303 scan++;
3304 break;
a0ed51b3
LW
3305 case ALNUMUTF8:
3306 loceol = PL_regeol;
dfe13c55 3307 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3308 scan += UTF8SKIP(scan);
3309 hardcount++;
3310 }
3311 break;
bbce6d69 3312 case ALNUML:
3280af22 3313 PL_reg_flags |= RF_tainted;
bbce6d69 3314 while (scan < loceol && isALNUM_LC(*scan))
3315 scan++;
3316 break;
a0ed51b3
LW
3317 case ALNUMLUTF8:
3318 PL_reg_flags |= RF_tainted;
3319 loceol = PL_regeol;
dfe13c55 3320 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3321 scan += UTF8SKIP(scan);
3322 hardcount++;
3323 }
3324 break;
3325 break;
a0d0e21e
LW
3326 case NALNUM:
3327 while (scan < loceol && !isALNUM(*scan))
3328 scan++;
3329 break;
a0ed51b3
LW
3330 case NALNUMUTF8:
3331 loceol = PL_regeol;
dfe13c55 3332 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3333 scan += UTF8SKIP(scan);
3334 hardcount++;
3335 }
3336 break;
bbce6d69 3337 case NALNUML:
3280af22 3338 PL_reg_flags |= RF_tainted;
bbce6d69 3339 while (scan < loceol && !isALNUM_LC(*scan))
3340 scan++;
3341 break;
a0ed51b3
LW
3342 case NALNUMLUTF8:
3343 PL_reg_flags |= RF_tainted;
3344 loceol = PL_regeol;
dfe13c55 3345 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3346 scan += UTF8SKIP(scan);
3347 hardcount++;
3348 }
3349 break;
a0d0e21e
LW
3350 case SPACE:
3351 while (scan < loceol && isSPACE(*scan))
3352 scan++;
3353 break;
a0ed51b3
LW
3354 case SPACEUTF8:
3355 loceol = PL_regeol;
dfe13c55 3356 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3357 scan += UTF8SKIP(scan);
3358 hardcount++;
3359 }
3360 break;
bbce6d69 3361 case SPACEL:
3280af22 3362 PL_reg_flags |= RF_tainted;
bbce6d69 3363 while (scan < loceol && isSPACE_LC(*scan))
3364 scan++;
3365 break;
a0ed51b3
LW
3366 case SPACELUTF8:
3367 PL_reg_flags |= RF_tainted;
3368 loceol = PL_regeol;
dfe13c55 3369 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3370 scan += UTF8SKIP(scan);
3371 hardcount++;
3372 }
3373 break;
a0d0e21e
LW
3374 case NSPACE:
3375 while (scan < loceol && !isSPACE(*scan))
3376 scan++;
3377 break;
a0ed51b3
LW
3378 case NSPACEUTF8:
3379 loceol = PL_regeol;
dfe13c55 3380 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3381 scan += UTF8SKIP(scan);
3382 hardcount++;
3383 }
3384 break;
bbce6d69 3385 case NSPACEL:
3280af22 3386 PL_reg_flags |= RF_tainted;
bbce6d69 3387 while (scan < loceol && !isSPACE_LC(*scan))
3388 scan++;
3389 break;
a0ed51b3
LW
3390 case NSPACELUTF8:
3391 PL_reg_flags |= RF_tainted;
3392 loceol = PL_regeol;
dfe13c55 3393 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3394 scan += UTF8SKIP(scan);
3395 hardcount++;
3396 }
3397 break;
a0d0e21e
LW
3398 case DIGIT:
3399 while (scan < loceol && isDIGIT(*scan))
3400 scan++;
3401 break;
a0ed51b3
LW
3402 case DIGITUTF8:
3403 loceol = PL_regeol;
dfe13c55 3404 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3405 scan += UTF8SKIP(scan);
3406 hardcount++;
3407 }
3408 break;
3409 break;
a0d0e21e
LW
3410 case NDIGIT:
3411 while (scan < loceol && !isDIGIT(*scan))
3412 scan++;
3413 break;
a0ed51b3
LW
3414 case NDIGITUTF8:
3415 loceol = PL_regeol;
dfe13c55 3416 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3417 scan += UTF8SKIP(scan);
3418 hardcount++;
3419 }
3420 break;
a0d0e21e
LW
3421 default: /* Called on something of 0 width. */
3422 break; /* So match right here or not at all. */
3423 }
a687059c 3424
a0ed51b3
LW
3425 if (hardcount)
3426 c = hardcount;
3427 else
3428 c = scan - PL_reginput;
3280af22 3429 PL_reginput = scan;
a687059c 3430
c277df42
IZ
3431 DEBUG_r(
3432 {
3433 SV *prop = sv_newmortal();
3434
3435 regprop(prop, p);
3436 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3437 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3438 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3439 });
3440
a0d0e21e 3441 return(c);
a687059c
LW
3442}
3443
3444/*
c277df42
IZ
3445 - regrepeat_hard - repeatedly match something, report total lenth and length
3446 *
3447 * The repeater is supposed to have constant length.
3448 */
3449
76e3520e 3450STATIC I32
cea2e8a9 3451S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3452{
5c0ca799 3453 dTHR;
c277df42
IZ
3454 register char *scan;
3455 register char *start;
3280af22 3456 register char *loceol = PL_regeol;
a0ed51b3 3457 I32 l = 0;
708e3b05 3458 I32 count = 0, res = 1;
a0ed51b3
LW
3459
3460 if (!max)
3461 return 0;
c277df42 3462
3280af22 3463 start = PL_reginput;
a0ed51b3 3464 if (UTF) {
708e3b05 3465 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3466 if (!count++) {
3467 l = 0;
3468 while (start < PL_reginput) {
3469 l++;
3470 start += UTF8SKIP(start);
3471 }
3472 *lp = l;
3473 if (l == 0)
3474 return max;
3475 }
3476 if (count == max)
3477 return count;
3478 }
3479 }
3480 else {
708e3b05 3481 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3482 if (!count++) {
3483 *lp = l = PL_reginput - start;
3484 if (max != REG_INFTY && l*max < loceol - scan)
3485 loceol = scan + l*max;
3486 if (l == 0)
3487 return max;
c277df42
IZ
3488 }
3489 }
3490 }
708e3b05 3491 if (!res)
3280af22 3492 PL_reginput = scan;
c277df42 3493
a0ed51b3 3494 return count;
c277df42
IZ
3495}
3496
3497/*
cb8d8820 3498 - reginclass - determine if a character falls into a character class
bbce6d69 3499 */
3500
76e3520e 3501STATIC bool
936ed897 3502S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3503{
5c0ca799 3504 dTHR;
b8c5462f 3505 char flags = ANYOF_FLAGS(p);
bbce6d69 3506 bool match = FALSE;
3507
3508 c &= 0xFF;
b8c5462f 3509 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3510 match = TRUE;
3511 else if (flags & ANYOF_FOLD) {
3512 I32 cf;
3513 if (flags & ANYOF_LOCALE) {
3280af22 3514 PL_reg_flags |= RF_tainted;
22c35a8c 3515 cf = PL_fold_locale[c];
bbce6d69 3516 }
3517 else
22c35a8c 3518 cf = PL_fold[c];
b8c5462f 3519 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3520 match = TRUE;
3521 }
3522
b8c5462f 3523 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3524 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3525 if (
3526 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3527 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3528 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3529 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3530 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3531 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3532 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3533 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3534 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3535 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3536 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3537 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3538 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3539 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3540 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3541 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3542 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3543 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3544 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3545 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3546 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3547 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3548 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3549 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3550 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3551 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3552 ) /* How's that for a conditional? */
bbce6d69 3553 {
3554 match = TRUE;
3555 }
3556 }
3557
ae5c130c 3558 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3559}
3560
a0ed51b3 3561STATIC bool
cea2e8a9 3562S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3563{
3564 dTHR;
a0ed51b3
LW
3565 char flags = ARG1(f);
3566 bool match = FALSE;
3567 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3568
3569 if (swash_fetch(sv, p))
3570 match = TRUE;
3571 else if (flags & ANYOF_FOLD) {
3572 I32 cf;
dfe13c55 3573 U8 tmpbuf[10];
a0ed51b3
LW
3574 if (flags & ANYOF_LOCALE) {
3575 PL_reg_flags |= RF_tainted;
3576 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3577 }
3578 else
3579 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3580 if (swash_fetch(sv, tmpbuf))
3581 match = TRUE;
3582 }
3583
b8c5462f 3584 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3585
3586 return (flags & ANYOF_INVERT) ? !match : match;
3587}
161b471a 3588
dfe13c55 3589STATIC U8 *
cea2e8a9 3590S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3591{
3592 dTHR;
a0ed51b3
LW
3593 if (off >= 0) {
3594 while (off-- && s < (U8*)PL_regeol)
3595 s += UTF8SKIP(s);
3596 }
3597 else {
3598 while (off++) {
3599 if (s > (U8*)PL_bostr) {
3600 s--;
3601 if (*s & 0x80) {
3602 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3603 s--;
3604 } /* XXX could check well-formedness here */
3605 }
3606 }
3607 }
3608 return s;
3609}
161b471a 3610
dfe13c55 3611STATIC U8 *
cea2e8a9 3612S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3613{
c485e607 3614 dTHR;
a0ed51b3
LW
3615 if (off >= 0) {
3616 while (off-- && s < (U8*)PL_regeol)
3617 s += UTF8SKIP(s);
3618 if (off >= 0)
3619 return 0;
3620 }
3621 else {
3622 while (off++) {
3623 if (s > (U8*)PL_bostr) {
3624 s--;
3625 if (*s & 0x80) {
3626 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3627 s--;
3628 } /* XXX could check well-formedness here */
3629 }
3630 else
3631 break;
3632 }
3633 if (off <= 0)
3634 return 0;
3635 }
3636 return s;
3637}
51371543
GS
3638
3639#ifdef PERL_OBJECT
51371543
GS
3640#include "XSUB.h"
3641#endif
3642
3643static void
3644restore_pos(pTHXo_ void *arg)
3645{
3646 dTHR;
3647 if (PL_reg_eval_set) {
3648 if (PL_reg_oldsaved) {
3649 PL_reg_re->subbeg = PL_reg_oldsaved;
3650 PL_reg_re->sublen = PL_reg_oldsavedlen;
3651 RX_MATCH_COPIED_on(PL_reg_re);
3652 }
3653 PL_reg_magic->mg_len = PL_reg_oldpos;
3654 PL_reg_eval_set = 0;
3655 PL_curpm = PL_reg_oldcurpm;
3656 }
3657}
3658