This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
various documentation tweaks suggested by M. J. T. Guy
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
6eb5f6b9
JH
278/* XXXX Some places assume that there is a fixed substring.
279 An update may be needed if optimizer marks as "INTUITable"
280 RExen without fixed substrings. Similarly, it is assumed that
281 lengths of all the strings are no more than minlen, thus they
282 cannot come from lookahead.
283 (Or minlen should take into account lookahead.) */
284
2c2d71f5
JH
285/* A failure to find a constant substring means that there is no need to make
286 an expensive call to REx engine, thus we celebrate a failure. Similarly,
287 finding a substring too deep into the string means that less calls to
30944b6d
IZ
288 regtry() should be needed.
289
290 REx compiler's optimizer found 4 possible hints:
291 a) Anchored substring;
292 b) Fixed substring;
293 c) Whether we are anchored (beginning-of-line or \G);
294 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 295 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
296 string which does not contradict any of them.
297 */
2c2d71f5 298
6eb5f6b9
JH
299/* Most of decisions we do here should have been done at compile time.
300 The nodes of the REx which we used for the search should have been
301 deleted from the finite automaton. */
302
cad2e5aa
JH
303char *
304Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
305 char *strend, U32 flags, re_scream_pos_data *data)
306{
2c2d71f5 307 register I32 start_shift;
cad2e5aa 308 /* Should be nonnegative! */
2c2d71f5
JH
309 register I32 end_shift;
310 register char *s;
311 register SV *check;
cad2e5aa
JH
312 char *t;
313 I32 ml_anch;
2c2d71f5 314 char *tmp;
6eb5f6b9
JH
315 register char *other_last = Nullch; /* other substr checked before this */
316 char *check_at; /* check substr found at this pos */
30944b6d
IZ
317#ifdef DEBUGGING
318 char *i_strpos = strpos;
319#endif
cad2e5aa
JH
320
321 DEBUG_r( if (!PL_colorset) reginitcolors() );
322 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 323 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
324 PL_colors[4],PL_colors[5],PL_colors[0],
325 prog->precomp,
326 PL_colors[1],
327 (strlen(prog->precomp) > 60 ? "..." : ""),
328 PL_colors[0],
b900a521 329 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
330 strpos, PL_colors[1],
331 (strend - strpos > 60 ? "..." : ""))
332 );
333
2c2d71f5
JH
334 if (prog->minlen > strend - strpos) {
335 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 336 goto fail;
2c2d71f5
JH
337 }
338 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
339 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
340 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 341 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
342
343 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 344 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
345 I32 slen;
346
347 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
348 && (sv && (strpos + SvCUR(sv) != strend)) ) {
349 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 350 goto fail;
2c2d71f5 351 }
adac82c7 352 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 353 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
354 if (SvTAIL(prog->check_substr)) {
355 slen = SvCUR(prog->check_substr); /* >= 1 */
356
2c2d71f5
JH
357 if ( strend - s > slen || strend - s < slen - 1
358 || (strend - s == slen && strend[-1] != '\n')) {
359 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
360 goto fail_finish;
cad2e5aa
JH
361 }
362 /* Now should match s[0..slen-2] */
363 slen--;
364 if (slen && (*SvPVX(prog->check_substr) != *s
365 || (slen > 1
2c2d71f5
JH
366 && memNE(SvPVX(prog->check_substr), s, slen)))) {
367 report_neq:
368 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
369 goto fail_finish;
370 }
cad2e5aa
JH
371 }
372 else if (*SvPVX(prog->check_substr) != *s
373 || ((slen = SvCUR(prog->check_substr)) > 1
374 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
375 goto report_neq;
376 goto success_at_start;
cad2e5aa 377 }
2c2d71f5 378 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 379 s = strpos;
2c2d71f5
JH
380 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
381 /* Should be nonnegative! */
382 end_shift = prog->minlen - start_shift -
383 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
384 if (!ml_anch) {
385 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
386 - (SvTAIL(prog->check_substr) != 0);
387 I32 eshift = strend - s - end;
388
389 if (end_shift < eshift)
390 end_shift = eshift;
391 }
cad2e5aa 392 }
2c2d71f5 393 else { /* Can match at random position */
cad2e5aa
JH
394 ml_anch = 0;
395 s = strpos;
2c2d71f5
JH
396 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
397 /* Should be nonnegative! */
398 end_shift = prog->minlen - start_shift -
399 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
400 }
401
2c2d71f5 402#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 403 if (end_shift < 0)
6bbae5e6 404 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
405#endif
406
407 check = prog->check_substr;
408 restart:
409 /* Find a possible match in the region s..strend by looking for
410 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 411 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
412 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
413 I32 p = -1; /* Internal iterator of scream. */
414 I32 *pp = data ? data->scream_pos : &p;
415
2c2d71f5
JH
416 if (PL_screamfirst[BmRARE(check)] >= 0
417 || ( BmRARE(check) == '\n'
418 && (BmPREVIOUS(check) == SvCUR(check) - 1)
419 && SvTAIL(check) ))
420 s = screaminstr(sv, check,
421 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 422 else
2c2d71f5 423 goto fail_finish;
cad2e5aa
JH
424 if (data)
425 *data->scream_olds = s;
426 }
427 else
428 s = fbm_instr((unsigned char*)s + start_shift,
429 (unsigned char*)strend - end_shift,
2c2d71f5 430 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
431
432 /* Update the count-of-usability, remove useless subpatterns,
433 unshift s. */
2c2d71f5
JH
434
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
436 (s ? "Found" : "Did not find"),
437 ((check == prog->anchored_substr) ? "anchored" : "floating"),
438 PL_colors[0],
7b0972df
JH
439 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
440 SvPVX(check),
2c2d71f5
JH
441 PL_colors[1], (SvTAIL(check) ? "$" : ""),
442 (s ? " at offset " : "...\n") ) );
443
444 if (!s)
445 goto fail_finish;
446
6eb5f6b9
JH
447 check_at = s;
448
2c2d71f5 449 /* Finish the diagnostic message */
30944b6d 450 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
451
452 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
453 Start with the other substr.
454 XXXX no SCREAM optimization yet - and a very coarse implementation
455 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
456 *always* match. Probably should be marked during compile...
457 Probably it is right to do no SCREAM here...
458 */
459
460 if (prog->float_substr && prog->anchored_substr) {
30944b6d 461 /* Take into account the "other" substring. */
2c2d71f5
JH
462 /* XXXX May be hopelessly wrong for UTF... */
463 if (!other_last)
6eb5f6b9 464 other_last = strpos;
2c2d71f5 465 if (check == prog->float_substr) {
30944b6d
IZ
466 do_other_anchored:
467 {
2c2d71f5
JH
468 char *last = s - start_shift, *last1, *last2;
469 char *s1 = s;
470
471 tmp = PL_bostr;
472 t = s - prog->check_offset_max;
473 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
474 && (!(prog->reganch & ROPT_UTF8)
475 || (PL_bostr = strpos, /* Used in regcopmaybe() */
476 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
477 && t > strpos)))
30944b6d 478 /* EMPTY */;
2c2d71f5
JH
479 else
480 t = strpos;
481 t += prog->anchored_offset;
6eb5f6b9
JH
482 if (t < other_last) /* These positions already checked */
483 t = other_last;
2c2d71f5
JH
484 PL_bostr = tmp;
485 last2 = last1 = strend - prog->minlen;
486 if (last < last1)
487 last1 = last;
488 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
489 /* On end-of-str: see comment below. */
490 s = fbm_instr((unsigned char*)t,
491 (unsigned char*)last1 + prog->anchored_offset
492 + SvCUR(prog->anchored_substr)
493 - (SvTAIL(prog->anchored_substr)!=0),
494 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
495 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
496 (s ? "Found" : "Contradicts"),
497 PL_colors[0],
7b0972df
JH
498 (int)(SvCUR(prog->anchored_substr)
499 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
500 SvPVX(prog->anchored_substr),
501 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
502 if (!s) {
503 if (last1 >= last2) {
504 DEBUG_r(PerlIO_printf(Perl_debug_log,
505 ", giving up...\n"));
506 goto fail_finish;
507 }
508 DEBUG_r(PerlIO_printf(Perl_debug_log,
509 ", trying floating at offset %ld...\n",
30944b6d 510 (long)(s1 + 1 - i_strpos)));
2c2d71f5 511 PL_regeol = strend; /* Used in HOP() */
6eb5f6b9 512 other_last = last1 + prog->anchored_offset + 1;
2c2d71f5
JH
513 s = HOPc(last, 1);
514 goto restart;
515 }
516 else {
517 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 518 (long)(s - i_strpos)));
2c2d71f5 519 t = s - prog->anchored_offset;
6eb5f6b9 520 other_last = s + 1;
30944b6d 521 s = s1;
2c2d71f5
JH
522 if (t == strpos)
523 goto try_at_start;
2c2d71f5
JH
524 goto try_at_offset;
525 }
30944b6d 526 }
2c2d71f5
JH
527 }
528 else { /* Take into account the floating substring. */
529 char *last, *last1;
530 char *s1 = s;
531
532 t = s - start_shift;
533 last1 = last = strend - prog->minlen + prog->float_min_offset;
534 if (last - t > prog->float_max_offset)
535 last = t + prog->float_max_offset;
536 s = t + prog->float_min_offset;
6eb5f6b9
JH
537 if (s < other_last)
538 s = other_last;
2c2d71f5
JH
539 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
540 /* fbm_instr() takes into account exact value of end-of-str
541 if the check is SvTAIL(ed). Since false positives are OK,
542 and end-of-str is not later than strend we are OK. */
543 s = fbm_instr((unsigned char*)s,
544 (unsigned char*)last + SvCUR(prog->float_substr)
545 - (SvTAIL(prog->float_substr)!=0),
546 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
547 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
548 (s ? "Found" : "Contradicts"),
549 PL_colors[0],
7b0972df
JH
550 (int)(SvCUR(prog->float_substr)
551 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
552 SvPVX(prog->float_substr),
553 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
554 if (!s) {
555 if (last1 == last) {
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
557 ", giving up...\n"));
558 goto fail_finish;
559 }
560 DEBUG_r(PerlIO_printf(Perl_debug_log,
561 ", trying anchored starting at offset %ld...\n",
30944b6d 562 (long)(s1 + 1 - i_strpos)));
6eb5f6b9 563 other_last = last + 1;
2c2d71f5
JH
564 PL_regeol = strend; /* Used in HOP() */
565 s = HOPc(t, 1);
566 goto restart;
567 }
568 else {
569 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 570 (long)(s - i_strpos)));
6eb5f6b9 571 other_last = s + 1;
30944b6d 572 s = s1;
2c2d71f5
JH
573 if (t == strpos)
574 goto try_at_start;
2c2d71f5
JH
575 goto try_at_offset;
576 }
577 }
cad2e5aa 578 }
2c2d71f5
JH
579
580 t = s - prog->check_offset_max;
581 tmp = PL_bostr;
582 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
583 && (!(prog->reganch & ROPT_UTF8)
584 || (PL_bostr = strpos, /* Used in regcopmaybe() */
585 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
586 && t > strpos)))) {
587 PL_bostr = tmp;
588 /* Fixed substring is found far enough so that the match
589 cannot start at strpos. */
590 try_at_offset:
cad2e5aa 591 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
592 /* Eventually fbm_*() should handle this, but often
593 anchored_offset is not 0, so this check will not be wasted. */
594 /* XXXX In the code below we prefer to look for "^" even in
595 presence of anchored substrings. And we search even
596 beyond the found float position. These pessimizations
597 are historical artefacts only. */
598 find_anchor:
2c2d71f5 599 while (t < strend - prog->minlen) {
cad2e5aa
JH
600 if (*t == '\n') {
601 if (t < s - prog->check_offset_min) {
30944b6d
IZ
602 if (prog->anchored_substr) {
603 /* We definitely contradict the found anchored
604 substr. Due to the above check we do not
605 contradict "check" substr.
606 Thus we can arrive here only if check substr
607 is float. Redo checking for "other"=="fixed".
608 */
609 strpos = t + 1;
610 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
611 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
612 goto do_other_anchored;
613 }
cad2e5aa 614 s = t + 1;
2c2d71f5 615 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 616 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
617 goto set_useful;
618 }
2c2d71f5 619 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
620 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
621 strpos = s = t + 1;
cad2e5aa
JH
622 goto restart;
623 }
624 t++;
625 }
2c2d71f5
JH
626 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
627 PL_colors[0],PL_colors[1]));
628 goto fail_finish;
cad2e5aa
JH
629 }
630 s = t;
631 set_useful:
2c2d71f5 632 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
633 }
634 else {
2c2d71f5
JH
635 PL_bostr = tmp;
636 /* The found string does not prohibit matching at beg-of-str
637 - no optimization of calling REx engine can be performed,
638 unless it was an MBOL and we are not after MBOL. */
639 try_at_start:
640 /* Even in this situation we may use MBOL flag if strpos is offset
641 wrt the start of the string. */
642 if (ml_anch && sv
cad2e5aa
JH
643 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
644 t = strpos;
645 goto find_anchor;
646 }
30944b6d
IZ
647 DEBUG_r( if (ml_anch)
648 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
649 PL_colors[0],PL_colors[1]);
650 );
2c2d71f5 651 success_at_start:
30944b6d 652 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
653 && --BmUSEFUL(prog->check_substr) < 0
654 && prog->check_substr == prog->float_substr) { /* boo */
655 /* If flags & SOMETHING - do not do it many times on the same match */
656 SvREFCNT_dec(prog->check_substr);
657 prog->check_substr = Nullsv; /* disable */
658 prog->float_substr = Nullsv; /* clear */
659 s = strpos;
3cf5c195
IZ
660 /* XXXX This is a remnant of the old implementation. It
661 looks wasteful, since now INTUIT can use many
6eb5f6b9 662 other heuristics. */
cad2e5aa
JH
663 prog->reganch &= ~RE_USE_INTUIT;
664 }
665 else
666 s = strpos;
667 }
668
6eb5f6b9
JH
669 /* Last resort... */
670 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
671 if (prog->regstclass) {
672 /* minlen == 0 is possible if regstclass is \b or \B,
673 and the fixed substr is ''$.
674 Since minlen is already taken into account, s+1 is before strend;
675 accidentally, minlen >= 1 guaranties no false positives at s + 1
676 even for \b or \B. But (minlen? 1 : 0) below assumes that
677 regstclass does not come from lookahead... */
678 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
679 This leaves EXACTF only, which is dealt with in find_byclass(). */
680 char *endpos = (prog->anchored_substr || ml_anch)
681 ? s + (prog->minlen? 1 : 0)
682 : (prog->float_substr ? check_at - start_shift + 1
683 : strend) ;
684 char *startpos = sv ? strend - SvCUR(sv) : s;
685
686 t = s;
687 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
688 if (!s) {
689#ifdef DEBUGGING
690 char *what;
691#endif
692 if (endpos == strend) {
693 DEBUG_r( PerlIO_printf(Perl_debug_log,
694 "Could not match STCLASS...\n") );
695 goto fail;
696 }
697 /* Contradict one of substrings */
698 if (prog->anchored_substr) {
699 DEBUG_r( PerlIO_printf(Perl_debug_log,
700 "This position contradicts STCLASS...\n") );
701 if (prog->anchored_substr == check) {
702 DEBUG_r( what = "anchored" );
703 hop_and_restart:
704 PL_regeol = strend; /* Used in HOP() */
705 s = HOPc(t, 1);
706 DEBUG_r( PerlIO_printf(Perl_debug_log,
707 "trying %s substr starting at offset %ld...\n",
708 what, (long)(s + start_shift - i_strpos)) );
709 goto restart;
710 }
711 /* Have both, check is floating */
712 if (t + start_shift >= check_at) /* Contradicts floating=check */
713 goto retry_floating_check;
714 /* Recheck anchored substring, but not floating... */
715 s = check_at;
716 DEBUG_r( PerlIO_printf(Perl_debug_log,
717 "trying anchored substr starting at offset %ld...\n",
718 (long)(other_last - i_strpos)) );
719 goto do_other_anchored;
720 }
721 /* Check is floating subtring. */
722 retry_floating_check:
723 t = check_at - start_shift;
724 DEBUG_r( what = "floating" );
725 goto hop_and_restart;
726 }
727 DEBUG_r( if (t != s)
728 PerlIO_printf(Perl_debug_log,
729 "By STCLASS: moving %ld --> %ld\n",
730 (long)(t - i_strpos), (long)(s - i_strpos));
731 else
732 PerlIO_printf(Perl_debug_log,
733 "Does not contradict STCLASS...\n") );
734 }
2c2d71f5 735 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 736 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 737 return s;
2c2d71f5
JH
738
739 fail_finish: /* Substring not found */
740 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 741 fail:
2c2d71f5 742 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
743 PL_colors[4],PL_colors[5]));
744 return Nullch;
745}
9661b544 746
6eb5f6b9 747/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
748STATIC char *
749S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 750{
6eb5f6b9
JH
751 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
752 char *m;
753 int ln;
754 int c1;
755 int c2;
756 char *e;
757 register I32 tmp = 1; /* Scratch variable? */
cad2e5aa 758
6eb5f6b9
JH
759 /* We know what class it must start with. */
760 switch (OP(c)) {
761 case ANYOFUTF8:
762 while (s < strend) {
763 if (REGINCLASSUTF8(c, (U8*)s)) {
764 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 765 goto got_it;
3cf5c195 766 else
6eb5f6b9 767 tmp = doevery;
a0ed51b3 768 }
6eb5f6b9
JH
769 else
770 tmp = 1;
a0ed51b3
LW
771 s += UTF8SKIP(s);
772 }
6eb5f6b9
JH
773 break;
774 case ANYOF:
a0ed51b3 775 while (s < strend) {
6eb5f6b9
JH
776 if (REGINCLASS(c, *s)) {
777 if (tmp && (norun || regtry(prog, s)))
778 goto got_it;
779 else
780 tmp = doevery;
a0ed51b3 781 }
6eb5f6b9
JH
782 else
783 tmp = 1;
a0ed51b3 784 s++;
a0d0e21e 785 }
6eb5f6b9
JH
786 break;
787 case EXACTF:
788 m = STRING(c);
789 ln = STR_LEN(c);
790 c1 = *m;
791 c2 = PL_fold[c1];
792 goto do_exactf;
793 case EXACTFL:
794 m = STRING(c);
795 ln = STR_LEN(c);
796 c1 = *m;
797 c2 = PL_fold_locale[c1];
798 do_exactf:
799 e = strend - ln;
b3c9acc1 800
6eb5f6b9
JH
801 if (norun && e < s)
802 e = s; /* Due to minlen logic of intuit() */
b3c9acc1
IZ
803 /* Here it is NOT UTF! */
804 if (c1 == c2) {
805 while (s <= e) {
806 if ( *s == c1
807 && (ln == 1 || (OP(c) == EXACTF
808 ? ibcmp(s, m, ln)
809 : ibcmp_locale(s, m, ln)))
6eb5f6b9 810 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
811 goto got_it;
812 s++;
813 }
814 } else {
815 while (s <= e) {
816 if ( (*s == c1 || *s == c2)
817 && (ln == 1 || (OP(c) == EXACTF
818 ? ibcmp(s, m, ln)
819 : ibcmp_locale(s, m, ln)))
6eb5f6b9 820 && (norun || regtry(prog, s)) )
b3c9acc1
IZ
821 goto got_it;
822 s++;
823 }
824 }
825 break;
bbce6d69 826 case BOUNDL:
3280af22 827 PL_reg_flags |= RF_tainted;
bbce6d69 828 /* FALL THROUGH */
a0d0e21e 829 case BOUND:
6eb5f6b9 830 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 831 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 832 while (s < strend) {
95bac841 833 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 834 tmp = !tmp;
6eb5f6b9 835 if ((norun || regtry(prog, s)))
a0d0e21e 836 goto got_it;
a687059c 837 }
a0d0e21e
LW
838 s++;
839 }
6eb5f6b9 840 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
841 goto got_it;
842 break;
a0ed51b3
LW
843 case BOUNDLUTF8:
844 PL_reg_flags |= RF_tainted;
845 /* FALL THROUGH */
846 case BOUNDUTF8:
6eb5f6b9 847 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
848 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
849 while (s < strend) {
dfe13c55
GS
850 if (tmp == !(OP(c) == BOUND ?
851 swash_fetch(PL_utf8_alnum, (U8*)s) :
852 isALNUM_LC_utf8((U8*)s)))
853 {
a0ed51b3 854 tmp = !tmp;
6eb5f6b9 855 if ((norun || regtry(prog, s)))
a0ed51b3
LW
856 goto got_it;
857 }
858 s += UTF8SKIP(s);
859 }
6eb5f6b9 860 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
861 goto got_it;
862 break;
bbce6d69 863 case NBOUNDL:
3280af22 864 PL_reg_flags |= RF_tainted;
bbce6d69 865 /* FALL THROUGH */
a0d0e21e 866 case NBOUND:
6eb5f6b9 867 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
95bac841 868 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 869 while (s < strend) {
95bac841 870 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 871 tmp = !tmp;
6eb5f6b9 872 else if ((norun || regtry(prog, s)))
a0d0e21e
LW
873 goto got_it;
874 s++;
875 }
6eb5f6b9 876 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0d0e21e
LW
877 goto got_it;
878 break;
a0ed51b3
LW
879 case NBOUNDLUTF8:
880 PL_reg_flags |= RF_tainted;
881 /* FALL THROUGH */
882 case NBOUNDUTF8:
6eb5f6b9 883 if (prog->minlen)
dfe13c55 884 strend = reghop_c(strend, -1);
6eb5f6b9 885 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
a0ed51b3
LW
886 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
887 while (s < strend) {
dfe13c55
GS
888 if (tmp == !(OP(c) == NBOUND ?
889 swash_fetch(PL_utf8_alnum, (U8*)s) :
890 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 891 tmp = !tmp;
6eb5f6b9 892 else if ((norun || regtry(prog, s)))
a0ed51b3
LW
893 goto got_it;
894 s += UTF8SKIP(s);
895 }
6eb5f6b9 896 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
897 goto got_it;
898 break;
a0d0e21e
LW
899 case ALNUM:
900 while (s < strend) {
bbce6d69 901 if (isALNUM(*s)) {
6eb5f6b9 902 if (tmp && (norun || regtry(prog, s)))
bbce6d69 903 goto got_it;
904 else
905 tmp = doevery;
906 }
907 else
908 tmp = 1;
909 s++;
910 }
911 break;
a0ed51b3
LW
912 case ALNUMUTF8:
913 while (s < strend) {
dfe13c55 914 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 915 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
916 goto got_it;
917 else
918 tmp = doevery;
919 }
920 else
921 tmp = 1;
922 s += UTF8SKIP(s);
923 }
924 break;
bbce6d69 925 case ALNUML:
3280af22 926 PL_reg_flags |= RF_tainted;
bbce6d69 927 while (s < strend) {
928 if (isALNUM_LC(*s)) {
6eb5f6b9 929 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 930 goto got_it;
a687059c 931 else
a0d0e21e
LW
932 tmp = doevery;
933 }
934 else
935 tmp = 1;
936 s++;
937 }
938 break;
a0ed51b3
LW
939 case ALNUMLUTF8:
940 PL_reg_flags |= RF_tainted;
941 while (s < strend) {
dfe13c55 942 if (isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 943 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
944 goto got_it;
945 else
946 tmp = doevery;
947 }
948 else
949 tmp = 1;
950 s += UTF8SKIP(s);
951 }
952 break;
a0d0e21e
LW
953 case NALNUM:
954 while (s < strend) {
bbce6d69 955 if (!isALNUM(*s)) {
6eb5f6b9 956 if (tmp && (norun || regtry(prog, s)))
bbce6d69 957 goto got_it;
958 else
959 tmp = doevery;
960 }
961 else
962 tmp = 1;
963 s++;
964 }
965 break;
a0ed51b3
LW
966 case NALNUMUTF8:
967 while (s < strend) {
dfe13c55 968 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
6eb5f6b9 969 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
970 goto got_it;
971 else
972 tmp = doevery;
973 }
974 else
975 tmp = 1;
976 s += UTF8SKIP(s);
977 }
978 break;
bbce6d69 979 case NALNUML:
3280af22 980 PL_reg_flags |= RF_tainted;
bbce6d69 981 while (s < strend) {
982 if (!isALNUM_LC(*s)) {
6eb5f6b9 983 if (tmp && (norun || regtry(prog, s)))
a0d0e21e 984 goto got_it;
a687059c 985 else
a0d0e21e 986 tmp = doevery;
a687059c 987 }
a0d0e21e
LW
988 else
989 tmp = 1;
990 s++;
991 }
992 break;
a0ed51b3
LW
993 case NALNUMLUTF8:
994 PL_reg_flags |= RF_tainted;
995 while (s < strend) {
dfe13c55 996 if (!isALNUM_LC_utf8((U8*)s)) {
6eb5f6b9 997 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
998 goto got_it;
999 else
1000 tmp = doevery;
1001 }
1002 else
1003 tmp = 1;
1004 s += UTF8SKIP(s);
1005 }
1006 break;
a0d0e21e
LW
1007 case SPACE:
1008 while (s < strend) {
1009 if (isSPACE(*s)) {
6eb5f6b9 1010 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1011 goto got_it;
1012 else
1013 tmp = doevery;
2304df62 1014 }
a0d0e21e
LW
1015 else
1016 tmp = 1;
1017 s++;
1018 }
1019 break;
a0ed51b3
LW
1020 case SPACEUTF8:
1021 while (s < strend) {
dfe13c55 1022 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
6eb5f6b9 1023 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1024 goto got_it;
1025 else
1026 tmp = doevery;
1027 }
1028 else
1029 tmp = 1;
1030 s += UTF8SKIP(s);
1031 }
1032 break;
bbce6d69 1033 case SPACEL:
3280af22 1034 PL_reg_flags |= RF_tainted;
bbce6d69 1035 while (s < strend) {
1036 if (isSPACE_LC(*s)) {
6eb5f6b9 1037 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1038 goto got_it;
1039 else
1040 tmp = doevery;
1041 }
1042 else
1043 tmp = 1;
1044 s++;
1045 }
1046 break;
a0ed51b3
LW
1047 case SPACELUTF8:
1048 PL_reg_flags |= RF_tainted;
1049 while (s < strend) {
dfe13c55 1050 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
6eb5f6b9 1051 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1052 goto got_it;
1053 else
1054 tmp = doevery;
1055 }
1056 else
1057 tmp = 1;
1058 s += UTF8SKIP(s);
1059 }
1060 break;
a0d0e21e
LW
1061 case NSPACE:
1062 while (s < strend) {
1063 if (!isSPACE(*s)) {
6eb5f6b9 1064 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1065 goto got_it;
1066 else
1067 tmp = doevery;
a687059c 1068 }
a0d0e21e
LW
1069 else
1070 tmp = 1;
1071 s++;
1072 }
1073 break;
a0ed51b3
LW
1074 case NSPACEUTF8:
1075 while (s < strend) {
dfe13c55 1076 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
6eb5f6b9 1077 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1078 goto got_it;
1079 else
1080 tmp = doevery;
1081 }
1082 else
1083 tmp = 1;
1084 s += UTF8SKIP(s);
1085 }
1086 break;
bbce6d69 1087 case NSPACEL:
3280af22 1088 PL_reg_flags |= RF_tainted;
bbce6d69 1089 while (s < strend) {
1090 if (!isSPACE_LC(*s)) {
6eb5f6b9 1091 if (tmp && (norun || regtry(prog, s)))
bbce6d69 1092 goto got_it;
1093 else
1094 tmp = doevery;
1095 }
1096 else
1097 tmp = 1;
1098 s++;
1099 }
1100 break;
a0ed51b3
LW
1101 case NSPACELUTF8:
1102 PL_reg_flags |= RF_tainted;
1103 while (s < strend) {
dfe13c55 1104 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
6eb5f6b9 1105 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1106 goto got_it;
1107 else
1108 tmp = doevery;
1109 }
1110 else
1111 tmp = 1;
1112 s += UTF8SKIP(s);
1113 }
1114 break;
a0d0e21e
LW
1115 case DIGIT:
1116 while (s < strend) {
1117 if (isDIGIT(*s)) {
6eb5f6b9 1118 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1119 goto got_it;
1120 else
1121 tmp = doevery;
2b69d0c2 1122 }
a0d0e21e
LW
1123 else
1124 tmp = 1;
1125 s++;
1126 }
1127 break;
a0ed51b3
LW
1128 case DIGITUTF8:
1129 while (s < strend) {
dfe13c55 1130 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1131 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1132 goto got_it;
1133 else
1134 tmp = doevery;
1135 }
1136 else
1137 tmp = 1;
1138 s += UTF8SKIP(s);
1139 }
1140 break;
b8c5462f
JH
1141 case DIGITL:
1142 PL_reg_flags |= RF_tainted;
1143 while (s < strend) {
1144 if (isDIGIT_LC(*s)) {
6eb5f6b9 1145 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
1150 else
1151 tmp = 1;
1152 s++;
1153 }
1154 break;
1155 case DIGITLUTF8:
1156 PL_reg_flags |= RF_tainted;
1157 while (s < strend) {
1158 if (isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1159 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1160 goto got_it;
1161 else
1162 tmp = doevery;
1163 }
1164 else
1165 tmp = 1;
1166 s += UTF8SKIP(s);
1167 }
1168 break;
a0d0e21e
LW
1169 case NDIGIT:
1170 while (s < strend) {
1171 if (!isDIGIT(*s)) {
6eb5f6b9 1172 if (tmp && (norun || regtry(prog, s)))
a0d0e21e
LW
1173 goto got_it;
1174 else
1175 tmp = doevery;
a687059c 1176 }
a0d0e21e
LW
1177 else
1178 tmp = 1;
1179 s++;
1180 }
1181 break;
a0ed51b3
LW
1182 case NDIGITUTF8:
1183 while (s < strend) {
dfe13c55 1184 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
6eb5f6b9 1185 if (tmp && (norun || regtry(prog, s)))
a0ed51b3
LW
1186 goto got_it;
1187 else
1188 tmp = doevery;
1189 }
1190 else
1191 tmp = 1;
1192 s += UTF8SKIP(s);
1193 }
1194 break;
b8c5462f
JH
1195 case NDIGITL:
1196 PL_reg_flags |= RF_tainted;
1197 while (s < strend) {
1198 if (!isDIGIT_LC(*s)) {
6eb5f6b9 1199 if (tmp && (norun || regtry(prog, s)))
b8c5462f
JH
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
1204 else
1205 tmp = 1;
1206 s++;
a0ed51b3 1207 }
b8c5462f
JH
1208 break;
1209 case NDIGITLUTF8:
1210 PL_reg_flags |= RF_tainted;
1211 while (s < strend) {
1212 if (!isDIGIT_LC_utf8((U8*)s)) {
6eb5f6b9 1213 if (tmp && (norun || regtry(prog, s)))
b8c5462f 1214 goto got_it;
cf93c79d 1215 else
b8c5462f
JH
1216 tmp = doevery;
1217 }
1218 else
1219 tmp = 1;
1220 s += UTF8SKIP(s);
1221 }
1222 break;
b3c9acc1 1223 default:
3c3eec57
GS
1224 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1225 break;
d6a28714 1226 }
6eb5f6b9
JH
1227 return 0;
1228 got_it:
1229 return s;
1230}
1231
1232/*
1233 - regexec_flags - match a regexp against a string
1234 */
1235I32
1236Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1237 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1238/* strend: pointer to null at end of string */
1239/* strbeg: real beginning of string */
1240/* minend: end of match must be >=minend after stringarg. */
1241/* data: May be used for some additional optimizations. */
1242/* nosave: For optimizations. */
1243{
1244 dTHR;
1245 register char *s;
1246 register regnode *c;
1247 register char *startpos = stringarg;
1248 register I32 tmp;
1249 I32 minlen; /* must match at least this many chars */
1250 I32 dontbother = 0; /* how many characters not to try at end */
1251 I32 start_shift = 0; /* Offset of the start to find
1252 constant substr. */ /* CC */
1253 I32 end_shift = 0; /* Same for the end. */ /* CC */
1254 I32 scream_pos = -1; /* Internal iterator of scream. */
1255 char *scream_olds;
1256 SV* oreplsv = GvSV(PL_replgv);
1257
1258 PL_regcc = 0;
1259
1260 cache_re(prog);
1261#ifdef DEBUGGING
1262 PL_regnarrate = PL_debug & 512;
1263#endif
1264
1265 /* Be paranoid... */
1266 if (prog == NULL || startpos == NULL) {
1267 Perl_croak(aTHX_ "NULL regexp parameter");
1268 return 0;
1269 }
1270
1271 minlen = prog->minlen;
1272 if (strend - startpos < minlen) goto phooey;
1273
1274 if (startpos == strbeg) /* is ^ valid at stringarg? */
1275 PL_regprev = '\n';
1276 else {
1277 PL_regprev = (U32)stringarg[-1];
1278 if (!PL_multiline && PL_regprev == '\n')
1279 PL_regprev = '\0'; /* force ^ to NOT match */
1280 }
1281
1282 /* Check validity of program. */
1283 if (UCHARAT(prog->program) != REG_MAGIC) {
1284 Perl_croak(aTHX_ "corrupted regexp program");
1285 }
1286
1287 PL_reg_flags = 0;
1288 PL_reg_eval_set = 0;
1289 PL_reg_maxiter = 0;
1290
1291 if (prog->reganch & ROPT_UTF8)
1292 PL_reg_flags |= RF_utf8;
1293
1294 /* Mark beginning of line for ^ and lookbehind. */
1295 PL_regbol = startpos;
1296 PL_bostr = strbeg;
1297 PL_reg_sv = sv;
1298
1299 /* Mark end of line for $ (and such) */
1300 PL_regeol = strend;
1301
1302 /* see how far we have to get to not match where we matched before */
1303 PL_regtill = startpos+minend;
1304
1305 /* We start without call_cc context. */
1306 PL_reg_call_cc = 0;
1307
1308 /* If there is a "must appear" string, look for it. */
1309 s = startpos;
1310
1311 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1312 MAGIC *mg;
1313
1314 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1315 PL_reg_ganch = startpos;
1316 else if (sv && SvTYPE(sv) >= SVt_PVMG
1317 && SvMAGIC(sv)
1318 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1319 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1320 if (prog->reganch & ROPT_ANCH_GPOS) {
1321 if (s > PL_reg_ganch)
1322 goto phooey;
1323 s = PL_reg_ganch;
1324 }
1325 }
1326 else /* pos() not defined */
1327 PL_reg_ganch = strbeg;
1328 }
1329
1330 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
1331 re_scream_pos_data d;
1332
1333 d.scream_olds = &scream_olds;
1334 d.scream_pos = &scream_pos;
1335 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1336 if (!s)
1337 goto phooey; /* not present */
1338 }
1339
1340 DEBUG_r( if (!PL_colorset) reginitcolors() );
1341 DEBUG_r(PerlIO_printf(Perl_debug_log,
1342 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1343 PL_colors[4],PL_colors[5],PL_colors[0],
1344 prog->precomp,
1345 PL_colors[1],
1346 (strlen(prog->precomp) > 60 ? "..." : ""),
1347 PL_colors[0],
1348 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1349 startpos, PL_colors[1],
1350 (strend - startpos > 60 ? "..." : ""))
1351 );
1352
1353 /* Simplest case: anchored match need be tried only once. */
1354 /* [unless only anchor is BOL and multiline is set] */
1355 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1356 if (s == startpos && regtry(prog, startpos))
1357 goto got_it;
1358 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1359 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1360 {
1361 char *end;
1362
1363 if (minlen)
1364 dontbother = minlen - 1;
1365 end = HOPc(strend, -dontbother) - 1;
1366 /* for multiline we only have to try after newlines */
1367 if (prog->check_substr) {
1368 if (s == startpos)
1369 goto after_try;
1370 while (1) {
1371 if (regtry(prog, s))
1372 goto got_it;
1373 after_try:
1374 if (s >= end)
1375 goto phooey;
1376 if (prog->reganch & RE_USE_INTUIT) {
1377 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1378 if (!s)
1379 goto phooey;
1380 }
1381 else
1382 s++;
1383 }
1384 } else {
1385 if (s > startpos)
1386 s--;
1387 while (s < end) {
1388 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1389 if (regtry(prog, s))
1390 goto got_it;
1391 }
1392 }
1393 }
1394 }
1395 goto phooey;
1396 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1397 if (regtry(prog, PL_reg_ganch))
1398 goto got_it;
1399 goto phooey;
1400 }
1401
1402 /* Messy cases: unanchored match. */
1403 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
1404 /* we have /x+whatever/ */
1405 /* it must be a one character string (XXXX Except UTF?) */
1406 char ch = SvPVX(prog->anchored_substr)[0];
1407 if (UTF) {
1408 while (s < strend) {
1409 if (*s == ch) {
1410 if (regtry(prog, s)) goto got_it;
1411 s += UTF8SKIP(s);
1412 while (s < strend && *s == ch)
1413 s += UTF8SKIP(s);
1414 }
1415 s += UTF8SKIP(s);
1416 }
1417 }
1418 else {
1419 while (s < strend) {
1420 if (*s == ch) {
1421 if (regtry(prog, s)) goto got_it;
1422 s++;
1423 while (s < strend && *s == ch)
1424 s++;
1425 }
1426 s++;
1427 }
1428 }
1429 }
1430 /*SUPPRESS 560*/
1431 else if (prog->anchored_substr != Nullsv
1432 || (prog->float_substr != Nullsv
1433 && prog->float_max_offset < strend - s)) {
1434 SV *must = prog->anchored_substr
1435 ? prog->anchored_substr : prog->float_substr;
1436 I32 back_max =
1437 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
1438 I32 back_min =
1439 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1440 I32 delta = back_max - back_min;
1441 char *last = HOPc(strend, /* Cannot start after this */
1442 -(I32)(CHR_SVLEN(must)
1443 - (SvTAIL(must) != 0) + back_min));
1444 char *last1; /* Last position checked before */
1445
1446 if (s > PL_bostr)
1447 last1 = HOPc(s, -1);
1448 else
1449 last1 = s - 1; /* bogus */
1450
1451 /* XXXX check_substr already used to find `s', can optimize if
1452 check_substr==must. */
1453 scream_pos = -1;
1454 dontbother = end_shift;
1455 strend = HOPc(strend, -dontbother);
1456 while ( (s <= last) &&
1457 ((flags & REXEC_SCREAM)
1458 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
1459 end_shift, &scream_pos, 0))
1460 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
1461 (unsigned char*)strend, must,
1462 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
1463 if (HOPc(s, -back_max) > last1) {
1464 last1 = HOPc(s, -back_min);
1465 s = HOPc(s, -back_max);
1466 }
1467 else {
1468 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1469
1470 last1 = HOPc(s, -back_min);
1471 s = t;
1472 }
1473 if (UTF) {
1474 while (s <= last1) {
1475 if (regtry(prog, s))
1476 goto got_it;
1477 s += UTF8SKIP(s);
1478 }
1479 }
1480 else {
1481 while (s <= last1) {
1482 if (regtry(prog, s))
1483 goto got_it;
1484 s++;
1485 }
1486 }
1487 }
1488 goto phooey;
1489 }
1490 else if (c = prog->regstclass) {
1491 if (minlen) /* don't bother with what can't match */
1492 strend = HOPc(strend, -(minlen - 1));
1493 if (find_byclass(prog, c, s, strend, startpos, 0))
1494 goto got_it;
d6a28714
JH
1495 }
1496 else {
1497 dontbother = 0;
1498 if (prog->float_substr != Nullsv) { /* Trim the end. */
1499 char *last;
1500 I32 oldpos = scream_pos;
1501
1502 if (flags & REXEC_SCREAM) {
1503 last = screaminstr(sv, prog->float_substr, s - strbeg,
1504 end_shift, &scream_pos, 1); /* last one */
1505 if (!last)
1506 last = scream_olds; /* Only one occurence. */
b8c5462f 1507 }
d6a28714
JH
1508 else {
1509 STRLEN len;
1510 char *little = SvPV(prog->float_substr, len);
1511
1512 if (SvTAIL(prog->float_substr)) {
1513 if (memEQ(strend - len + 1, little, len - 1))
1514 last = strend - len + 1;
1515 else if (!PL_multiline)
1516 last = memEQ(strend - len, little, len)
1517 ? strend - len : Nullch;
b8c5462f 1518 else
d6a28714
JH
1519 goto find_last;
1520 } else {
1521 find_last:
1522 if (len)
1523 last = rninstr(s, strend, little, little + len);
b8c5462f 1524 else
d6a28714 1525 last = strend; /* matching `$' */
b8c5462f 1526 }
b8c5462f 1527 }
d6a28714
JH
1528 if (last == NULL) goto phooey; /* Should not happen! */
1529 dontbother = strend - last + prog->float_min_offset;
1530 }
1531 if (minlen && (dontbother < minlen))
1532 dontbother = minlen - 1;
1533 strend -= dontbother; /* this one's always in bytes! */
1534 /* We don't know much -- general case. */
1535 if (UTF) {
1536 for (;;) {
1537 if (regtry(prog, s))
1538 goto got_it;
1539 if (s >= strend)
1540 break;
b8c5462f 1541 s += UTF8SKIP(s);
d6a28714
JH
1542 };
1543 }
1544 else {
1545 do {
1546 if (regtry(prog, s))
1547 goto got_it;
1548 } while (s++ < strend);
1549 }
1550 }
1551
1552 /* Failure. */
1553 goto phooey;
1554
1555got_it:
1556 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1557
1558 if (PL_reg_eval_set) {
1559 /* Preserve the current value of $^R */
1560 if (oreplsv != GvSV(PL_replgv))
1561 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1562 restored, the value remains
1563 the same. */
1564 restore_pos(aTHXo_ 0);
1565 }
1566
1567 /* make sure $`, $&, $', and $digit will work later */
1568 if ( !(flags & REXEC_NOT_FIRST) ) {
1569 if (RX_MATCH_COPIED(prog)) {
1570 Safefree(prog->subbeg);
1571 RX_MATCH_COPIED_off(prog);
1572 }
1573 if (flags & REXEC_COPY_STR) {
1574 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1575
1576 s = savepvn(strbeg, i);
1577 prog->subbeg = s;
1578 prog->sublen = i;
1579 RX_MATCH_COPIED_on(prog);
1580 }
1581 else {
1582 prog->subbeg = strbeg;
1583 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1584 }
1585 }
1586
1587 return 1;
1588
1589phooey:
1590 if (PL_reg_eval_set)
1591 restore_pos(aTHXo_ 0);
1592 return 0;
1593}
1594
1595/*
1596 - regtry - try match at specific point
1597 */
1598STATIC I32 /* 0 failure, 1 success */
1599S_regtry(pTHX_ regexp *prog, char *startpos)
1600{
1601 dTHR;
1602 register I32 i;
1603 register I32 *sp;
1604 register I32 *ep;
1605 CHECKPOINT lastcp;
1606
1607 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1608 MAGIC *mg;
1609
1610 PL_reg_eval_set = RS_init;
1611 DEBUG_r(DEBUG_s(
b900a521
JH
1612 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1613 (IV)(PL_stack_sp - PL_stack_base));
d6a28714
JH
1614 ));
1615 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1616 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1617 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1618 SAVETMPS;
1619 /* Apparently this is not needed, judging by wantarray. */
1620 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1621 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1622
1623 if (PL_reg_sv) {
1624 /* Make $_ available to executed code. */
1625 if (PL_reg_sv != DEFSV) {
1626 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1627 SAVESPTR(DEFSV);
1628 DEFSV = PL_reg_sv;
b8c5462f 1629 }
d6a28714
JH
1630
1631 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1632 && (mg = mg_find(PL_reg_sv, 'g')))) {
1633 /* prepare for quick setting of pos */
1634 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1635 mg = mg_find(PL_reg_sv, 'g');
1636 mg->mg_len = -1;
b8c5462f 1637 }
d6a28714
JH
1638 PL_reg_magic = mg;
1639 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1640 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1641 }
1642 if (!PL_reg_curpm)
1643 New(22,PL_reg_curpm, 1, PMOP);
1644 PL_reg_curpm->op_pmregexp = prog;
1645 PL_reg_oldcurpm = PL_curpm;
1646 PL_curpm = PL_reg_curpm;
1647 if (RX_MATCH_COPIED(prog)) {
1648 /* Here is a serious problem: we cannot rewrite subbeg,
1649 since it may be needed if this match fails. Thus
1650 $` inside (?{}) could fail... */
1651 PL_reg_oldsaved = prog->subbeg;
1652 PL_reg_oldsavedlen = prog->sublen;
1653 RX_MATCH_COPIED_off(prog);
1654 }
1655 else
1656 PL_reg_oldsaved = Nullch;
1657 prog->subbeg = PL_bostr;
1658 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1659 }
1660 prog->startp[0] = startpos - PL_bostr;
1661 PL_reginput = startpos;
1662 PL_regstartp = prog->startp;
1663 PL_regendp = prog->endp;
1664 PL_reglastparen = &prog->lastparen;
1665 prog->lastparen = 0;
1666 PL_regsize = 0;
1667 DEBUG_r(PL_reg_starttry = startpos);
1668 if (PL_reg_start_tmpl <= prog->nparens) {
1669 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1670 if(PL_reg_start_tmp)
1671 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1672 else
1673 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1674 }
1675
1676 /* XXXX What this code is doing here?!!! There should be no need
1677 to do this again and again, PL_reglastparen should take care of
1678 this! */
1679 sp = prog->startp;
1680 ep = prog->endp;
1681 if (prog->nparens) {
1682 for (i = prog->nparens; i >= 1; i--) {
1683 *++sp = -1;
1684 *++ep = -1;
1685 }
1686 }
1687 REGCP_SET;
1688 if (regmatch(prog->program + 1)) {
1689 prog->endp[0] = PL_reginput - PL_bostr;
1690 return 1;
1691 }
1692 REGCP_UNWIND;
1693 return 0;
1694}
1695
1696/*
1697 - regmatch - main matching routine
1698 *
1699 * Conceptually the strategy is simple: check to see whether the current
1700 * node matches, call self recursively to see whether the rest matches,
1701 * and then act accordingly. In practice we make some effort to avoid
1702 * recursion, in particular by going through "ordinary" nodes (that don't
1703 * need to know whether the rest of the match failed) by a loop instead of
1704 * by recursion.
1705 */
1706/* [lwall] I've hoisted the register declarations to the outer block in order to
1707 * maybe save a little bit of pushing and popping on the stack. It also takes
1708 * advantage of machines that use a register save mask on subroutine entry.
1709 */
1710STATIC I32 /* 0 failure, 1 success */
1711S_regmatch(pTHX_ regnode *prog)
1712{
1713 dTHR;
1714 register regnode *scan; /* Current node. */
1715 regnode *next; /* Next node. */
1716 regnode *inner; /* Next node in internal branch. */
1717 register I32 nextchr; /* renamed nextchr - nextchar colides with
1718 function of same name */
1719 register I32 n; /* no or next */
1720 register I32 ln; /* len or last */
1721 register char *s; /* operand or save */
1722 register char *locinput = PL_reginput;
1723 register I32 c1, c2, paren; /* case fold search, parenth */
1724 int minmod = 0, sw = 0, logical = 0;
1725#ifdef DEBUGGING
1726 PL_regindent++;
1727#endif
1728
1729 /* Note that nextchr is a byte even in UTF */
1730 nextchr = UCHARAT(locinput);
1731 scan = prog;
1732 while (scan != NULL) {
1733#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1734#ifdef DEBUGGING
1735# define sayYES goto yes
1736# define sayNO goto no
7821416a
IZ
1737# define sayYES_FINAL goto yes_final
1738# define sayYES_LOUD goto yes_loud
1739# define sayNO_FINAL goto no_final
1740# define sayNO_SILENT goto do_no
d6a28714
JH
1741# define saySAME(x) if (x) goto yes; else goto no
1742# define REPORT_CODE_OFF 24
1743#else
1744# define sayYES return 1
1745# define sayNO return 0
7821416a
IZ
1746# define sayYES_FINAL return 1
1747# define sayYES_LOUD return 1
1748# define sayNO_FINAL return 0
1749# define sayNO_SILENT return 0
d6a28714
JH
1750# define saySAME(x) return x
1751#endif
1752 DEBUG_r( {
1753 SV *prop = sv_newmortal();
1754 int docolor = *PL_colors[0];
1755 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1756 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1757 /* The part of the string before starttry has one color
1758 (pref0_len chars), between starttry and current
1759 position another one (pref_len - pref0_len chars),
1760 after the current position the third one.
1761 We assume that pref0_len <= pref_len, otherwise we
1762 decrease pref0_len. */
1763 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1764 ? (5 + taill) - l : locinput - PL_bostr);
1765 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1766
1767 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1768 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1769 ? (5 + taill) - pref_len : PL_regeol - locinput);
1770 if (pref0_len < 0)
1771 pref0_len = 0;
1772 if (pref0_len > pref_len)
1773 pref0_len = pref_len;
1774 regprop(prop, scan);
1775 PerlIO_printf(Perl_debug_log,
b900a521
JH
1776 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1777 (IV)(locinput - PL_bostr),
d6a28714
JH
1778 PL_colors[4], pref0_len,
1779 locinput - pref_len, PL_colors[5],
1780 PL_colors[2], pref_len - pref0_len,
1781 locinput - pref_len + pref0_len, PL_colors[3],
1782 (docolor ? "" : "> <"),
1783 PL_colors[0], l, locinput, PL_colors[1],
1784 15 - l - pref_len + 1,
1785 "",
b900a521 1786 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1787 SvPVX(prop));
1788 } );
1789
1790 next = scan + NEXT_OFF(scan);
1791 if (next == scan)
1792 next = NULL;
1793
1794 switch (OP(scan)) {
1795 case BOL:
1796 if (locinput == PL_bostr
1797 ? PL_regprev == '\n'
1798 : (PL_multiline &&
1799 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1800 {
1801 /* regtill = regbol; */
b8c5462f
JH
1802 break;
1803 }
d6a28714
JH
1804 sayNO;
1805 case MBOL:
1806 if (locinput == PL_bostr
1807 ? PL_regprev == '\n'
1808 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1809 {
b8c5462f
JH
1810 break;
1811 }
d6a28714
JH
1812 sayNO;
1813 case SBOL:
1814 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1815 break;
d6a28714
JH
1816 sayNO;
1817 case GPOS:
1818 if (locinput == PL_reg_ganch)
1819 break;
1820 sayNO;
1821 case EOL:
1822 if (PL_multiline)
1823 goto meol;
1824 else
1825 goto seol;
1826 case MEOL:
1827 meol:
1828 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1829 sayNO;
b8c5462f 1830 break;
d6a28714
JH
1831 case SEOL:
1832 seol:
1833 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1834 sayNO;
d6a28714 1835 if (PL_regeol - locinput > 1)
b8c5462f 1836 sayNO;
b8c5462f 1837 break;
d6a28714
JH
1838 case EOS:
1839 if (PL_regeol != locinput)
b8c5462f 1840 sayNO;
d6a28714
JH
1841 break;
1842 case SANYUTF8:
b8c5462f 1843 if (nextchr & 0x80) {
b8c5462f 1844 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1845 if (locinput > PL_regeol)
1846 sayNO;
b8c5462f
JH
1847 nextchr = UCHARAT(locinput);
1848 break;
1849 }
d6a28714 1850 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1851 sayNO;
b8c5462f 1852 nextchr = UCHARAT(++locinput);
a0d0e21e 1853 break;
d6a28714
JH
1854 case SANY:
1855 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1856 sayNO;
1857 nextchr = UCHARAT(++locinput);
b85d18e9 1858 break;
d6a28714 1859 case ANYUTF8:
a0ed51b3 1860 if (nextchr & 0x80) {
b8c5462f 1861 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1862 if (locinput > PL_regeol)
1863 sayNO;
a0ed51b3
LW
1864 nextchr = UCHARAT(locinput);
1865 break;
1866 }
d6a28714 1867 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1868 sayNO;
1869 nextchr = UCHARAT(++locinput);
1870 break;
d6a28714
JH
1871 case REG_ANY:
1872 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1873 sayNO;
76e3520e 1874 nextchr = UCHARAT(++locinput);
a0d0e21e 1875 break;
d6a28714 1876 case EXACT:
cd439c50
IZ
1877 s = STRING(scan);
1878 ln = STR_LEN(scan);
d6a28714
JH
1879 /* Inline the first character, for speed. */
1880 if (UCHARAT(s) != nextchr)
1881 sayNO;
1882 if (PL_regeol - locinput < ln)
1883 sayNO;
1884 if (ln > 1 && memNE(s, locinput, ln))
1885 sayNO;
1886 locinput += ln;
1887 nextchr = UCHARAT(locinput);
1888 break;
1889 case EXACTFL:
b8c5462f
JH
1890 PL_reg_flags |= RF_tainted;
1891 /* FALL THROUGH */
d6a28714 1892 case EXACTF:
cd439c50
IZ
1893 s = STRING(scan);
1894 ln = STR_LEN(scan);
d6a28714
JH
1895
1896 if (UTF) {
1897 char *l = locinput;
1898 char *e = s + ln;
1899 c1 = OP(scan) == EXACTF;
1900 while (s < e) {
1901 if (l >= PL_regeol)
1902 sayNO;
1903 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1904 toLOWER_utf8((U8*)l) :
1905 toLOWER_LC_utf8((U8*)l)))
1906 {
1907 sayNO;
1908 }
1909 s += UTF8SKIP(s);
1910 l += UTF8SKIP(l);
b8c5462f 1911 }
d6a28714 1912 locinput = l;
a0ed51b3
LW
1913 nextchr = UCHARAT(locinput);
1914 break;
1915 }
d6a28714
JH
1916
1917 /* Inline the first character, for speed. */
1918 if (UCHARAT(s) != nextchr &&
1919 UCHARAT(s) != ((OP(scan) == EXACTF)
1920 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1921 sayNO;
d6a28714 1922 if (PL_regeol - locinput < ln)
b8c5462f 1923 sayNO;
d6a28714
JH
1924 if (ln > 1 && (OP(scan) == EXACTF
1925 ? ibcmp(s, locinput, ln)
1926 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1927 sayNO;
d6a28714
JH
1928 locinput += ln;
1929 nextchr = UCHARAT(locinput);
a0d0e21e 1930 break;
d6a28714 1931 case ANYOFUTF8:
d6a28714 1932 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1933 sayNO;
d6a28714
JH
1934 if (locinput >= PL_regeol)
1935 sayNO;
1936 locinput += PL_utf8skip[nextchr];
1937 nextchr = UCHARAT(locinput);
1938 break;
1939 case ANYOF:
d6a28714 1940 if (nextchr < 0)
b8c5462f 1941 nextchr = UCHARAT(locinput);
936ed897 1942 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1943 sayNO;
1944 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1945 sayNO;
b8c5462f
JH
1946 nextchr = UCHARAT(++locinput);
1947 break;
d6a28714 1948 case ALNUML:
b8c5462f
JH
1949 PL_reg_flags |= RF_tainted;
1950 /* FALL THROUGH */
d6a28714 1951 case ALNUM:
b8c5462f 1952 if (!nextchr)
4633a7c4 1953 sayNO;
d6a28714
JH
1954 if (!(OP(scan) == ALNUM
1955 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1956 sayNO;
1957 nextchr = UCHARAT(++locinput);
bbce6d69 1958 break;
d6a28714 1959 case ALNUMLUTF8:
3280af22 1960 PL_reg_flags |= RF_tainted;
bbce6d69 1961 /* FALL THROUGH */
d6a28714 1962 case ALNUMUTF8:
b8c5462f
JH
1963 if (!nextchr)
1964 sayNO;
1965 if (nextchr & 0x80) {
d6a28714
JH
1966 if (!(OP(scan) == ALNUMUTF8
1967 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1968 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1969 {
1970 sayNO;
a0ed51b3 1971 }
b8c5462f 1972 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1973 nextchr = UCHARAT(locinput);
1974 break;
1975 }
d6a28714
JH
1976 if (!(OP(scan) == ALNUMUTF8
1977 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1978 sayNO;
b8c5462f 1979 nextchr = UCHARAT(++locinput);
a0d0e21e 1980 break;
d6a28714 1981 case NALNUML:
b8c5462f
JH
1982 PL_reg_flags |= RF_tainted;
1983 /* FALL THROUGH */
d6a28714
JH
1984 case NALNUM:
1985 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1986 sayNO;
d6a28714
JH
1987 if (OP(scan) == NALNUM
1988 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1989 sayNO;
b8c5462f 1990 nextchr = UCHARAT(++locinput);
a0ed51b3 1991 break;
d6a28714 1992 case NALNUMLUTF8:
b8c5462f
JH
1993 PL_reg_flags |= RF_tainted;
1994 /* FALL THROUGH */
d6a28714 1995 case NALNUMUTF8:
3280af22 1996 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1997 sayNO;
b8c5462f 1998 if (nextchr & 0x80) {
d6a28714
JH
1999 if (OP(scan) == NALNUMUTF8
2000 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2001 : isALNUM_LC_utf8((U8*)locinput))
2002 {
b8c5462f 2003 sayNO;
d6a28714 2004 }
b8c5462f
JH
2005 locinput += PL_utf8skip[nextchr];
2006 nextchr = UCHARAT(locinput);
2007 break;
2008 }
d6a28714
JH
2009 if (OP(scan) == NALNUMUTF8
2010 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2011 sayNO;
76e3520e 2012 nextchr = UCHARAT(++locinput);
a0d0e21e 2013 break;
d6a28714
JH
2014 case BOUNDL:
2015 case NBOUNDL:
3280af22 2016 PL_reg_flags |= RF_tainted;
bbce6d69 2017 /* FALL THROUGH */
d6a28714
JH
2018 case BOUND:
2019 case NBOUND:
2020 /* was last char in word? */
2021 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
2022 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2023 ln = isALNUM(ln);
2024 n = isALNUM(nextchr);
2025 }
2026 else {
2027 ln = isALNUM_LC(ln);
2028 n = isALNUM_LC(nextchr);
2029 }
2030 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 2031 sayNO;
a0d0e21e 2032 break;
d6a28714
JH
2033 case BOUNDLUTF8:
2034 case NBOUNDLUTF8:
a0ed51b3
LW
2035 PL_reg_flags |= RF_tainted;
2036 /* FALL THROUGH */
d6a28714
JH
2037 case BOUNDUTF8:
2038 case NBOUNDUTF8:
2039 /* was last char in word? */
2040 ln = (locinput != PL_regbol)
2041 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
2042 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
2043 ln = isALNUM_uni(ln);
2044 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 2045 }
d6a28714
JH
2046 else {
2047 ln = isALNUM_LC_uni(ln);
2048 n = isALNUM_LC_utf8((U8*)locinput);
2049 }
2050 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 2051 sayNO;
a0ed51b3 2052 break;
d6a28714 2053 case SPACEL:
3280af22 2054 PL_reg_flags |= RF_tainted;
bbce6d69 2055 /* FALL THROUGH */
d6a28714
JH
2056 case SPACE:
2057 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2058 sayNO;
d6a28714
JH
2059 if (!(OP(scan) == SPACE
2060 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 2061 sayNO;
76e3520e 2062 nextchr = UCHARAT(++locinput);
a0d0e21e 2063 break;
d6a28714 2064 case SPACELUTF8:
a0ed51b3
LW
2065 PL_reg_flags |= RF_tainted;
2066 /* FALL THROUGH */
d6a28714 2067 case SPACEUTF8:
a0ed51b3
LW
2068 if (!nextchr && locinput >= PL_regeol)
2069 sayNO;
2070 if (nextchr & 0x80) {
d6a28714
JH
2071 if (!(OP(scan) == SPACEUTF8
2072 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2073 : isSPACE_LC_utf8((U8*)locinput)))
2074 {
a0ed51b3 2075 sayNO;
d6a28714 2076 }
6f06b55f 2077 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2078 nextchr = UCHARAT(locinput);
2079 break;
2080 }
d6a28714
JH
2081 if (!(OP(scan) == SPACEUTF8
2082 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2083 sayNO;
2084 nextchr = UCHARAT(++locinput);
2085 break;
d6a28714 2086 case NSPACEL:
3280af22 2087 PL_reg_flags |= RF_tainted;
bbce6d69 2088 /* FALL THROUGH */
d6a28714 2089 case NSPACE:
b8c5462f
JH
2090 if (!nextchr)
2091 sayNO;
d6a28714
JH
2092 if (OP(scan) == SPACE
2093 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2094 sayNO;
b8c5462f 2095 nextchr = UCHARAT(++locinput);
a0d0e21e 2096 break;
d6a28714 2097 case NSPACELUTF8:
a0ed51b3
LW
2098 PL_reg_flags |= RF_tainted;
2099 /* FALL THROUGH */
d6a28714 2100 case NSPACEUTF8:
b8c5462f
JH
2101 if (!nextchr)
2102 sayNO;
2103 if (nextchr & 0x80) {
d6a28714
JH
2104 if (OP(scan) == NSPACEUTF8
2105 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2106 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2107 {
2108 sayNO;
2109 }
2110 locinput += PL_utf8skip[nextchr];
2111 nextchr = UCHARAT(locinput);
2112 break;
a0ed51b3 2113 }
d6a28714
JH
2114 if (OP(scan) == NSPACEUTF8
2115 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2116 sayNO;
76e3520e 2117 nextchr = UCHARAT(++locinput);
a0d0e21e 2118 break;
d6a28714 2119 case DIGITL:
a0ed51b3
LW
2120 PL_reg_flags |= RF_tainted;
2121 /* FALL THROUGH */
d6a28714 2122 case DIGIT:
a0ed51b3
LW
2123 if (!nextchr && locinput >= PL_regeol)
2124 sayNO;
d6a28714
JH
2125 if (!(OP(scan) == DIGIT
2126 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2127 sayNO;
76e3520e 2128 nextchr = UCHARAT(++locinput);
a0d0e21e 2129 break;
d6a28714 2130 case DIGITLUTF8:
a0ed51b3
LW
2131 PL_reg_flags |= RF_tainted;
2132 /* FALL THROUGH */
d6a28714 2133 case DIGITUTF8:
a0ed51b3
LW
2134 if (!nextchr)
2135 sayNO;
2136 if (nextchr & 0x80) {
d6a28714
JH
2137 if (OP(scan) == NDIGITUTF8
2138 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2139 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2140 {
a0ed51b3 2141 sayNO;
dfe13c55 2142 }
6f06b55f 2143 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2144 nextchr = UCHARAT(locinput);
2145 break;
2146 }
d6a28714 2147 if (!isDIGIT(nextchr))
a0ed51b3
LW
2148 sayNO;
2149 nextchr = UCHARAT(++locinput);
2150 break;
d6a28714 2151 case NDIGITL:
b8c5462f
JH
2152 PL_reg_flags |= RF_tainted;
2153 /* FALL THROUGH */
d6a28714 2154 case NDIGIT:
b8c5462f
JH
2155 if (!nextchr)
2156 sayNO;
d6a28714
JH
2157 if (OP(scan) == DIGIT
2158 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2159 sayNO;
76e3520e 2160 nextchr = UCHARAT(++locinput);
a0d0e21e 2161 break;
d6a28714 2162 case NDIGITLUTF8:
b8c5462f
JH
2163 PL_reg_flags |= RF_tainted;
2164 /* FALL THROUGH */
d6a28714 2165 case NDIGITUTF8:
b8c5462f
JH
2166 if (!nextchr && locinput >= PL_regeol)
2167 sayNO;
a0ed51b3 2168 if (nextchr & 0x80) {
d6a28714 2169 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2170 sayNO;
6f06b55f 2171 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2172 nextchr = UCHARAT(locinput);
2173 break;
2174 }
d6a28714 2175 if (isDIGIT(nextchr))
a0ed51b3
LW
2176 sayNO;
2177 nextchr = UCHARAT(++locinput);
2178 break;
2179 case CLUMP:
dfe13c55 2180 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2181 sayNO;
6f06b55f 2182 locinput += PL_utf8skip[nextchr];
dfe13c55 2183 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2184 locinput += UTF8SKIP(locinput);
2185 if (locinput > PL_regeol)
2186 sayNO;
2187 nextchr = UCHARAT(locinput);
2188 break;
c8756f30 2189 case REFFL:
3280af22 2190 PL_reg_flags |= RF_tainted;
c8756f30 2191 /* FALL THROUGH */
c277df42 2192 case REF:
c8756f30 2193 case REFF:
c277df42 2194 n = ARG(scan); /* which paren pair */
cf93c79d 2195 ln = PL_regstartp[n];
2c2d71f5 2196 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2197 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2198 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2199 if (ln == PL_regendp[n])
a0d0e21e 2200 break;
a0ed51b3 2201
cf93c79d 2202 s = PL_bostr + ln;
a0ed51b3
LW
2203 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2204 char *l = locinput;
cf93c79d 2205 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2206 /*
2207 * Note that we can't do the "other character" lookup trick as
2208 * in the 8-bit case (no pun intended) because in Unicode we
2209 * have to map both upper and title case to lower case.
2210 */
2211 if (OP(scan) == REFF) {
2212 while (s < e) {
2213 if (l >= PL_regeol)
2214 sayNO;
dfe13c55 2215 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2216 sayNO;
2217 s += UTF8SKIP(s);
2218 l += UTF8SKIP(l);
2219 }
2220 }
2221 else {
2222 while (s < e) {
2223 if (l >= PL_regeol)
2224 sayNO;
dfe13c55 2225 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2226 sayNO;
2227 s += UTF8SKIP(s);
2228 l += UTF8SKIP(l);
2229 }
2230 }
2231 locinput = l;
2232 nextchr = UCHARAT(locinput);
2233 break;
2234 }
2235
a0d0e21e 2236 /* Inline the first character, for speed. */
76e3520e 2237 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2238 (OP(scan) == REF ||
2239 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2240 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2241 sayNO;
cf93c79d 2242 ln = PL_regendp[n] - ln;
3280af22 2243 if (locinput + ln > PL_regeol)
4633a7c4 2244 sayNO;
c8756f30
AK
2245 if (ln > 1 && (OP(scan) == REF
2246 ? memNE(s, locinput, ln)
2247 : (OP(scan) == REFF
2248 ? ibcmp(s, locinput, ln)
2249 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2250 sayNO;
a0d0e21e 2251 locinput += ln;
76e3520e 2252 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2253 break;
2254
2255 case NOTHING:
c277df42 2256 case TAIL:
a0d0e21e
LW
2257 break;
2258 case BACK:
2259 break;
c277df42
IZ
2260 case EVAL:
2261 {
2262 dSP;
533c011a 2263 OP_4tree *oop = PL_op;
3280af22
NIS
2264 COP *ocurcop = PL_curcop;
2265 SV **ocurpad = PL_curpad;
c277df42
IZ
2266 SV *ret;
2267
2268 n = ARG(scan);
533c011a 2269 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2270 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2271 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2272 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2273
cea2e8a9 2274 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2275 SPAGAIN;
2276 ret = POPs;
2277 PUTBACK;
2278
0f5d15d6
IZ
2279 PL_op = oop;
2280 PL_curpad = ocurpad;
2281 PL_curcop = ocurcop;
c277df42 2282 if (logical) {
0f5d15d6
IZ
2283 if (logical == 2) { /* Postponed subexpression. */
2284 regexp *re;
22c35a8c 2285 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2286 re_cc_state state;
0f5d15d6
IZ
2287 CHECKPOINT cp, lastcp;
2288
2289 if(SvROK(ret) || SvRMAGICAL(ret)) {
2290 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2291
2292 if(SvMAGICAL(sv))
2293 mg = mg_find(sv, 'r');
2294 }
2295 if (mg) {
2296 re = (regexp *)mg->mg_obj;
df0003d4 2297 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2298 }
2299 else {
2300 STRLEN len;
2301 char *t = SvPV(ret, len);
2302 PMOP pm;
2303 char *oprecomp = PL_regprecomp;
2304 I32 osize = PL_regsize;
2305 I32 onpar = PL_regnpar;
2306
2307 pm.op_pmflags = 0;
cea2e8a9 2308 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2309 if (!(SvFLAGS(ret)
2310 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2311 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2312 PL_regprecomp = oprecomp;
2313 PL_regsize = osize;
2314 PL_regnpar = onpar;
2315 }
2316 DEBUG_r(
2317 PerlIO_printf(Perl_debug_log,
2318 "Entering embedded `%s%.60s%s%s'\n",
2319 PL_colors[0],
2320 re->precomp,
2321 PL_colors[1],
2322 (strlen(re->precomp) > 60 ? "..." : ""))
2323 );
2324 state.node = next;
2325 state.prev = PL_reg_call_cc;
2326 state.cc = PL_regcc;
2327 state.re = PL_reg_re;
2328
2ab05381 2329 PL_regcc = 0;
0f5d15d6
IZ
2330
2331 cp = regcppush(0); /* Save *all* the positions. */
2332 REGCP_SET;
2333 cache_re(re);
2334 state.ss = PL_savestack_ix;
2335 *PL_reglastparen = 0;
2336 PL_reg_call_cc = &state;
2337 PL_reginput = locinput;
2c2d71f5
JH
2338
2339 /* XXXX This is too dramatic a measure... */
2340 PL_reg_maxiter = 0;
2341
0f5d15d6 2342 if (regmatch(re->program + 1)) {
2c914db6
IZ
2343 /* Even though we succeeded, we need to restore
2344 global variables, since we may be wrapped inside
2345 SUSPEND, thus the match may be not finished yet. */
2346
2347 /* XXXX Do this only if SUSPENDed? */
2348 PL_reg_call_cc = state.prev;
2349 PL_regcc = state.cc;
2350 PL_reg_re = state.re;
2351 cache_re(PL_reg_re);
2352
2353 /* XXXX This is too dramatic a measure... */
2354 PL_reg_maxiter = 0;
2355
2356 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2357 ReREFCNT_dec(re);
2358 regcpblow(cp);
2359 sayYES;
2360 }
0f5d15d6
IZ
2361 ReREFCNT_dec(re);
2362 REGCP_UNWIND;
2363 regcppop();
2364 PL_reg_call_cc = state.prev;
2365 PL_regcc = state.cc;
2366 PL_reg_re = state.re;
d3790889 2367 cache_re(PL_reg_re);
2c2d71f5
JH
2368
2369 /* XXXX This is too dramatic a measure... */
2370 PL_reg_maxiter = 0;
2371
0f5d15d6
IZ
2372 sayNO;
2373 }
c277df42 2374 sw = SvTRUE(ret);
0f5d15d6 2375 logical = 0;
a0ed51b3
LW
2376 }
2377 else
3280af22 2378 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2379 break;
2380 }
a0d0e21e 2381 case OPEN:
c277df42 2382 n = ARG(scan); /* which paren pair */
3280af22
NIS
2383 PL_reg_start_tmp[n] = locinput;
2384 if (n > PL_regsize)
2385 PL_regsize = n;
a0d0e21e
LW
2386 break;
2387 case CLOSE:
c277df42 2388 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2389 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2390 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2391 if (n > *PL_reglastparen)
2392 *PL_reglastparen = n;
a0d0e21e 2393 break;
c277df42
IZ
2394 case GROUPP:
2395 n = ARG(scan); /* which paren pair */
cf93c79d 2396 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2397 break;
2398 case IFTHEN:
2c2d71f5 2399 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2400 if (sw)
2401 next = NEXTOPER(NEXTOPER(scan));
2402 else {
2403 next = scan + ARG(scan);
2404 if (OP(next) == IFTHEN) /* Fake one. */
2405 next = NEXTOPER(NEXTOPER(next));
2406 }
2407 break;
2408 case LOGICAL:
0f5d15d6 2409 logical = scan->flags;
c277df42 2410 break;
2ab05381
IZ
2411/*******************************************************************
2412 PL_regcc contains infoblock about the innermost (...)* loop, and
2413 a pointer to the next outer infoblock.
2414
2415 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2416
2417 1) After matching X, regnode for CURLYX is processed;
2418
2419 2) This regnode creates infoblock on the stack, and calls
2420 regmatch() recursively with the starting point at WHILEM node;
2421
2422 3) Each hit of WHILEM node tries to match A and Z (in the order
2423 depending on the current iteration, min/max of {min,max} and
2424 greediness). The information about where are nodes for "A"
2425 and "Z" is read from the infoblock, as is info on how many times "A"
2426 was already matched, and greediness.
2427
2428 4) After A matches, the same WHILEM node is hit again.
2429
2430 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2431 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2432 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2433 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2434 of the external loop.
2435
2436 Currently present infoblocks form a tree with a stem formed by PL_curcc
2437 and whatever it mentions via ->next, and additional attached trees
2438 corresponding to temporarily unset infoblocks as in "5" above.
2439
2440 In the following picture infoblocks for outer loop of
2441 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2442 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2443 infoblocks are drawn below the "reset" infoblock.
2444
2445 In fact in the picture below we do not show failed matches for Z and T
2446 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2447 more obvious *why* one needs to *temporary* unset infoblocks.]
2448
2449 Matched REx position InfoBlocks Comment
2450 (Y(A)*?Z)*?T x
2451 Y(A)*?Z)*?T x <- O
2452 Y (A)*?Z)*?T x <- O
2453 Y A)*?Z)*?T x <- O <- I
2454 YA )*?Z)*?T x <- O <- I
2455 YA A)*?Z)*?T x <- O <- I
2456 YAA )*?Z)*?T x <- O <- I
2457 YAA Z)*?T x <- O # Temporary unset I
2458 I
2459
2460 YAAZ Y(A)*?Z)*?T x <- O
2461 I
2462
2463 YAAZY (A)*?Z)*?T x <- O
2464 I
2465
2466 YAAZY A)*?Z)*?T x <- O <- I
2467 I
2468
2469 YAAZYA )*?Z)*?T x <- O <- I
2470 I
2471
2472 YAAZYA Z)*?T x <- O # Temporary unset I
2473 I,I
2474
2475 YAAZYAZ )*?T x <- O
2476 I,I
2477
2478 YAAZYAZ T x # Temporary unset O
2479 O
2480 I,I
2481
2482 YAAZYAZT x
2483 O
2484 I,I
2485 *******************************************************************/
a0d0e21e
LW
2486 case CURLYX: {
2487 CURCUR cc;
3280af22 2488 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2489
2490 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2491 next += ARG(next);
3280af22
NIS
2492 cc.oldcc = PL_regcc;
2493 PL_regcc = &cc;
2494 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2495 cc.cur = -1;
2496 cc.min = ARG1(scan);
2497 cc.max = ARG2(scan);
c277df42 2498 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2499 cc.next = next;
2500 cc.minmod = minmod;
2501 cc.lastloc = 0;
3280af22 2502 PL_reginput = locinput;
a0d0e21e
LW
2503 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2504 regcpblow(cp);
3280af22 2505 PL_regcc = cc.oldcc;
4633a7c4 2506 saySAME(n);
a0d0e21e
LW
2507 }
2508 /* NOT REACHED */
2509 case WHILEM: {
2510 /*
2511 * This is really hard to understand, because after we match
2512 * what we're trying to match, we must make sure the rest of
2c2d71f5 2513 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2514 * to go back UP the parse tree by recursing ever deeper. And
2515 * if it fails, we have to reset our parent's current state
2516 * that we can try again after backing off.
2517 */
2518
c277df42 2519 CHECKPOINT cp, lastcp;
3280af22 2520 CURCUR* cc = PL_regcc;
c277df42
IZ
2521 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2522
4633a7c4 2523 n = cc->cur + 1; /* how many we know we matched */
3280af22 2524 PL_reginput = locinput;
a0d0e21e 2525
c277df42
IZ
2526 DEBUG_r(
2527 PerlIO_printf(Perl_debug_log,
2528 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2529 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2530 (long)n, (long)cc->min,
2531 (long)cc->max, (long)cc)
2532 );
4633a7c4 2533
a0d0e21e
LW
2534 /* If degenerate scan matches "", assume scan done. */
2535
579cf2c3 2536 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2537 PL_regcc = cc->oldcc;
2ab05381
IZ
2538 if (PL_regcc)
2539 ln = PL_regcc->cur;
c277df42 2540 DEBUG_r(
c3464db5
DD
2541 PerlIO_printf(Perl_debug_log,
2542 "%*s empty match detected, try continuation...\n",
3280af22 2543 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2544 );
a0d0e21e 2545 if (regmatch(cc->next))
4633a7c4 2546 sayYES;
2ab05381
IZ
2547 if (PL_regcc)
2548 PL_regcc->cur = ln;
3280af22 2549 PL_regcc = cc;
4633a7c4 2550 sayNO;
a0d0e21e
LW
2551 }
2552
2553 /* First just match a string of min scans. */
2554
2555 if (n < cc->min) {
2556 cc->cur = n;
2557 cc->lastloc = locinput;
4633a7c4
LW
2558 if (regmatch(cc->scan))
2559 sayYES;
2560 cc->cur = n - 1;
c277df42 2561 cc->lastloc = lastloc;
4633a7c4 2562 sayNO;
a0d0e21e
LW
2563 }
2564
2c2d71f5
JH
2565 if (scan->flags) {
2566 /* Check whether we already were at this position.
2567 Postpone detection until we know the match is not
2568 *that* much linear. */
2569 if (!PL_reg_maxiter) {
2570 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2571 PL_reg_leftiter = PL_reg_maxiter;
2572 }
2573 if (PL_reg_leftiter-- == 0) {
2574 I32 size = (PL_reg_maxiter + 7)/8;
2575 if (PL_reg_poscache) {
2576 if (PL_reg_poscache_size < size) {
2577 Renew(PL_reg_poscache, size, char);
2578 PL_reg_poscache_size = size;
2579 }
2580 Zero(PL_reg_poscache, size, char);
2581 }
2582 else {
2583 PL_reg_poscache_size = size;
2584 Newz(29, PL_reg_poscache, size, char);
2585 }
2586 DEBUG_r(
2587 PerlIO_printf(Perl_debug_log,
2588 "%sDetected a super-linear match, switching on caching%s...\n",
2589 PL_colors[4], PL_colors[5])
2590 );
2591 }
2592 if (PL_reg_leftiter < 0) {
2593 I32 o = locinput - PL_bostr, b;
2594
2595 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2596 b = o % 8;
2597 o /= 8;
2598 if (PL_reg_poscache[o] & (1<<b)) {
2599 DEBUG_r(
2600 PerlIO_printf(Perl_debug_log,
2601 "%*s already tried at this position...\n",
2602 REPORT_CODE_OFF+PL_regindent*2, "")
2603 );
7821416a 2604 sayNO_SILENT;
2c2d71f5
JH
2605 }
2606 PL_reg_poscache[o] |= (1<<b);
2607 }
2608 }
2609
a0d0e21e
LW
2610 /* Prefer next over scan for minimal matching. */
2611
2612 if (cc->minmod) {
3280af22 2613 PL_regcc = cc->oldcc;
2ab05381
IZ
2614 if (PL_regcc)
2615 ln = PL_regcc->cur;
5f05dabc 2616 cp = regcppush(cc->parenfloor);
c277df42 2617 REGCP_SET;
5f05dabc 2618 if (regmatch(cc->next)) {
c277df42 2619 regcpblow(cp);
4633a7c4 2620 sayYES; /* All done. */
5f05dabc 2621 }
c277df42 2622 REGCP_UNWIND;
5f05dabc 2623 regcppop();
2ab05381
IZ
2624 if (PL_regcc)
2625 PL_regcc->cur = ln;
3280af22 2626 PL_regcc = cc;
a0d0e21e 2627
c277df42 2628 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2629 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2630 && !(PL_reg_flags & RF_warned)) {
2631 PL_reg_flags |= RF_warned;
cea2e8a9 2632 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2633 "Complex regular subexpression recursion",
2634 REG_INFTY - 1);
c277df42 2635 }
4633a7c4 2636 sayNO;
c277df42 2637 }
a687059c 2638
c277df42 2639 DEBUG_r(
c3464db5
DD
2640 PerlIO_printf(Perl_debug_log,
2641 "%*s trying longer...\n",
3280af22 2642 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2643 );
a0d0e21e 2644 /* Try scanning more and see if it helps. */
3280af22 2645 PL_reginput = locinput;
a0d0e21e
LW
2646 cc->cur = n;
2647 cc->lastloc = locinput;
5f05dabc 2648 cp = regcppush(cc->parenfloor);
c277df42 2649 REGCP_SET;
5f05dabc 2650 if (regmatch(cc->scan)) {
c277df42 2651 regcpblow(cp);
4633a7c4 2652 sayYES;
5f05dabc 2653 }
c277df42 2654 REGCP_UNWIND;
5f05dabc 2655 regcppop();
4633a7c4 2656 cc->cur = n - 1;
c277df42 2657 cc->lastloc = lastloc;
4633a7c4 2658 sayNO;
a0d0e21e
LW
2659 }
2660
2661 /* Prefer scan over next for maximal matching. */
2662
2663 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2664 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2665 cc->cur = n;
2666 cc->lastloc = locinput;
c277df42 2667 REGCP_SET;
5f05dabc 2668 if (regmatch(cc->scan)) {
c277df42 2669 regcpblow(cp);
4633a7c4 2670 sayYES;
5f05dabc 2671 }
c277df42 2672 REGCP_UNWIND;
a0d0e21e 2673 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2674 PL_reginput = locinput;
c277df42 2675 DEBUG_r(
c3464db5
DD
2676 PerlIO_printf(Perl_debug_log,
2677 "%*s failed, try continuation...\n",
3280af22 2678 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2679 );
2680 }
599cee73
PM
2681 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2682 && !(PL_reg_flags & RF_warned)) {
3280af22 2683 PL_reg_flags |= RF_warned;
cea2e8a9 2684 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2685 "Complex regular subexpression recursion",
2686 REG_INFTY - 1);
a0d0e21e
LW
2687 }
2688
2689 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2690 PL_regcc = cc->oldcc;
2ab05381
IZ
2691 if (PL_regcc)
2692 ln = PL_regcc->cur;
a0d0e21e 2693 if (regmatch(cc->next))
4633a7c4 2694 sayYES;
2ab05381
IZ
2695 if (PL_regcc)
2696 PL_regcc->cur = ln;
3280af22 2697 PL_regcc = cc;
4633a7c4 2698 cc->cur = n - 1;
c277df42 2699 cc->lastloc = lastloc;
4633a7c4 2700 sayNO;
a0d0e21e
LW
2701 }
2702 /* NOT REACHED */
c277df42
IZ
2703 case BRANCHJ:
2704 next = scan + ARG(scan);
2705 if (next == scan)
2706 next = NULL;
2707 inner = NEXTOPER(NEXTOPER(scan));
2708 goto do_branch;
2709 case BRANCH:
2710 inner = NEXTOPER(scan);
2711 do_branch:
2712 {
2713 CHECKPOINT lastcp;
2714 c1 = OP(scan);
2715 if (OP(next) != c1) /* No choice. */
2716 next = inner; /* Avoid recursion. */
a0d0e21e 2717 else {
3280af22 2718 int lastparen = *PL_reglastparen;
c277df42
IZ
2719
2720 REGCP_SET;
a0d0e21e 2721 do {
3280af22 2722 PL_reginput = locinput;
c277df42 2723 if (regmatch(inner))
4633a7c4 2724 sayYES;
c277df42 2725 REGCP_UNWIND;
3280af22 2726 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2727 PL_regendp[n] = -1;
3280af22 2728 *PL_reglastparen = n;
c277df42 2729 scan = next;
a0d0e21e 2730 /*SUPPRESS 560*/
c277df42
IZ
2731 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2732 next += n;
a0d0e21e 2733 else
c277df42 2734 next = NULL;
c277df42
IZ
2735 inner = NEXTOPER(scan);
2736 if (c1 == BRANCHJ) {
2737 inner = NEXTOPER(inner);
2738 }
2739 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2740 sayNO;
a0d0e21e 2741 /* NOTREACHED */
a687059c 2742 }
a0d0e21e
LW
2743 }
2744 break;
2745 case MINMOD:
2746 minmod = 1;
2747 break;
c277df42
IZ
2748 case CURLYM:
2749 {
00db4c45 2750 I32 l = 0;
c277df42
IZ
2751 CHECKPOINT lastcp;
2752
2753 /* We suppose that the next guy does not need
2754 backtracking: in particular, it is of constant length,
2755 and has no parenths to influence future backrefs. */
2756 ln = ARG1(scan); /* min to match */
2757 n = ARG2(scan); /* max to match */
c277df42
IZ
2758 paren = scan->flags;
2759 if (paren) {
3280af22
NIS
2760 if (paren > PL_regsize)
2761 PL_regsize = paren;
2762 if (paren > *PL_reglastparen)
2763 *PL_reglastparen = paren;
c277df42 2764 }
dc45a647 2765 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2766 if (paren)
2767 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2768 PL_reginput = locinput;
c277df42
IZ
2769 if (minmod) {
2770 minmod = 0;
2771 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2772 sayNO;
5f4b28b2 2773 if (ln && l == 0 && n >= ln
c277df42
IZ
2774 /* In fact, this is tricky. If paren, then the
2775 fact that we did/didnot match may influence
2776 future execution. */
2777 && !(paren && ln == 0))
2778 ln = n;
3280af22 2779 locinput = PL_reginput;
22c35a8c 2780 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2781 c1 = (U8)*STRING(next);
c277df42 2782 if (OP(next) == EXACTF)
22c35a8c 2783 c2 = PL_fold[c1];
c277df42 2784 else if (OP(next) == EXACTFL)
22c35a8c 2785 c2 = PL_fold_locale[c1];
c277df42
IZ
2786 else
2787 c2 = c1;
a0ed51b3
LW
2788 }
2789 else
c277df42
IZ
2790 c1 = c2 = -1000;
2791 REGCP_SET;
5f4b28b2 2792 /* This may be improved if l == 0. */
c277df42
IZ
2793 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2794 /* If it could work, try it. */
2795 if (c1 == -1000 ||
3280af22
NIS
2796 UCHARAT(PL_reginput) == c1 ||
2797 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2798 {
2799 if (paren) {
2800 if (n) {
cf93c79d
IZ
2801 PL_regstartp[paren] =
2802 HOPc(PL_reginput, -l) - PL_bostr;
2803 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2804 }
2805 else
cf93c79d 2806 PL_regendp[paren] = -1;
c277df42
IZ
2807 }
2808 if (regmatch(next))
2809 sayYES;
2810 REGCP_UNWIND;
2811 }
2812 /* Couldn't or didn't -- move forward. */
3280af22 2813 PL_reginput = locinput;
c277df42
IZ
2814 if (regrepeat_hard(scan, 1, &l)) {
2815 ln++;
3280af22 2816 locinput = PL_reginput;
c277df42
IZ
2817 }
2818 else
2819 sayNO;
2820 }
a0ed51b3
LW
2821 }
2822 else {
c277df42
IZ
2823 n = regrepeat_hard(scan, n, &l);
2824 if (n != 0 && l == 0
2825 /* In fact, this is tricky. If paren, then the
2826 fact that we did/didnot match may influence
2827 future execution. */
2828 && !(paren && ln == 0))
2829 ln = n;
3280af22 2830 locinput = PL_reginput;
c277df42 2831 DEBUG_r(
5c0ca799 2832 PerlIO_printf(Perl_debug_log,
faccc32b 2833 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2834 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2835 (IV) n, (IV)l)
c277df42
IZ
2836 );
2837 if (n >= ln) {
22c35a8c 2838 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2839 c1 = (U8)*STRING(next);
c277df42 2840 if (OP(next) == EXACTF)
22c35a8c 2841 c2 = PL_fold[c1];
c277df42 2842 else if (OP(next) == EXACTFL)
22c35a8c 2843 c2 = PL_fold_locale[c1];
c277df42
IZ
2844 else
2845 c2 = c1;
a0ed51b3
LW
2846 }
2847 else
c277df42
IZ
2848 c1 = c2 = -1000;
2849 }
2850 REGCP_SET;
2851 while (n >= ln) {
2852 /* If it could work, try it. */
2853 if (c1 == -1000 ||
3280af22
NIS
2854 UCHARAT(PL_reginput) == c1 ||
2855 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2856 {
2857 DEBUG_r(
c3464db5 2858 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2859 "%*s trying tail with n=%"IVdf"...\n",
2860 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2861 );
2862 if (paren) {
2863 if (n) {
cf93c79d
IZ
2864 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2865 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2866 }
a0ed51b3 2867 else
cf93c79d 2868 PL_regendp[paren] = -1;
c277df42 2869 }
a0ed51b3
LW
2870 if (regmatch(next))
2871 sayYES;
2872 REGCP_UNWIND;
2873 }
c277df42
IZ
2874 /* Couldn't or didn't -- back up. */
2875 n--;
dfe13c55 2876 locinput = HOPc(locinput, -l);
3280af22 2877 PL_reginput = locinput;
c277df42
IZ
2878 }
2879 }
2880 sayNO;
2881 break;
2882 }
2883 case CURLYN:
2884 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2885 if (paren > PL_regsize)
2886 PL_regsize = paren;
2887 if (paren > *PL_reglastparen)
2888 *PL_reglastparen = paren;
c277df42
IZ
2889 ln = ARG1(scan); /* min to match */
2890 n = ARG2(scan); /* max to match */
dc45a647 2891 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2892 goto repeat;
a0d0e21e 2893 case CURLY:
c277df42 2894 paren = 0;
a0d0e21e
LW
2895 ln = ARG1(scan); /* min to match */
2896 n = ARG2(scan); /* max to match */
dc45a647 2897 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2898 goto repeat;
2899 case STAR:
2900 ln = 0;
c277df42 2901 n = REG_INFTY;
a0d0e21e 2902 scan = NEXTOPER(scan);
c277df42 2903 paren = 0;
a0d0e21e
LW
2904 goto repeat;
2905 case PLUS:
c277df42
IZ
2906 ln = 1;
2907 n = REG_INFTY;
2908 scan = NEXTOPER(scan);
2909 paren = 0;
2910 repeat:
a0d0e21e
LW
2911 /*
2912 * Lookahead to avoid useless match attempts
2913 * when we know what character comes next.
2914 */
22c35a8c 2915 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2916 c1 = (U8)*STRING(next);
bbce6d69 2917 if (OP(next) == EXACTF)
22c35a8c 2918 c2 = PL_fold[c1];
bbce6d69 2919 else if (OP(next) == EXACTFL)
22c35a8c 2920 c2 = PL_fold_locale[c1];
bbce6d69 2921 else
2922 c2 = c1;
2923 }
a0d0e21e 2924 else
bbce6d69 2925 c1 = c2 = -1000;
3280af22 2926 PL_reginput = locinput;
a0d0e21e 2927 if (minmod) {
c277df42 2928 CHECKPOINT lastcp;
a0d0e21e
LW
2929 minmod = 0;
2930 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2931 sayNO;
a0ed51b3 2932 locinput = PL_reginput;
c277df42 2933 REGCP_SET;
0fe9bf95
IZ
2934 if (c1 != -1000) {
2935 char *e = locinput + n - ln; /* Should not check after this */
2936 char *old = locinput;
2937
2938 if (e >= PL_regeol || (n == REG_INFTY))
2939 e = PL_regeol - 1;
2940 while (1) {
2941 /* Find place 'next' could work */
2942 if (c1 == c2) {
2943 while (locinput <= e && *locinput != c1)
2944 locinput++;
2945 } else {
2946 while (locinput <= e
2947 && *locinput != c1
2948 && *locinput != c2)
2949 locinput++;
2950 }
2951 if (locinput > e)
2952 sayNO;
2953 /* PL_reginput == old now */
2954 if (locinput != old) {
2955 ln = 1; /* Did some */
2956 if (regrepeat(scan, locinput - old) <
2957 locinput - old)
2958 sayNO;
2959 }
2960 /* PL_reginput == locinput now */
2961 if (paren) {
2962 if (ln) {
cf93c79d
IZ
2963 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2964 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2965 }
2966 else
cf93c79d 2967 PL_regendp[paren] = -1;
0fe9bf95
IZ
2968 }
2969 if (regmatch(next))
2970 sayYES;
2971 PL_reginput = locinput; /* Could be reset... */
2972 REGCP_UNWIND;
2973 /* Couldn't or didn't -- move forward. */
2974 old = locinput++;
2975 }
2976 }
2977 else
c277df42 2978 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2979 /* If it could work, try it. */
bbce6d69 2980 if (c1 == -1000 ||
3280af22
NIS
2981 UCHARAT(PL_reginput) == c1 ||
2982 UCHARAT(PL_reginput) == c2)
bbce6d69 2983 {
c277df42
IZ
2984 if (paren) {
2985 if (n) {
cf93c79d
IZ
2986 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2987 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2988 }
2989 else
cf93c79d 2990 PL_regendp[paren] = -1;
c277df42 2991 }
a0d0e21e 2992 if (regmatch(next))
4633a7c4 2993 sayYES;
c277df42 2994 REGCP_UNWIND;
bbce6d69 2995 }
c277df42 2996 /* Couldn't or didn't -- move forward. */
a0ed51b3 2997 PL_reginput = locinput;
a0d0e21e
LW
2998 if (regrepeat(scan, 1)) {
2999 ln++;
a0ed51b3
LW
3000 locinput = PL_reginput;
3001 }
3002 else
4633a7c4 3003 sayNO;
a0d0e21e
LW
3004 }
3005 }
3006 else {
c277df42 3007 CHECKPOINT lastcp;
a0d0e21e 3008 n = regrepeat(scan, n);
a0ed51b3 3009 locinput = PL_reginput;
22c35a8c 3010 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 3011 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 3012 ln = n; /* why back off? */
c277df42
IZ
3013 REGCP_SET;
3014 if (paren) {
3015 while (n >= ln) {
3016 /* If it could work, try it. */
3017 if (c1 == -1000 ||
3280af22
NIS
3018 UCHARAT(PL_reginput) == c1 ||
3019 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3020 {
3021 if (paren && n) {
3022 if (n) {
cf93c79d
IZ
3023 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
3024 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3025 }
3026 else
cf93c79d 3027 PL_regendp[paren] = -1;
c277df42
IZ
3028 }
3029 if (regmatch(next))
3030 sayYES;
3031 REGCP_UNWIND;
3032 }
3033 /* Couldn't or didn't -- back up. */
3034 n--;
dfe13c55 3035 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3036 }
a0ed51b3
LW
3037 }
3038 else {
c277df42
IZ
3039 while (n >= ln) {
3040 /* If it could work, try it. */
3041 if (c1 == -1000 ||
3280af22
NIS
3042 UCHARAT(PL_reginput) == c1 ||
3043 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3044 {
3045 if (regmatch(next))
3046 sayYES;
3047 REGCP_UNWIND;
3048 }
3049 /* Couldn't or didn't -- back up. */
3050 n--;
dfe13c55 3051 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3052 }
a0d0e21e
LW
3053 }
3054 }
4633a7c4 3055 sayNO;
c277df42 3056 break;
a0d0e21e 3057 case END:
0f5d15d6
IZ
3058 if (PL_reg_call_cc) {
3059 re_cc_state *cur_call_cc = PL_reg_call_cc;
3060 CURCUR *cctmp = PL_regcc;
3061 regexp *re = PL_reg_re;
3062 CHECKPOINT cp, lastcp;
3063
3064 cp = regcppush(0); /* Save *all* the positions. */
3065 REGCP_SET;
3066 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3067 the caller. */
3068 PL_reginput = locinput; /* Make position available to
3069 the callcc. */
3070 cache_re(PL_reg_call_cc->re);
3071 PL_regcc = PL_reg_call_cc->cc;
3072 PL_reg_call_cc = PL_reg_call_cc->prev;
3073 if (regmatch(cur_call_cc->node)) {
3074 PL_reg_call_cc = cur_call_cc;
3075 regcpblow(cp);
3076 sayYES;
3077 }
3078 REGCP_UNWIND;
3079 regcppop();
3080 PL_reg_call_cc = cur_call_cc;
3081 PL_regcc = cctmp;
3082 PL_reg_re = re;
3083 cache_re(re);
3084
3085 DEBUG_r(
3086 PerlIO_printf(Perl_debug_log,
3087 "%*s continuation failed...\n",
3088 REPORT_CODE_OFF+PL_regindent*2, "")
3089 );
7821416a 3090 sayNO_SILENT;
0f5d15d6 3091 }
7821416a
IZ
3092 if (locinput < PL_regtill) {
3093 DEBUG_r(PerlIO_printf(Perl_debug_log,
3094 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3095 PL_colors[4],
3096 (long)(locinput - PL_reg_starttry),
3097 (long)(PL_regtill - PL_reg_starttry),
3098 PL_colors[5]));
3099 sayNO_FINAL; /* Cannot match: too short. */
3100 }
3101 PL_reginput = locinput; /* put where regtry can find it */
3102 sayYES_FINAL; /* Success! */
7e5428c5 3103 case SUCCEED:
3280af22 3104 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3105 sayYES_LOUD; /* Success! */
c277df42
IZ
3106 case SUSPEND:
3107 n = 1;
9fe1d20c 3108 PL_reginput = locinput;
c277df42 3109 goto do_ifmatch;
a0d0e21e 3110 case UNLESSM:
c277df42 3111 n = 0;
a0ed51b3 3112 if (scan->flags) {
0fe9bf95
IZ
3113 if (UTF) { /* XXXX This is absolutely
3114 broken, we read before
3115 start of string. */
3116 s = HOPMAYBEc(locinput, -scan->flags);
3117 if (!s)
3118 goto say_yes;
3119 PL_reginput = s;
3120 }
3121 else {
3122 if (locinput < PL_bostr + scan->flags)
3123 goto say_yes;
3124 PL_reginput = locinput - scan->flags;
3125 goto do_ifmatch;
3126 }
a0ed51b3
LW
3127 }
3128 else
3129 PL_reginput = locinput;
c277df42
IZ
3130 goto do_ifmatch;
3131 case IFMATCH:
3132 n = 1;
a0ed51b3 3133 if (scan->flags) {
0fe9bf95
IZ
3134 if (UTF) { /* XXXX This is absolutely
3135 broken, we read before
3136 start of string. */
3137 s = HOPMAYBEc(locinput, -scan->flags);
3138 if (!s || s < PL_bostr)
3139 goto say_no;
3140 PL_reginput = s;
3141 }
3142 else {
3143 if (locinput < PL_bostr + scan->flags)
3144 goto say_no;
3145 PL_reginput = locinput - scan->flags;
3146 goto do_ifmatch;
3147 }
a0ed51b3
LW
3148 }
3149 else
3150 PL_reginput = locinput;
3151
c277df42 3152 do_ifmatch:
c277df42
IZ
3153 inner = NEXTOPER(NEXTOPER(scan));
3154 if (regmatch(inner) != n) {
3155 say_no:
3156 if (logical) {
3157 logical = 0;
3158 sw = 0;
3159 goto do_longjump;
a0ed51b3
LW
3160 }
3161 else
c277df42
IZ
3162 sayNO;
3163 }
3164 say_yes:
3165 if (logical) {
3166 logical = 0;
3167 sw = 1;
3168 }
fe44a5e8 3169 if (OP(scan) == SUSPEND) {
3280af22 3170 locinput = PL_reginput;
565764a8 3171 nextchr = UCHARAT(locinput);
fe44a5e8 3172 }
c277df42
IZ
3173 /* FALL THROUGH. */
3174 case LONGJMP:
3175 do_longjump:
3176 next = scan + ARG(scan);
3177 if (next == scan)
3178 next = NULL;
a0d0e21e
LW
3179 break;
3180 default:
b900a521 3181 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3182 PTR2UV(scan), OP(scan));
cea2e8a9 3183 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3184 }
a0d0e21e
LW
3185 scan = next;
3186 }
a687059c 3187
a0d0e21e
LW
3188 /*
3189 * We get here only if there's trouble -- normally "case END" is
3190 * the terminating point.
3191 */
cea2e8a9 3192 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3193 /*NOTREACHED*/
4633a7c4
LW
3194 sayNO;
3195
7821416a
IZ
3196yes_loud:
3197 DEBUG_r(
3198 PerlIO_printf(Perl_debug_log,
3199 "%*s %scould match...%s\n",
3200 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3201 );
3202 goto yes;
3203yes_final:
3204 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3205 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3206yes:
3207#ifdef DEBUGGING
3280af22 3208 PL_regindent--;
4633a7c4
LW
3209#endif
3210 return 1;
3211
3212no:
7821416a
IZ
3213 DEBUG_r(
3214 PerlIO_printf(Perl_debug_log,
3215 "%*s %sfailed...%s\n",
3216 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3217 );
3218 goto do_no;
3219no_final:
3220do_no:
4633a7c4 3221#ifdef DEBUGGING
3280af22 3222 PL_regindent--;
4633a7c4 3223#endif
a0d0e21e 3224 return 0;
a687059c
LW
3225}
3226
3227/*
3228 - regrepeat - repeatedly match something simple, report how many
3229 */
3230/*
3231 * [This routine now assumes that it will only match on things of length 1.
3232 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3233 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3234 */
76e3520e 3235STATIC I32
cea2e8a9 3236S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3237{
5c0ca799 3238 dTHR;
a0d0e21e 3239 register char *scan;
a0d0e21e 3240 register I32 c;
3280af22 3241 register char *loceol = PL_regeol;
a0ed51b3 3242 register I32 hardcount = 0;
a0d0e21e 3243
3280af22 3244 scan = PL_reginput;
c277df42 3245 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3246 loceol = scan + max;
a0d0e21e 3247 switch (OP(p)) {
22c35a8c 3248 case REG_ANY:
a0d0e21e
LW
3249 while (scan < loceol && *scan != '\n')
3250 scan++;
3251 break;
3252 case SANY:
3253 scan = loceol;
3254 break;
a0ed51b3
LW
3255 case ANYUTF8:
3256 loceol = PL_regeol;
3257 while (scan < loceol && *scan != '\n') {
3258 scan += UTF8SKIP(scan);
3259 hardcount++;
3260 }
3261 break;
3262 case SANYUTF8:
3263 loceol = PL_regeol;
3264 while (scan < loceol) {
3265 scan += UTF8SKIP(scan);
3266 hardcount++;
3267 }
3268 break;
bbce6d69 3269 case EXACT: /* length of string is 1 */
cd439c50 3270 c = (U8)*STRING(p);
bbce6d69 3271 while (scan < loceol && UCHARAT(scan) == c)
3272 scan++;
3273 break;
3274 case EXACTF: /* length of string is 1 */
cd439c50 3275 c = (U8)*STRING(p);
bbce6d69 3276 while (scan < loceol &&
22c35a8c 3277 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3278 scan++;
3279 break;
3280 case EXACTFL: /* length of string is 1 */
3280af22 3281 PL_reg_flags |= RF_tainted;
cd439c50 3282 c = (U8)*STRING(p);
bbce6d69 3283 while (scan < loceol &&
22c35a8c 3284 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3285 scan++;
3286 break;
a0ed51b3
LW
3287 case ANYOFUTF8:
3288 loceol = PL_regeol;
3289 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3290 scan += UTF8SKIP(scan);
3291 hardcount++;
3292 }
3293 break;
a0d0e21e 3294 case ANYOF:
936ed897 3295 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3296 scan++;
a0d0e21e
LW
3297 break;
3298 case ALNUM:
3299 while (scan < loceol && isALNUM(*scan))
3300 scan++;
3301 break;
a0ed51b3
LW
3302 case ALNUMUTF8:
3303 loceol = PL_regeol;
dfe13c55 3304 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3305 scan += UTF8SKIP(scan);
3306 hardcount++;
3307 }
3308 break;
bbce6d69 3309 case ALNUML:
3280af22 3310 PL_reg_flags |= RF_tainted;
bbce6d69 3311 while (scan < loceol && isALNUM_LC(*scan))
3312 scan++;
3313 break;
a0ed51b3
LW
3314 case ALNUMLUTF8:
3315 PL_reg_flags |= RF_tainted;
3316 loceol = PL_regeol;
dfe13c55 3317 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3318 scan += UTF8SKIP(scan);
3319 hardcount++;
3320 }
3321 break;
3322 break;
a0d0e21e
LW
3323 case NALNUM:
3324 while (scan < loceol && !isALNUM(*scan))
3325 scan++;
3326 break;
a0ed51b3
LW
3327 case NALNUMUTF8:
3328 loceol = PL_regeol;
dfe13c55 3329 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3330 scan += UTF8SKIP(scan);
3331 hardcount++;
3332 }
3333 break;
bbce6d69 3334 case NALNUML:
3280af22 3335 PL_reg_flags |= RF_tainted;
bbce6d69 3336 while (scan < loceol && !isALNUM_LC(*scan))
3337 scan++;
3338 break;
a0ed51b3
LW
3339 case NALNUMLUTF8:
3340 PL_reg_flags |= RF_tainted;
3341 loceol = PL_regeol;
dfe13c55 3342 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3343 scan += UTF8SKIP(scan);
3344 hardcount++;
3345 }
3346 break;
a0d0e21e
LW
3347 case SPACE:
3348 while (scan < loceol && isSPACE(*scan))
3349 scan++;
3350 break;
a0ed51b3
LW
3351 case SPACEUTF8:
3352 loceol = PL_regeol;
dfe13c55 3353 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3354 scan += UTF8SKIP(scan);
3355 hardcount++;
3356 }
3357 break;
bbce6d69 3358 case SPACEL:
3280af22 3359 PL_reg_flags |= RF_tainted;
bbce6d69 3360 while (scan < loceol && isSPACE_LC(*scan))
3361 scan++;
3362 break;
a0ed51b3
LW
3363 case SPACELUTF8:
3364 PL_reg_flags |= RF_tainted;
3365 loceol = PL_regeol;
dfe13c55 3366 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3367 scan += UTF8SKIP(scan);
3368 hardcount++;
3369 }
3370 break;
a0d0e21e
LW
3371 case NSPACE:
3372 while (scan < loceol && !isSPACE(*scan))
3373 scan++;
3374 break;
a0ed51b3
LW
3375 case NSPACEUTF8:
3376 loceol = PL_regeol;
dfe13c55 3377 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3378 scan += UTF8SKIP(scan);
3379 hardcount++;
3380 }
3381 break;
bbce6d69 3382 case NSPACEL:
3280af22 3383 PL_reg_flags |= RF_tainted;
bbce6d69 3384 while (scan < loceol && !isSPACE_LC(*scan))
3385 scan++;
3386 break;
a0ed51b3
LW
3387 case NSPACELUTF8:
3388 PL_reg_flags |= RF_tainted;
3389 loceol = PL_regeol;
dfe13c55 3390 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3391 scan += UTF8SKIP(scan);
3392 hardcount++;
3393 }
3394 break;
a0d0e21e
LW
3395 case DIGIT:
3396 while (scan < loceol && isDIGIT(*scan))
3397 scan++;
3398 break;
a0ed51b3
LW
3399 case DIGITUTF8:
3400 loceol = PL_regeol;
dfe13c55 3401 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3402 scan += UTF8SKIP(scan);
3403 hardcount++;
3404 }
3405 break;
3406 break;
a0d0e21e
LW
3407 case NDIGIT:
3408 while (scan < loceol && !isDIGIT(*scan))
3409 scan++;
3410 break;
a0ed51b3
LW
3411 case NDIGITUTF8:
3412 loceol = PL_regeol;
dfe13c55 3413 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3414 scan += UTF8SKIP(scan);
3415 hardcount++;
3416 }
3417 break;
a0d0e21e
LW
3418 default: /* Called on something of 0 width. */
3419 break; /* So match right here or not at all. */
3420 }
a687059c 3421
a0ed51b3
LW
3422 if (hardcount)
3423 c = hardcount;
3424 else
3425 c = scan - PL_reginput;
3280af22 3426 PL_reginput = scan;
a687059c 3427
c277df42
IZ
3428 DEBUG_r(
3429 {
3430 SV *prop = sv_newmortal();
3431
3432 regprop(prop, p);
3433 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3434 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3435 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3436 });
3437
a0d0e21e 3438 return(c);
a687059c
LW
3439}
3440
3441/*
c277df42
IZ
3442 - regrepeat_hard - repeatedly match something, report total lenth and length
3443 *
3444 * The repeater is supposed to have constant length.
3445 */
3446
76e3520e 3447STATIC I32
cea2e8a9 3448S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3449{
5c0ca799 3450 dTHR;
c277df42
IZ
3451 register char *scan;
3452 register char *start;
3280af22 3453 register char *loceol = PL_regeol;
a0ed51b3 3454 I32 l = 0;
708e3b05 3455 I32 count = 0, res = 1;
a0ed51b3
LW
3456
3457 if (!max)
3458 return 0;
c277df42 3459
3280af22 3460 start = PL_reginput;
a0ed51b3 3461 if (UTF) {
708e3b05 3462 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3463 if (!count++) {
3464 l = 0;
3465 while (start < PL_reginput) {
3466 l++;
3467 start += UTF8SKIP(start);
3468 }
3469 *lp = l;
3470 if (l == 0)
3471 return max;
3472 }
3473 if (count == max)
3474 return count;
3475 }
3476 }
3477 else {
708e3b05 3478 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3479 if (!count++) {
3480 *lp = l = PL_reginput - start;
3481 if (max != REG_INFTY && l*max < loceol - scan)
3482 loceol = scan + l*max;
3483 if (l == 0)
3484 return max;
c277df42
IZ
3485 }
3486 }
3487 }
708e3b05 3488 if (!res)
3280af22 3489 PL_reginput = scan;
c277df42 3490
a0ed51b3 3491 return count;
c277df42
IZ
3492}
3493
3494/*
cb8d8820 3495 - reginclass - determine if a character falls into a character class
bbce6d69 3496 */
3497
76e3520e 3498STATIC bool
936ed897 3499S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3500{
5c0ca799 3501 dTHR;
b8c5462f 3502 char flags = ANYOF_FLAGS(p);
bbce6d69 3503 bool match = FALSE;
3504
3505 c &= 0xFF;
b8c5462f 3506 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3507 match = TRUE;
3508 else if (flags & ANYOF_FOLD) {
3509 I32 cf;
3510 if (flags & ANYOF_LOCALE) {
3280af22 3511 PL_reg_flags |= RF_tainted;
22c35a8c 3512 cf = PL_fold_locale[c];
bbce6d69 3513 }
3514 else
22c35a8c 3515 cf = PL_fold[c];
b8c5462f 3516 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3517 match = TRUE;
3518 }
3519
b8c5462f 3520 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3521 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3522 if (
3523 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3524 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3525 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3526 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3527 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3528 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3529 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3530 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3531 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3532 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3533 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3534 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3535 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3536 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3537 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3538 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3539 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3540 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3541 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3542 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3543 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3544 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3545 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3546 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3547 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3548 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3549 ) /* How's that for a conditional? */
bbce6d69 3550 {
3551 match = TRUE;
3552 }
3553 }
3554
ae5c130c 3555 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3556}
3557
a0ed51b3 3558STATIC bool
cea2e8a9 3559S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3560{
3561 dTHR;
a0ed51b3
LW
3562 char flags = ARG1(f);
3563 bool match = FALSE;
3564 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3565
3566 if (swash_fetch(sv, p))
3567 match = TRUE;
3568 else if (flags & ANYOF_FOLD) {
3569 I32 cf;
dfe13c55 3570 U8 tmpbuf[10];
a0ed51b3
LW
3571 if (flags & ANYOF_LOCALE) {
3572 PL_reg_flags |= RF_tainted;
3573 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3574 }
3575 else
3576 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3577 if (swash_fetch(sv, tmpbuf))
3578 match = TRUE;
3579 }
3580
b8c5462f 3581 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3582
3583 return (flags & ANYOF_INVERT) ? !match : match;
3584}
161b471a 3585
dfe13c55 3586STATIC U8 *
cea2e8a9 3587S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3588{
3589 dTHR;
a0ed51b3
LW
3590 if (off >= 0) {
3591 while (off-- && s < (U8*)PL_regeol)
3592 s += UTF8SKIP(s);
3593 }
3594 else {
3595 while (off++) {
3596 if (s > (U8*)PL_bostr) {
3597 s--;
3598 if (*s & 0x80) {
3599 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3600 s--;
3601 } /* XXX could check well-formedness here */
3602 }
3603 }
3604 }
3605 return s;
3606}
161b471a 3607
dfe13c55 3608STATIC U8 *
cea2e8a9 3609S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3610{
c485e607 3611 dTHR;
a0ed51b3
LW
3612 if (off >= 0) {
3613 while (off-- && s < (U8*)PL_regeol)
3614 s += UTF8SKIP(s);
3615 if (off >= 0)
3616 return 0;
3617 }
3618 else {
3619 while (off++) {
3620 if (s > (U8*)PL_bostr) {
3621 s--;
3622 if (*s & 0x80) {
3623 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3624 s--;
3625 } /* XXX could check well-formedness here */
3626 }
3627 else
3628 break;
3629 }
3630 if (off <= 0)
3631 return 0;
3632 }
3633 return s;
3634}
51371543
GS
3635
3636#ifdef PERL_OBJECT
51371543
GS
3637#include "XSUB.h"
3638#endif
3639
3640static void
3641restore_pos(pTHXo_ void *arg)
3642{
3643 dTHR;
3644 if (PL_reg_eval_set) {
3645 if (PL_reg_oldsaved) {
3646 PL_reg_re->subbeg = PL_reg_oldsaved;
3647 PL_reg_re->sublen = PL_reg_oldsavedlen;
3648 RX_MATCH_COPIED_on(PL_reg_re);
3649 }
3650 PL_reg_magic->mg_len = PL_reg_oldpos;
3651 PL_reg_eval_set = 0;
3652 PL_curpm = PL_reg_oldcurpm;
3653 }
3654}
3655