This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate with Sarathy.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
c5be433b
GS
42
43# define PERL_NO_GET_CONTEXT
56953603
IZ
44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 **** Alterations to Henry's code are...
68 ****
4eb8286e 69 **** Copyright (c) 1991-1999, Larry Wall
a687059c 70 ****
9ef589d8
LW
71 **** You may distribute under the terms of either the GNU General Public
72 **** License or the Artistic License, as specified in the README file.
a687059c
LW
73 *
74 * Beware that some of this code is subtly aware of the way operator
75 * precedence is structured in regular expressions. Serious changes in
76 * regular-expression syntax might require a total rethink.
77 */
78#include "EXTERN.h"
864dbfa3 79#define PERL_IN_REGEXEC_C
a687059c 80#include "perl.h"
0f5d15d6 81
c5be433b
GS
82#ifdef PERL_IN_XSUB_RE
83# if defined(PERL_CAPI) || defined(PERL_OBJECT)
84# include "XSUB.h"
85# endif
86#endif
87
a687059c
LW
88#include "regcomp.h"
89
c277df42
IZ
90#define RF_tainted 1 /* tainted information used? */
91#define RF_warned 2 /* warned about big count? */
ce862d02 92#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
93#define RF_utf8 8 /* String contains multibyte chars? */
94
95#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
96
97#define RS_init 1 /* eval environment created */
98#define RS_set 2 /* replsv value is set */
c277df42 99
a687059c
LW
100#ifndef STATIC
101#define STATIC static
102#endif
103
c277df42
IZ
104/*
105 * Forwards.
106 */
107
b8c5462f 108#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c) : ANYOF_BITMAP_TEST(p,c))
a0ed51b3
LW
109#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
110
111#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
112#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
113
dfe13c55
GS
114#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
115#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
116#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
117#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
118#define HOPc(pos,off) ((char*)HOP(pos,off))
119#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 120
51371543
GS
121static void restore_pos(pTHXo_ void *arg);
122
123
76e3520e 124STATIC CHECKPOINT
cea2e8a9 125S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 126{
11343788 127 dTHR;
3280af22
NIS
128 int retval = PL_savestack_ix;
129 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
130 int p;
131
132 SSCHECK(i + 5);
3280af22 133 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
134 SSPUSHINT(PL_regendp[p]);
135 SSPUSHINT(PL_regstartp[p]);
3280af22 136 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
137 SSPUSHINT(p);
138 }
3280af22
NIS
139 SSPUSHINT(PL_regsize);
140 SSPUSHINT(*PL_reglastparen);
141 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
142 SSPUSHINT(i + 3);
143 SSPUSHINT(SAVEt_REGCONTEXT);
144 return retval;
145}
146
c277df42 147/* These are needed since we do not localize EVAL nodes: */
c3464db5 148# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b
JH
149 " Setting an EVAL scope, savestack=%"IVdf"\n", \
150 (IV)PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 151
3280af22 152# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5 153 PerlIO_printf(Perl_debug_log, \
faccc32b
JH
154 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
155 (IV)lastcp, (IV)PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 156
76e3520e 157STATIC char *
cea2e8a9 158S_regcppop(pTHX)
a0d0e21e 159{
11343788 160 dTHR;
a0d0e21e
LW
161 I32 i = SSPOPINT;
162 U32 paren = 0;
163 char *input;
cf93c79d 164 I32 tmps;
a0d0e21e
LW
165 assert(i == SAVEt_REGCONTEXT);
166 i = SSPOPINT;
167 input = (char *) SSPOPPTR;
3280af22
NIS
168 *PL_reglastparen = SSPOPINT;
169 PL_regsize = SSPOPINT;
c277df42 170 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 171 paren = (U32)SSPOPINT;
3280af22 172 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
173 PL_regstartp[paren] = SSPOPINT;
174 tmps = SSPOPINT;
3280af22
NIS
175 if (paren <= *PL_reglastparen)
176 PL_regendp[paren] = tmps;
c277df42 177 DEBUG_r(
c3464db5 178 PerlIO_printf(Perl_debug_log,
b900a521
JH
179 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
180 (UV)paren, (IV)PL_regstartp[paren],
181 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
182 (IV)PL_regendp[paren],
3280af22 183 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 184 );
a0d0e21e 185 }
c277df42 186 DEBUG_r(
3280af22 187 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 188 PerlIO_printf(Perl_debug_log,
faccc32b
JH
189 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
190 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
191 }
192 );
3280af22
NIS
193 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
194 if (paren > PL_regsize)
cf93c79d
IZ
195 PL_regstartp[paren] = -1;
196 PL_regendp[paren] = -1;
a0d0e21e
LW
197 }
198 return input;
199}
200
0f5d15d6 201STATIC char *
cea2e8a9 202S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 203{
46124e9e 204 dTHR;
0f5d15d6
IZ
205 I32 tmp = PL_savestack_ix;
206
207 PL_savestack_ix = ss;
208 regcppop();
209 PL_savestack_ix = tmp;
942e002e 210 return Nullch;
0f5d15d6
IZ
211}
212
213typedef struct re_cc_state
214{
215 I32 ss;
216 regnode *node;
217 struct re_cc_state *prev;
218 CURCUR *cc;
219 regexp *re;
220} re_cc_state;
221
c277df42 222#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 223
a687059c 224/*
e50aee73 225 * pregexec and friends
a687059c
LW
226 */
227
228/*
c277df42 229 - pregexec - match a regexp against a string
a687059c 230 */
c277df42 231I32
864dbfa3 232Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 233 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
234/* strend: pointer to null at end of string */
235/* strbeg: real beginning of string */
236/* minend: end of match must be >=minend after stringarg. */
237/* nosave: For optimizations. */
238{
239 return
240 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
241 nosave ? 0 : REXEC_COPY_STR);
242}
0f5d15d6
IZ
243
244STATIC void
cea2e8a9 245S_cache_re(pTHX_ regexp *prog)
0f5d15d6 246{
46124e9e 247 dTHR;
0f5d15d6
IZ
248 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
249#ifdef DEBUGGING
250 PL_regprogram = prog->program;
251#endif
252 PL_regnpar = prog->nparens;
253 PL_regdata = prog->data;
254 PL_reg_re = prog;
255}
22e551b9 256
cad2e5aa
JH
257/*
258 * Need to implement the following flags for reg_anch:
259 *
260 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
261 * USE_INTUIT_ML
262 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
263 * INTUIT_AUTORITATIVE_ML
264 * INTUIT_ONCE_NOML - Intuit can match in one location only.
265 * INTUIT_ONCE_ML
266 *
267 * Another flag for this function: SECOND_TIME (so that float substrs
268 * with giant delta may be not rechecked).
269 */
270
271/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
272
2c2d71f5 273/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
274 Otherwise, only SvCUR(sv) is used to get strbeg. */
275
276/* XXXX We assume that strpos is strbeg unless sv. */
277
2c2d71f5
JH
278/* A failure to find a constant substring means that there is no need to make
279 an expensive call to REx engine, thus we celebrate a failure. Similarly,
280 finding a substring too deep into the string means that less calls to
30944b6d
IZ
281 regtry() should be needed.
282
283 REx compiler's optimizer found 4 possible hints:
284 a) Anchored substring;
285 b) Fixed substring;
286 c) Whether we are anchored (beginning-of-line or \G);
287 d) First node (of those at offset 0) which may distingush positions;
b2a3d410 288 We use 'a', 'b', multiline-part of 'c', and try to find a position in the
30944b6d
IZ
289 string which does not contradict any of them.
290 */
2c2d71f5 291
cad2e5aa
JH
292char *
293Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
294 char *strend, U32 flags, re_scream_pos_data *data)
295{
2c2d71f5 296 register I32 start_shift;
cad2e5aa 297 /* Should be nonnegative! */
2c2d71f5
JH
298 register I32 end_shift;
299 register char *s;
300 register SV *check;
cad2e5aa
JH
301 char *t;
302 I32 ml_anch;
2c2d71f5 303 char *tmp;
b2a3d410 304 register char *other_last = Nullch;
30944b6d
IZ
305#ifdef DEBUGGING
306 char *i_strpos = strpos;
307#endif
cad2e5aa
JH
308
309 DEBUG_r( if (!PL_colorset) reginitcolors() );
310 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 311 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
312 PL_colors[4],PL_colors[5],PL_colors[0],
313 prog->precomp,
314 PL_colors[1],
315 (strlen(prog->precomp) > 60 ? "..." : ""),
316 PL_colors[0],
b900a521 317 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
318 strpos, PL_colors[1],
319 (strend - strpos > 60 ? "..." : ""))
320 );
321
2c2d71f5
JH
322 if (prog->minlen > strend - strpos) {
323 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 324 goto fail;
2c2d71f5
JH
325 }
326 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
327 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
328 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 329 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa
JH
330
331 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 332 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
333 I32 slen;
334
335 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
2c2d71f5
JH
336 && (sv && (strpos + SvCUR(sv) != strend)) ) {
337 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
cad2e5aa 338 goto fail;
2c2d71f5 339 }
adac82c7 340 PL_regeol = strend; /* Used in HOP() */
2c2d71f5 341 s = HOPc(strpos, prog->check_offset_min);
cad2e5aa
JH
342 if (SvTAIL(prog->check_substr)) {
343 slen = SvCUR(prog->check_substr); /* >= 1 */
344
2c2d71f5
JH
345 if ( strend - s > slen || strend - s < slen - 1
346 || (strend - s == slen && strend[-1] != '\n')) {
347 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
348 goto fail_finish;
cad2e5aa
JH
349 }
350 /* Now should match s[0..slen-2] */
351 slen--;
352 if (slen && (*SvPVX(prog->check_substr) != *s
353 || (slen > 1
2c2d71f5
JH
354 && memNE(SvPVX(prog->check_substr), s, slen)))) {
355 report_neq:
356 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
357 goto fail_finish;
358 }
cad2e5aa
JH
359 }
360 else if (*SvPVX(prog->check_substr) != *s
361 || ((slen = SvCUR(prog->check_substr)) > 1
362 && memNE(SvPVX(prog->check_substr), s, slen)))
2c2d71f5
JH
363 goto report_neq;
364 goto success_at_start;
cad2e5aa 365 }
2c2d71f5 366 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 367 s = strpos;
2c2d71f5
JH
368 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
369 /* Should be nonnegative! */
370 end_shift = prog->minlen - start_shift -
371 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
372 if (!ml_anch) {
373 I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
374 - (SvTAIL(prog->check_substr) != 0);
375 I32 eshift = strend - s - end;
376
377 if (end_shift < eshift)
378 end_shift = eshift;
379 }
cad2e5aa 380 }
2c2d71f5 381 else { /* Can match at random position */
cad2e5aa
JH
382 ml_anch = 0;
383 s = strpos;
2c2d71f5
JH
384 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
385 /* Should be nonnegative! */
386 end_shift = prog->minlen - start_shift -
387 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
cad2e5aa
JH
388 }
389
2c2d71f5 390#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 391 if (end_shift < 0)
6bbae5e6 392 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
393#endif
394
395 check = prog->check_substr;
396 restart:
397 /* Find a possible match in the region s..strend by looking for
398 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 399 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
400 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
401 I32 p = -1; /* Internal iterator of scream. */
402 I32 *pp = data ? data->scream_pos : &p;
403
2c2d71f5
JH
404 if (PL_screamfirst[BmRARE(check)] >= 0
405 || ( BmRARE(check) == '\n'
406 && (BmPREVIOUS(check) == SvCUR(check) - 1)
407 && SvTAIL(check) ))
408 s = screaminstr(sv, check,
409 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 410 else
2c2d71f5 411 goto fail_finish;
cad2e5aa
JH
412 if (data)
413 *data->scream_olds = s;
414 }
415 else
416 s = fbm_instr((unsigned char*)s + start_shift,
417 (unsigned char*)strend - end_shift,
2c2d71f5 418 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
419
420 /* Update the count-of-usability, remove useless subpatterns,
421 unshift s. */
2c2d71f5
JH
422
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
424 (s ? "Found" : "Did not find"),
425 ((check == prog->anchored_substr) ? "anchored" : "floating"),
426 PL_colors[0],
7b0972df
JH
427 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
428 SvPVX(check),
2c2d71f5
JH
429 PL_colors[1], (SvTAIL(check) ? "$" : ""),
430 (s ? " at offset " : "...\n") ) );
431
432 if (!s)
433 goto fail_finish;
434
435 /* Finish the diagnostic message */
30944b6d 436 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
437
438 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
439 Start with the other substr.
440 XXXX no SCREAM optimization yet - and a very coarse implementation
441 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
442 *always* match. Probably should be marked during compile...
443 Probably it is right to do no SCREAM here...
444 */
445
446 if (prog->float_substr && prog->anchored_substr) {
30944b6d 447 /* Take into account the "other" substring. */
2c2d71f5
JH
448 /* XXXX May be hopelessly wrong for UTF... */
449 if (!other_last)
b2a3d410 450 other_last = strpos - 1;
2c2d71f5 451 if (check == prog->float_substr) {
30944b6d
IZ
452 do_other_anchored:
453 {
2c2d71f5
JH
454 char *last = s - start_shift, *last1, *last2;
455 char *s1 = s;
456
457 tmp = PL_bostr;
458 t = s - prog->check_offset_max;
459 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
460 && (!(prog->reganch & ROPT_UTF8)
461 || (PL_bostr = strpos, /* Used in regcopmaybe() */
462 (t = reghopmaybe_c(s, -(prog->check_offset_max)))
463 && t > strpos)))
30944b6d 464 /* EMPTY */;
2c2d71f5
JH
465 else
466 t = strpos;
467 t += prog->anchored_offset;
b2a3d410
JH
468 if (t <= other_last)
469 t = other_last + 1;
2c2d71f5
JH
470 PL_bostr = tmp;
471 last2 = last1 = strend - prog->minlen;
472 if (last < last1)
473 last1 = last;
474 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
475 /* On end-of-str: see comment below. */
476 s = fbm_instr((unsigned char*)t,
477 (unsigned char*)last1 + prog->anchored_offset
478 + SvCUR(prog->anchored_substr)
479 - (SvTAIL(prog->anchored_substr)!=0),
480 prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
481 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
482 (s ? "Found" : "Contradicts"),
483 PL_colors[0],
7b0972df
JH
484 (int)(SvCUR(prog->anchored_substr)
485 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
486 SvPVX(prog->anchored_substr),
487 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
488 if (!s) {
489 if (last1 >= last2) {
490 DEBUG_r(PerlIO_printf(Perl_debug_log,
491 ", giving up...\n"));
492 goto fail_finish;
493 }
494 DEBUG_r(PerlIO_printf(Perl_debug_log,
495 ", trying floating at offset %ld...\n",
30944b6d 496 (long)(s1 + 1 - i_strpos)));
2c2d71f5 497 PL_regeol = strend; /* Used in HOP() */
b2a3d410 498 other_last = last1 + prog->anchored_offset;
2c2d71f5
JH
499 s = HOPc(last, 1);
500 goto restart;
501 }
502 else {
503 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 504 (long)(s - i_strpos)));
2c2d71f5 505 t = s - prog->anchored_offset;
b2a3d410 506 other_last = s - 1;
30944b6d 507 s = s1;
2c2d71f5
JH
508 if (t == strpos)
509 goto try_at_start;
2c2d71f5
JH
510 goto try_at_offset;
511 }
30944b6d 512 }
2c2d71f5
JH
513 }
514 else { /* Take into account the floating substring. */
515 char *last, *last1;
516 char *s1 = s;
517
518 t = s - start_shift;
519 last1 = last = strend - prog->minlen + prog->float_min_offset;
520 if (last - t > prog->float_max_offset)
521 last = t + prog->float_max_offset;
522 s = t + prog->float_min_offset;
b2a3d410
JH
523 if (s <= other_last)
524 s = other_last + 1;
2c2d71f5
JH
525 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
526 /* fbm_instr() takes into account exact value of end-of-str
527 if the check is SvTAIL(ed). Since false positives are OK,
528 and end-of-str is not later than strend we are OK. */
529 s = fbm_instr((unsigned char*)s,
530 (unsigned char*)last + SvCUR(prog->float_substr)
531 - (SvTAIL(prog->float_substr)!=0),
532 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
533 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
534 (s ? "Found" : "Contradicts"),
535 PL_colors[0],
7b0972df
JH
536 (int)(SvCUR(prog->float_substr)
537 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
538 SvPVX(prog->float_substr),
539 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
540 if (!s) {
541 if (last1 == last) {
542 DEBUG_r(PerlIO_printf(Perl_debug_log,
543 ", giving up...\n"));
544 goto fail_finish;
545 }
546 DEBUG_r(PerlIO_printf(Perl_debug_log,
547 ", trying anchored starting at offset %ld...\n",
30944b6d 548 (long)(s1 + 1 - i_strpos)));
b2a3d410 549 other_last = last;
2c2d71f5
JH
550 PL_regeol = strend; /* Used in HOP() */
551 s = HOPc(t, 1);
552 goto restart;
553 }
554 else {
555 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 556 (long)(s - i_strpos)));
b2a3d410 557 other_last = s - 1;
30944b6d 558 s = s1;
2c2d71f5
JH
559 if (t == strpos)
560 goto try_at_start;
2c2d71f5
JH
561 goto try_at_offset;
562 }
563 }
cad2e5aa 564 }
2c2d71f5
JH
565
566 t = s - prog->check_offset_max;
567 tmp = PL_bostr;
568 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
569 && (!(prog->reganch & ROPT_UTF8)
570 || (PL_bostr = strpos, /* Used in regcopmaybe() */
571 ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
572 && t > strpos)))) {
573 PL_bostr = tmp;
574 /* Fixed substring is found far enough so that the match
575 cannot start at strpos. */
576 try_at_offset:
cad2e5aa 577 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
578 /* Eventually fbm_*() should handle this, but often
579 anchored_offset is not 0, so this check will not be wasted. */
580 /* XXXX In the code below we prefer to look for "^" even in
581 presence of anchored substrings. And we search even
582 beyond the found float position. These pessimizations
583 are historical artefacts only. */
584 find_anchor:
2c2d71f5 585 while (t < strend - prog->minlen) {
cad2e5aa
JH
586 if (*t == '\n') {
587 if (t < s - prog->check_offset_min) {
30944b6d
IZ
588 if (prog->anchored_substr) {
589 /* We definitely contradict the found anchored
590 substr. Due to the above check we do not
591 contradict "check" substr.
592 Thus we can arrive here only if check substr
593 is float. Redo checking for "other"=="fixed".
594 */
595 strpos = t + 1;
596 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
597 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
598 goto do_other_anchored;
599 }
cad2e5aa 600 s = t + 1;
2c2d71f5 601 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 602 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
603 goto set_useful;
604 }
2c2d71f5 605 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
30944b6d
IZ
606 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
607 strpos = s = t + 1;
cad2e5aa
JH
608 goto restart;
609 }
610 t++;
611 }
2c2d71f5
JH
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
613 PL_colors[0],PL_colors[1]));
614 goto fail_finish;
cad2e5aa
JH
615 }
616 s = t;
617 set_useful:
2c2d71f5 618 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
619 }
620 else {
2c2d71f5
JH
621 PL_bostr = tmp;
622 /* The found string does not prohibit matching at beg-of-str
623 - no optimization of calling REx engine can be performed,
624 unless it was an MBOL and we are not after MBOL. */
625 try_at_start:
626 /* Even in this situation we may use MBOL flag if strpos is offset
627 wrt the start of the string. */
628 if (ml_anch && sv
cad2e5aa
JH
629 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
630 t = strpos;
631 goto find_anchor;
632 }
30944b6d
IZ
633 DEBUG_r( if (ml_anch)
634 PerlIO_printf(Perl_debug_log, "Does not contradict /%s^%s/m...\n",
635 PL_colors[0],PL_colors[1]);
636 );
2c2d71f5 637 success_at_start:
30944b6d 638 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
cad2e5aa
JH
639 && --BmUSEFUL(prog->check_substr) < 0
640 && prog->check_substr == prog->float_substr) { /* boo */
641 /* If flags & SOMETHING - do not do it many times on the same match */
642 SvREFCNT_dec(prog->check_substr);
643 prog->check_substr = Nullsv; /* disable */
644 prog->float_substr = Nullsv; /* clear */
645 s = strpos;
3cf5c195
IZ
646 /* XXXX This is a remnant of the old implementation. It
647 looks wasteful, since now INTUIT can use many
648 other heuristics too. */
cad2e5aa
JH
649 prog->reganch &= ~RE_USE_INTUIT;
650 }
651 else
652 s = strpos;
653 }
654
2c2d71f5 655 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
30944b6d 656 PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 657 return s;
2c2d71f5
JH
658
659 fail_finish: /* Substring not found */
660 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 661 fail:
2c2d71f5 662 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
663 PL_colors[4],PL_colors[5]));
664 return Nullch;
665}
9661b544 666
a687059c 667/*
c277df42 668 - regexec_flags - match a regexp against a string
a687059c 669 */
79072805 670I32
864dbfa3 671Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 672 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
673/* strend: pointer to null at end of string */
674/* strbeg: real beginning of string */
675/* minend: end of match must be >=minend after stringarg. */
676/* data: May be used for some additional optimizations. */
677/* nosave: For optimizations. */
a687059c 678{
5c0ca799 679 dTHR;
a0d0e21e 680 register char *s;
c277df42 681 register regnode *c;
a0d0e21e
LW
682 register char *startpos = stringarg;
683 register I32 tmp;
c277df42 684 I32 minlen; /* must match at least this many chars */
a0d0e21e 685 I32 dontbother = 0; /* how many characters not to try at end */
c277df42 686 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
687 constant substr. */ /* CC */
688 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
689 I32 scream_pos = -1; /* Internal iterator of scream. */
690 char *scream_olds;
3280af22 691 SV* oreplsv = GvSV(PL_replgv);
a687059c 692
2ab05381 693 PL_regcc = 0;
a0d0e21e 694
0f5d15d6 695 cache_re(prog);
a0d0e21e 696#ifdef DEBUGGING
3280af22 697 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
698#endif
699
700 /* Be paranoid... */
701 if (prog == NULL || startpos == NULL) {
cea2e8a9 702 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
703 return 0;
704 }
705
c277df42
IZ
706 minlen = prog->minlen;
707 if (strend - startpos < minlen) goto phooey;
708
a0d0e21e 709 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 710 PL_regprev = '\n';
a0d0e21e 711 else {
a0ed51b3 712 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
713 if (!PL_multiline && PL_regprev == '\n')
714 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 715 }
bbce6d69 716
a0d0e21e 717 /* Check validity of program. */
22c35a8c 718 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 719 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
720 }
721
3280af22
NIS
722 PL_reg_flags = 0;
723 PL_reg_eval_set = 0;
2c2d71f5 724 PL_reg_maxiter = 0;
a0d0e21e 725
a0ed51b3
LW
726 if (prog->reganch & ROPT_UTF8)
727 PL_reg_flags |= RF_utf8;
728
729 /* Mark beginning of line for ^ and lookbehind. */
730 PL_regbol = startpos;
731 PL_bostr = strbeg;
9661b544 732 PL_reg_sv = sv;
a0ed51b3
LW
733
734 /* Mark end of line for $ (and such) */
735 PL_regeol = strend;
736
737 /* see how far we have to get to not match where we matched before */
738 PL_regtill = startpos+minend;
739
0f5d15d6
IZ
740 /* We start without call_cc context. */
741 PL_reg_call_cc = 0;
742
a0d0e21e
LW
743 /* If there is a "must appear" string, look for it. */
744 s = startpos;
cad2e5aa 745
30382c73 746 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
cad2e5aa
JH
747 MAGIC *mg;
748
30382c73 749 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
cad2e5aa 750 PL_reg_ganch = startpos;
30382c73
IZ
751 else if (sv && SvTYPE(sv) >= SVt_PVMG
752 && SvMAGIC(sv)
753 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
754 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
755 if (prog->reganch & ROPT_ANCH_GPOS) {
756 if (s > PL_reg_ganch)
757 goto phooey;
758 s = PL_reg_ganch;
759 }
a0ed51b3 760 }
30382c73
IZ
761 else /* pos() not defined */
762 PL_reg_ganch = strbeg;
a0d0e21e 763 }
a687059c 764
cad2e5aa
JH
765 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
766 re_scream_pos_data d;
767
768 d.scream_olds = &scream_olds;
769 d.scream_pos = &scream_pos;
770 s = re_intuit_start(prog, sv, s, strend, flags, &d);
771 if (!s)
772 goto phooey; /* not present */
773 }
774
775 DEBUG_r( if (!PL_colorset) reginitcolors() );
776 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 777 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
8d300b32
GS
778 PL_colors[4],PL_colors[5],PL_colors[0],
779 prog->precomp,
780 PL_colors[1],
c277df42 781 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 782 PL_colors[0],
b900a521 783 (int)(strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 784 startpos, PL_colors[1],
c277df42
IZ
785 (strend - startpos > 60 ? "..." : ""))
786 );
787
a0d0e21e 788 /* Simplest case: anchored match need be tried only once. */
774d564b 789 /* [unless only anchor is BOL and multiline is set] */
22e551b9 790 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 791 if (s == startpos && regtry(prog, startpos))
a0d0e21e 792 goto got_it;
22e551b9
IZ
793 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
794 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 795 {
cad2e5aa
JH
796 char *end;
797
a0d0e21e
LW
798 if (minlen)
799 dontbother = minlen - 1;
cad2e5aa 800 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 801 /* for multiline we only have to try after newlines */
cad2e5aa 802 if (prog->check_substr) {
30944b6d
IZ
803 if (s == startpos)
804 goto after_try;
cad2e5aa
JH
805 while (1) {
806 if (regtry(prog, s))
a0d0e21e 807 goto got_it;
30944b6d 808 after_try:
cad2e5aa
JH
809 if (s >= end)
810 goto phooey;
3cf5c195
IZ
811 if (prog->reganch & RE_USE_INTUIT) {
812 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
813 if (!s)
814 goto phooey;
815 }
816 else
817 s++;
cad2e5aa
JH
818 }
819 } else {
820 if (s > startpos)
821 s--;
822 while (s < end) {
823 if (*s++ == '\n') { /* don't need PL_utf8skip here */
824 if (regtry(prog, s))
825 goto got_it;
826 }
827 }
35c8bce7 828 }
35c8bce7 829 }
a0d0e21e 830 goto phooey;
22e551b9
IZ
831 } else if (prog->reganch & ROPT_ANCH_GPOS) {
832 if (regtry(prog, PL_reg_ganch))
833 goto got_it;
834 goto phooey;
a0d0e21e 835 }
35c8bce7 836
a0d0e21e 837 /* Messy cases: unanchored match. */
c277df42
IZ
838 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
839 /* we have /x+whatever/ */
cad2e5aa 840 /* it must be a one character string (XXXX Except UTF?) */
c277df42 841 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
842 if (UTF) {
843 while (s < strend) {
844 if (*s == ch) {
845 if (regtry(prog, s)) goto got_it;
846 s += UTF8SKIP(s);
847 while (s < strend && *s == ch)
848 s += UTF8SKIP(s);
849 }
850 s += UTF8SKIP(s);
851 }
852 }
853 else {
854 while (s < strend) {
855 if (*s == ch) {
856 if (regtry(prog, s)) goto got_it;
c277df42 857 s++;
a0ed51b3
LW
858 while (s < strend && *s == ch)
859 s++;
860 }
861 s++;
a0d0e21e 862 }
a687059c 863 }
c277df42
IZ
864 }
865 /*SUPPRESS 560*/
866 else if (prog->anchored_substr != Nullsv
867 || (prog->float_substr != Nullsv
868 && prog->float_max_offset < strend - s)) {
869 SV *must = prog->anchored_substr
870 ? prog->anchored_substr : prog->float_substr;
871 I32 back_max =
872 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
873 I32 back_min =
874 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
875 I32 delta = back_max - back_min;
cf93c79d 876 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
877 -(I32)(CHR_SVLEN(must)
878 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
879 char *last1; /* Last position checked before */
880
881 if (s > PL_bostr)
dfe13c55 882 last1 = HOPc(s, -1);
a0ed51b3
LW
883 else
884 last1 = s - 1; /* bogus */
c277df42
IZ
885
886 /* XXXX check_substr already used to find `s', can optimize if
887 check_substr==must. */
888 scream_pos = -1;
889 dontbother = end_shift;
dfe13c55 890 strend = HOPc(strend, -dontbother);
c277df42 891 while ( (s <= last) &&
22e551b9
IZ
892 ((flags & REXEC_SCREAM)
893 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 894 end_shift, &scream_pos, 0))
a0ed51b3 895 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
896 (unsigned char*)strend, must,
897 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
898 if (HOPc(s, -back_max) > last1) {
899 last1 = HOPc(s, -back_min);
900 s = HOPc(s, -back_max);
a0ed51b3
LW
901 }
902 else {
dfe13c55 903 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 904
dfe13c55 905 last1 = HOPc(s, -back_min);
c277df42 906 s = t;
a0d0e21e 907 }
a0ed51b3
LW
908 if (UTF) {
909 while (s <= last1) {
910 if (regtry(prog, s))
911 goto got_it;
912 s += UTF8SKIP(s);
913 }
914 }
915 else {
916 while (s <= last1) {
917 if (regtry(prog, s))
918 goto got_it;
919 s++;
920 }
a0d0e21e
LW
921 }
922 }
923 goto phooey;
a0ed51b3
LW
924 }
925 else if (c = prog->regstclass) {
a0d0e21e 926 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
b3c9acc1
IZ
927 char *m;
928 int ln;
929 int c1;
930 int c2;
931 char *e;
a687059c 932
a0d0e21e
LW
933 if (minlen)
934 dontbother = minlen - 1;
dfe13c55 935 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
936 tmp = 1;
937 /* We know what class it must start with. */
938 switch (OP(c)) {
a0ed51b3 939 case ANYOFUTF8:
a0ed51b3
LW
940 while (s < strend) {
941 if (REGINCLASSUTF8(c, (U8*)s)) {
942 if (tmp && regtry(prog, s))
943 goto got_it;
944 else
945 tmp = doevery;
946 }
947 else
948 tmp = 1;
949 s += UTF8SKIP(s);
950 }
951 break;
a0d0e21e 952 case ANYOF:
a0d0e21e 953 while (s < strend) {
936ed897 954 if (REGINCLASS(c, *s)) {
a0d0e21e
LW
955 if (tmp && regtry(prog, s))
956 goto got_it;
957 else
958 tmp = doevery;
a687059c 959 }
a0d0e21e
LW
960 else
961 tmp = 1;
962 s++;
963 }
964 break;
b3c9acc1
IZ
965 case EXACTF:
966 m = STRING(c);
967 ln = STR_LEN(c);
968 c1 = *m;
969 c2 = PL_fold[c1];
970 goto do_exactf;
971 case EXACTFL:
972 m = STRING(c);
973 ln = STR_LEN(c);
974 c1 = *m;
975 c2 = PL_fold_locale[c1];
976 do_exactf:
977 e = strend - ln;
978
979 /* Here it is NOT UTF! */
980 if (c1 == c2) {
981 while (s <= e) {
982 if ( *s == c1
983 && (ln == 1 || (OP(c) == EXACTF
984 ? ibcmp(s, m, ln)
985 : ibcmp_locale(s, m, ln)))
b2a3d410 986 && regtry(prog, s) )
b3c9acc1
IZ
987 goto got_it;
988 s++;
989 }
990 } else {
991 while (s <= e) {
992 if ( (*s == c1 || *s == c2)
993 && (ln == 1 || (OP(c) == EXACTF
994 ? ibcmp(s, m, ln)
995 : ibcmp_locale(s, m, ln)))
b2a3d410 996 && regtry(prog, s) )
b3c9acc1
IZ
997 goto got_it;
998 s++;
999 }
1000 }
1001 break;
bbce6d69 1002 case BOUNDL:
3280af22 1003 PL_reg_flags |= RF_tainted;
bbce6d69 1004 /* FALL THROUGH */
a0d0e21e 1005 case BOUND:
b2a3d410
JH
1006 if (minlen) {
1007 dontbother++;
1008 strend -= 1;
1009 }
1010 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1011 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1012 while (s < strend) {
95bac841 1013 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 1014 tmp = !tmp;
b2a3d410 1015 if (regtry(prog, s))
a0d0e21e 1016 goto got_it;
a687059c 1017 }
a0d0e21e
LW
1018 s++;
1019 }
b2a3d410 1020 if ((minlen || tmp) && regtry(prog,s))
a0d0e21e
LW
1021 goto got_it;
1022 break;
a0ed51b3
LW
1023 case BOUNDLUTF8:
1024 PL_reg_flags |= RF_tainted;
1025 /* FALL THROUGH */
1026 case BOUNDUTF8:
b2a3d410
JH
1027 if (minlen) {
1028 dontbother++;
1029 strend = reghop_c(strend, -1);
1030 }
1031 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1032 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1033 while (s < strend) {
dfe13c55
GS
1034 if (tmp == !(OP(c) == BOUND ?
1035 swash_fetch(PL_utf8_alnum, (U8*)s) :
1036 isALNUM_LC_utf8((U8*)s)))
1037 {
a0ed51b3 1038 tmp = !tmp;
b2a3d410 1039 if (regtry(prog, s))
a0ed51b3
LW
1040 goto got_it;
1041 }
1042 s += UTF8SKIP(s);
1043 }
b2a3d410 1044 if ((minlen || tmp) && regtry(prog,s))
a0ed51b3
LW
1045 goto got_it;
1046 break;
bbce6d69 1047 case NBOUNDL:
3280af22 1048 PL_reg_flags |= RF_tainted;
bbce6d69 1049 /* FALL THROUGH */
a0d0e21e 1050 case NBOUND:
b2a3d410
JH
1051 if (minlen) {
1052 dontbother++;
1053 strend -= 1;
1054 }
1055 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 1056 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 1057 while (s < strend) {
95bac841 1058 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 1059 tmp = !tmp;
b2a3d410 1060 else if (regtry(prog, s))
a0d0e21e
LW
1061 goto got_it;
1062 s++;
1063 }
b2a3d410 1064 if ((minlen || !tmp) && regtry(prog,s))
a0d0e21e
LW
1065 goto got_it;
1066 break;
a0ed51b3
LW
1067 case NBOUNDLUTF8:
1068 PL_reg_flags |= RF_tainted;
1069 /* FALL THROUGH */
1070 case NBOUNDUTF8:
b2a3d410
JH
1071 if (minlen) {
1072 dontbother++;
dfe13c55 1073 strend = reghop_c(strend, -1);
b2a3d410
JH
1074 }
1075 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
1076 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
1077 while (s < strend) {
dfe13c55
GS
1078 if (tmp == !(OP(c) == NBOUND ?
1079 swash_fetch(PL_utf8_alnum, (U8*)s) :
1080 isALNUM_LC_utf8((U8*)s)))
a0ed51b3 1081 tmp = !tmp;
b2a3d410 1082 else if (regtry(prog, s))
a0ed51b3
LW
1083 goto got_it;
1084 s += UTF8SKIP(s);
1085 }
b2a3d410 1086 if ((minlen || !tmp) && regtry(prog,s))
a0ed51b3
LW
1087 goto got_it;
1088 break;
a0d0e21e
LW
1089 case ALNUM:
1090 while (s < strend) {
bbce6d69 1091 if (isALNUM(*s)) {
b2a3d410 1092 if (tmp && regtry(prog, s))
bbce6d69 1093 goto got_it;
1094 else
1095 tmp = doevery;
1096 }
1097 else
1098 tmp = 1;
1099 s++;
1100 }
1101 break;
a0ed51b3
LW
1102 case ALNUMUTF8:
1103 while (s < strend) {
dfe13c55 1104 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
b2a3d410 1105 if (tmp && regtry(prog, s))
a0ed51b3
LW
1106 goto got_it;
1107 else
1108 tmp = doevery;
1109 }
1110 else
1111 tmp = 1;
1112 s += UTF8SKIP(s);
1113 }
1114 break;
bbce6d69 1115 case ALNUML:
3280af22 1116 PL_reg_flags |= RF_tainted;
bbce6d69 1117 while (s < strend) {
1118 if (isALNUM_LC(*s)) {
b2a3d410 1119 if (tmp && regtry(prog, s))
a0d0e21e 1120 goto got_it;
a687059c 1121 else
a0d0e21e
LW
1122 tmp = doevery;
1123 }
1124 else
1125 tmp = 1;
1126 s++;
1127 }
1128 break;
a0ed51b3
LW
1129 case ALNUMLUTF8:
1130 PL_reg_flags |= RF_tainted;
1131 while (s < strend) {
dfe13c55 1132 if (isALNUM_LC_utf8((U8*)s)) {
b2a3d410 1133 if (tmp && regtry(prog, s))
a0ed51b3
LW
1134 goto got_it;
1135 else
1136 tmp = doevery;
1137 }
1138 else
1139 tmp = 1;
1140 s += UTF8SKIP(s);
1141 }
1142 break;
a0d0e21e
LW
1143 case NALNUM:
1144 while (s < strend) {
bbce6d69 1145 if (!isALNUM(*s)) {
b2a3d410 1146 if (tmp && regtry(prog, s))
bbce6d69 1147 goto got_it;
1148 else
1149 tmp = doevery;
1150 }
1151 else
1152 tmp = 1;
1153 s++;
1154 }
1155 break;
a0ed51b3
LW
1156 case NALNUMUTF8:
1157 while (s < strend) {
dfe13c55 1158 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
b2a3d410 1159 if (tmp && regtry(prog, s))
a0ed51b3
LW
1160 goto got_it;
1161 else
1162 tmp = doevery;
1163 }
1164 else
1165 tmp = 1;
1166 s += UTF8SKIP(s);
1167 }
1168 break;
bbce6d69 1169 case NALNUML:
3280af22 1170 PL_reg_flags |= RF_tainted;
bbce6d69 1171 while (s < strend) {
1172 if (!isALNUM_LC(*s)) {
b2a3d410 1173 if (tmp && regtry(prog, s))
a0d0e21e 1174 goto got_it;
a687059c 1175 else
a0d0e21e 1176 tmp = doevery;
a687059c 1177 }
a0d0e21e
LW
1178 else
1179 tmp = 1;
1180 s++;
1181 }
1182 break;
a0ed51b3
LW
1183 case NALNUMLUTF8:
1184 PL_reg_flags |= RF_tainted;
1185 while (s < strend) {
dfe13c55 1186 if (!isALNUM_LC_utf8((U8*)s)) {
b2a3d410 1187 if (tmp && regtry(prog, s))
a0ed51b3
LW
1188 goto got_it;
1189 else
1190 tmp = doevery;
1191 }
1192 else
1193 tmp = 1;
1194 s += UTF8SKIP(s);
1195 }
1196 break;
a0d0e21e
LW
1197 case SPACE:
1198 while (s < strend) {
1199 if (isSPACE(*s)) {
b2a3d410 1200 if (tmp && regtry(prog, s))
a0d0e21e
LW
1201 goto got_it;
1202 else
1203 tmp = doevery;
2304df62 1204 }
a0d0e21e
LW
1205 else
1206 tmp = 1;
1207 s++;
1208 }
1209 break;
a0ed51b3
LW
1210 case SPACEUTF8:
1211 while (s < strend) {
dfe13c55 1212 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
b2a3d410 1213 if (tmp && regtry(prog, s))
a0ed51b3
LW
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
1218 else
1219 tmp = 1;
1220 s += UTF8SKIP(s);
1221 }
1222 break;
bbce6d69 1223 case SPACEL:
3280af22 1224 PL_reg_flags |= RF_tainted;
bbce6d69 1225 while (s < strend) {
1226 if (isSPACE_LC(*s)) {
b2a3d410 1227 if (tmp && regtry(prog, s))
bbce6d69 1228 goto got_it;
1229 else
1230 tmp = doevery;
1231 }
1232 else
1233 tmp = 1;
1234 s++;
1235 }
1236 break;
a0ed51b3
LW
1237 case SPACELUTF8:
1238 PL_reg_flags |= RF_tainted;
1239 while (s < strend) {
dfe13c55 1240 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
b2a3d410 1241 if (tmp && regtry(prog, s))
a0ed51b3
LW
1242 goto got_it;
1243 else
1244 tmp = doevery;
1245 }
1246 else
1247 tmp = 1;
1248 s += UTF8SKIP(s);
1249 }
1250 break;
a0d0e21e
LW
1251 case NSPACE:
1252 while (s < strend) {
1253 if (!isSPACE(*s)) {
b2a3d410 1254 if (tmp && regtry(prog, s))
a0d0e21e
LW
1255 goto got_it;
1256 else
1257 tmp = doevery;
a687059c 1258 }
a0d0e21e
LW
1259 else
1260 tmp = 1;
1261 s++;
1262 }
1263 break;
a0ed51b3
LW
1264 case NSPACEUTF8:
1265 while (s < strend) {
dfe13c55 1266 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
b2a3d410 1267 if (tmp && regtry(prog, s))
a0ed51b3
LW
1268 goto got_it;
1269 else
1270 tmp = doevery;
1271 }
1272 else
1273 tmp = 1;
1274 s += UTF8SKIP(s);
1275 }
1276 break;
bbce6d69 1277 case NSPACEL:
3280af22 1278 PL_reg_flags |= RF_tainted;
bbce6d69 1279 while (s < strend) {
1280 if (!isSPACE_LC(*s)) {
b2a3d410 1281 if (tmp && regtry(prog, s))
bbce6d69 1282 goto got_it;
1283 else
1284 tmp = doevery;
1285 }
1286 else
1287 tmp = 1;
1288 s++;
1289 }
1290 break;
a0ed51b3
LW
1291 case NSPACELUTF8:
1292 PL_reg_flags |= RF_tainted;
1293 while (s < strend) {
dfe13c55 1294 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
b2a3d410 1295 if (tmp && regtry(prog, s))
a0ed51b3
LW
1296 goto got_it;
1297 else
1298 tmp = doevery;
1299 }
1300 else
1301 tmp = 1;
1302 s += UTF8SKIP(s);
1303 }
1304 break;
a0d0e21e
LW
1305 case DIGIT:
1306 while (s < strend) {
1307 if (isDIGIT(*s)) {
b2a3d410 1308 if (tmp && regtry(prog, s))
a0d0e21e
LW
1309 goto got_it;
1310 else
1311 tmp = doevery;
2b69d0c2 1312 }
a0d0e21e
LW
1313 else
1314 tmp = 1;
1315 s++;
1316 }
1317 break;
a0ed51b3
LW
1318 case DIGITUTF8:
1319 while (s < strend) {
dfe13c55 1320 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
b2a3d410 1321 if (tmp && regtry(prog, s))
a0ed51b3
LW
1322 goto got_it;
1323 else
1324 tmp = doevery;
1325 }
1326 else
1327 tmp = 1;
1328 s += UTF8SKIP(s);
1329 }
1330 break;
b8c5462f
JH
1331 case DIGITL:
1332 PL_reg_flags |= RF_tainted;
1333 while (s < strend) {
1334 if (isDIGIT_LC(*s)) {
b2a3d410 1335 if (tmp && regtry(prog, s))
b8c5462f
JH
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
1340 else
1341 tmp = 1;
1342 s++;
1343 }
1344 break;
1345 case DIGITLUTF8:
1346 PL_reg_flags |= RF_tainted;
1347 while (s < strend) {
1348 if (isDIGIT_LC_utf8((U8*)s)) {
b2a3d410 1349 if (tmp && regtry(prog, s))
b8c5462f
JH
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
1354 else
1355 tmp = 1;
1356 s += UTF8SKIP(s);
1357 }
1358 break;
a0d0e21e
LW
1359 case NDIGIT:
1360 while (s < strend) {
1361 if (!isDIGIT(*s)) {
b2a3d410 1362 if (tmp && regtry(prog, s))
a0d0e21e
LW
1363 goto got_it;
1364 else
1365 tmp = doevery;
a687059c 1366 }
a0d0e21e
LW
1367 else
1368 tmp = 1;
1369 s++;
1370 }
1371 break;
a0ed51b3
LW
1372 case NDIGITUTF8:
1373 while (s < strend) {
dfe13c55 1374 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
b2a3d410 1375 if (tmp && regtry(prog, s))
a0ed51b3
LW
1376 goto got_it;
1377 else
1378 tmp = doevery;
1379 }
1380 else
1381 tmp = 1;
1382 s += UTF8SKIP(s);
1383 }
1384 break;
b8c5462f
JH
1385 case NDIGITL:
1386 PL_reg_flags |= RF_tainted;
1387 while (s < strend) {
1388 if (!isDIGIT_LC(*s)) {
b2a3d410 1389 if (tmp && regtry(prog, s))
b8c5462f
JH
1390 goto got_it;
1391 else
1392 tmp = doevery;
1393 }
1394 else
1395 tmp = 1;
1396 s++;
a0ed51b3 1397 }
b8c5462f
JH
1398 break;
1399 case NDIGITLUTF8:
1400 PL_reg_flags |= RF_tainted;
1401 while (s < strend) {
1402 if (!isDIGIT_LC_utf8((U8*)s)) {
b2a3d410 1403 if (tmp && regtry(prog, s))
b8c5462f 1404 goto got_it;
cf93c79d 1405 else
b8c5462f
JH
1406 tmp = doevery;
1407 }
1408 else
1409 tmp = 1;
1410 s += UTF8SKIP(s);
1411 }
1412 break;
b3c9acc1 1413 default:
7934575e 1414 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
b3c9acc1 1415 break;
d6a28714
JH
1416 }
1417 }
1418 else {
1419 dontbother = 0;
1420 if (prog->float_substr != Nullsv) { /* Trim the end. */
1421 char *last;
1422 I32 oldpos = scream_pos;
1423
1424 if (flags & REXEC_SCREAM) {
1425 last = screaminstr(sv, prog->float_substr, s - strbeg,
1426 end_shift, &scream_pos, 1); /* last one */
1427 if (!last)
1428 last = scream_olds; /* Only one occurence. */
b8c5462f 1429 }
d6a28714
JH
1430 else {
1431 STRLEN len;
1432 char *little = SvPV(prog->float_substr, len);
1433
1434 if (SvTAIL(prog->float_substr)) {
1435 if (memEQ(strend - len + 1, little, len - 1))
1436 last = strend - len + 1;
1437 else if (!PL_multiline)
1438 last = memEQ(strend - len, little, len)
1439 ? strend - len : Nullch;
b8c5462f 1440 else
d6a28714
JH
1441 goto find_last;
1442 } else {
1443 find_last:
1444 if (len)
1445 last = rninstr(s, strend, little, little + len);
b8c5462f 1446 else
d6a28714 1447 last = strend; /* matching `$' */
b8c5462f 1448 }
b8c5462f 1449 }
d6a28714
JH
1450 if (last == NULL) goto phooey; /* Should not happen! */
1451 dontbother = strend - last + prog->float_min_offset;
1452 }
1453 if (minlen && (dontbother < minlen))
1454 dontbother = minlen - 1;
1455 strend -= dontbother; /* this one's always in bytes! */
1456 /* We don't know much -- general case. */
1457 if (UTF) {
1458 for (;;) {
1459 if (regtry(prog, s))
1460 goto got_it;
1461 if (s >= strend)
1462 break;
b8c5462f 1463 s += UTF8SKIP(s);
d6a28714
JH
1464 };
1465 }
1466 else {
1467 do {
1468 if (regtry(prog, s))
1469 goto got_it;
1470 } while (s++ < strend);
1471 }
1472 }
1473
1474 /* Failure. */
1475 goto phooey;
1476
1477got_it:
1478 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1479
1480 if (PL_reg_eval_set) {
1481 /* Preserve the current value of $^R */
1482 if (oreplsv != GvSV(PL_replgv))
1483 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1484 restored, the value remains
1485 the same. */
1486 restore_pos(aTHXo_ 0);
1487 }
1488
1489 /* make sure $`, $&, $', and $digit will work later */
1490 if ( !(flags & REXEC_NOT_FIRST) ) {
1491 if (RX_MATCH_COPIED(prog)) {
1492 Safefree(prog->subbeg);
1493 RX_MATCH_COPIED_off(prog);
1494 }
1495 if (flags & REXEC_COPY_STR) {
1496 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1497
1498 s = savepvn(strbeg, i);
1499 prog->subbeg = s;
1500 prog->sublen = i;
1501 RX_MATCH_COPIED_on(prog);
1502 }
1503 else {
1504 prog->subbeg = strbeg;
1505 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1506 }
1507 }
1508
1509 return 1;
1510
1511phooey:
1512 if (PL_reg_eval_set)
1513 restore_pos(aTHXo_ 0);
1514 return 0;
1515}
1516
1517/*
1518 - regtry - try match at specific point
1519 */
1520STATIC I32 /* 0 failure, 1 success */
1521S_regtry(pTHX_ regexp *prog, char *startpos)
1522{
1523 dTHR;
1524 register I32 i;
1525 register I32 *sp;
1526 register I32 *ep;
1527 CHECKPOINT lastcp;
1528
1529 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1530 MAGIC *mg;
1531
1532 PL_reg_eval_set = RS_init;
1533 DEBUG_r(DEBUG_s(
b900a521
JH
1534 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1535 (IV)(PL_stack_sp - PL_stack_base));
d6a28714
JH
1536 ));
1537 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
1538 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1539 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1540 SAVETMPS;
1541 /* Apparently this is not needed, judging by wantarray. */
1542 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1543 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1544
1545 if (PL_reg_sv) {
1546 /* Make $_ available to executed code. */
1547 if (PL_reg_sv != DEFSV) {
1548 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1549 SAVESPTR(DEFSV);
1550 DEFSV = PL_reg_sv;
b8c5462f 1551 }
d6a28714
JH
1552
1553 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1554 && (mg = mg_find(PL_reg_sv, 'g')))) {
1555 /* prepare for quick setting of pos */
1556 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1557 mg = mg_find(PL_reg_sv, 'g');
1558 mg->mg_len = -1;
b8c5462f 1559 }
d6a28714
JH
1560 PL_reg_magic = mg;
1561 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1562 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1563 }
1564 if (!PL_reg_curpm)
1565 New(22,PL_reg_curpm, 1, PMOP);
1566 PL_reg_curpm->op_pmregexp = prog;
1567 PL_reg_oldcurpm = PL_curpm;
1568 PL_curpm = PL_reg_curpm;
1569 if (RX_MATCH_COPIED(prog)) {
1570 /* Here is a serious problem: we cannot rewrite subbeg,
1571 since it may be needed if this match fails. Thus
1572 $` inside (?{}) could fail... */
1573 PL_reg_oldsaved = prog->subbeg;
1574 PL_reg_oldsavedlen = prog->sublen;
1575 RX_MATCH_COPIED_off(prog);
1576 }
1577 else
1578 PL_reg_oldsaved = Nullch;
1579 prog->subbeg = PL_bostr;
1580 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1581 }
1582 prog->startp[0] = startpos - PL_bostr;
1583 PL_reginput = startpos;
1584 PL_regstartp = prog->startp;
1585 PL_regendp = prog->endp;
1586 PL_reglastparen = &prog->lastparen;
1587 prog->lastparen = 0;
1588 PL_regsize = 0;
1589 DEBUG_r(PL_reg_starttry = startpos);
1590 if (PL_reg_start_tmpl <= prog->nparens) {
1591 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1592 if(PL_reg_start_tmp)
1593 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1594 else
1595 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1596 }
1597
1598 /* XXXX What this code is doing here?!!! There should be no need
1599 to do this again and again, PL_reglastparen should take care of
1600 this! */
1601 sp = prog->startp;
1602 ep = prog->endp;
1603 if (prog->nparens) {
1604 for (i = prog->nparens; i >= 1; i--) {
1605 *++sp = -1;
1606 *++ep = -1;
1607 }
1608 }
1609 REGCP_SET;
1610 if (regmatch(prog->program + 1)) {
1611 prog->endp[0] = PL_reginput - PL_bostr;
1612 return 1;
1613 }
1614 REGCP_UNWIND;
1615 return 0;
1616}
1617
1618/*
1619 - regmatch - main matching routine
1620 *
1621 * Conceptually the strategy is simple: check to see whether the current
1622 * node matches, call self recursively to see whether the rest matches,
1623 * and then act accordingly. In practice we make some effort to avoid
1624 * recursion, in particular by going through "ordinary" nodes (that don't
1625 * need to know whether the rest of the match failed) by a loop instead of
1626 * by recursion.
1627 */
1628/* [lwall] I've hoisted the register declarations to the outer block in order to
1629 * maybe save a little bit of pushing and popping on the stack. It also takes
1630 * advantage of machines that use a register save mask on subroutine entry.
1631 */
1632STATIC I32 /* 0 failure, 1 success */
1633S_regmatch(pTHX_ regnode *prog)
1634{
1635 dTHR;
1636 register regnode *scan; /* Current node. */
1637 regnode *next; /* Next node. */
1638 regnode *inner; /* Next node in internal branch. */
1639 register I32 nextchr; /* renamed nextchr - nextchar colides with
1640 function of same name */
1641 register I32 n; /* no or next */
1642 register I32 ln; /* len or last */
1643 register char *s; /* operand or save */
1644 register char *locinput = PL_reginput;
1645 register I32 c1, c2, paren; /* case fold search, parenth */
1646 int minmod = 0, sw = 0, logical = 0;
1647#ifdef DEBUGGING
1648 PL_regindent++;
1649#endif
1650
1651 /* Note that nextchr is a byte even in UTF */
1652 nextchr = UCHARAT(locinput);
1653 scan = prog;
1654 while (scan != NULL) {
1655#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
1656#ifdef DEBUGGING
1657# define sayYES goto yes
1658# define sayNO goto no
7821416a
IZ
1659# define sayYES_FINAL goto yes_final
1660# define sayYES_LOUD goto yes_loud
1661# define sayNO_FINAL goto no_final
1662# define sayNO_SILENT goto do_no
d6a28714
JH
1663# define saySAME(x) if (x) goto yes; else goto no
1664# define REPORT_CODE_OFF 24
1665#else
1666# define sayYES return 1
1667# define sayNO return 0
7821416a
IZ
1668# define sayYES_FINAL return 1
1669# define sayYES_LOUD return 1
1670# define sayNO_FINAL return 0
1671# define sayNO_SILENT return 0
d6a28714
JH
1672# define saySAME(x) return x
1673#endif
1674 DEBUG_r( {
1675 SV *prop = sv_newmortal();
1676 int docolor = *PL_colors[0];
1677 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1678 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
1679 /* The part of the string before starttry has one color
1680 (pref0_len chars), between starttry and current
1681 position another one (pref_len - pref0_len chars),
1682 after the current position the third one.
1683 We assume that pref0_len <= pref_len, otherwise we
1684 decrease pref0_len. */
1685 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1686 ? (5 + taill) - l : locinput - PL_bostr);
1687 int pref0_len = pref_len - (locinput - PL_reg_starttry);
1688
1689 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1690 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1691 ? (5 + taill) - pref_len : PL_regeol - locinput);
1692 if (pref0_len < 0)
1693 pref0_len = 0;
1694 if (pref0_len > pref_len)
1695 pref0_len = pref_len;
1696 regprop(prop, scan);
1697 PerlIO_printf(Perl_debug_log,
b900a521
JH
1698 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
1699 (IV)(locinput - PL_bostr),
d6a28714
JH
1700 PL_colors[4], pref0_len,
1701 locinput - pref_len, PL_colors[5],
1702 PL_colors[2], pref_len - pref0_len,
1703 locinput - pref_len + pref0_len, PL_colors[3],
1704 (docolor ? "" : "> <"),
1705 PL_colors[0], l, locinput, PL_colors[1],
1706 15 - l - pref_len + 1,
1707 "",
b900a521 1708 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
1709 SvPVX(prop));
1710 } );
1711
1712 next = scan + NEXT_OFF(scan);
1713 if (next == scan)
1714 next = NULL;
1715
1716 switch (OP(scan)) {
1717 case BOL:
1718 if (locinput == PL_bostr
1719 ? PL_regprev == '\n'
1720 : (PL_multiline &&
1721 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1722 {
1723 /* regtill = regbol; */
b8c5462f
JH
1724 break;
1725 }
d6a28714
JH
1726 sayNO;
1727 case MBOL:
1728 if (locinput == PL_bostr
1729 ? PL_regprev == '\n'
1730 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
1731 {
b8c5462f
JH
1732 break;
1733 }
d6a28714
JH
1734 sayNO;
1735 case SBOL:
1736 if (locinput == PL_regbol && PL_regprev == '\n')
b8c5462f 1737 break;
d6a28714
JH
1738 sayNO;
1739 case GPOS:
1740 if (locinput == PL_reg_ganch)
1741 break;
1742 sayNO;
1743 case EOL:
1744 if (PL_multiline)
1745 goto meol;
1746 else
1747 goto seol;
1748 case MEOL:
1749 meol:
1750 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1751 sayNO;
b8c5462f 1752 break;
d6a28714
JH
1753 case SEOL:
1754 seol:
1755 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 1756 sayNO;
d6a28714 1757 if (PL_regeol - locinput > 1)
b8c5462f 1758 sayNO;
b8c5462f 1759 break;
d6a28714
JH
1760 case EOS:
1761 if (PL_regeol != locinput)
b8c5462f 1762 sayNO;
d6a28714
JH
1763 break;
1764 case SANYUTF8:
b8c5462f 1765 if (nextchr & 0x80) {
b8c5462f 1766 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1767 if (locinput > PL_regeol)
1768 sayNO;
b8c5462f
JH
1769 nextchr = UCHARAT(locinput);
1770 break;
1771 }
d6a28714 1772 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1773 sayNO;
b8c5462f 1774 nextchr = UCHARAT(++locinput);
a0d0e21e 1775 break;
d6a28714
JH
1776 case SANY:
1777 if (!nextchr && locinput >= PL_regeol)
b8c5462f
JH
1778 sayNO;
1779 nextchr = UCHARAT(++locinput);
b85d18e9 1780 break;
d6a28714 1781 case ANYUTF8:
a0ed51b3 1782 if (nextchr & 0x80) {
b8c5462f 1783 locinput += PL_utf8skip[nextchr];
d6a28714
JH
1784 if (locinput > PL_regeol)
1785 sayNO;
a0ed51b3
LW
1786 nextchr = UCHARAT(locinput);
1787 break;
1788 }
d6a28714 1789 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
a0ed51b3
LW
1790 sayNO;
1791 nextchr = UCHARAT(++locinput);
1792 break;
d6a28714
JH
1793 case REG_ANY:
1794 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1795 sayNO;
76e3520e 1796 nextchr = UCHARAT(++locinput);
a0d0e21e 1797 break;
d6a28714 1798 case EXACT:
cd439c50
IZ
1799 s = STRING(scan);
1800 ln = STR_LEN(scan);
d6a28714
JH
1801 /* Inline the first character, for speed. */
1802 if (UCHARAT(s) != nextchr)
1803 sayNO;
1804 if (PL_regeol - locinput < ln)
1805 sayNO;
1806 if (ln > 1 && memNE(s, locinput, ln))
1807 sayNO;
1808 locinput += ln;
1809 nextchr = UCHARAT(locinput);
1810 break;
1811 case EXACTFL:
b8c5462f
JH
1812 PL_reg_flags |= RF_tainted;
1813 /* FALL THROUGH */
d6a28714 1814 case EXACTF:
cd439c50
IZ
1815 s = STRING(scan);
1816 ln = STR_LEN(scan);
d6a28714
JH
1817
1818 if (UTF) {
1819 char *l = locinput;
1820 char *e = s + ln;
1821 c1 = OP(scan) == EXACTF;
1822 while (s < e) {
1823 if (l >= PL_regeol)
1824 sayNO;
1825 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1826 toLOWER_utf8((U8*)l) :
1827 toLOWER_LC_utf8((U8*)l)))
1828 {
1829 sayNO;
1830 }
1831 s += UTF8SKIP(s);
1832 l += UTF8SKIP(l);
b8c5462f 1833 }
d6a28714 1834 locinput = l;
a0ed51b3
LW
1835 nextchr = UCHARAT(locinput);
1836 break;
1837 }
d6a28714
JH
1838
1839 /* Inline the first character, for speed. */
1840 if (UCHARAT(s) != nextchr &&
1841 UCHARAT(s) != ((OP(scan) == EXACTF)
1842 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 1843 sayNO;
d6a28714 1844 if (PL_regeol - locinput < ln)
b8c5462f 1845 sayNO;
d6a28714
JH
1846 if (ln > 1 && (OP(scan) == EXACTF
1847 ? ibcmp(s, locinput, ln)
1848 : ibcmp_locale(s, locinput, ln)))
4633a7c4 1849 sayNO;
d6a28714
JH
1850 locinput += ln;
1851 nextchr = UCHARAT(locinput);
a0d0e21e 1852 break;
d6a28714 1853 case ANYOFUTF8:
d6a28714 1854 if (!REGINCLASSUTF8(scan, (U8*)locinput))
4633a7c4 1855 sayNO;
d6a28714
JH
1856 if (locinput >= PL_regeol)
1857 sayNO;
1858 locinput += PL_utf8skip[nextchr];
1859 nextchr = UCHARAT(locinput);
1860 break;
1861 case ANYOF:
d6a28714 1862 if (nextchr < 0)
b8c5462f 1863 nextchr = UCHARAT(locinput);
936ed897 1864 if (!REGINCLASS(scan, nextchr))
d6a28714
JH
1865 sayNO;
1866 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1867 sayNO;
b8c5462f
JH
1868 nextchr = UCHARAT(++locinput);
1869 break;
d6a28714 1870 case ALNUML:
b8c5462f
JH
1871 PL_reg_flags |= RF_tainted;
1872 /* FALL THROUGH */
d6a28714 1873 case ALNUM:
b8c5462f 1874 if (!nextchr)
4633a7c4 1875 sayNO;
d6a28714
JH
1876 if (!(OP(scan) == ALNUM
1877 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
b8c5462f
JH
1878 sayNO;
1879 nextchr = UCHARAT(++locinput);
bbce6d69 1880 break;
d6a28714 1881 case ALNUMLUTF8:
3280af22 1882 PL_reg_flags |= RF_tainted;
bbce6d69 1883 /* FALL THROUGH */
d6a28714 1884 case ALNUMUTF8:
b8c5462f
JH
1885 if (!nextchr)
1886 sayNO;
1887 if (nextchr & 0x80) {
d6a28714
JH
1888 if (!(OP(scan) == ALNUMUTF8
1889 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1890 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
1891 {
1892 sayNO;
a0ed51b3 1893 }
b8c5462f 1894 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1895 nextchr = UCHARAT(locinput);
1896 break;
1897 }
d6a28714
JH
1898 if (!(OP(scan) == ALNUMUTF8
1899 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 1900 sayNO;
b8c5462f 1901 nextchr = UCHARAT(++locinput);
a0d0e21e 1902 break;
d6a28714 1903 case NALNUML:
b8c5462f
JH
1904 PL_reg_flags |= RF_tainted;
1905 /* FALL THROUGH */
d6a28714
JH
1906 case NALNUM:
1907 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 1908 sayNO;
d6a28714
JH
1909 if (OP(scan) == NALNUM
1910 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
a0ed51b3 1911 sayNO;
b8c5462f 1912 nextchr = UCHARAT(++locinput);
a0ed51b3 1913 break;
d6a28714 1914 case NALNUMLUTF8:
b8c5462f
JH
1915 PL_reg_flags |= RF_tainted;
1916 /* FALL THROUGH */
d6a28714 1917 case NALNUMUTF8:
3280af22 1918 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1919 sayNO;
b8c5462f 1920 if (nextchr & 0x80) {
d6a28714
JH
1921 if (OP(scan) == NALNUMUTF8
1922 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1923 : isALNUM_LC_utf8((U8*)locinput))
1924 {
b8c5462f 1925 sayNO;
d6a28714 1926 }
b8c5462f
JH
1927 locinput += PL_utf8skip[nextchr];
1928 nextchr = UCHARAT(locinput);
1929 break;
1930 }
d6a28714
JH
1931 if (OP(scan) == NALNUMUTF8
1932 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 1933 sayNO;
76e3520e 1934 nextchr = UCHARAT(++locinput);
a0d0e21e 1935 break;
d6a28714
JH
1936 case BOUNDL:
1937 case NBOUNDL:
3280af22 1938 PL_reg_flags |= RF_tainted;
bbce6d69 1939 /* FALL THROUGH */
d6a28714
JH
1940 case BOUND:
1941 case NBOUND:
1942 /* was last char in word? */
1943 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
1944 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1945 ln = isALNUM(ln);
1946 n = isALNUM(nextchr);
1947 }
1948 else {
1949 ln = isALNUM_LC(ln);
1950 n = isALNUM_LC(nextchr);
1951 }
1952 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1953 sayNO;
a0d0e21e 1954 break;
d6a28714
JH
1955 case BOUNDLUTF8:
1956 case NBOUNDLUTF8:
a0ed51b3
LW
1957 PL_reg_flags |= RF_tainted;
1958 /* FALL THROUGH */
d6a28714
JH
1959 case BOUNDUTF8:
1960 case NBOUNDUTF8:
1961 /* was last char in word? */
1962 ln = (locinput != PL_regbol)
1963 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
1964 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1965 ln = isALNUM_uni(ln);
1966 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3 1967 }
d6a28714
JH
1968 else {
1969 ln = isALNUM_LC_uni(ln);
1970 n = isALNUM_LC_utf8((U8*)locinput);
1971 }
1972 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
a0ed51b3 1973 sayNO;
a0ed51b3 1974 break;
d6a28714 1975 case SPACEL:
3280af22 1976 PL_reg_flags |= RF_tainted;
bbce6d69 1977 /* FALL THROUGH */
d6a28714
JH
1978 case SPACE:
1979 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1980 sayNO;
d6a28714
JH
1981 if (!(OP(scan) == SPACE
1982 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1983 sayNO;
76e3520e 1984 nextchr = UCHARAT(++locinput);
a0d0e21e 1985 break;
d6a28714 1986 case SPACELUTF8:
a0ed51b3
LW
1987 PL_reg_flags |= RF_tainted;
1988 /* FALL THROUGH */
d6a28714 1989 case SPACEUTF8:
a0ed51b3
LW
1990 if (!nextchr && locinput >= PL_regeol)
1991 sayNO;
1992 if (nextchr & 0x80) {
d6a28714
JH
1993 if (!(OP(scan) == SPACEUTF8
1994 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1995 : isSPACE_LC_utf8((U8*)locinput)))
1996 {
a0ed51b3 1997 sayNO;
d6a28714 1998 }
6f06b55f 1999 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2000 nextchr = UCHARAT(locinput);
2001 break;
2002 }
d6a28714
JH
2003 if (!(OP(scan) == SPACEUTF8
2004 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
a0ed51b3
LW
2005 sayNO;
2006 nextchr = UCHARAT(++locinput);
2007 break;
d6a28714 2008 case NSPACEL:
3280af22 2009 PL_reg_flags |= RF_tainted;
bbce6d69 2010 /* FALL THROUGH */
d6a28714 2011 case NSPACE:
b8c5462f
JH
2012 if (!nextchr)
2013 sayNO;
d6a28714
JH
2014 if (OP(scan) == SPACE
2015 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2016 sayNO;
b8c5462f 2017 nextchr = UCHARAT(++locinput);
a0d0e21e 2018 break;
d6a28714 2019 case NSPACELUTF8:
a0ed51b3
LW
2020 PL_reg_flags |= RF_tainted;
2021 /* FALL THROUGH */
d6a28714 2022 case NSPACEUTF8:
b8c5462f
JH
2023 if (!nextchr)
2024 sayNO;
2025 if (nextchr & 0x80) {
d6a28714
JH
2026 if (OP(scan) == NSPACEUTF8
2027 ? swash_fetch(PL_utf8_space,(U8*)locinput)
2028 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2029 {
2030 sayNO;
2031 }
2032 locinput += PL_utf8skip[nextchr];
2033 nextchr = UCHARAT(locinput);
2034 break;
a0ed51b3 2035 }
d6a28714
JH
2036 if (OP(scan) == NSPACEUTF8
2037 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2038 sayNO;
76e3520e 2039 nextchr = UCHARAT(++locinput);
a0d0e21e 2040 break;
d6a28714 2041 case DIGITL:
a0ed51b3
LW
2042 PL_reg_flags |= RF_tainted;
2043 /* FALL THROUGH */
d6a28714 2044 case DIGIT:
a0ed51b3
LW
2045 if (!nextchr && locinput >= PL_regeol)
2046 sayNO;
d6a28714
JH
2047 if (!(OP(scan) == DIGIT
2048 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
4633a7c4 2049 sayNO;
76e3520e 2050 nextchr = UCHARAT(++locinput);
a0d0e21e 2051 break;
d6a28714 2052 case DIGITLUTF8:
a0ed51b3
LW
2053 PL_reg_flags |= RF_tainted;
2054 /* FALL THROUGH */
d6a28714 2055 case DIGITUTF8:
a0ed51b3
LW
2056 if (!nextchr)
2057 sayNO;
2058 if (nextchr & 0x80) {
d6a28714
JH
2059 if (OP(scan) == NDIGITUTF8
2060 ? swash_fetch(PL_utf8_digit,(U8*)locinput)
2061 : isDIGIT_LC_utf8((U8*)locinput))
dfe13c55 2062 {
a0ed51b3 2063 sayNO;
dfe13c55 2064 }
6f06b55f 2065 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2066 nextchr = UCHARAT(locinput);
2067 break;
2068 }
d6a28714 2069 if (!isDIGIT(nextchr))
a0ed51b3
LW
2070 sayNO;
2071 nextchr = UCHARAT(++locinput);
2072 break;
d6a28714 2073 case NDIGITL:
b8c5462f
JH
2074 PL_reg_flags |= RF_tainted;
2075 /* FALL THROUGH */
d6a28714 2076 case NDIGIT:
b8c5462f
JH
2077 if (!nextchr)
2078 sayNO;
d6a28714
JH
2079 if (OP(scan) == DIGIT
2080 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
4633a7c4 2081 sayNO;
76e3520e 2082 nextchr = UCHARAT(++locinput);
a0d0e21e 2083 break;
d6a28714 2084 case NDIGITLUTF8:
b8c5462f
JH
2085 PL_reg_flags |= RF_tainted;
2086 /* FALL THROUGH */
d6a28714 2087 case NDIGITUTF8:
b8c5462f
JH
2088 if (!nextchr && locinput >= PL_regeol)
2089 sayNO;
a0ed51b3 2090 if (nextchr & 0x80) {
d6a28714 2091 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 2092 sayNO;
6f06b55f 2093 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2094 nextchr = UCHARAT(locinput);
2095 break;
2096 }
d6a28714 2097 if (isDIGIT(nextchr))
a0ed51b3
LW
2098 sayNO;
2099 nextchr = UCHARAT(++locinput);
2100 break;
2101 case CLUMP:
dfe13c55 2102 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2103 sayNO;
6f06b55f 2104 locinput += PL_utf8skip[nextchr];
dfe13c55 2105 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2106 locinput += UTF8SKIP(locinput);
2107 if (locinput > PL_regeol)
2108 sayNO;
2109 nextchr = UCHARAT(locinput);
2110 break;
c8756f30 2111 case REFFL:
3280af22 2112 PL_reg_flags |= RF_tainted;
c8756f30 2113 /* FALL THROUGH */
c277df42 2114 case REF:
c8756f30 2115 case REFF:
c277df42 2116 n = ARG(scan); /* which paren pair */
cf93c79d 2117 ln = PL_regstartp[n];
2c2d71f5 2118 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2119 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2120 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2121 if (ln == PL_regendp[n])
a0d0e21e 2122 break;
a0ed51b3 2123
cf93c79d 2124 s = PL_bostr + ln;
a0ed51b3
LW
2125 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
2126 char *l = locinput;
cf93c79d 2127 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2128 /*
2129 * Note that we can't do the "other character" lookup trick as
2130 * in the 8-bit case (no pun intended) because in Unicode we
2131 * have to map both upper and title case to lower case.
2132 */
2133 if (OP(scan) == REFF) {
2134 while (s < e) {
2135 if (l >= PL_regeol)
2136 sayNO;
dfe13c55 2137 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2138 sayNO;
2139 s += UTF8SKIP(s);
2140 l += UTF8SKIP(l);
2141 }
2142 }
2143 else {
2144 while (s < e) {
2145 if (l >= PL_regeol)
2146 sayNO;
dfe13c55 2147 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2148 sayNO;
2149 s += UTF8SKIP(s);
2150 l += UTF8SKIP(l);
2151 }
2152 }
2153 locinput = l;
2154 nextchr = UCHARAT(locinput);
2155 break;
2156 }
2157
a0d0e21e 2158 /* Inline the first character, for speed. */
76e3520e 2159 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2160 (OP(scan) == REF ||
2161 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2162 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2163 sayNO;
cf93c79d 2164 ln = PL_regendp[n] - ln;
3280af22 2165 if (locinput + ln > PL_regeol)
4633a7c4 2166 sayNO;
c8756f30
AK
2167 if (ln > 1 && (OP(scan) == REF
2168 ? memNE(s, locinput, ln)
2169 : (OP(scan) == REFF
2170 ? ibcmp(s, locinput, ln)
2171 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2172 sayNO;
a0d0e21e 2173 locinput += ln;
76e3520e 2174 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2175 break;
2176
2177 case NOTHING:
c277df42 2178 case TAIL:
a0d0e21e
LW
2179 break;
2180 case BACK:
2181 break;
c277df42
IZ
2182 case EVAL:
2183 {
2184 dSP;
533c011a 2185 OP_4tree *oop = PL_op;
3280af22
NIS
2186 COP *ocurcop = PL_curcop;
2187 SV **ocurpad = PL_curpad;
c277df42
IZ
2188 SV *ret;
2189
2190 n = ARG(scan);
533c011a 2191 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2192 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2193 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2194 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2195
cea2e8a9 2196 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2197 SPAGAIN;
2198 ret = POPs;
2199 PUTBACK;
2200
0f5d15d6
IZ
2201 PL_op = oop;
2202 PL_curpad = ocurpad;
2203 PL_curcop = ocurcop;
c277df42 2204 if (logical) {
0f5d15d6
IZ
2205 if (logical == 2) { /* Postponed subexpression. */
2206 regexp *re;
22c35a8c 2207 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2208 re_cc_state state;
0f5d15d6
IZ
2209 CHECKPOINT cp, lastcp;
2210
2211 if(SvROK(ret) || SvRMAGICAL(ret)) {
2212 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2213
2214 if(SvMAGICAL(sv))
2215 mg = mg_find(sv, 'r');
2216 }
2217 if (mg) {
2218 re = (regexp *)mg->mg_obj;
df0003d4 2219 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2220 }
2221 else {
2222 STRLEN len;
2223 char *t = SvPV(ret, len);
2224 PMOP pm;
2225 char *oprecomp = PL_regprecomp;
2226 I32 osize = PL_regsize;
2227 I32 onpar = PL_regnpar;
2228
2229 pm.op_pmflags = 0;
cea2e8a9 2230 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
2231 if (!(SvFLAGS(ret)
2232 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2233 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2234 PL_regprecomp = oprecomp;
2235 PL_regsize = osize;
2236 PL_regnpar = onpar;
2237 }
2238 DEBUG_r(
2239 PerlIO_printf(Perl_debug_log,
2240 "Entering embedded `%s%.60s%s%s'\n",
2241 PL_colors[0],
2242 re->precomp,
2243 PL_colors[1],
2244 (strlen(re->precomp) > 60 ? "..." : ""))
2245 );
2246 state.node = next;
2247 state.prev = PL_reg_call_cc;
2248 state.cc = PL_regcc;
2249 state.re = PL_reg_re;
2250
2ab05381 2251 PL_regcc = 0;
0f5d15d6
IZ
2252
2253 cp = regcppush(0); /* Save *all* the positions. */
2254 REGCP_SET;
2255 cache_re(re);
2256 state.ss = PL_savestack_ix;
2257 *PL_reglastparen = 0;
2258 PL_reg_call_cc = &state;
2259 PL_reginput = locinput;
2c2d71f5
JH
2260
2261 /* XXXX This is too dramatic a measure... */
2262 PL_reg_maxiter = 0;
2263
0f5d15d6 2264 if (regmatch(re->program + 1)) {
2c914db6
IZ
2265 /* Even though we succeeded, we need to restore
2266 global variables, since we may be wrapped inside
2267 SUSPEND, thus the match may be not finished yet. */
2268
2269 /* XXXX Do this only if SUSPENDed? */
2270 PL_reg_call_cc = state.prev;
2271 PL_regcc = state.cc;
2272 PL_reg_re = state.re;
2273 cache_re(PL_reg_re);
2274
2275 /* XXXX This is too dramatic a measure... */
2276 PL_reg_maxiter = 0;
2277
2278 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2279 ReREFCNT_dec(re);
2280 regcpblow(cp);
2281 sayYES;
2282 }
0f5d15d6
IZ
2283 ReREFCNT_dec(re);
2284 REGCP_UNWIND;
2285 regcppop();
2286 PL_reg_call_cc = state.prev;
2287 PL_regcc = state.cc;
2288 PL_reg_re = state.re;
d3790889 2289 cache_re(PL_reg_re);
2c2d71f5
JH
2290
2291 /* XXXX This is too dramatic a measure... */
2292 PL_reg_maxiter = 0;
2293
0f5d15d6
IZ
2294 sayNO;
2295 }
c277df42 2296 sw = SvTRUE(ret);
0f5d15d6 2297 logical = 0;
a0ed51b3
LW
2298 }
2299 else
3280af22 2300 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2301 break;
2302 }
a0d0e21e 2303 case OPEN:
c277df42 2304 n = ARG(scan); /* which paren pair */
3280af22
NIS
2305 PL_reg_start_tmp[n] = locinput;
2306 if (n > PL_regsize)
2307 PL_regsize = n;
a0d0e21e
LW
2308 break;
2309 case CLOSE:
c277df42 2310 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2311 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2312 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2313 if (n > *PL_reglastparen)
2314 *PL_reglastparen = n;
a0d0e21e 2315 break;
c277df42
IZ
2316 case GROUPP:
2317 n = ARG(scan); /* which paren pair */
cf93c79d 2318 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2319 break;
2320 case IFTHEN:
2c2d71f5 2321 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2322 if (sw)
2323 next = NEXTOPER(NEXTOPER(scan));
2324 else {
2325 next = scan + ARG(scan);
2326 if (OP(next) == IFTHEN) /* Fake one. */
2327 next = NEXTOPER(NEXTOPER(next));
2328 }
2329 break;
2330 case LOGICAL:
0f5d15d6 2331 logical = scan->flags;
c277df42 2332 break;
2ab05381
IZ
2333/*******************************************************************
2334 PL_regcc contains infoblock about the innermost (...)* loop, and
2335 a pointer to the next outer infoblock.
2336
2337 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2338
2339 1) After matching X, regnode for CURLYX is processed;
2340
2341 2) This regnode creates infoblock on the stack, and calls
2342 regmatch() recursively with the starting point at WHILEM node;
2343
2344 3) Each hit of WHILEM node tries to match A and Z (in the order
2345 depending on the current iteration, min/max of {min,max} and
2346 greediness). The information about where are nodes for "A"
2347 and "Z" is read from the infoblock, as is info on how many times "A"
2348 was already matched, and greediness.
2349
2350 4) After A matches, the same WHILEM node is hit again.
2351
2352 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2353 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2354 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2355 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2356 of the external loop.
2357
2358 Currently present infoblocks form a tree with a stem formed by PL_curcc
2359 and whatever it mentions via ->next, and additional attached trees
2360 corresponding to temporarily unset infoblocks as in "5" above.
2361
2362 In the following picture infoblocks for outer loop of
2363 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2364 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2365 infoblocks are drawn below the "reset" infoblock.
2366
2367 In fact in the picture below we do not show failed matches for Z and T
2368 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2369 more obvious *why* one needs to *temporary* unset infoblocks.]
2370
2371 Matched REx position InfoBlocks Comment
2372 (Y(A)*?Z)*?T x
2373 Y(A)*?Z)*?T x <- O
2374 Y (A)*?Z)*?T x <- O
2375 Y A)*?Z)*?T x <- O <- I
2376 YA )*?Z)*?T x <- O <- I
2377 YA A)*?Z)*?T x <- O <- I
2378 YAA )*?Z)*?T x <- O <- I
2379 YAA Z)*?T x <- O # Temporary unset I
2380 I
2381
2382 YAAZ Y(A)*?Z)*?T x <- O
2383 I
2384
2385 YAAZY (A)*?Z)*?T x <- O
2386 I
2387
2388 YAAZY A)*?Z)*?T x <- O <- I
2389 I
2390
2391 YAAZYA )*?Z)*?T x <- O <- I
2392 I
2393
2394 YAAZYA Z)*?T x <- O # Temporary unset I
2395 I,I
2396
2397 YAAZYAZ )*?T x <- O
2398 I,I
2399
2400 YAAZYAZ T x # Temporary unset O
2401 O
2402 I,I
2403
2404 YAAZYAZT x
2405 O
2406 I,I
2407 *******************************************************************/
a0d0e21e
LW
2408 case CURLYX: {
2409 CURCUR cc;
3280af22 2410 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
2411
2412 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2413 next += ARG(next);
3280af22
NIS
2414 cc.oldcc = PL_regcc;
2415 PL_regcc = &cc;
2416 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
2417 cc.cur = -1;
2418 cc.min = ARG1(scan);
2419 cc.max = ARG2(scan);
c277df42 2420 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2421 cc.next = next;
2422 cc.minmod = minmod;
2423 cc.lastloc = 0;
3280af22 2424 PL_reginput = locinput;
a0d0e21e
LW
2425 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2426 regcpblow(cp);
3280af22 2427 PL_regcc = cc.oldcc;
4633a7c4 2428 saySAME(n);
a0d0e21e
LW
2429 }
2430 /* NOT REACHED */
2431 case WHILEM: {
2432 /*
2433 * This is really hard to understand, because after we match
2434 * what we're trying to match, we must make sure the rest of
2c2d71f5 2435 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2436 * to go back UP the parse tree by recursing ever deeper. And
2437 * if it fails, we have to reset our parent's current state
2438 * that we can try again after backing off.
2439 */
2440
c277df42 2441 CHECKPOINT cp, lastcp;
3280af22 2442 CURCUR* cc = PL_regcc;
c277df42
IZ
2443 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2444
4633a7c4 2445 n = cc->cur + 1; /* how many we know we matched */
3280af22 2446 PL_reginput = locinput;
a0d0e21e 2447
c277df42
IZ
2448 DEBUG_r(
2449 PerlIO_printf(Perl_debug_log,
2450 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2451 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2452 (long)n, (long)cc->min,
2453 (long)cc->max, (long)cc)
2454 );
4633a7c4 2455
a0d0e21e
LW
2456 /* If degenerate scan matches "", assume scan done. */
2457
579cf2c3 2458 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2459 PL_regcc = cc->oldcc;
2ab05381
IZ
2460 if (PL_regcc)
2461 ln = PL_regcc->cur;
c277df42 2462 DEBUG_r(
c3464db5
DD
2463 PerlIO_printf(Perl_debug_log,
2464 "%*s empty match detected, try continuation...\n",
3280af22 2465 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2466 );
a0d0e21e 2467 if (regmatch(cc->next))
4633a7c4 2468 sayYES;
2ab05381
IZ
2469 if (PL_regcc)
2470 PL_regcc->cur = ln;
3280af22 2471 PL_regcc = cc;
4633a7c4 2472 sayNO;
a0d0e21e
LW
2473 }
2474
2475 /* First just match a string of min scans. */
2476
2477 if (n < cc->min) {
2478 cc->cur = n;
2479 cc->lastloc = locinput;
4633a7c4
LW
2480 if (regmatch(cc->scan))
2481 sayYES;
2482 cc->cur = n - 1;
c277df42 2483 cc->lastloc = lastloc;
4633a7c4 2484 sayNO;
a0d0e21e
LW
2485 }
2486
2c2d71f5
JH
2487 if (scan->flags) {
2488 /* Check whether we already were at this position.
2489 Postpone detection until we know the match is not
2490 *that* much linear. */
2491 if (!PL_reg_maxiter) {
2492 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2493 PL_reg_leftiter = PL_reg_maxiter;
2494 }
2495 if (PL_reg_leftiter-- == 0) {
2496 I32 size = (PL_reg_maxiter + 7)/8;
2497 if (PL_reg_poscache) {
2498 if (PL_reg_poscache_size < size) {
2499 Renew(PL_reg_poscache, size, char);
2500 PL_reg_poscache_size = size;
2501 }
2502 Zero(PL_reg_poscache, size, char);
2503 }
2504 else {
2505 PL_reg_poscache_size = size;
2506 Newz(29, PL_reg_poscache, size, char);
2507 }
2508 DEBUG_r(
2509 PerlIO_printf(Perl_debug_log,
2510 "%sDetected a super-linear match, switching on caching%s...\n",
2511 PL_colors[4], PL_colors[5])
2512 );
2513 }
2514 if (PL_reg_leftiter < 0) {
2515 I32 o = locinput - PL_bostr, b;
2516
2517 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2518 b = o % 8;
2519 o /= 8;
2520 if (PL_reg_poscache[o] & (1<<b)) {
2521 DEBUG_r(
2522 PerlIO_printf(Perl_debug_log,
2523 "%*s already tried at this position...\n",
2524 REPORT_CODE_OFF+PL_regindent*2, "")
2525 );
7821416a 2526 sayNO_SILENT;
2c2d71f5
JH
2527 }
2528 PL_reg_poscache[o] |= (1<<b);
2529 }
2530 }
2531
a0d0e21e
LW
2532 /* Prefer next over scan for minimal matching. */
2533
2534 if (cc->minmod) {
3280af22 2535 PL_regcc = cc->oldcc;
2ab05381
IZ
2536 if (PL_regcc)
2537 ln = PL_regcc->cur;
5f05dabc 2538 cp = regcppush(cc->parenfloor);
c277df42 2539 REGCP_SET;
5f05dabc 2540 if (regmatch(cc->next)) {
c277df42 2541 regcpblow(cp);
4633a7c4 2542 sayYES; /* All done. */
5f05dabc 2543 }
c277df42 2544 REGCP_UNWIND;
5f05dabc 2545 regcppop();
2ab05381
IZ
2546 if (PL_regcc)
2547 PL_regcc->cur = ln;
3280af22 2548 PL_regcc = cc;
a0d0e21e 2549
c277df42 2550 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2551 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2552 && !(PL_reg_flags & RF_warned)) {
2553 PL_reg_flags |= RF_warned;
cea2e8a9 2554 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2555 "Complex regular subexpression recursion",
2556 REG_INFTY - 1);
c277df42 2557 }
4633a7c4 2558 sayNO;
c277df42 2559 }
a687059c 2560
c277df42 2561 DEBUG_r(
c3464db5
DD
2562 PerlIO_printf(Perl_debug_log,
2563 "%*s trying longer...\n",
3280af22 2564 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2565 );
a0d0e21e 2566 /* Try scanning more and see if it helps. */
3280af22 2567 PL_reginput = locinput;
a0d0e21e
LW
2568 cc->cur = n;
2569 cc->lastloc = locinput;
5f05dabc 2570 cp = regcppush(cc->parenfloor);
c277df42 2571 REGCP_SET;
5f05dabc 2572 if (regmatch(cc->scan)) {
c277df42 2573 regcpblow(cp);
4633a7c4 2574 sayYES;
5f05dabc 2575 }
c277df42 2576 REGCP_UNWIND;
5f05dabc 2577 regcppop();
4633a7c4 2578 cc->cur = n - 1;
c277df42 2579 cc->lastloc = lastloc;
4633a7c4 2580 sayNO;
a0d0e21e
LW
2581 }
2582
2583 /* Prefer scan over next for maximal matching. */
2584
2585 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2586 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2587 cc->cur = n;
2588 cc->lastloc = locinput;
c277df42 2589 REGCP_SET;
5f05dabc 2590 if (regmatch(cc->scan)) {
c277df42 2591 regcpblow(cp);
4633a7c4 2592 sayYES;
5f05dabc 2593 }
c277df42 2594 REGCP_UNWIND;
a0d0e21e 2595 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2596 PL_reginput = locinput;
c277df42 2597 DEBUG_r(
c3464db5
DD
2598 PerlIO_printf(Perl_debug_log,
2599 "%*s failed, try continuation...\n",
3280af22 2600 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2601 );
2602 }
599cee73
PM
2603 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2604 && !(PL_reg_flags & RF_warned)) {
3280af22 2605 PL_reg_flags |= RF_warned;
cea2e8a9 2606 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2607 "Complex regular subexpression recursion",
2608 REG_INFTY - 1);
a0d0e21e
LW
2609 }
2610
2611 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2612 PL_regcc = cc->oldcc;
2ab05381
IZ
2613 if (PL_regcc)
2614 ln = PL_regcc->cur;
a0d0e21e 2615 if (regmatch(cc->next))
4633a7c4 2616 sayYES;
2ab05381
IZ
2617 if (PL_regcc)
2618 PL_regcc->cur = ln;
3280af22 2619 PL_regcc = cc;
4633a7c4 2620 cc->cur = n - 1;
c277df42 2621 cc->lastloc = lastloc;
4633a7c4 2622 sayNO;
a0d0e21e
LW
2623 }
2624 /* NOT REACHED */
c277df42
IZ
2625 case BRANCHJ:
2626 next = scan + ARG(scan);
2627 if (next == scan)
2628 next = NULL;
2629 inner = NEXTOPER(NEXTOPER(scan));
2630 goto do_branch;
2631 case BRANCH:
2632 inner = NEXTOPER(scan);
2633 do_branch:
2634 {
2635 CHECKPOINT lastcp;
2636 c1 = OP(scan);
2637 if (OP(next) != c1) /* No choice. */
2638 next = inner; /* Avoid recursion. */
a0d0e21e 2639 else {
3280af22 2640 int lastparen = *PL_reglastparen;
c277df42
IZ
2641
2642 REGCP_SET;
a0d0e21e 2643 do {
3280af22 2644 PL_reginput = locinput;
c277df42 2645 if (regmatch(inner))
4633a7c4 2646 sayYES;
c277df42 2647 REGCP_UNWIND;
3280af22 2648 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2649 PL_regendp[n] = -1;
3280af22 2650 *PL_reglastparen = n;
c277df42 2651 scan = next;
a0d0e21e 2652 /*SUPPRESS 560*/
c277df42
IZ
2653 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2654 next += n;
a0d0e21e 2655 else
c277df42 2656 next = NULL;
c277df42
IZ
2657 inner = NEXTOPER(scan);
2658 if (c1 == BRANCHJ) {
2659 inner = NEXTOPER(inner);
2660 }
2661 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2662 sayNO;
a0d0e21e 2663 /* NOTREACHED */
a687059c 2664 }
a0d0e21e
LW
2665 }
2666 break;
2667 case MINMOD:
2668 minmod = 1;
2669 break;
c277df42
IZ
2670 case CURLYM:
2671 {
00db4c45 2672 I32 l = 0;
c277df42
IZ
2673 CHECKPOINT lastcp;
2674
2675 /* We suppose that the next guy does not need
2676 backtracking: in particular, it is of constant length,
2677 and has no parenths to influence future backrefs. */
2678 ln = ARG1(scan); /* min to match */
2679 n = ARG2(scan); /* max to match */
c277df42
IZ
2680 paren = scan->flags;
2681 if (paren) {
3280af22
NIS
2682 if (paren > PL_regsize)
2683 PL_regsize = paren;
2684 if (paren > *PL_reglastparen)
2685 *PL_reglastparen = paren;
c277df42 2686 }
dc45a647 2687 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2688 if (paren)
2689 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2690 PL_reginput = locinput;
c277df42
IZ
2691 if (minmod) {
2692 minmod = 0;
2693 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2694 sayNO;
5f4b28b2 2695 if (ln && l == 0 && n >= ln
c277df42
IZ
2696 /* In fact, this is tricky. If paren, then the
2697 fact that we did/didnot match may influence
2698 future execution. */
2699 && !(paren && ln == 0))
2700 ln = n;
3280af22 2701 locinput = PL_reginput;
22c35a8c 2702 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2703 c1 = (U8)*STRING(next);
c277df42 2704 if (OP(next) == EXACTF)
22c35a8c 2705 c2 = PL_fold[c1];
c277df42 2706 else if (OP(next) == EXACTFL)
22c35a8c 2707 c2 = PL_fold_locale[c1];
c277df42
IZ
2708 else
2709 c2 = c1;
a0ed51b3
LW
2710 }
2711 else
c277df42
IZ
2712 c1 = c2 = -1000;
2713 REGCP_SET;
5f4b28b2 2714 /* This may be improved if l == 0. */
c277df42
IZ
2715 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2716 /* If it could work, try it. */
2717 if (c1 == -1000 ||
3280af22
NIS
2718 UCHARAT(PL_reginput) == c1 ||
2719 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2720 {
2721 if (paren) {
2722 if (n) {
cf93c79d
IZ
2723 PL_regstartp[paren] =
2724 HOPc(PL_reginput, -l) - PL_bostr;
2725 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2726 }
2727 else
cf93c79d 2728 PL_regendp[paren] = -1;
c277df42
IZ
2729 }
2730 if (regmatch(next))
2731 sayYES;
2732 REGCP_UNWIND;
2733 }
2734 /* Couldn't or didn't -- move forward. */
3280af22 2735 PL_reginput = locinput;
c277df42
IZ
2736 if (regrepeat_hard(scan, 1, &l)) {
2737 ln++;
3280af22 2738 locinput = PL_reginput;
c277df42
IZ
2739 }
2740 else
2741 sayNO;
2742 }
a0ed51b3
LW
2743 }
2744 else {
c277df42
IZ
2745 n = regrepeat_hard(scan, n, &l);
2746 if (n != 0 && l == 0
2747 /* In fact, this is tricky. If paren, then the
2748 fact that we did/didnot match may influence
2749 future execution. */
2750 && !(paren && ln == 0))
2751 ln = n;
3280af22 2752 locinput = PL_reginput;
c277df42 2753 DEBUG_r(
5c0ca799 2754 PerlIO_printf(Perl_debug_log,
faccc32b 2755 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 2756 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 2757 (IV) n, (IV)l)
c277df42
IZ
2758 );
2759 if (n >= ln) {
22c35a8c 2760 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2761 c1 = (U8)*STRING(next);
c277df42 2762 if (OP(next) == EXACTF)
22c35a8c 2763 c2 = PL_fold[c1];
c277df42 2764 else if (OP(next) == EXACTFL)
22c35a8c 2765 c2 = PL_fold_locale[c1];
c277df42
IZ
2766 else
2767 c2 = c1;
a0ed51b3
LW
2768 }
2769 else
c277df42
IZ
2770 c1 = c2 = -1000;
2771 }
2772 REGCP_SET;
2773 while (n >= ln) {
2774 /* If it could work, try it. */
2775 if (c1 == -1000 ||
3280af22
NIS
2776 UCHARAT(PL_reginput) == c1 ||
2777 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2778 {
2779 DEBUG_r(
c3464db5 2780 PerlIO_printf(Perl_debug_log,
7b0972df
JH
2781 "%*s trying tail with n=%"IVdf"...\n",
2782 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
2783 );
2784 if (paren) {
2785 if (n) {
cf93c79d
IZ
2786 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2787 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2788 }
a0ed51b3 2789 else
cf93c79d 2790 PL_regendp[paren] = -1;
c277df42 2791 }
a0ed51b3
LW
2792 if (regmatch(next))
2793 sayYES;
2794 REGCP_UNWIND;
2795 }
c277df42
IZ
2796 /* Couldn't or didn't -- back up. */
2797 n--;
dfe13c55 2798 locinput = HOPc(locinput, -l);
3280af22 2799 PL_reginput = locinput;
c277df42
IZ
2800 }
2801 }
2802 sayNO;
2803 break;
2804 }
2805 case CURLYN:
2806 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2807 if (paren > PL_regsize)
2808 PL_regsize = paren;
2809 if (paren > *PL_reglastparen)
2810 *PL_reglastparen = paren;
c277df42
IZ
2811 ln = ARG1(scan); /* min to match */
2812 n = ARG2(scan); /* max to match */
dc45a647 2813 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2814 goto repeat;
a0d0e21e 2815 case CURLY:
c277df42 2816 paren = 0;
a0d0e21e
LW
2817 ln = ARG1(scan); /* min to match */
2818 n = ARG2(scan); /* max to match */
dc45a647 2819 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2820 goto repeat;
2821 case STAR:
2822 ln = 0;
c277df42 2823 n = REG_INFTY;
a0d0e21e 2824 scan = NEXTOPER(scan);
c277df42 2825 paren = 0;
a0d0e21e
LW
2826 goto repeat;
2827 case PLUS:
c277df42
IZ
2828 ln = 1;
2829 n = REG_INFTY;
2830 scan = NEXTOPER(scan);
2831 paren = 0;
2832 repeat:
a0d0e21e
LW
2833 /*
2834 * Lookahead to avoid useless match attempts
2835 * when we know what character comes next.
2836 */
22c35a8c 2837 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2838 c1 = (U8)*STRING(next);
bbce6d69 2839 if (OP(next) == EXACTF)
22c35a8c 2840 c2 = PL_fold[c1];
bbce6d69 2841 else if (OP(next) == EXACTFL)
22c35a8c 2842 c2 = PL_fold_locale[c1];
bbce6d69 2843 else
2844 c2 = c1;
2845 }
a0d0e21e 2846 else
bbce6d69 2847 c1 = c2 = -1000;
3280af22 2848 PL_reginput = locinput;
a0d0e21e 2849 if (minmod) {
c277df42 2850 CHECKPOINT lastcp;
a0d0e21e
LW
2851 minmod = 0;
2852 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2853 sayNO;
a0ed51b3 2854 locinput = PL_reginput;
c277df42 2855 REGCP_SET;
0fe9bf95
IZ
2856 if (c1 != -1000) {
2857 char *e = locinput + n - ln; /* Should not check after this */
2858 char *old = locinput;
2859
2860 if (e >= PL_regeol || (n == REG_INFTY))
2861 e = PL_regeol - 1;
2862 while (1) {
2863 /* Find place 'next' could work */
2864 if (c1 == c2) {
2865 while (locinput <= e && *locinput != c1)
2866 locinput++;
2867 } else {
2868 while (locinput <= e
2869 && *locinput != c1
2870 && *locinput != c2)
2871 locinput++;
2872 }
2873 if (locinput > e)
2874 sayNO;
2875 /* PL_reginput == old now */
2876 if (locinput != old) {
2877 ln = 1; /* Did some */
2878 if (regrepeat(scan, locinput - old) <
2879 locinput - old)
2880 sayNO;
2881 }
2882 /* PL_reginput == locinput now */
2883 if (paren) {
2884 if (ln) {
cf93c79d
IZ
2885 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2886 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2887 }
2888 else
cf93c79d 2889 PL_regendp[paren] = -1;
0fe9bf95
IZ
2890 }
2891 if (regmatch(next))
2892 sayYES;
2893 PL_reginput = locinput; /* Could be reset... */
2894 REGCP_UNWIND;
2895 /* Couldn't or didn't -- move forward. */
2896 old = locinput++;
2897 }
2898 }
2899 else
c277df42 2900 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2901 /* If it could work, try it. */
bbce6d69 2902 if (c1 == -1000 ||
3280af22
NIS
2903 UCHARAT(PL_reginput) == c1 ||
2904 UCHARAT(PL_reginput) == c2)
bbce6d69 2905 {
c277df42
IZ
2906 if (paren) {
2907 if (n) {
cf93c79d
IZ
2908 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2909 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2910 }
2911 else
cf93c79d 2912 PL_regendp[paren] = -1;
c277df42 2913 }
a0d0e21e 2914 if (regmatch(next))
4633a7c4 2915 sayYES;
c277df42 2916 REGCP_UNWIND;
bbce6d69 2917 }
c277df42 2918 /* Couldn't or didn't -- move forward. */
a0ed51b3 2919 PL_reginput = locinput;
a0d0e21e
LW
2920 if (regrepeat(scan, 1)) {
2921 ln++;
a0ed51b3
LW
2922 locinput = PL_reginput;
2923 }
2924 else
4633a7c4 2925 sayNO;
a0d0e21e
LW
2926 }
2927 }
2928 else {
c277df42 2929 CHECKPOINT lastcp;
a0d0e21e 2930 n = regrepeat(scan, n);
a0ed51b3 2931 locinput = PL_reginput;
22c35a8c 2932 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2933 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2934 ln = n; /* why back off? */
c277df42
IZ
2935 REGCP_SET;
2936 if (paren) {
2937 while (n >= ln) {
2938 /* If it could work, try it. */
2939 if (c1 == -1000 ||
3280af22
NIS
2940 UCHARAT(PL_reginput) == c1 ||
2941 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2942 {
2943 if (paren && n) {
2944 if (n) {
cf93c79d
IZ
2945 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2946 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2947 }
2948 else
cf93c79d 2949 PL_regendp[paren] = -1;
c277df42
IZ
2950 }
2951 if (regmatch(next))
2952 sayYES;
2953 REGCP_UNWIND;
2954 }
2955 /* Couldn't or didn't -- back up. */
2956 n--;
dfe13c55 2957 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2958 }
a0ed51b3
LW
2959 }
2960 else {
c277df42
IZ
2961 while (n >= ln) {
2962 /* If it could work, try it. */
2963 if (c1 == -1000 ||
3280af22
NIS
2964 UCHARAT(PL_reginput) == c1 ||
2965 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2966 {
2967 if (regmatch(next))
2968 sayYES;
2969 REGCP_UNWIND;
2970 }
2971 /* Couldn't or didn't -- back up. */
2972 n--;
dfe13c55 2973 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2974 }
a0d0e21e
LW
2975 }
2976 }
4633a7c4 2977 sayNO;
c277df42 2978 break;
a0d0e21e 2979 case END:
0f5d15d6
IZ
2980 if (PL_reg_call_cc) {
2981 re_cc_state *cur_call_cc = PL_reg_call_cc;
2982 CURCUR *cctmp = PL_regcc;
2983 regexp *re = PL_reg_re;
2984 CHECKPOINT cp, lastcp;
2985
2986 cp = regcppush(0); /* Save *all* the positions. */
2987 REGCP_SET;
2988 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2989 the caller. */
2990 PL_reginput = locinput; /* Make position available to
2991 the callcc. */
2992 cache_re(PL_reg_call_cc->re);
2993 PL_regcc = PL_reg_call_cc->cc;
2994 PL_reg_call_cc = PL_reg_call_cc->prev;
2995 if (regmatch(cur_call_cc->node)) {
2996 PL_reg_call_cc = cur_call_cc;
2997 regcpblow(cp);
2998 sayYES;
2999 }
3000 REGCP_UNWIND;
3001 regcppop();
3002 PL_reg_call_cc = cur_call_cc;
3003 PL_regcc = cctmp;
3004 PL_reg_re = re;
3005 cache_re(re);
3006
3007 DEBUG_r(
3008 PerlIO_printf(Perl_debug_log,
3009 "%*s continuation failed...\n",
3010 REPORT_CODE_OFF+PL_regindent*2, "")
3011 );
7821416a 3012 sayNO_SILENT;
0f5d15d6 3013 }
7821416a
IZ
3014 if (locinput < PL_regtill) {
3015 DEBUG_r(PerlIO_printf(Perl_debug_log,
3016 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3017 PL_colors[4],
3018 (long)(locinput - PL_reg_starttry),
3019 (long)(PL_regtill - PL_reg_starttry),
3020 PL_colors[5]));
3021 sayNO_FINAL; /* Cannot match: too short. */
3022 }
3023 PL_reginput = locinput; /* put where regtry can find it */
3024 sayYES_FINAL; /* Success! */
7e5428c5 3025 case SUCCEED:
3280af22 3026 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3027 sayYES_LOUD; /* Success! */
c277df42
IZ
3028 case SUSPEND:
3029 n = 1;
9fe1d20c 3030 PL_reginput = locinput;
c277df42 3031 goto do_ifmatch;
a0d0e21e 3032 case UNLESSM:
c277df42 3033 n = 0;
a0ed51b3 3034 if (scan->flags) {
0fe9bf95
IZ
3035 if (UTF) { /* XXXX This is absolutely
3036 broken, we read before
3037 start of string. */
3038 s = HOPMAYBEc(locinput, -scan->flags);
3039 if (!s)
3040 goto say_yes;
3041 PL_reginput = s;
3042 }
3043 else {
3044 if (locinput < PL_bostr + scan->flags)
3045 goto say_yes;
3046 PL_reginput = locinput - scan->flags;
3047 goto do_ifmatch;
3048 }
a0ed51b3
LW
3049 }
3050 else
3051 PL_reginput = locinput;
c277df42
IZ
3052 goto do_ifmatch;
3053 case IFMATCH:
3054 n = 1;
a0ed51b3 3055 if (scan->flags) {
0fe9bf95
IZ
3056 if (UTF) { /* XXXX This is absolutely
3057 broken, we read before
3058 start of string. */
3059 s = HOPMAYBEc(locinput, -scan->flags);
3060 if (!s || s < PL_bostr)
3061 goto say_no;
3062 PL_reginput = s;
3063 }
3064 else {
3065 if (locinput < PL_bostr + scan->flags)
3066 goto say_no;
3067 PL_reginput = locinput - scan->flags;
3068 goto do_ifmatch;
3069 }
a0ed51b3
LW
3070 }
3071 else
3072 PL_reginput = locinput;
3073
c277df42 3074 do_ifmatch:
c277df42
IZ
3075 inner = NEXTOPER(NEXTOPER(scan));
3076 if (regmatch(inner) != n) {
3077 say_no:
3078 if (logical) {
3079 logical = 0;
3080 sw = 0;
3081 goto do_longjump;
a0ed51b3
LW
3082 }
3083 else
c277df42
IZ
3084 sayNO;
3085 }
3086 say_yes:
3087 if (logical) {
3088 logical = 0;
3089 sw = 1;
3090 }
fe44a5e8 3091 if (OP(scan) == SUSPEND) {
3280af22 3092 locinput = PL_reginput;
565764a8 3093 nextchr = UCHARAT(locinput);
fe44a5e8 3094 }
c277df42
IZ
3095 /* FALL THROUGH. */
3096 case LONGJMP:
3097 do_longjump:
3098 next = scan + ARG(scan);
3099 if (next == scan)
3100 next = NULL;
a0d0e21e
LW
3101 break;
3102 default:
b900a521 3103 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3104 PTR2UV(scan), OP(scan));
cea2e8a9 3105 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3106 }
a0d0e21e
LW
3107 scan = next;
3108 }
a687059c 3109
a0d0e21e
LW
3110 /*
3111 * We get here only if there's trouble -- normally "case END" is
3112 * the terminating point.
3113 */
cea2e8a9 3114 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3115 /*NOTREACHED*/
4633a7c4
LW
3116 sayNO;
3117
7821416a
IZ
3118yes_loud:
3119 DEBUG_r(
3120 PerlIO_printf(Perl_debug_log,
3121 "%*s %scould match...%s\n",
3122 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3123 );
3124 goto yes;
3125yes_final:
3126 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3127 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3128yes:
3129#ifdef DEBUGGING
3280af22 3130 PL_regindent--;
4633a7c4
LW
3131#endif
3132 return 1;
3133
3134no:
7821416a
IZ
3135 DEBUG_r(
3136 PerlIO_printf(Perl_debug_log,
3137 "%*s %sfailed...%s\n",
3138 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3139 );
3140 goto do_no;
3141no_final:
3142do_no:
4633a7c4 3143#ifdef DEBUGGING
3280af22 3144 PL_regindent--;
4633a7c4 3145#endif
a0d0e21e 3146 return 0;
a687059c
LW
3147}
3148
3149/*
3150 - regrepeat - repeatedly match something simple, report how many
3151 */
3152/*
3153 * [This routine now assumes that it will only match on things of length 1.
3154 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3155 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3156 */
76e3520e 3157STATIC I32
cea2e8a9 3158S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3159{
5c0ca799 3160 dTHR;
a0d0e21e 3161 register char *scan;
a0d0e21e 3162 register I32 c;
3280af22 3163 register char *loceol = PL_regeol;
a0ed51b3 3164 register I32 hardcount = 0;
a0d0e21e 3165
3280af22 3166 scan = PL_reginput;
c277df42 3167 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3168 loceol = scan + max;
a0d0e21e 3169 switch (OP(p)) {
22c35a8c 3170 case REG_ANY:
a0d0e21e
LW
3171 while (scan < loceol && *scan != '\n')
3172 scan++;
3173 break;
3174 case SANY:
3175 scan = loceol;
3176 break;
a0ed51b3
LW
3177 case ANYUTF8:
3178 loceol = PL_regeol;
3179 while (scan < loceol && *scan != '\n') {
3180 scan += UTF8SKIP(scan);
3181 hardcount++;
3182 }
3183 break;
3184 case SANYUTF8:
3185 loceol = PL_regeol;
3186 while (scan < loceol) {
3187 scan += UTF8SKIP(scan);
3188 hardcount++;
3189 }
3190 break;
bbce6d69 3191 case EXACT: /* length of string is 1 */
cd439c50 3192 c = (U8)*STRING(p);
bbce6d69 3193 while (scan < loceol && UCHARAT(scan) == c)
3194 scan++;
3195 break;
3196 case EXACTF: /* length of string is 1 */
cd439c50 3197 c = (U8)*STRING(p);
bbce6d69 3198 while (scan < loceol &&
22c35a8c 3199 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3200 scan++;
3201 break;
3202 case EXACTFL: /* length of string is 1 */
3280af22 3203 PL_reg_flags |= RF_tainted;
cd439c50 3204 c = (U8)*STRING(p);
bbce6d69 3205 while (scan < loceol &&
22c35a8c 3206 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3207 scan++;
3208 break;
a0ed51b3
LW
3209 case ANYOFUTF8:
3210 loceol = PL_regeol;
3211 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
3212 scan += UTF8SKIP(scan);
3213 hardcount++;
3214 }
3215 break;
a0d0e21e 3216 case ANYOF:
936ed897 3217 while (scan < loceol && REGINCLASS(p, *scan))
a0d0e21e 3218 scan++;
a0d0e21e
LW
3219 break;
3220 case ALNUM:
3221 while (scan < loceol && isALNUM(*scan))
3222 scan++;
3223 break;
a0ed51b3
LW
3224 case ALNUMUTF8:
3225 loceol = PL_regeol;
dfe13c55 3226 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3227 scan += UTF8SKIP(scan);
3228 hardcount++;
3229 }
3230 break;
bbce6d69 3231 case ALNUML:
3280af22 3232 PL_reg_flags |= RF_tainted;
bbce6d69 3233 while (scan < loceol && isALNUM_LC(*scan))
3234 scan++;
3235 break;
a0ed51b3
LW
3236 case ALNUMLUTF8:
3237 PL_reg_flags |= RF_tainted;
3238 loceol = PL_regeol;
dfe13c55 3239 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3240 scan += UTF8SKIP(scan);
3241 hardcount++;
3242 }
3243 break;
3244 break;
a0d0e21e
LW
3245 case NALNUM:
3246 while (scan < loceol && !isALNUM(*scan))
3247 scan++;
3248 break;
a0ed51b3
LW
3249 case NALNUMUTF8:
3250 loceol = PL_regeol;
dfe13c55 3251 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
3252 scan += UTF8SKIP(scan);
3253 hardcount++;
3254 }
3255 break;
bbce6d69 3256 case NALNUML:
3280af22 3257 PL_reg_flags |= RF_tainted;
bbce6d69 3258 while (scan < loceol && !isALNUM_LC(*scan))
3259 scan++;
3260 break;
a0ed51b3
LW
3261 case NALNUMLUTF8:
3262 PL_reg_flags |= RF_tainted;
3263 loceol = PL_regeol;
dfe13c55 3264 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
3265 scan += UTF8SKIP(scan);
3266 hardcount++;
3267 }
3268 break;
a0d0e21e
LW
3269 case SPACE:
3270 while (scan < loceol && isSPACE(*scan))
3271 scan++;
3272 break;
a0ed51b3
LW
3273 case SPACEUTF8:
3274 loceol = PL_regeol;
dfe13c55 3275 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3276 scan += UTF8SKIP(scan);
3277 hardcount++;
3278 }
3279 break;
bbce6d69 3280 case SPACEL:
3280af22 3281 PL_reg_flags |= RF_tainted;
bbce6d69 3282 while (scan < loceol && isSPACE_LC(*scan))
3283 scan++;
3284 break;
a0ed51b3
LW
3285 case SPACELUTF8:
3286 PL_reg_flags |= RF_tainted;
3287 loceol = PL_regeol;
dfe13c55 3288 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3289 scan += UTF8SKIP(scan);
3290 hardcount++;
3291 }
3292 break;
a0d0e21e
LW
3293 case NSPACE:
3294 while (scan < loceol && !isSPACE(*scan))
3295 scan++;
3296 break;
a0ed51b3
LW
3297 case NSPACEUTF8:
3298 loceol = PL_regeol;
dfe13c55 3299 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
3300 scan += UTF8SKIP(scan);
3301 hardcount++;
3302 }
3303 break;
bbce6d69 3304 case NSPACEL:
3280af22 3305 PL_reg_flags |= RF_tainted;
bbce6d69 3306 while (scan < loceol && !isSPACE_LC(*scan))
3307 scan++;
3308 break;
a0ed51b3
LW
3309 case NSPACELUTF8:
3310 PL_reg_flags |= RF_tainted;
3311 loceol = PL_regeol;
dfe13c55 3312 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
3313 scan += UTF8SKIP(scan);
3314 hardcount++;
3315 }
3316 break;
a0d0e21e
LW
3317 case DIGIT:
3318 while (scan < loceol && isDIGIT(*scan))
3319 scan++;
3320 break;
a0ed51b3
LW
3321 case DIGITUTF8:
3322 loceol = PL_regeol;
dfe13c55 3323 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3324 scan += UTF8SKIP(scan);
3325 hardcount++;
3326 }
3327 break;
3328 break;
a0d0e21e
LW
3329 case NDIGIT:
3330 while (scan < loceol && !isDIGIT(*scan))
3331 scan++;
3332 break;
a0ed51b3
LW
3333 case NDIGITUTF8:
3334 loceol = PL_regeol;
dfe13c55 3335 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
3336 scan += UTF8SKIP(scan);
3337 hardcount++;
3338 }
3339 break;
a0d0e21e
LW
3340 default: /* Called on something of 0 width. */
3341 break; /* So match right here or not at all. */
3342 }
a687059c 3343
a0ed51b3
LW
3344 if (hardcount)
3345 c = hardcount;
3346 else
3347 c = scan - PL_reginput;
3280af22 3348 PL_reginput = scan;
a687059c 3349
c277df42
IZ
3350 DEBUG_r(
3351 {
3352 SV *prop = sv_newmortal();
3353
3354 regprop(prop, p);
3355 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3356 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3357 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42
IZ
3358 });
3359
a0d0e21e 3360 return(c);
a687059c
LW
3361}
3362
3363/*
c277df42
IZ
3364 - regrepeat_hard - repeatedly match something, report total lenth and length
3365 *
3366 * The repeater is supposed to have constant length.
3367 */
3368
76e3520e 3369STATIC I32
cea2e8a9 3370S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3371{
5c0ca799 3372 dTHR;
c277df42
IZ
3373 register char *scan;
3374 register char *start;
3280af22 3375 register char *loceol = PL_regeol;
a0ed51b3 3376 I32 l = 0;
708e3b05 3377 I32 count = 0, res = 1;
a0ed51b3
LW
3378
3379 if (!max)
3380 return 0;
c277df42 3381
3280af22 3382 start = PL_reginput;
a0ed51b3 3383 if (UTF) {
708e3b05 3384 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3385 if (!count++) {
3386 l = 0;
3387 while (start < PL_reginput) {
3388 l++;
3389 start += UTF8SKIP(start);
3390 }
3391 *lp = l;
3392 if (l == 0)
3393 return max;
3394 }
3395 if (count == max)
3396 return count;
3397 }
3398 }
3399 else {
708e3b05 3400 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3401 if (!count++) {
3402 *lp = l = PL_reginput - start;
3403 if (max != REG_INFTY && l*max < loceol - scan)
3404 loceol = scan + l*max;
3405 if (l == 0)
3406 return max;
c277df42
IZ
3407 }
3408 }
3409 }
708e3b05 3410 if (!res)
3280af22 3411 PL_reginput = scan;
c277df42 3412
a0ed51b3 3413 return count;
c277df42
IZ
3414}
3415
3416/*
cb8d8820 3417 - reginclass - determine if a character falls into a character class
bbce6d69 3418 */
3419
76e3520e 3420STATIC bool
936ed897 3421S_reginclass(pTHX_ register regnode *p, register I32 c)
bbce6d69 3422{
5c0ca799 3423 dTHR;
b8c5462f 3424 char flags = ANYOF_FLAGS(p);
bbce6d69 3425 bool match = FALSE;
3426
3427 c &= 0xFF;
b8c5462f 3428 if (ANYOF_BITMAP_TEST(p, c))
bbce6d69 3429 match = TRUE;
3430 else if (flags & ANYOF_FOLD) {
3431 I32 cf;
3432 if (flags & ANYOF_LOCALE) {
3280af22 3433 PL_reg_flags |= RF_tainted;
22c35a8c 3434 cf = PL_fold_locale[c];
bbce6d69 3435 }
3436 else
22c35a8c 3437 cf = PL_fold[c];
b8c5462f 3438 if (ANYOF_BITMAP_TEST(p, cf))
bbce6d69 3439 match = TRUE;
3440 }
3441
b8c5462f 3442 if (!match && (flags & ANYOF_CLASS)) {
3280af22 3443 PL_reg_flags |= RF_tainted;
b8c5462f
JH
3444 if (
3445 (ANYOF_CLASS_TEST(p, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3446 (ANYOF_CLASS_TEST(p, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3447 (ANYOF_CLASS_TEST(p, ANYOF_SPACE) && isSPACE_LC(c)) ||
3448 (ANYOF_CLASS_TEST(p, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3449 (ANYOF_CLASS_TEST(p, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3450 (ANYOF_CLASS_TEST(p, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3451 (ANYOF_CLASS_TEST(p, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3452 (ANYOF_CLASS_TEST(p, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3453 (ANYOF_CLASS_TEST(p, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3454 (ANYOF_CLASS_TEST(p, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3455 (ANYOF_CLASS_TEST(p, ANYOF_ASCII) && isASCII(c)) ||
3456 (ANYOF_CLASS_TEST(p, ANYOF_NASCII) && !isASCII(c)) ||
3457 (ANYOF_CLASS_TEST(p, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3458 (ANYOF_CLASS_TEST(p, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3459 (ANYOF_CLASS_TEST(p, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3460 (ANYOF_CLASS_TEST(p, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3461 (ANYOF_CLASS_TEST(p, ANYOF_LOWER) && isLOWER_LC(c)) ||
3462 (ANYOF_CLASS_TEST(p, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3463 (ANYOF_CLASS_TEST(p, ANYOF_PRINT) && isPRINT_LC(c)) ||
3464 (ANYOF_CLASS_TEST(p, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3465 (ANYOF_CLASS_TEST(p, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3466 (ANYOF_CLASS_TEST(p, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3467 (ANYOF_CLASS_TEST(p, ANYOF_UPPER) && isUPPER_LC(c)) ||
3468 (ANYOF_CLASS_TEST(p, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3469 (ANYOF_CLASS_TEST(p, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3470 (ANYOF_CLASS_TEST(p, ANYOF_NXDIGIT) && !isXDIGIT(c))
3471 ) /* How's that for a conditional? */
bbce6d69 3472 {
3473 match = TRUE;
3474 }
3475 }
3476
ae5c130c 3477 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 3478}
3479
a0ed51b3 3480STATIC bool
cea2e8a9 3481S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
3482{
3483 dTHR;
a0ed51b3
LW
3484 char flags = ARG1(f);
3485 bool match = FALSE;
3486 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
3487
3488 if (swash_fetch(sv, p))
3489 match = TRUE;
3490 else if (flags & ANYOF_FOLD) {
3491 I32 cf;
dfe13c55 3492 U8 tmpbuf[10];
a0ed51b3
LW
3493 if (flags & ANYOF_LOCALE) {
3494 PL_reg_flags |= RF_tainted;
3495 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
3496 }
3497 else
3498 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
3499 if (swash_fetch(sv, tmpbuf))
3500 match = TRUE;
3501 }
3502
b8c5462f 3503 /* UTF8 combined with ANYOF_CLASS is ill-defined. */
a0ed51b3
LW
3504
3505 return (flags & ANYOF_INVERT) ? !match : match;
3506}
161b471a 3507
dfe13c55 3508STATIC U8 *
cea2e8a9 3509S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
3510{
3511 dTHR;
a0ed51b3
LW
3512 if (off >= 0) {
3513 while (off-- && s < (U8*)PL_regeol)
3514 s += UTF8SKIP(s);
3515 }
3516 else {
3517 while (off++) {
3518 if (s > (U8*)PL_bostr) {
3519 s--;
3520 if (*s & 0x80) {
3521 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3522 s--;
3523 } /* XXX could check well-formedness here */
3524 }
3525 }
3526 }
3527 return s;
3528}
161b471a 3529
dfe13c55 3530STATIC U8 *
cea2e8a9 3531S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3532{
c485e607 3533 dTHR;
a0ed51b3
LW
3534 if (off >= 0) {
3535 while (off-- && s < (U8*)PL_regeol)
3536 s += UTF8SKIP(s);
3537 if (off >= 0)
3538 return 0;
3539 }
3540 else {
3541 while (off++) {
3542 if (s > (U8*)PL_bostr) {
3543 s--;
3544 if (*s & 0x80) {
3545 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3546 s--;
3547 } /* XXX could check well-formedness here */
3548 }
3549 else
3550 break;
3551 }
3552 if (off <= 0)
3553 return 0;
3554 }
3555 return s;
3556}
51371543
GS
3557
3558#ifdef PERL_OBJECT
51371543
GS
3559#include "XSUB.h"
3560#endif
3561
3562static void
3563restore_pos(pTHXo_ void *arg)
3564{
3565 dTHR;
3566 if (PL_reg_eval_set) {
3567 if (PL_reg_oldsaved) {
3568 PL_reg_re->subbeg = PL_reg_oldsaved;
3569 PL_reg_re->sublen = PL_reg_oldsavedlen;
3570 RX_MATCH_COPIED_on(PL_reg_re);
3571 }
3572 PL_reg_magic->mg_len = PL_reg_oldpos;
3573 PL_reg_eval_set = 0;
3574 PL_curpm = PL_reg_oldcurpm;
3575 }
3576}
3577