This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Mention EPOC and SOCKS.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
d88dccdf 41# define Perl_reginitcolors my_reginitcolors
56953603
IZ
42#endif
43
f0fcb552 44/*SUPPRESS 112*/
a687059c 45/*
e50aee73 46 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
47 *
48 * Copyright (c) 1986 by University of Toronto.
49 * Written by Henry Spencer. Not derived from licensed software.
50 *
51 * Permission is granted to anyone to use this software for any
52 * purpose on any computer system, and to redistribute it freely,
53 * subject to the following restrictions:
54 *
55 * 1. The author is not responsible for the consequences of use of
56 * this software, no matter how awful, even if they arise
57 * from defects in it.
58 *
59 * 2. The origin of this software must not be misrepresented, either
60 * by explicit claim or by omission.
61 *
62 * 3. Altered versions must be plainly marked as such, and must not
63 * be misrepresented as being the original software.
64 *
65 **** Alterations to Henry's code are...
66 ****
4eb8286e 67 **** Copyright (c) 1991-1999, Larry Wall
a687059c 68 ****
9ef589d8
LW
69 **** You may distribute under the terms of either the GNU General Public
70 **** License or the Artistic License, as specified in the README file.
a687059c
LW
71 *
72 * Beware that some of this code is subtly aware of the way operator
73 * precedence is structured in regular expressions. Serious changes in
74 * regular-expression syntax might require a total rethink.
75 */
76#include "EXTERN.h"
864dbfa3 77#define PERL_IN_REGEXEC_C
a687059c 78#include "perl.h"
0f5d15d6 79
a687059c
LW
80#include "regcomp.h"
81
c277df42
IZ
82#define RF_tainted 1 /* tainted information used? */
83#define RF_warned 2 /* warned about big count? */
ce862d02 84#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
85#define RF_utf8 8 /* String contains multibyte chars? */
86
87#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
88
89#define RS_init 1 /* eval environment created */
90#define RS_set 2 /* replsv value is set */
c277df42 91
a687059c
LW
92#ifndef STATIC
93#define STATIC static
94#endif
95
c277df42
IZ
96/*
97 * Forwards.
98 */
99
ae5c130c 100#define REGINCLASS(p,c) (*(p) ? reginclass(p,c) : ANYOF_TEST(p,c))
a0ed51b3
LW
101#define REGINCLASSUTF8(f,p) (ARG1(f) ? reginclassutf8(f,p) : swash_fetch((SV*)PL_regdata->data[ARG2(f)],p))
102
103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
104#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
108#define HOP(pos,off) (UTF ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (UTF ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
76e3520e 113STATIC CHECKPOINT
cea2e8a9 114S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 115{
11343788 116 dTHR;
3280af22
NIS
117 int retval = PL_savestack_ix;
118 int i = (PL_regsize - parenfloor) * 4;
a0d0e21e
LW
119 int p;
120
121 SSCHECK(i + 5);
3280af22 122 for (p = PL_regsize; p > parenfloor; p--) {
cf93c79d
IZ
123 SSPUSHINT(PL_regendp[p]);
124 SSPUSHINT(PL_regstartp[p]);
3280af22 125 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
126 SSPUSHINT(p);
127 }
3280af22
NIS
128 SSPUSHINT(PL_regsize);
129 SSPUSHINT(*PL_reglastparen);
130 SSPUSHPTR(PL_reginput);
a0d0e21e
LW
131 SSPUSHINT(i + 3);
132 SSPUSHINT(SAVEt_REGCONTEXT);
133 return retval;
134}
135
c277df42 136/* These are needed since we do not localize EVAL nodes: */
c3464db5
DD
137# define REGCP_SET DEBUG_r(PerlIO_printf(Perl_debug_log, \
138 " Setting an EVAL scope, savestack=%i\n", \
3280af22 139 PL_savestack_ix)); lastcp = PL_savestack_ix
c3464db5 140
3280af22 141# define REGCP_UNWIND DEBUG_r(lastcp != PL_savestack_ix ? \
c3464db5
DD
142 PerlIO_printf(Perl_debug_log, \
143 " Clearing an EVAL scope, savestack=%i..%i\n", \
3280af22 144 lastcp, PL_savestack_ix) : 0); regcpblow(lastcp)
c277df42 145
76e3520e 146STATIC char *
cea2e8a9 147S_regcppop(pTHX)
a0d0e21e 148{
11343788 149 dTHR;
a0d0e21e
LW
150 I32 i = SSPOPINT;
151 U32 paren = 0;
152 char *input;
cf93c79d 153 I32 tmps;
a0d0e21e
LW
154 assert(i == SAVEt_REGCONTEXT);
155 i = SSPOPINT;
156 input = (char *) SSPOPPTR;
3280af22
NIS
157 *PL_reglastparen = SSPOPINT;
158 PL_regsize = SSPOPINT;
c277df42 159 for (i -= 3; i > 0; i -= 4) {
a0d0e21e 160 paren = (U32)SSPOPINT;
3280af22 161 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
162 PL_regstartp[paren] = SSPOPINT;
163 tmps = SSPOPINT;
3280af22
NIS
164 if (paren <= *PL_reglastparen)
165 PL_regendp[paren] = tmps;
c277df42 166 DEBUG_r(
c3464db5
DD
167 PerlIO_printf(Perl_debug_log,
168 " restoring \\%d to %d(%d)..%d%s\n",
cf93c79d
IZ
169 paren, PL_regstartp[paren],
170 PL_reg_start_tmp[paren] - PL_bostr,
171 PL_regendp[paren],
3280af22 172 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 173 );
a0d0e21e 174 }
c277df42 175 DEBUG_r(
3280af22 176 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5
DD
177 PerlIO_printf(Perl_debug_log,
178 " restoring \\%d..\\%d to undef\n",
3280af22 179 *PL_reglastparen + 1, PL_regnpar);
c277df42
IZ
180 }
181 );
3280af22
NIS
182 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
183 if (paren > PL_regsize)
cf93c79d
IZ
184 PL_regstartp[paren] = -1;
185 PL_regendp[paren] = -1;
a0d0e21e
LW
186 }
187 return input;
188}
189
0f5d15d6 190STATIC char *
cea2e8a9 191S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6 192{
46124e9e 193 dTHR;
0f5d15d6
IZ
194 I32 tmp = PL_savestack_ix;
195
196 PL_savestack_ix = ss;
197 regcppop();
198 PL_savestack_ix = tmp;
942e002e 199 return Nullch;
0f5d15d6
IZ
200}
201
202typedef struct re_cc_state
203{
204 I32 ss;
205 regnode *node;
206 struct re_cc_state *prev;
207 CURCUR *cc;
208 regexp *re;
209} re_cc_state;
210
c277df42 211#define regcpblow(cp) LEAVE_SCOPE(cp)
a0d0e21e 212
a687059c 213/*
e50aee73 214 * pregexec and friends
a687059c
LW
215 */
216
217/*
c277df42 218 - pregexec - match a regexp against a string
a687059c 219 */
c277df42 220I32
864dbfa3 221Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 222 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
223/* strend: pointer to null at end of string */
224/* strbeg: real beginning of string */
225/* minend: end of match must be >=minend after stringarg. */
226/* nosave: For optimizations. */
227{
228 return
229 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
230 nosave ? 0 : REXEC_COPY_STR);
231}
0f5d15d6
IZ
232
233STATIC void
cea2e8a9 234S_cache_re(pTHX_ regexp *prog)
0f5d15d6 235{
46124e9e 236 dTHR;
0f5d15d6
IZ
237 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
238#ifdef DEBUGGING
239 PL_regprogram = prog->program;
240#endif
241 PL_regnpar = prog->nparens;
242 PL_regdata = prog->data;
243 PL_reg_re = prog;
244}
22e551b9 245
9661b544 246STATIC void
cea2e8a9 247S_restore_pos(pTHX_ void *arg)
35ef4773
GS
248{
249 dTHR;
cf93c79d
IZ
250 if (PL_reg_eval_set) {
251 if (PL_reg_oldsaved) {
252 PL_reg_re->subbeg = PL_reg_oldsaved;
253 PL_reg_re->sublen = PL_reg_oldsavedlen;
254 RX_MATCH_COPIED_on(PL_reg_re);
255 }
9661b544
IZ
256 PL_reg_magic->mg_len = PL_reg_oldpos;
257 PL_reg_eval_set = 0;
5c5e4c24 258 PL_curpm = PL_reg_oldcurpm;
9661b544
IZ
259 }
260}
261
cad2e5aa
JH
262/*
263 * Need to implement the following flags for reg_anch:
264 *
265 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
266 * USE_INTUIT_ML
267 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
268 * INTUIT_AUTORITATIVE_ML
269 * INTUIT_ONCE_NOML - Intuit can match in one location only.
270 * INTUIT_ONCE_ML
271 *
272 * Another flag for this function: SECOND_TIME (so that float substrs
273 * with giant delta may be not rechecked).
274 */
275
276/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
277
278/* If SCREAM, then sv should be compatible with strpos and strend.
279 Otherwise, only SvCUR(sv) is used to get strbeg. */
280
281/* XXXX We assume that strpos is strbeg unless sv. */
282
283char *
284Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
285 char *strend, U32 flags, re_scream_pos_data *data)
286{
287 I32 start_shift;
288 /* Should be nonnegative! */
289 I32 end_shift;
290 char *s;
291 char *t;
292 I32 ml_anch;
293
294 DEBUG_r( if (!PL_colorset) reginitcolors() );
295 DEBUG_r(PerlIO_printf(Perl_debug_log,
296 "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
297 PL_colors[4],PL_colors[5],PL_colors[0],
298 prog->precomp,
299 PL_colors[1],
300 (strlen(prog->precomp) > 60 ? "..." : ""),
301 PL_colors[0],
302 (strend - strpos > 60 ? 60 : strend - strpos),
303 strpos, PL_colors[1],
304 (strend - strpos > 60 ? "..." : ""))
305 );
306
307 if (prog->minlen > strend - strpos)
308 goto fail;
309
310 /* XXXX Move further down? */
311 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
312 /* Should be nonnegative! */
313 end_shift = prog->minlen - start_shift -
314 CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
315
316 if (prog->reganch & ROPT_ANCH) {
317 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
318 || ( (prog->reganch & ROPT_ANCH_BOL)
319 && !PL_multiline ) );
320
321 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
322 /* Anchored... */
323 I32 slen;
324
325 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
326 && (sv && (strpos + SvCUR(sv) != strend)) )
327 goto fail;
328
329 s = (char*)HOP((U8*)strpos, prog->check_offset_min);
330 if (SvTAIL(prog->check_substr)) {
331 slen = SvCUR(prog->check_substr); /* >= 1 */
332
333 if ( strend - s > slen || strend - s < slen - 1 ) {
334 s = Nullch;
335 goto finish;
336 }
337 if ( strend - s == slen && strend[-1] != '\n') {
338 s = Nullch;
339 goto finish;
340 }
341 /* Now should match s[0..slen-2] */
342 slen--;
343 if (slen && (*SvPVX(prog->check_substr) != *s
344 || (slen > 1
345 && memNE(SvPVX(prog->check_substr), s, slen))))
346 s = Nullch;
347 }
348 else if (*SvPVX(prog->check_substr) != *s
349 || ((slen = SvCUR(prog->check_substr)) > 1
350 && memNE(SvPVX(prog->check_substr), s, slen)))
351 s = Nullch;
352 else
353 s = strpos;
354 goto finish;
355 }
356 s = strpos;
357 if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
358 end_shift += strend - s - prog->minlen - prog->check_offset_max;
359 }
360 else {
361 ml_anch = 0;
362 s = strpos;
363 }
364
365 restart:
366 if (flags & REXEC_SCREAM) {
367 SV *c = prog->check_substr;
368 char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
369 I32 p = -1; /* Internal iterator of scream. */
370 I32 *pp = data ? data->scream_pos : &p;
371
372 if (PL_screamfirst[BmRARE(c)] >= 0
373 || ( BmRARE(c) == '\n'
374 && (BmPREVIOUS(c) == SvCUR(c) - 1)
375 && SvTAIL(c) ))
376 s = screaminstr(sv, prog->check_substr,
377 start_shift + (strpos - strbeg), end_shift, pp, 0);
378 else
379 s = Nullch;
380 if (data)
381 *data->scream_olds = s;
382 }
383 else
384 s = fbm_instr((unsigned char*)s + start_shift,
385 (unsigned char*)strend - end_shift,
386 prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
387
388 /* Update the count-of-usability, remove useless subpatterns,
389 unshift s. */
390 finish:
391 if (!s) {
392 ++BmUSEFUL(prog->check_substr); /* hooray */
393 goto fail; /* not present */
394 }
395 else if (s - strpos > prog->check_offset_max &&
396 ((prog->reganch & ROPT_UTF8)
397 ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
398 && t >= strpos)
399 : (t = s - prog->check_offset_max) != 0) ) {
400 if (ml_anch && t[-1] != '\n') {
401 find_anchor:
402 while (t < strend - end_shift - prog->minlen) {
403 if (*t == '\n') {
404 if (t < s - prog->check_offset_min) {
405 s = t + 1;
406 goto set_useful;
407 }
408 s = t + 1;
409 goto restart;
410 }
411 t++;
412 }
413 s = Nullch;
414 goto finish;
415 }
416 s = t;
417 set_useful:
418 ++BmUSEFUL(prog->check_substr); /* hooray/2 */
419 }
420 else {
421 if (ml_anch && sv
422 && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
423 t = strpos;
424 goto find_anchor;
425 }
426 if (!(prog->reganch & ROPT_NAUGHTY)
427 && --BmUSEFUL(prog->check_substr) < 0
428 && prog->check_substr == prog->float_substr) { /* boo */
429 /* If flags & SOMETHING - do not do it many times on the same match */
430 SvREFCNT_dec(prog->check_substr);
431 prog->check_substr = Nullsv; /* disable */
432 prog->float_substr = Nullsv; /* clear */
433 s = strpos;
434 prog->reganch &= ~RE_USE_INTUIT;
435 }
436 else
437 s = strpos;
438 }
439
440 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
441 PL_colors[4],PL_colors[5], (long)(s - strpos)) );
442 return s;
443 fail:
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
445 PL_colors[4],PL_colors[5]));
446 return Nullch;
447}
9661b544 448
a687059c 449/*
c277df42 450 - regexec_flags - match a regexp against a string
a687059c 451 */
79072805 452I32
864dbfa3 453Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
22e551b9 454 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
c277df42
IZ
455/* strend: pointer to null at end of string */
456/* strbeg: real beginning of string */
457/* minend: end of match must be >=minend after stringarg. */
458/* data: May be used for some additional optimizations. */
459/* nosave: For optimizations. */
a687059c 460{
5c0ca799 461 dTHR;
a0d0e21e 462 register char *s;
c277df42 463 register regnode *c;
a0d0e21e
LW
464 register char *startpos = stringarg;
465 register I32 tmp;
c277df42 466 I32 minlen; /* must match at least this many chars */
a0d0e21e
LW
467 I32 dontbother = 0; /* how many characters not to try at end */
468 CURCUR cc;
c277df42 469 I32 start_shift = 0; /* Offset of the start to find
a0ed51b3
LW
470 constant substr. */ /* CC */
471 I32 end_shift = 0; /* Same for the end. */ /* CC */
c277df42
IZ
472 I32 scream_pos = -1; /* Internal iterator of scream. */
473 char *scream_olds;
3280af22 474 SV* oreplsv = GvSV(PL_replgv);
a687059c 475
a0d0e21e 476 cc.cur = 0;
4633a7c4 477 cc.oldcc = 0;
3280af22 478 PL_regcc = &cc;
a0d0e21e 479
0f5d15d6 480 cache_re(prog);
a0d0e21e 481#ifdef DEBUGGING
3280af22 482 PL_regnarrate = PL_debug & 512;
a0d0e21e
LW
483#endif
484
485 /* Be paranoid... */
486 if (prog == NULL || startpos == NULL) {
cea2e8a9 487 Perl_croak(aTHX_ "NULL regexp parameter");
a0d0e21e
LW
488 return 0;
489 }
490
c277df42
IZ
491 minlen = prog->minlen;
492 if (strend - startpos < minlen) goto phooey;
493
a0d0e21e 494 if (startpos == strbeg) /* is ^ valid at stringarg? */
3280af22 495 PL_regprev = '\n';
a0d0e21e 496 else {
a0ed51b3 497 PL_regprev = (U32)stringarg[-1];
3280af22
NIS
498 if (!PL_multiline && PL_regprev == '\n')
499 PL_regprev = '\0'; /* force ^ to NOT match */
a0d0e21e 500 }
bbce6d69 501
a0d0e21e 502 /* Check validity of program. */
22c35a8c 503 if (UCHARAT(prog->program) != REG_MAGIC) {
cea2e8a9 504 Perl_croak(aTHX_ "corrupted regexp program");
a0d0e21e
LW
505 }
506
3280af22
NIS
507 PL_reg_flags = 0;
508 PL_reg_eval_set = 0;
a0d0e21e 509
a0ed51b3
LW
510 if (prog->reganch & ROPT_UTF8)
511 PL_reg_flags |= RF_utf8;
512
513 /* Mark beginning of line for ^ and lookbehind. */
514 PL_regbol = startpos;
515 PL_bostr = strbeg;
9661b544 516 PL_reg_sv = sv;
a0ed51b3
LW
517
518 /* Mark end of line for $ (and such) */
519 PL_regeol = strend;
520
521 /* see how far we have to get to not match where we matched before */
522 PL_regtill = startpos+minend;
523
0f5d15d6
IZ
524 /* We start without call_cc context. */
525 PL_reg_call_cc = 0;
526
a0d0e21e
LW
527 /* If there is a "must appear" string, look for it. */
528 s = startpos;
cad2e5aa
JH
529
530 if (prog->reganch & ROPT_GPOS_SEEN) {
531 MAGIC *mg;
532
533 if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
534 && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
535 PL_reg_ganch = strbeg + mg->mg_len;
a0d0e21e 536 else
cad2e5aa
JH
537 PL_reg_ganch = startpos;
538 if (prog->reganch & ROPT_ANCH_GPOS) {
539 if (s > PL_reg_ganch)
540 goto phooey;
541 s = PL_reg_ganch;
a0ed51b3 542 }
a0d0e21e 543 }
a687059c 544
cad2e5aa
JH
545 if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
546 re_scream_pos_data d;
547
548 d.scream_olds = &scream_olds;
549 d.scream_pos = &scream_pos;
550 s = re_intuit_start(prog, sv, s, strend, flags, &d);
551 if (!s)
552 goto phooey; /* not present */
553 }
554
555 DEBUG_r( if (!PL_colorset) reginitcolors() );
556 DEBUG_r(PerlIO_printf(Perl_debug_log,
8d300b32
GS
557 "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
558 PL_colors[4],PL_colors[5],PL_colors[0],
559 prog->precomp,
560 PL_colors[1],
c277df42 561 (strlen(prog->precomp) > 60 ? "..." : ""),
cad2e5aa 562 PL_colors[0],
c277df42 563 (strend - startpos > 60 ? 60 : strend - startpos),
8d300b32 564 startpos, PL_colors[1],
c277df42
IZ
565 (strend - startpos > 60 ? "..." : ""))
566 );
567
a0d0e21e 568 /* Simplest case: anchored match need be tried only once. */
774d564b 569 /* [unless only anchor is BOL and multiline is set] */
22e551b9 570 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
cad2e5aa 571 if (s == startpos && regtry(prog, startpos))
a0d0e21e 572 goto got_it;
22e551b9
IZ
573 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
574 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
774d564b 575 {
cad2e5aa
JH
576 char *end;
577
a0d0e21e
LW
578 if (minlen)
579 dontbother = minlen - 1;
cad2e5aa 580 end = HOPc(strend, -dontbother) - 1;
a0d0e21e 581 /* for multiline we only have to try after newlines */
cad2e5aa
JH
582 if (prog->check_substr) {
583 while (1) {
584 if (regtry(prog, s))
a0d0e21e 585 goto got_it;
cad2e5aa
JH
586 if (s >= end)
587 goto phooey;
588 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
589 if (!s)
590 goto phooey;
591 }
592 } else {
593 if (s > startpos)
594 s--;
595 while (s < end) {
596 if (*s++ == '\n') { /* don't need PL_utf8skip here */
597 if (regtry(prog, s))
598 goto got_it;
599 }
600 }
35c8bce7 601 }
35c8bce7 602 }
a0d0e21e 603 goto phooey;
22e551b9
IZ
604 } else if (prog->reganch & ROPT_ANCH_GPOS) {
605 if (regtry(prog, PL_reg_ganch))
606 goto got_it;
607 goto phooey;
a0d0e21e 608 }
35c8bce7 609
a0d0e21e 610 /* Messy cases: unanchored match. */
c277df42
IZ
611 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
612 /* we have /x+whatever/ */
cad2e5aa 613 /* it must be a one character string (XXXX Except UTF?) */
c277df42 614 char ch = SvPVX(prog->anchored_substr)[0];
a0ed51b3
LW
615 if (UTF) {
616 while (s < strend) {
617 if (*s == ch) {
618 if (regtry(prog, s)) goto got_it;
619 s += UTF8SKIP(s);
620 while (s < strend && *s == ch)
621 s += UTF8SKIP(s);
622 }
623 s += UTF8SKIP(s);
624 }
625 }
626 else {
627 while (s < strend) {
628 if (*s == ch) {
629 if (regtry(prog, s)) goto got_it;
c277df42 630 s++;
a0ed51b3
LW
631 while (s < strend && *s == ch)
632 s++;
633 }
634 s++;
a0d0e21e 635 }
a687059c 636 }
c277df42
IZ
637 }
638 /*SUPPRESS 560*/
639 else if (prog->anchored_substr != Nullsv
640 || (prog->float_substr != Nullsv
641 && prog->float_max_offset < strend - s)) {
642 SV *must = prog->anchored_substr
643 ? prog->anchored_substr : prog->float_substr;
644 I32 back_max =
645 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
646 I32 back_min =
647 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
648 I32 delta = back_max - back_min;
cf93c79d 649 char *last = HOPc(strend, /* Cannot start after this */
73c4f7a1
GS
650 -(I32)(CHR_SVLEN(must)
651 - (SvTAIL(must) != 0) + back_min));
a0ed51b3
LW
652 char *last1; /* Last position checked before */
653
654 if (s > PL_bostr)
dfe13c55 655 last1 = HOPc(s, -1);
a0ed51b3
LW
656 else
657 last1 = s - 1; /* bogus */
c277df42
IZ
658
659 /* XXXX check_substr already used to find `s', can optimize if
660 check_substr==must. */
661 scream_pos = -1;
662 dontbother = end_shift;
dfe13c55 663 strend = HOPc(strend, -dontbother);
c277df42 664 while ( (s <= last) &&
22e551b9
IZ
665 ((flags & REXEC_SCREAM)
666 ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
c277df42 667 end_shift, &scream_pos, 0))
a0ed51b3 668 : (s = fbm_instr((unsigned char*)HOP(s, back_min),
cf93c79d
IZ
669 (unsigned char*)strend, must,
670 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
dfe13c55
GS
671 if (HOPc(s, -back_max) > last1) {
672 last1 = HOPc(s, -back_min);
673 s = HOPc(s, -back_max);
a0ed51b3
LW
674 }
675 else {
dfe13c55 676 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
c277df42 677
dfe13c55 678 last1 = HOPc(s, -back_min);
c277df42 679 s = t;
a0d0e21e 680 }
a0ed51b3
LW
681 if (UTF) {
682 while (s <= last1) {
683 if (regtry(prog, s))
684 goto got_it;
685 s += UTF8SKIP(s);
686 }
687 }
688 else {
689 while (s <= last1) {
690 if (regtry(prog, s))
691 goto got_it;
692 s++;
693 }
a0d0e21e
LW
694 }
695 }
696 goto phooey;
a0ed51b3
LW
697 }
698 else if (c = prog->regstclass) {
a0d0e21e 699 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a0ed51b3 700 char *cc;
a687059c 701
a0d0e21e
LW
702 if (minlen)
703 dontbother = minlen - 1;
dfe13c55 704 strend = HOPc(strend, -dontbother); /* don't bother with what can't match */
a0d0e21e
LW
705 tmp = 1;
706 /* We know what class it must start with. */
707 switch (OP(c)) {
a0ed51b3
LW
708 case ANYOFUTF8:
709 cc = (char *) OPERAND(c);
710 while (s < strend) {
711 if (REGINCLASSUTF8(c, (U8*)s)) {
712 if (tmp && regtry(prog, s))
713 goto got_it;
714 else
715 tmp = doevery;
716 }
717 else
718 tmp = 1;
719 s += UTF8SKIP(s);
720 }
721 break;
a0d0e21e 722 case ANYOF:
a0ed51b3 723 cc = (char *) OPERAND(c);
a0d0e21e 724 while (s < strend) {
a0ed51b3 725 if (REGINCLASS(cc, *s)) {
a0d0e21e
LW
726 if (tmp && regtry(prog, s))
727 goto got_it;
728 else
729 tmp = doevery;
a687059c 730 }
a0d0e21e
LW
731 else
732 tmp = 1;
733 s++;
734 }
735 break;
bbce6d69 736 case BOUNDL:
3280af22 737 PL_reg_flags |= RF_tainted;
bbce6d69 738 /* FALL THROUGH */
a0d0e21e 739 case BOUND:
a0ed51b3
LW
740 if (minlen) {
741 dontbother++;
742 strend -= 1;
743 }
3280af22 744 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 745 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 746 while (s < strend) {
95bac841 747 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e
LW
748 tmp = !tmp;
749 if (regtry(prog, s))
750 goto got_it;
a687059c 751 }
a0d0e21e
LW
752 s++;
753 }
754 if ((minlen || tmp) && regtry(prog,s))
755 goto got_it;
756 break;
a0ed51b3
LW
757 case BOUNDLUTF8:
758 PL_reg_flags |= RF_tainted;
759 /* FALL THROUGH */
760 case BOUNDUTF8:
761 if (minlen) {
762 dontbother++;
dfe13c55 763 strend = reghop_c(strend, -1);
a0ed51b3 764 }
dfe13c55 765 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
766 tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
767 while (s < strend) {
dfe13c55
GS
768 if (tmp == !(OP(c) == BOUND ?
769 swash_fetch(PL_utf8_alnum, (U8*)s) :
770 isALNUM_LC_utf8((U8*)s)))
771 {
a0ed51b3
LW
772 tmp = !tmp;
773 if (regtry(prog, s))
774 goto got_it;
775 }
776 s += UTF8SKIP(s);
777 }
778 if ((minlen || tmp) && regtry(prog,s))
779 goto got_it;
780 break;
bbce6d69 781 case NBOUNDL:
3280af22 782 PL_reg_flags |= RF_tainted;
bbce6d69 783 /* FALL THROUGH */
a0d0e21e 784 case NBOUND:
a0ed51b3
LW
785 if (minlen) {
786 dontbother++;
787 strend -= 1;
788 }
3280af22 789 tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
95bac841 790 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 791 while (s < strend) {
95bac841 792 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e
LW
793 tmp = !tmp;
794 else if (regtry(prog, s))
795 goto got_it;
796 s++;
797 }
798 if ((minlen || !tmp) && regtry(prog,s))
799 goto got_it;
800 break;
a0ed51b3
LW
801 case NBOUNDLUTF8:
802 PL_reg_flags |= RF_tainted;
803 /* FALL THROUGH */
804 case NBOUNDUTF8:
805 if (minlen) {
806 dontbother++;
dfe13c55 807 strend = reghop_c(strend, -1);
a0ed51b3 808 }
dfe13c55 809 tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
a0ed51b3
LW
810 tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
811 while (s < strend) {
dfe13c55
GS
812 if (tmp == !(OP(c) == NBOUND ?
813 swash_fetch(PL_utf8_alnum, (U8*)s) :
814 isALNUM_LC_utf8((U8*)s)))
a0ed51b3
LW
815 tmp = !tmp;
816 else if (regtry(prog, s))
817 goto got_it;
818 s += UTF8SKIP(s);
819 }
820 if ((minlen || !tmp) && regtry(prog,s))
821 goto got_it;
822 break;
a0d0e21e
LW
823 case ALNUM:
824 while (s < strend) {
bbce6d69 825 if (isALNUM(*s)) {
826 if (tmp && regtry(prog, s))
827 goto got_it;
828 else
829 tmp = doevery;
830 }
831 else
832 tmp = 1;
833 s++;
834 }
835 break;
a0ed51b3
LW
836 case ALNUMUTF8:
837 while (s < strend) {
dfe13c55 838 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
839 if (tmp && regtry(prog, s))
840 goto got_it;
841 else
842 tmp = doevery;
843 }
844 else
845 tmp = 1;
846 s += UTF8SKIP(s);
847 }
848 break;
bbce6d69 849 case ALNUML:
3280af22 850 PL_reg_flags |= RF_tainted;
bbce6d69 851 while (s < strend) {
852 if (isALNUM_LC(*s)) {
a0d0e21e
LW
853 if (tmp && regtry(prog, s))
854 goto got_it;
a687059c 855 else
a0d0e21e
LW
856 tmp = doevery;
857 }
858 else
859 tmp = 1;
860 s++;
861 }
862 break;
a0ed51b3
LW
863 case ALNUMLUTF8:
864 PL_reg_flags |= RF_tainted;
865 while (s < strend) {
dfe13c55 866 if (isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
867 if (tmp && regtry(prog, s))
868 goto got_it;
869 else
870 tmp = doevery;
871 }
872 else
873 tmp = 1;
874 s += UTF8SKIP(s);
875 }
876 break;
a0d0e21e
LW
877 case NALNUM:
878 while (s < strend) {
bbce6d69 879 if (!isALNUM(*s)) {
880 if (tmp && regtry(prog, s))
881 goto got_it;
882 else
883 tmp = doevery;
884 }
885 else
886 tmp = 1;
887 s++;
888 }
889 break;
a0ed51b3
LW
890 case NALNUMUTF8:
891 while (s < strend) {
dfe13c55 892 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
a0ed51b3
LW
893 if (tmp && regtry(prog, s))
894 goto got_it;
895 else
896 tmp = doevery;
897 }
898 else
899 tmp = 1;
900 s += UTF8SKIP(s);
901 }
902 break;
bbce6d69 903 case NALNUML:
3280af22 904 PL_reg_flags |= RF_tainted;
bbce6d69 905 while (s < strend) {
906 if (!isALNUM_LC(*s)) {
a0d0e21e
LW
907 if (tmp && regtry(prog, s))
908 goto got_it;
a687059c 909 else
a0d0e21e 910 tmp = doevery;
a687059c 911 }
a0d0e21e
LW
912 else
913 tmp = 1;
914 s++;
915 }
916 break;
a0ed51b3
LW
917 case NALNUMLUTF8:
918 PL_reg_flags |= RF_tainted;
919 while (s < strend) {
dfe13c55 920 if (!isALNUM_LC_utf8((U8*)s)) {
a0ed51b3
LW
921 if (tmp && regtry(prog, s))
922 goto got_it;
923 else
924 tmp = doevery;
925 }
926 else
927 tmp = 1;
928 s += UTF8SKIP(s);
929 }
930 break;
a0d0e21e
LW
931 case SPACE:
932 while (s < strend) {
933 if (isSPACE(*s)) {
934 if (tmp && regtry(prog, s))
935 goto got_it;
936 else
937 tmp = doevery;
2304df62 938 }
a0d0e21e
LW
939 else
940 tmp = 1;
941 s++;
942 }
943 break;
a0ed51b3
LW
944 case SPACEUTF8:
945 while (s < strend) {
dfe13c55 946 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
a0ed51b3
LW
947 if (tmp && regtry(prog, s))
948 goto got_it;
949 else
950 tmp = doevery;
951 }
952 else
953 tmp = 1;
954 s += UTF8SKIP(s);
955 }
956 break;
bbce6d69 957 case SPACEL:
3280af22 958 PL_reg_flags |= RF_tainted;
bbce6d69 959 while (s < strend) {
960 if (isSPACE_LC(*s)) {
961 if (tmp && regtry(prog, s))
962 goto got_it;
963 else
964 tmp = doevery;
965 }
966 else
967 tmp = 1;
968 s++;
969 }
970 break;
a0ed51b3
LW
971 case SPACELUTF8:
972 PL_reg_flags |= RF_tainted;
973 while (s < strend) {
dfe13c55 974 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
a0ed51b3
LW
975 if (tmp && regtry(prog, s))
976 goto got_it;
977 else
978 tmp = doevery;
979 }
980 else
981 tmp = 1;
982 s += UTF8SKIP(s);
983 }
984 break;
a0d0e21e
LW
985 case NSPACE:
986 while (s < strend) {
987 if (!isSPACE(*s)) {
988 if (tmp && regtry(prog, s))
989 goto got_it;
990 else
991 tmp = doevery;
a687059c 992 }
a0d0e21e
LW
993 else
994 tmp = 1;
995 s++;
996 }
997 break;
a0ed51b3
LW
998 case NSPACEUTF8:
999 while (s < strend) {
dfe13c55 1000 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
a0ed51b3
LW
1001 if (tmp && regtry(prog, s))
1002 goto got_it;
1003 else
1004 tmp = doevery;
1005 }
1006 else
1007 tmp = 1;
1008 s += UTF8SKIP(s);
1009 }
1010 break;
bbce6d69 1011 case NSPACEL:
3280af22 1012 PL_reg_flags |= RF_tainted;
bbce6d69 1013 while (s < strend) {
1014 if (!isSPACE_LC(*s)) {
1015 if (tmp && regtry(prog, s))
1016 goto got_it;
1017 else
1018 tmp = doevery;
1019 }
1020 else
1021 tmp = 1;
1022 s++;
1023 }
1024 break;
a0ed51b3
LW
1025 case NSPACELUTF8:
1026 PL_reg_flags |= RF_tainted;
1027 while (s < strend) {
dfe13c55 1028 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
a0ed51b3
LW
1029 if (tmp && regtry(prog, s))
1030 goto got_it;
1031 else
1032 tmp = doevery;
1033 }
1034 else
1035 tmp = 1;
1036 s += UTF8SKIP(s);
1037 }
1038 break;
a0d0e21e
LW
1039 case DIGIT:
1040 while (s < strend) {
1041 if (isDIGIT(*s)) {
1042 if (tmp && regtry(prog, s))
1043 goto got_it;
1044 else
1045 tmp = doevery;
2b69d0c2 1046 }
a0d0e21e
LW
1047 else
1048 tmp = 1;
1049 s++;
1050 }
1051 break;
a0ed51b3
LW
1052 case DIGITUTF8:
1053 while (s < strend) {
dfe13c55 1054 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1055 if (tmp && regtry(prog, s))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
1060 else
1061 tmp = 1;
1062 s += UTF8SKIP(s);
1063 }
1064 break;
a0d0e21e
LW
1065 case NDIGIT:
1066 while (s < strend) {
1067 if (!isDIGIT(*s)) {
1068 if (tmp && regtry(prog, s))
1069 goto got_it;
1070 else
1071 tmp = doevery;
a687059c 1072 }
a0d0e21e
LW
1073 else
1074 tmp = 1;
1075 s++;
1076 }
1077 break;
a0ed51b3
LW
1078 case NDIGITUTF8:
1079 while (s < strend) {
dfe13c55 1080 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
a0ed51b3
LW
1081 if (tmp && regtry(prog, s))
1082 goto got_it;
1083 else
1084 tmp = doevery;
1085 }
1086 else
1087 tmp = 1;
1088 s += UTF8SKIP(s);
1089 }
1090 break;
a687059c 1091 }
a0d0e21e
LW
1092 }
1093 else {
c277df42
IZ
1094 dontbother = 0;
1095 if (prog->float_substr != Nullsv) { /* Trim the end. */
1096 char *last;
1097 I32 oldpos = scream_pos;
1098
22e551b9
IZ
1099 if (flags & REXEC_SCREAM) {
1100 last = screaminstr(sv, prog->float_substr, s - strbeg,
c277df42 1101 end_shift, &scream_pos, 1); /* last one */
cf93c79d 1102 if (!last)
c277df42 1103 last = scream_olds; /* Only one occurence. */
a0ed51b3
LW
1104 }
1105 else {
c277df42
IZ
1106 STRLEN len;
1107 char *little = SvPV(prog->float_substr, len);
cf93c79d
IZ
1108
1109 if (SvTAIL(prog->float_substr)) {
1110 if (memEQ(strend - len + 1, little, len - 1))
1111 last = strend - len + 1;
1112 else if (!PL_multiline)
1113 last = memEQ(strend - len, little, len)
1114 ? strend - len : Nullch;
1115 else
1116 goto find_last;
1117 } else {
1118 find_last:
1119 if (len)
1120 last = rninstr(s, strend, little, little + len);
1121 else
1122 last = strend; /* matching `$' */
1123 }
c277df42
IZ
1124 }
1125 if (last == NULL) goto phooey; /* Should not happen! */
19b4f81a 1126 dontbother = strend - last + prog->float_min_offset;
c277df42
IZ
1127 }
1128 if (minlen && (dontbother < minlen))
a0d0e21e 1129 dontbother = minlen - 1;
a0ed51b3 1130 strend -= dontbother; /* this one's always in bytes! */
a0d0e21e 1131 /* We don't know much -- general case. */
a0ed51b3
LW
1132 if (UTF) {
1133 for (;;) {
84df6dba 1134 if (regtry(prog, s))
a0ed51b3 1135 goto got_it;
a0ed51b3
LW
1136 if (s >= strend)
1137 break;
1138 s += UTF8SKIP(s);
1139 };
1140 }
1141 else {
1142 do {
1143 if (regtry(prog, s))
1144 goto got_it;
1145 } while (s++ < strend);
1146 }
a0d0e21e
LW
1147 }
1148
1149 /* Failure. */
1150 goto phooey;
a687059c 1151
a0d0e21e 1152got_it:
3280af22 1153 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
5f05dabc 1154
5c5e4c24
IZ
1155 if (PL_reg_eval_set) {
1156 /* Preserve the current value of $^R */
1157 if (oreplsv != GvSV(PL_replgv))
1158 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1159 restored, the value remains
1160 the same. */
9661b544 1161 restore_pos(0);
5c5e4c24 1162 }
cf93c79d
IZ
1163
1164 /* make sure $`, $&, $', and $digit will work later */
1165 if ( !(flags & REXEC_NOT_FIRST) ) {
1166 if (RX_MATCH_COPIED(prog)) {
1167 Safefree(prog->subbeg);
1168 RX_MATCH_COPIED_off(prog);
1169 }
1170 if (flags & REXEC_COPY_STR) {
1171 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1172
1173 s = savepvn(strbeg, i);
1174 prog->subbeg = s;
1175 prog->sublen = i;
1176 RX_MATCH_COPIED_on(prog);
1177 }
1178 else {
1179 prog->subbeg = strbeg;
1180 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1181 }
1182 }
5c5e4c24 1183
a0d0e21e
LW
1184 return 1;
1185
1186phooey:
9661b544
IZ
1187 if (PL_reg_eval_set)
1188 restore_pos(0);
a0d0e21e 1189 return 0;
a687059c
LW
1190}
1191
1192/*
1193 - regtry - try match at specific point
1194 */
76e3520e 1195STATIC I32 /* 0 failure, 1 success */
cea2e8a9 1196S_regtry(pTHX_ regexp *prog, char *startpos)
a687059c 1197{
c277df42 1198 dTHR;
a0d0e21e 1199 register I32 i;
cf93c79d
IZ
1200 register I32 *sp;
1201 register I32 *ep;
c277df42 1202 CHECKPOINT lastcp;
a0d0e21e 1203
3280af22 1204 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
9661b544
IZ
1205 MAGIC *mg;
1206
3280af22 1207 PL_reg_eval_set = RS_init;
ce862d02 1208 DEBUG_r(DEBUG_s(
c3464db5 1209 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n",
3280af22 1210 PL_stack_sp - PL_stack_base);
ce862d02
IZ
1211 ));
1212 SAVEINT(cxstack[cxstack_ix].blk_oldsp);
3280af22 1213 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
ce862d02
IZ
1214 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1215 SAVETMPS;
1216 /* Apparently this is not needed, judging by wantarray. */
1217 /* SAVEINT(cxstack[cxstack_ix].blk_gimme);
1218 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
9661b544
IZ
1219
1220 if (PL_reg_sv) {
1221 /* Make $_ available to executed code. */
127ad2b7
GS
1222 if (PL_reg_sv != DEFSV) {
1223 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1224 SAVESPTR(DEFSV);
1225 DEFSV = PL_reg_sv;
9661b544
IZ
1226 }
1227
1228 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
1229 && (mg = mg_find(PL_reg_sv, 'g')))) {
1230 /* prepare for quick setting of pos */
1231 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1232 mg = mg_find(PL_reg_sv, 'g');
1233 mg->mg_len = -1;
1234 }
1235 PL_reg_magic = mg;
1236 PL_reg_oldpos = mg->mg_len;
cea2e8a9 1237 SAVEDESTRUCTOR(S_restore_pos, 0);
9661b544 1238 }
5c5e4c24
IZ
1239 if (!PL_reg_curpm)
1240 New(22,PL_reg_curpm, 1, PMOP);
1241 PL_reg_curpm->op_pmregexp = prog;
1242 PL_reg_oldcurpm = PL_curpm;
1243 PL_curpm = PL_reg_curpm;
cf93c79d
IZ
1244 if (RX_MATCH_COPIED(prog)) {
1245 /* Here is a serious problem: we cannot rewrite subbeg,
1246 since it may be needed if this match fails. Thus
1247 $` inside (?{}) could fail... */
1248 PL_reg_oldsaved = prog->subbeg;
1249 PL_reg_oldsavedlen = prog->sublen;
1250 RX_MATCH_COPIED_off(prog);
1251 }
1252 else
1253 PL_reg_oldsaved = Nullch;
5c5e4c24 1254 prog->subbeg = PL_bostr;
cf93c79d 1255 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
ce862d02 1256 }
cf93c79d 1257 prog->startp[0] = startpos - PL_bostr;
3280af22
NIS
1258 PL_reginput = startpos;
1259 PL_regstartp = prog->startp;
1260 PL_regendp = prog->endp;
1261 PL_reglastparen = &prog->lastparen;
a0d0e21e 1262 prog->lastparen = 0;
3280af22 1263 PL_regsize = 0;
364723c2 1264 DEBUG_r(PL_reg_starttry = startpos);
3280af22
NIS
1265 if (PL_reg_start_tmpl <= prog->nparens) {
1266 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1267 if(PL_reg_start_tmp)
1268 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1269 else
3280af22 1270 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
c277df42 1271 }
a0d0e21e 1272
5c5e4c24
IZ
1273 /* XXXX What this code is doing here?!!! There should be no need
1274 to do this again and again, PL_reglastparen should take care of
1275 this! */
a0d0e21e
LW
1276 sp = prog->startp;
1277 ep = prog->endp;
1278 if (prog->nparens) {
5c5e4c24 1279 for (i = prog->nparens; i >= 1; i--) {
cf93c79d
IZ
1280 *++sp = -1;
1281 *++ep = -1;
a687059c 1282 }
a0d0e21e 1283 }
c277df42 1284 REGCP_SET;
7e5428c5 1285 if (regmatch(prog->program + 1)) {
cf93c79d 1286 prog->endp[0] = PL_reginput - PL_bostr;
a0d0e21e
LW
1287 return 1;
1288 }
c277df42
IZ
1289 REGCP_UNWIND;
1290 return 0;
a687059c
LW
1291}
1292
1293/*
1294 - regmatch - main matching routine
1295 *
1296 * Conceptually the strategy is simple: check to see whether the current
1297 * node matches, call self recursively to see whether the rest matches,
1298 * and then act accordingly. In practice we make some effort to avoid
1299 * recursion, in particular by going through "ordinary" nodes (that don't
1300 * need to know whether the rest of the match failed) by a loop instead of
1301 * by recursion.
1302 */
1303/* [lwall] I've hoisted the register declarations to the outer block in order to
1304 * maybe save a little bit of pushing and popping on the stack. It also takes
1305 * advantage of machines that use a register save mask on subroutine entry.
1306 */
76e3520e 1307STATIC I32 /* 0 failure, 1 success */
cea2e8a9 1308S_regmatch(pTHX_ regnode *prog)
a687059c 1309{
c277df42
IZ
1310 dTHR;
1311 register regnode *scan; /* Current node. */
1312 regnode *next; /* Next node. */
1313 regnode *inner; /* Next node in internal branch. */
c3464db5
DD
1314 register I32 nextchr; /* renamed nextchr - nextchar colides with
1315 function of same name */
a0d0e21e
LW
1316 register I32 n; /* no or next */
1317 register I32 ln; /* len or last */
1318 register char *s; /* operand or save */
3280af22 1319 register char *locinput = PL_reginput;
c277df42
IZ
1320 register I32 c1, c2, paren; /* case fold search, parenth */
1321 int minmod = 0, sw = 0, logical = 0;
4633a7c4 1322#ifdef DEBUGGING
3280af22 1323 PL_regindent++;
4633a7c4 1324#endif
a0d0e21e 1325
a0ed51b3 1326 /* Note that nextchr is a byte even in UTF */
76e3520e 1327 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1328 scan = prog;
1329 while (scan != NULL) {
c277df42 1330#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
a687059c 1331#ifdef DEBUGGING
c277df42
IZ
1332# define sayYES goto yes
1333# define sayNO goto no
1334# define saySAME(x) if (x) goto yes; else goto no
1335# define REPORT_CODE_OFF 24
4633a7c4 1336#else
c277df42
IZ
1337# define sayYES return 1
1338# define sayNO return 0
1339# define saySAME(x) return x
a687059c 1340#endif
c277df42
IZ
1341 DEBUG_r( {
1342 SV *prop = sv_newmortal();
3280af22 1343 int docolor = *PL_colors[0];
c277df42 1344 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3280af22 1345 int l = (PL_regeol - locinput > taill ? taill : PL_regeol - locinput);
beed8111
IZ
1346 /* The part of the string before starttry has one color
1347 (pref0_len chars), between starttry and current
1348 position another one (pref_len - pref0_len chars),
1349 after the current position the third one.
1350 We assume that pref0_len <= pref_len, otherwise we
1351 decrease pref0_len. */
3280af22
NIS
1352 int pref_len = (locinput - PL_bostr > (5 + taill) - l
1353 ? (5 + taill) - l : locinput - PL_bostr);
364723c2 1354 int pref0_len = pref_len - (locinput - PL_reg_starttry);
c277df42 1355
3280af22
NIS
1356 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
1357 l = ( PL_regeol - locinput > (5 + taill) - pref_len
1358 ? (5 + taill) - pref_len : PL_regeol - locinput);
8d300b32
GS
1359 if (pref0_len < 0)
1360 pref0_len = 0;
beed8111
IZ
1361 if (pref0_len > pref_len)
1362 pref0_len = pref_len;
c277df42
IZ
1363 regprop(prop, scan);
1364 PerlIO_printf(Perl_debug_log,
8d300b32 1365 "%4i <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3d:%*s%s\n",
3280af22 1366 locinput - PL_bostr,
8d300b32
GS
1367 PL_colors[4], pref0_len,
1368 locinput - pref_len, PL_colors[5],
1369 PL_colors[2], pref_len - pref0_len,
1370 locinput - pref_len + pref0_len, PL_colors[3],
c277df42 1371 (docolor ? "" : "> <"),
3280af22 1372 PL_colors[0], l, locinput, PL_colors[1],
c277df42
IZ
1373 15 - l - pref_len + 1,
1374 "",
3280af22 1375 scan - PL_regprogram, PL_regindent*2, "",
c277df42
IZ
1376 SvPVX(prop));
1377 } );
a687059c 1378
c277df42 1379 next = scan + NEXT_OFF(scan);
a0d0e21e
LW
1380 if (next == scan)
1381 next = NULL;
a687059c 1382
a0d0e21e
LW
1383 switch (OP(scan)) {
1384 case BOL:
3280af22
NIS
1385 if (locinput == PL_bostr
1386 ? PL_regprev == '\n'
1387 : (PL_multiline &&
1388 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e 1389 {
a0ed51b3 1390 /* regtill = regbol; */
a0d0e21e
LW
1391 break;
1392 }
4633a7c4 1393 sayNO;
a0d0e21e 1394 case MBOL:
3280af22
NIS
1395 if (locinput == PL_bostr
1396 ? PL_regprev == '\n'
1397 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
a0d0e21e
LW
1398 {
1399 break;
1400 }
4633a7c4 1401 sayNO;
a0d0e21e 1402 case SBOL:
3280af22 1403 if (locinput == PL_regbol && PL_regprev == '\n')
a0d0e21e 1404 break;
4633a7c4 1405 sayNO;
774d564b 1406 case GPOS:
22e551b9 1407 if (locinput == PL_reg_ganch)
a0d0e21e 1408 break;
4633a7c4 1409 sayNO;
a0d0e21e 1410 case EOL:
3280af22 1411 if (PL_multiline)
a0d0e21e
LW
1412 goto meol;
1413 else
1414 goto seol;
1415 case MEOL:
1416 meol:
3280af22 1417 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1418 sayNO;
a0d0e21e
LW
1419 break;
1420 case SEOL:
1421 seol:
3280af22 1422 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
4633a7c4 1423 sayNO;
3280af22 1424 if (PL_regeol - locinput > 1)
4633a7c4 1425 sayNO;
a0d0e21e 1426 break;
b85d18e9 1427 case EOS:
3280af22 1428 if (PL_regeol != locinput)
b85d18e9
IZ
1429 sayNO;
1430 break;
a0ed51b3
LW
1431 case SANYUTF8:
1432 if (nextchr & 0x80) {
6f06b55f 1433 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1434 if (locinput > PL_regeol)
1435 sayNO;
1436 nextchr = UCHARAT(locinput);
1437 break;
1438 }
1439 if (!nextchr && locinput >= PL_regeol)
1440 sayNO;
1441 nextchr = UCHARAT(++locinput);
1442 break;
a0d0e21e 1443 case SANY:
3280af22 1444 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1445 sayNO;
76e3520e 1446 nextchr = UCHARAT(++locinput);
a0d0e21e 1447 break;
a0ed51b3
LW
1448 case ANYUTF8:
1449 if (nextchr & 0x80) {
6f06b55f 1450 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1451 if (locinput > PL_regeol)
1452 sayNO;
1453 nextchr = UCHARAT(locinput);
1454 break;
1455 }
1456 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
1457 sayNO;
1458 nextchr = UCHARAT(++locinput);
1459 break;
22c35a8c 1460 case REG_ANY:
3280af22 1461 if (!nextchr && locinput >= PL_regeol || nextchr == '\n')
4633a7c4 1462 sayNO;
76e3520e 1463 nextchr = UCHARAT(++locinput);
a0d0e21e 1464 break;
bbce6d69 1465 case EXACT:
161b471a 1466 s = (char *) OPERAND(scan);
c277df42 1467 ln = UCHARAT(s++);
a0d0e21e 1468 /* Inline the first character, for speed. */
76e3520e 1469 if (UCHARAT(s) != nextchr)
4633a7c4 1470 sayNO;
3280af22 1471 if (PL_regeol - locinput < ln)
4633a7c4 1472 sayNO;
36477c24 1473 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 1474 sayNO;
a0d0e21e 1475 locinput += ln;
76e3520e 1476 nextchr = UCHARAT(locinput);
bbce6d69 1477 break;
1478 case EXACTFL:
3280af22 1479 PL_reg_flags |= RF_tainted;
bbce6d69 1480 /* FALL THROUGH */
1481 case EXACTF:
161b471a 1482 s = (char *) OPERAND(scan);
c277df42 1483 ln = UCHARAT(s++);
a0ed51b3
LW
1484
1485 if (UTF) {
1486 char *l = locinput;
1487 char *e = s + ln;
1488 c1 = OP(scan) == EXACTF;
1489 while (s < e) {
1490 if (l >= PL_regeol)
1491 sayNO;
dfe13c55
GS
1492 if (utf8_to_uv((U8*)s, 0) != (c1 ?
1493 toLOWER_utf8((U8*)l) :
1494 toLOWER_LC_utf8((U8*)l)))
1495 {
a0ed51b3 1496 sayNO;
dfe13c55 1497 }
a0ed51b3
LW
1498 s += UTF8SKIP(s);
1499 l += UTF8SKIP(l);
1500 }
1501 locinput = l;
1502 nextchr = UCHARAT(locinput);
1503 break;
1504 }
1505
bbce6d69 1506 /* Inline the first character, for speed. */
76e3520e 1507 if (UCHARAT(s) != nextchr &&
bbce6d69 1508 UCHARAT(s) != ((OP(scan) == EXACTF)
22c35a8c 1509 ? PL_fold : PL_fold_locale)[nextchr])
bbce6d69 1510 sayNO;
3280af22 1511 if (PL_regeol - locinput < ln)
bbce6d69 1512 sayNO;
5f05dabc 1513 if (ln > 1 && (OP(scan) == EXACTF
1514 ? ibcmp(s, locinput, ln)
1515 : ibcmp_locale(s, locinput, ln)))
bbce6d69 1516 sayNO;
1517 locinput += ln;
76e3520e 1518 nextchr = UCHARAT(locinput);
a0d0e21e 1519 break;
a0ed51b3
LW
1520 case ANYOFUTF8:
1521 s = (char *) OPERAND(scan);
1522 if (!REGINCLASSUTF8(scan, (U8*)locinput))
1523 sayNO;
1524 if (locinput >= PL_regeol)
1525 sayNO;
6f06b55f 1526 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1527 nextchr = UCHARAT(locinput);
1528 break;
a0d0e21e 1529 case ANYOF:
161b471a 1530 s = (char *) OPERAND(scan);
76e3520e
GS
1531 if (nextchr < 0)
1532 nextchr = UCHARAT(locinput);
873ef191 1533 if (!REGINCLASS(s, nextchr))
4633a7c4 1534 sayNO;
3280af22 1535 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1536 sayNO;
76e3520e 1537 nextchr = UCHARAT(++locinput);
a0d0e21e 1538 break;
bbce6d69 1539 case ALNUML:
3280af22 1540 PL_reg_flags |= RF_tainted;
bbce6d69 1541 /* FALL THROUGH */
a0d0e21e 1542 case ALNUM:
76e3520e 1543 if (!nextchr)
4633a7c4 1544 sayNO;
bbce6d69 1545 if (!(OP(scan) == ALNUM
76e3520e 1546 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
4633a7c4 1547 sayNO;
76e3520e 1548 nextchr = UCHARAT(++locinput);
a0d0e21e 1549 break;
a0ed51b3
LW
1550 case ALNUMLUTF8:
1551 PL_reg_flags |= RF_tainted;
1552 /* FALL THROUGH */
1553 case ALNUMUTF8:
1554 if (!nextchr)
1555 sayNO;
1556 if (nextchr & 0x80) {
1557 if (!(OP(scan) == ALNUMUTF8
dfe13c55
GS
1558 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1559 : isALNUM_LC_utf8((U8*)locinput)))
1560 {
a0ed51b3 1561 sayNO;
dfe13c55 1562 }
6f06b55f 1563 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1564 nextchr = UCHARAT(locinput);
1565 break;
1566 }
1567 if (!(OP(scan) == ALNUMUTF8
1568 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
1569 sayNO;
1570 nextchr = UCHARAT(++locinput);
1571 break;
bbce6d69 1572 case NALNUML:
3280af22 1573 PL_reg_flags |= RF_tainted;
bbce6d69 1574 /* FALL THROUGH */
a0d0e21e 1575 case NALNUM:
3280af22 1576 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1577 sayNO;
bbce6d69 1578 if (OP(scan) == NALNUM
76e3520e 1579 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
4633a7c4 1580 sayNO;
76e3520e 1581 nextchr = UCHARAT(++locinput);
a0d0e21e 1582 break;
a0ed51b3
LW
1583 case NALNUMLUTF8:
1584 PL_reg_flags |= RF_tainted;
1585 /* FALL THROUGH */
1586 case NALNUMUTF8:
1587 if (!nextchr && locinput >= PL_regeol)
1588 sayNO;
1589 if (nextchr & 0x80) {
1590 if (OP(scan) == NALNUMUTF8
dfe13c55
GS
1591 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
1592 : isALNUM_LC_utf8((U8*)locinput))
1593 {
a0ed51b3 1594 sayNO;
dfe13c55 1595 }
6f06b55f 1596 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1597 nextchr = UCHARAT(locinput);
1598 break;
1599 }
1600 if (OP(scan) == NALNUMUTF8
1601 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
1602 sayNO;
1603 nextchr = UCHARAT(++locinput);
1604 break;
bbce6d69 1605 case BOUNDL:
1606 case NBOUNDL:
3280af22 1607 PL_reg_flags |= RF_tainted;
bbce6d69 1608 /* FALL THROUGH */
a0d0e21e 1609 case BOUND:
bbce6d69 1610 case NBOUND:
1611 /* was last char in word? */
3280af22 1612 ln = (locinput != PL_regbol) ? UCHARAT(locinput - 1) : PL_regprev;
bbce6d69 1613 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
1614 ln = isALNUM(ln);
76e3520e 1615 n = isALNUM(nextchr);
bbce6d69 1616 }
1617 else {
1618 ln = isALNUM_LC(ln);
76e3520e 1619 n = isALNUM_LC(nextchr);
bbce6d69 1620 }
95bac841 1621 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 1622 sayNO;
a0d0e21e 1623 break;
a0ed51b3
LW
1624 case BOUNDLUTF8:
1625 case NBOUNDLUTF8:
1626 PL_reg_flags |= RF_tainted;
1627 /* FALL THROUGH */
1628 case BOUNDUTF8:
1629 case NBOUNDUTF8:
1630 /* was last char in word? */
dfe13c55
GS
1631 ln = (locinput != PL_regbol)
1632 ? utf8_to_uv(reghop((U8*)locinput, -1), 0) : PL_regprev;
a0ed51b3
LW
1633 if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
1634 ln = isALNUM_uni(ln);
dfe13c55 1635 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
a0ed51b3
LW
1636 }
1637 else {
1638 ln = isALNUM_LC_uni(ln);
dfe13c55 1639 n = isALNUM_LC_utf8((U8*)locinput);
a0ed51b3
LW
1640 }
1641 if (((!ln) == (!n)) == (OP(scan) == BOUNDUTF8 || OP(scan) == BOUNDLUTF8))
1642 sayNO;
1643 break;
bbce6d69 1644 case SPACEL:
3280af22 1645 PL_reg_flags |= RF_tainted;
bbce6d69 1646 /* FALL THROUGH */
a0d0e21e 1647 case SPACE:
3280af22 1648 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1649 sayNO;
bbce6d69 1650 if (!(OP(scan) == SPACE
76e3520e 1651 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
4633a7c4 1652 sayNO;
76e3520e 1653 nextchr = UCHARAT(++locinput);
a0d0e21e 1654 break;
a0ed51b3
LW
1655 case SPACELUTF8:
1656 PL_reg_flags |= RF_tainted;
1657 /* FALL THROUGH */
1658 case SPACEUTF8:
1659 if (!nextchr && locinput >= PL_regeol)
1660 sayNO;
1661 if (nextchr & 0x80) {
1662 if (!(OP(scan) == SPACEUTF8
dfe13c55
GS
1663 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1664 : isSPACE_LC_utf8((U8*)locinput)))
1665 {
a0ed51b3 1666 sayNO;
dfe13c55 1667 }
6f06b55f 1668 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1669 nextchr = UCHARAT(locinput);
1670 break;
1671 }
1672 if (!(OP(scan) == SPACEUTF8
1673 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
1674 sayNO;
1675 nextchr = UCHARAT(++locinput);
1676 break;
bbce6d69 1677 case NSPACEL:
3280af22 1678 PL_reg_flags |= RF_tainted;
bbce6d69 1679 /* FALL THROUGH */
a0d0e21e 1680 case NSPACE:
76e3520e 1681 if (!nextchr)
4633a7c4 1682 sayNO;
bbce6d69 1683 if (OP(scan) == SPACE
76e3520e 1684 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 1685 sayNO;
76e3520e 1686 nextchr = UCHARAT(++locinput);
a0d0e21e 1687 break;
a0ed51b3
LW
1688 case NSPACELUTF8:
1689 PL_reg_flags |= RF_tainted;
1690 /* FALL THROUGH */
1691 case NSPACEUTF8:
1692 if (!nextchr)
1693 sayNO;
1694 if (nextchr & 0x80) {
1695 if (OP(scan) == NSPACEUTF8
dfe13c55
GS
1696 ? swash_fetch(PL_utf8_space,(U8*)locinput)
1697 : isSPACE_LC_utf8((U8*)locinput))
1698 {
a0ed51b3 1699 sayNO;
dfe13c55 1700 }
6f06b55f 1701 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1702 nextchr = UCHARAT(locinput);
1703 break;
1704 }
1705 if (OP(scan) == NSPACEUTF8
1706 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
1707 sayNO;
1708 nextchr = UCHARAT(++locinput);
1709 break;
a0d0e21e 1710 case DIGIT:
76e3520e 1711 if (!isDIGIT(nextchr))
4633a7c4 1712 sayNO;
76e3520e 1713 nextchr = UCHARAT(++locinput);
a0d0e21e 1714 break;
a0ed51b3
LW
1715 case DIGITUTF8:
1716 if (nextchr & 0x80) {
dfe13c55 1717 if (!(swash_fetch(PL_utf8_digit,(U8*)locinput)))
a0ed51b3 1718 sayNO;
6f06b55f 1719 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1720 nextchr = UCHARAT(locinput);
1721 break;
1722 }
1723 if (!isDIGIT(nextchr))
1724 sayNO;
1725 nextchr = UCHARAT(++locinput);
1726 break;
a0d0e21e 1727 case NDIGIT:
3280af22 1728 if (!nextchr && locinput >= PL_regeol)
4633a7c4 1729 sayNO;
76e3520e 1730 if (isDIGIT(nextchr))
4633a7c4 1731 sayNO;
76e3520e 1732 nextchr = UCHARAT(++locinput);
a0d0e21e 1733 break;
a0ed51b3
LW
1734 case NDIGITUTF8:
1735 if (!nextchr && locinput >= PL_regeol)
1736 sayNO;
1737 if (nextchr & 0x80) {
dfe13c55 1738 if (swash_fetch(PL_utf8_digit,(U8*)locinput))
a0ed51b3 1739 sayNO;
6f06b55f 1740 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
1741 nextchr = UCHARAT(locinput);
1742 break;
1743 }
1744 if (isDIGIT(nextchr))
1745 sayNO;
1746 nextchr = UCHARAT(++locinput);
1747 break;
1748 case CLUMP:
dfe13c55 1749 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 1750 sayNO;
6f06b55f 1751 locinput += PL_utf8skip[nextchr];
dfe13c55 1752 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
1753 locinput += UTF8SKIP(locinput);
1754 if (locinput > PL_regeol)
1755 sayNO;
1756 nextchr = UCHARAT(locinput);
1757 break;
c8756f30 1758 case REFFL:
3280af22 1759 PL_reg_flags |= RF_tainted;
c8756f30 1760 /* FALL THROUGH */
c277df42 1761 case REF:
c8756f30 1762 case REFF:
c277df42 1763 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
1764 ln = PL_regstartp[n];
1765 if (*PL_reglastparen < n || ln == -1)
af3f8c16 1766 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 1767 if (ln == PL_regendp[n])
a0d0e21e 1768 break;
a0ed51b3 1769
cf93c79d 1770 s = PL_bostr + ln;
a0ed51b3
LW
1771 if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
1772 char *l = locinput;
cf93c79d 1773 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
1774 /*
1775 * Note that we can't do the "other character" lookup trick as
1776 * in the 8-bit case (no pun intended) because in Unicode we
1777 * have to map both upper and title case to lower case.
1778 */
1779 if (OP(scan) == REFF) {
1780 while (s < e) {
1781 if (l >= PL_regeol)
1782 sayNO;
dfe13c55 1783 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
1784 sayNO;
1785 s += UTF8SKIP(s);
1786 l += UTF8SKIP(l);
1787 }
1788 }
1789 else {
1790 while (s < e) {
1791 if (l >= PL_regeol)
1792 sayNO;
dfe13c55 1793 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
1794 sayNO;
1795 s += UTF8SKIP(s);
1796 l += UTF8SKIP(l);
1797 }
1798 }
1799 locinput = l;
1800 nextchr = UCHARAT(locinput);
1801 break;
1802 }
1803
a0d0e21e 1804 /* Inline the first character, for speed. */
76e3520e 1805 if (UCHARAT(s) != nextchr &&
c8756f30
AK
1806 (OP(scan) == REF ||
1807 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 1808 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 1809 sayNO;
cf93c79d 1810 ln = PL_regendp[n] - ln;
3280af22 1811 if (locinput + ln > PL_regeol)
4633a7c4 1812 sayNO;
c8756f30
AK
1813 if (ln > 1 && (OP(scan) == REF
1814 ? memNE(s, locinput, ln)
1815 : (OP(scan) == REFF
1816 ? ibcmp(s, locinput, ln)
1817 : ibcmp_locale(s, locinput, ln))))
4633a7c4 1818 sayNO;
a0d0e21e 1819 locinput += ln;
76e3520e 1820 nextchr = UCHARAT(locinput);
a0d0e21e
LW
1821 break;
1822
1823 case NOTHING:
c277df42 1824 case TAIL:
a0d0e21e
LW
1825 break;
1826 case BACK:
1827 break;
c277df42
IZ
1828 case EVAL:
1829 {
1830 dSP;
533c011a 1831 OP_4tree *oop = PL_op;
3280af22
NIS
1832 COP *ocurcop = PL_curcop;
1833 SV **ocurpad = PL_curpad;
c277df42
IZ
1834 SV *ret;
1835
1836 n = ARG(scan);
533c011a
NIS
1837 PL_op = (OP_4tree*)PL_regdata->data[n];
1838 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
dfad63ad 1839 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 1840 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 1841
cea2e8a9 1842 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
1843 SPAGAIN;
1844 ret = POPs;
1845 PUTBACK;
1846
0f5d15d6
IZ
1847 PL_op = oop;
1848 PL_curpad = ocurpad;
1849 PL_curcop = ocurcop;
c277df42 1850 if (logical) {
0f5d15d6
IZ
1851 if (logical == 2) { /* Postponed subexpression. */
1852 regexp *re;
22c35a8c 1853 MAGIC *mg = Null(MAGIC*);
0f5d15d6
IZ
1854 re_cc_state state;
1855 CURCUR cctmp;
1856 CHECKPOINT cp, lastcp;
1857
1858 if(SvROK(ret) || SvRMAGICAL(ret)) {
1859 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
1860
1861 if(SvMAGICAL(sv))
1862 mg = mg_find(sv, 'r');
1863 }
1864 if (mg) {
1865 re = (regexp *)mg->mg_obj;
df0003d4 1866 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
1867 }
1868 else {
1869 STRLEN len;
1870 char *t = SvPV(ret, len);
1871 PMOP pm;
1872 char *oprecomp = PL_regprecomp;
1873 I32 osize = PL_regsize;
1874 I32 onpar = PL_regnpar;
1875
1876 pm.op_pmflags = 0;
cea2e8a9 1877 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
0f5d15d6
IZ
1878 if (!(SvFLAGS(ret)
1879 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
1880 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
1881 PL_regprecomp = oprecomp;
1882 PL_regsize = osize;
1883 PL_regnpar = onpar;
1884 }
1885 DEBUG_r(
1886 PerlIO_printf(Perl_debug_log,
1887 "Entering embedded `%s%.60s%s%s'\n",
1888 PL_colors[0],
1889 re->precomp,
1890 PL_colors[1],
1891 (strlen(re->precomp) > 60 ? "..." : ""))
1892 );
1893 state.node = next;
1894 state.prev = PL_reg_call_cc;
1895 state.cc = PL_regcc;
1896 state.re = PL_reg_re;
1897
1898 cctmp.cur = 0;
1899 cctmp.oldcc = 0;
1900 PL_regcc = &cctmp;
1901
1902 cp = regcppush(0); /* Save *all* the positions. */
1903 REGCP_SET;
1904 cache_re(re);
1905 state.ss = PL_savestack_ix;
1906 *PL_reglastparen = 0;
1907 PL_reg_call_cc = &state;
1908 PL_reginput = locinput;
1909 if (regmatch(re->program + 1)) {
1910 ReREFCNT_dec(re);
1911 regcpblow(cp);
1912 sayYES;
1913 }
1914 DEBUG_r(
1915 PerlIO_printf(Perl_debug_log,
1916 "%*s failed...\n",
1917 REPORT_CODE_OFF+PL_regindent*2, "")
1918 );
1919 ReREFCNT_dec(re);
1920 REGCP_UNWIND;
1921 regcppop();
1922 PL_reg_call_cc = state.prev;
1923 PL_regcc = state.cc;
1924 PL_reg_re = state.re;
d3790889 1925 cache_re(PL_reg_re);
0f5d15d6
IZ
1926 sayNO;
1927 }
c277df42 1928 sw = SvTRUE(ret);
0f5d15d6 1929 logical = 0;
a0ed51b3
LW
1930 }
1931 else
3280af22 1932 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
1933 break;
1934 }
a0d0e21e 1935 case OPEN:
c277df42 1936 n = ARG(scan); /* which paren pair */
3280af22
NIS
1937 PL_reg_start_tmp[n] = locinput;
1938 if (n > PL_regsize)
1939 PL_regsize = n;
a0d0e21e
LW
1940 break;
1941 case CLOSE:
c277df42 1942 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
1943 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
1944 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
1945 if (n > *PL_reglastparen)
1946 *PL_reglastparen = n;
a0d0e21e 1947 break;
c277df42
IZ
1948 case GROUPP:
1949 n = ARG(scan); /* which paren pair */
cf93c79d 1950 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
1951 break;
1952 case IFTHEN:
1953 if (sw)
1954 next = NEXTOPER(NEXTOPER(scan));
1955 else {
1956 next = scan + ARG(scan);
1957 if (OP(next) == IFTHEN) /* Fake one. */
1958 next = NEXTOPER(NEXTOPER(next));
1959 }
1960 break;
1961 case LOGICAL:
0f5d15d6 1962 logical = scan->flags;
c277df42 1963 break;
a0d0e21e
LW
1964 case CURLYX: {
1965 CURCUR cc;
3280af22 1966 CHECKPOINT cp = PL_savestack_ix;
c277df42
IZ
1967
1968 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
1969 next += ARG(next);
3280af22
NIS
1970 cc.oldcc = PL_regcc;
1971 PL_regcc = &cc;
1972 cc.parenfloor = *PL_reglastparen;
a0d0e21e
LW
1973 cc.cur = -1;
1974 cc.min = ARG1(scan);
1975 cc.max = ARG2(scan);
c277df42 1976 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
1977 cc.next = next;
1978 cc.minmod = minmod;
1979 cc.lastloc = 0;
3280af22 1980 PL_reginput = locinput;
a0d0e21e
LW
1981 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
1982 regcpblow(cp);
3280af22 1983 PL_regcc = cc.oldcc;
4633a7c4 1984 saySAME(n);
a0d0e21e
LW
1985 }
1986 /* NOT REACHED */
1987 case WHILEM: {
1988 /*
1989 * This is really hard to understand, because after we match
1990 * what we're trying to match, we must make sure the rest of
1991 * the RE is going to match for sure, and to do that we have
1992 * to go back UP the parse tree by recursing ever deeper. And
1993 * if it fails, we have to reset our parent's current state
1994 * that we can try again after backing off.
1995 */
1996
c277df42 1997 CHECKPOINT cp, lastcp;
3280af22 1998 CURCUR* cc = PL_regcc;
c277df42
IZ
1999 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2000
4633a7c4 2001 n = cc->cur + 1; /* how many we know we matched */
3280af22 2002 PL_reginput = locinput;
a0d0e21e 2003
c277df42
IZ
2004 DEBUG_r(
2005 PerlIO_printf(Perl_debug_log,
2006 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2007 REPORT_CODE_OFF+PL_regindent*2, "",
c277df42
IZ
2008 (long)n, (long)cc->min,
2009 (long)cc->max, (long)cc)
2010 );
4633a7c4 2011
a0d0e21e
LW
2012 /* If degenerate scan matches "", assume scan done. */
2013
579cf2c3 2014 if (locinput == cc->lastloc && n >= cc->min) {
3280af22
NIS
2015 PL_regcc = cc->oldcc;
2016 ln = PL_regcc->cur;
c277df42 2017 DEBUG_r(
c3464db5
DD
2018 PerlIO_printf(Perl_debug_log,
2019 "%*s empty match detected, try continuation...\n",
3280af22 2020 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2021 );
a0d0e21e 2022 if (regmatch(cc->next))
4633a7c4 2023 sayYES;
c277df42 2024 DEBUG_r(
c3464db5
DD
2025 PerlIO_printf(Perl_debug_log,
2026 "%*s failed...\n",
3280af22 2027 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2028 );
3280af22
NIS
2029 PL_regcc->cur = ln;
2030 PL_regcc = cc;
4633a7c4 2031 sayNO;
a0d0e21e
LW
2032 }
2033
2034 /* First just match a string of min scans. */
2035
2036 if (n < cc->min) {
2037 cc->cur = n;
2038 cc->lastloc = locinput;
4633a7c4
LW
2039 if (regmatch(cc->scan))
2040 sayYES;
2041 cc->cur = n - 1;
c277df42
IZ
2042 cc->lastloc = lastloc;
2043 DEBUG_r(
c3464db5
DD
2044 PerlIO_printf(Perl_debug_log,
2045 "%*s failed...\n",
3280af22 2046 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2047 );
4633a7c4 2048 sayNO;
a0d0e21e
LW
2049 }
2050
2051 /* Prefer next over scan for minimal matching. */
2052
2053 if (cc->minmod) {
3280af22
NIS
2054 PL_regcc = cc->oldcc;
2055 ln = PL_regcc->cur;
5f05dabc 2056 cp = regcppush(cc->parenfloor);
c277df42 2057 REGCP_SET;
5f05dabc 2058 if (regmatch(cc->next)) {
c277df42 2059 regcpblow(cp);
4633a7c4 2060 sayYES; /* All done. */
5f05dabc 2061 }
c277df42 2062 REGCP_UNWIND;
5f05dabc 2063 regcppop();
3280af22
NIS
2064 PL_regcc->cur = ln;
2065 PL_regcc = cc;
a0d0e21e 2066
c277df42 2067 if (n >= cc->max) { /* Maximum greed exceeded? */
599cee73 2068 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
3280af22
NIS
2069 && !(PL_reg_flags & RF_warned)) {
2070 PL_reg_flags |= RF_warned;
cea2e8a9 2071 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
2f3ca594
GS
2072 "Complex regular subexpression recursion",
2073 REG_INFTY - 1);
c277df42 2074 }
4633a7c4 2075 sayNO;
c277df42 2076 }
a687059c 2077
c277df42 2078 DEBUG_r(
c3464db5
DD
2079 PerlIO_printf(Perl_debug_log,
2080 "%*s trying longer...\n",
3280af22 2081 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2082 );
a0d0e21e 2083 /* Try scanning more and see if it helps. */
3280af22 2084 PL_reginput = locinput;
a0d0e21e
LW
2085 cc->cur = n;
2086 cc->lastloc = locinput;
5f05dabc 2087 cp = regcppush(cc->parenfloor);
c277df42 2088 REGCP_SET;
5f05dabc 2089 if (regmatch(cc->scan)) {
c277df42 2090 regcpblow(cp);
4633a7c4 2091 sayYES;
5f05dabc 2092 }
c277df42 2093 DEBUG_r(
c3464db5
DD
2094 PerlIO_printf(Perl_debug_log,
2095 "%*s failed...\n",
3280af22 2096 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2097 );
2098 REGCP_UNWIND;
5f05dabc 2099 regcppop();
4633a7c4 2100 cc->cur = n - 1;
c277df42 2101 cc->lastloc = lastloc;
4633a7c4 2102 sayNO;
a0d0e21e
LW
2103 }
2104
2105 /* Prefer scan over next for maximal matching. */
2106
2107 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2108 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2109 cc->cur = n;
2110 cc->lastloc = locinput;
c277df42 2111 REGCP_SET;
5f05dabc 2112 if (regmatch(cc->scan)) {
c277df42 2113 regcpblow(cp);
4633a7c4 2114 sayYES;
5f05dabc 2115 }
c277df42 2116 REGCP_UNWIND;
a0d0e21e 2117 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2118 PL_reginput = locinput;
c277df42 2119 DEBUG_r(
c3464db5
DD
2120 PerlIO_printf(Perl_debug_log,
2121 "%*s failed, try continuation...\n",
3280af22 2122 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2123 );
2124 }
599cee73
PM
2125 if (ckWARN(WARN_UNSAFE) && n >= REG_INFTY
2126 && !(PL_reg_flags & RF_warned)) {
3280af22 2127 PL_reg_flags |= RF_warned;
cea2e8a9 2128 Perl_warner(aTHX_ WARN_UNSAFE, "%s limit (%d) exceeded",
cb5d145d
GS
2129 "Complex regular subexpression recursion",
2130 REG_INFTY - 1);
a0d0e21e
LW
2131 }
2132
2133 /* Failed deeper matches of scan, so see if this one works. */
3280af22
NIS
2134 PL_regcc = cc->oldcc;
2135 ln = PL_regcc->cur;
a0d0e21e 2136 if (regmatch(cc->next))
4633a7c4 2137 sayYES;
c277df42 2138 DEBUG_r(
c3464db5 2139 PerlIO_printf(Perl_debug_log, "%*s failed...\n",
3280af22 2140 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2141 );
3280af22
NIS
2142 PL_regcc->cur = ln;
2143 PL_regcc = cc;
4633a7c4 2144 cc->cur = n - 1;
c277df42 2145 cc->lastloc = lastloc;
4633a7c4 2146 sayNO;
a0d0e21e
LW
2147 }
2148 /* NOT REACHED */
c277df42
IZ
2149 case BRANCHJ:
2150 next = scan + ARG(scan);
2151 if (next == scan)
2152 next = NULL;
2153 inner = NEXTOPER(NEXTOPER(scan));
2154 goto do_branch;
2155 case BRANCH:
2156 inner = NEXTOPER(scan);
2157 do_branch:
2158 {
2159 CHECKPOINT lastcp;
2160 c1 = OP(scan);
2161 if (OP(next) != c1) /* No choice. */
2162 next = inner; /* Avoid recursion. */
a0d0e21e 2163 else {
3280af22 2164 int lastparen = *PL_reglastparen;
c277df42
IZ
2165
2166 REGCP_SET;
a0d0e21e 2167 do {
3280af22 2168 PL_reginput = locinput;
c277df42 2169 if (regmatch(inner))
4633a7c4 2170 sayYES;
c277df42 2171 REGCP_UNWIND;
3280af22 2172 for (n = *PL_reglastparen; n > lastparen; n--)
cf93c79d 2173 PL_regendp[n] = -1;
3280af22 2174 *PL_reglastparen = n;
c277df42 2175 scan = next;
a0d0e21e 2176 /*SUPPRESS 560*/
c277df42
IZ
2177 if (n = (c1 == BRANCH ? NEXT_OFF(next) : ARG(next)))
2178 next += n;
a0d0e21e 2179 else
c277df42 2180 next = NULL;
c277df42
IZ
2181 inner = NEXTOPER(scan);
2182 if (c1 == BRANCHJ) {
2183 inner = NEXTOPER(inner);
2184 }
2185 } while (scan != NULL && OP(scan) == c1);
4633a7c4 2186 sayNO;
a0d0e21e 2187 /* NOTREACHED */
a687059c 2188 }
a0d0e21e
LW
2189 }
2190 break;
2191 case MINMOD:
2192 minmod = 1;
2193 break;
c277df42
IZ
2194 case CURLYM:
2195 {
00db4c45 2196 I32 l = 0;
c277df42
IZ
2197 CHECKPOINT lastcp;
2198
2199 /* We suppose that the next guy does not need
2200 backtracking: in particular, it is of constant length,
2201 and has no parenths to influence future backrefs. */
2202 ln = ARG1(scan); /* min to match */
2203 n = ARG2(scan); /* max to match */
c277df42
IZ
2204 paren = scan->flags;
2205 if (paren) {
3280af22
NIS
2206 if (paren > PL_regsize)
2207 PL_regsize = paren;
2208 if (paren > *PL_reglastparen)
2209 *PL_reglastparen = paren;
c277df42 2210 }
dc45a647 2211 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2212 if (paren)
2213 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2214 PL_reginput = locinput;
c277df42
IZ
2215 if (minmod) {
2216 minmod = 0;
2217 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2218 sayNO;
5f4b28b2 2219 if (ln && l == 0 && n >= ln
c277df42
IZ
2220 /* In fact, this is tricky. If paren, then the
2221 fact that we did/didnot match may influence
2222 future execution. */
2223 && !(paren && ln == 0))
2224 ln = n;
3280af22 2225 locinput = PL_reginput;
22c35a8c 2226 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
2227 c1 = UCHARAT(OPERAND(next) + 1);
2228 if (OP(next) == EXACTF)
22c35a8c 2229 c2 = PL_fold[c1];
c277df42 2230 else if (OP(next) == EXACTFL)
22c35a8c 2231 c2 = PL_fold_locale[c1];
c277df42
IZ
2232 else
2233 c2 = c1;
a0ed51b3
LW
2234 }
2235 else
c277df42
IZ
2236 c1 = c2 = -1000;
2237 REGCP_SET;
5f4b28b2 2238 /* This may be improved if l == 0. */
c277df42
IZ
2239 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
2240 /* If it could work, try it. */
2241 if (c1 == -1000 ||
3280af22
NIS
2242 UCHARAT(PL_reginput) == c1 ||
2243 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2244 {
2245 if (paren) {
2246 if (n) {
cf93c79d
IZ
2247 PL_regstartp[paren] =
2248 HOPc(PL_reginput, -l) - PL_bostr;
2249 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2250 }
2251 else
cf93c79d 2252 PL_regendp[paren] = -1;
c277df42
IZ
2253 }
2254 if (regmatch(next))
2255 sayYES;
2256 REGCP_UNWIND;
2257 }
2258 /* Couldn't or didn't -- move forward. */
3280af22 2259 PL_reginput = locinput;
c277df42
IZ
2260 if (regrepeat_hard(scan, 1, &l)) {
2261 ln++;
3280af22 2262 locinput = PL_reginput;
c277df42
IZ
2263 }
2264 else
2265 sayNO;
2266 }
a0ed51b3
LW
2267 }
2268 else {
c277df42
IZ
2269 n = regrepeat_hard(scan, n, &l);
2270 if (n != 0 && l == 0
2271 /* In fact, this is tricky. If paren, then the
2272 fact that we did/didnot match may influence
2273 future execution. */
2274 && !(paren && ln == 0))
2275 ln = n;
3280af22 2276 locinput = PL_reginput;
c277df42 2277 DEBUG_r(
5c0ca799
GS
2278 PerlIO_printf(Perl_debug_log,
2279 "%*s matched %ld times, len=%ld...\n",
3280af22 2280 REPORT_CODE_OFF+PL_regindent*2, "", n, l)
c277df42
IZ
2281 );
2282 if (n >= ln) {
22c35a8c 2283 if (PL_regkind[(U8)OP(next)] == EXACT) {
c277df42
IZ
2284 c1 = UCHARAT(OPERAND(next) + 1);
2285 if (OP(next) == EXACTF)
22c35a8c 2286 c2 = PL_fold[c1];
c277df42 2287 else if (OP(next) == EXACTFL)
22c35a8c 2288 c2 = PL_fold_locale[c1];
c277df42
IZ
2289 else
2290 c2 = c1;
a0ed51b3
LW
2291 }
2292 else
c277df42
IZ
2293 c1 = c2 = -1000;
2294 }
2295 REGCP_SET;
2296 while (n >= ln) {
2297 /* If it could work, try it. */
2298 if (c1 == -1000 ||
3280af22
NIS
2299 UCHARAT(PL_reginput) == c1 ||
2300 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
2301 {
2302 DEBUG_r(
c3464db5
DD
2303 PerlIO_printf(Perl_debug_log,
2304 "%*s trying tail with n=%ld...\n",
3280af22 2305 REPORT_CODE_OFF+PL_regindent*2, "", n)
a0ed51b3
LW
2306 );
2307 if (paren) {
2308 if (n) {
cf93c79d
IZ
2309 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
2310 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 2311 }
a0ed51b3 2312 else
cf93c79d 2313 PL_regendp[paren] = -1;
c277df42 2314 }
a0ed51b3
LW
2315 if (regmatch(next))
2316 sayYES;
2317 REGCP_UNWIND;
2318 }
c277df42
IZ
2319 /* Couldn't or didn't -- back up. */
2320 n--;
dfe13c55 2321 locinput = HOPc(locinput, -l);
3280af22 2322 PL_reginput = locinput;
c277df42
IZ
2323 }
2324 }
2325 sayNO;
2326 break;
2327 }
2328 case CURLYN:
2329 paren = scan->flags; /* Which paren to set */
3280af22
NIS
2330 if (paren > PL_regsize)
2331 PL_regsize = paren;
2332 if (paren > *PL_reglastparen)
2333 *PL_reglastparen = paren;
c277df42
IZ
2334 ln = ARG1(scan); /* min to match */
2335 n = ARG2(scan); /* max to match */
dc45a647 2336 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 2337 goto repeat;
a0d0e21e 2338 case CURLY:
c277df42 2339 paren = 0;
a0d0e21e
LW
2340 ln = ARG1(scan); /* min to match */
2341 n = ARG2(scan); /* max to match */
dc45a647 2342 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
2343 goto repeat;
2344 case STAR:
2345 ln = 0;
c277df42 2346 n = REG_INFTY;
a0d0e21e 2347 scan = NEXTOPER(scan);
c277df42 2348 paren = 0;
a0d0e21e
LW
2349 goto repeat;
2350 case PLUS:
c277df42
IZ
2351 ln = 1;
2352 n = REG_INFTY;
2353 scan = NEXTOPER(scan);
2354 paren = 0;
2355 repeat:
a0d0e21e
LW
2356 /*
2357 * Lookahead to avoid useless match attempts
2358 * when we know what character comes next.
2359 */
22c35a8c 2360 if (PL_regkind[(U8)OP(next)] == EXACT) {
bbce6d69 2361 c1 = UCHARAT(OPERAND(next) + 1);
2362 if (OP(next) == EXACTF)
22c35a8c 2363 c2 = PL_fold[c1];
bbce6d69 2364 else if (OP(next) == EXACTFL)
22c35a8c 2365 c2 = PL_fold_locale[c1];
bbce6d69 2366 else
2367 c2 = c1;
2368 }
a0d0e21e 2369 else
bbce6d69 2370 c1 = c2 = -1000;
3280af22 2371 PL_reginput = locinput;
a0d0e21e 2372 if (minmod) {
c277df42 2373 CHECKPOINT lastcp;
a0d0e21e
LW
2374 minmod = 0;
2375 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 2376 sayNO;
a0ed51b3 2377 locinput = PL_reginput;
c277df42 2378 REGCP_SET;
0fe9bf95
IZ
2379 if (c1 != -1000) {
2380 char *e = locinput + n - ln; /* Should not check after this */
2381 char *old = locinput;
2382
2383 if (e >= PL_regeol || (n == REG_INFTY))
2384 e = PL_regeol - 1;
2385 while (1) {
2386 /* Find place 'next' could work */
2387 if (c1 == c2) {
2388 while (locinput <= e && *locinput != c1)
2389 locinput++;
2390 } else {
2391 while (locinput <= e
2392 && *locinput != c1
2393 && *locinput != c2)
2394 locinput++;
2395 }
2396 if (locinput > e)
2397 sayNO;
2398 /* PL_reginput == old now */
2399 if (locinput != old) {
2400 ln = 1; /* Did some */
2401 if (regrepeat(scan, locinput - old) <
2402 locinput - old)
2403 sayNO;
2404 }
2405 /* PL_reginput == locinput now */
2406 if (paren) {
2407 if (ln) {
cf93c79d
IZ
2408 PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
2409 PL_regendp[paren] = locinput - PL_bostr;
0fe9bf95
IZ
2410 }
2411 else
cf93c79d 2412 PL_regendp[paren] = -1;
0fe9bf95
IZ
2413 }
2414 if (regmatch(next))
2415 sayYES;
2416 PL_reginput = locinput; /* Could be reset... */
2417 REGCP_UNWIND;
2418 /* Couldn't or didn't -- move forward. */
2419 old = locinput++;
2420 }
2421 }
2422 else
c277df42 2423 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
a0d0e21e 2424 /* If it could work, try it. */
bbce6d69 2425 if (c1 == -1000 ||
3280af22
NIS
2426 UCHARAT(PL_reginput) == c1 ||
2427 UCHARAT(PL_reginput) == c2)
bbce6d69 2428 {
c277df42
IZ
2429 if (paren) {
2430 if (n) {
cf93c79d
IZ
2431 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2432 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2433 }
2434 else
cf93c79d 2435 PL_regendp[paren] = -1;
c277df42 2436 }
a0d0e21e 2437 if (regmatch(next))
4633a7c4 2438 sayYES;
c277df42 2439 REGCP_UNWIND;
bbce6d69 2440 }
c277df42 2441 /* Couldn't or didn't -- move forward. */
a0ed51b3 2442 PL_reginput = locinput;
a0d0e21e
LW
2443 if (regrepeat(scan, 1)) {
2444 ln++;
a0ed51b3
LW
2445 locinput = PL_reginput;
2446 }
2447 else
4633a7c4 2448 sayNO;
a0d0e21e
LW
2449 }
2450 }
2451 else {
c277df42 2452 CHECKPOINT lastcp;
a0d0e21e 2453 n = regrepeat(scan, n);
a0ed51b3 2454 locinput = PL_reginput;
22c35a8c 2455 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
3280af22 2456 (!PL_multiline || OP(next) == SEOL))
a0d0e21e 2457 ln = n; /* why back off? */
c277df42
IZ
2458 REGCP_SET;
2459 if (paren) {
2460 while (n >= ln) {
2461 /* If it could work, try it. */
2462 if (c1 == -1000 ||
3280af22
NIS
2463 UCHARAT(PL_reginput) == c1 ||
2464 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2465 {
2466 if (paren && n) {
2467 if (n) {
cf93c79d
IZ
2468 PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
2469 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
2470 }
2471 else
cf93c79d 2472 PL_regendp[paren] = -1;
c277df42
IZ
2473 }
2474 if (regmatch(next))
2475 sayYES;
2476 REGCP_UNWIND;
2477 }
2478 /* Couldn't or didn't -- back up. */
2479 n--;
dfe13c55 2480 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 2481 }
a0ed51b3
LW
2482 }
2483 else {
c277df42
IZ
2484 while (n >= ln) {
2485 /* If it could work, try it. */
2486 if (c1 == -1000 ||
3280af22
NIS
2487 UCHARAT(PL_reginput) == c1 ||
2488 UCHARAT(PL_reginput) == c2)
c277df42
IZ
2489 {
2490 if (regmatch(next))
2491 sayYES;
2492 REGCP_UNWIND;
2493 }
2494 /* Couldn't or didn't -- back up. */
2495 n--;
dfe13c55 2496 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 2497 }
a0d0e21e
LW
2498 }
2499 }
4633a7c4 2500 sayNO;
c277df42 2501 break;
a0d0e21e 2502 case END:
0f5d15d6
IZ
2503 if (PL_reg_call_cc) {
2504 re_cc_state *cur_call_cc = PL_reg_call_cc;
2505 CURCUR *cctmp = PL_regcc;
2506 regexp *re = PL_reg_re;
2507 CHECKPOINT cp, lastcp;
2508
2509 cp = regcppush(0); /* Save *all* the positions. */
2510 REGCP_SET;
2511 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
2512 the caller. */
2513 PL_reginput = locinput; /* Make position available to
2514 the callcc. */
2515 cache_re(PL_reg_call_cc->re);
2516 PL_regcc = PL_reg_call_cc->cc;
2517 PL_reg_call_cc = PL_reg_call_cc->prev;
2518 if (regmatch(cur_call_cc->node)) {
2519 PL_reg_call_cc = cur_call_cc;
2520 regcpblow(cp);
2521 sayYES;
2522 }
2523 REGCP_UNWIND;
2524 regcppop();
2525 PL_reg_call_cc = cur_call_cc;
2526 PL_regcc = cctmp;
2527 PL_reg_re = re;
2528 cache_re(re);
2529
2530 DEBUG_r(
2531 PerlIO_printf(Perl_debug_log,
2532 "%*s continuation failed...\n",
2533 REPORT_CODE_OFF+PL_regindent*2, "")
2534 );
2535 sayNO;
2536 }
3280af22 2537 if (locinput < PL_regtill)
7e5428c5
IZ
2538 sayNO; /* Cannot match: too short. */
2539 /* Fall through */
2540 case SUCCEED:
3280af22 2541 PL_reginput = locinput; /* put where regtry can find it */
4633a7c4 2542 sayYES; /* Success! */
c277df42
IZ
2543 case SUSPEND:
2544 n = 1;
9fe1d20c 2545 PL_reginput = locinput;
c277df42 2546 goto do_ifmatch;
a0d0e21e 2547 case UNLESSM:
c277df42 2548 n = 0;
a0ed51b3 2549 if (scan->flags) {
0fe9bf95
IZ
2550 if (UTF) { /* XXXX This is absolutely
2551 broken, we read before
2552 start of string. */
2553 s = HOPMAYBEc(locinput, -scan->flags);
2554 if (!s)
2555 goto say_yes;
2556 PL_reginput = s;
2557 }
2558 else {
2559 if (locinput < PL_bostr + scan->flags)
2560 goto say_yes;
2561 PL_reginput = locinput - scan->flags;
2562 goto do_ifmatch;
2563 }
a0ed51b3
LW
2564 }
2565 else
2566 PL_reginput = locinput;
c277df42
IZ
2567 goto do_ifmatch;
2568 case IFMATCH:
2569 n = 1;
a0ed51b3 2570 if (scan->flags) {
0fe9bf95
IZ
2571 if (UTF) { /* XXXX This is absolutely
2572 broken, we read before
2573 start of string. */
2574 s = HOPMAYBEc(locinput, -scan->flags);
2575 if (!s || s < PL_bostr)
2576 goto say_no;
2577 PL_reginput = s;
2578 }
2579 else {
2580 if (locinput < PL_bostr + scan->flags)
2581 goto say_no;
2582 PL_reginput = locinput - scan->flags;
2583 goto do_ifmatch;
2584 }
a0ed51b3
LW
2585 }
2586 else
2587 PL_reginput = locinput;
2588
c277df42 2589 do_ifmatch:
c277df42
IZ
2590 inner = NEXTOPER(NEXTOPER(scan));
2591 if (regmatch(inner) != n) {
2592 say_no:
2593 if (logical) {
2594 logical = 0;
2595 sw = 0;
2596 goto do_longjump;
a0ed51b3
LW
2597 }
2598 else
c277df42
IZ
2599 sayNO;
2600 }
2601 say_yes:
2602 if (logical) {
2603 logical = 0;
2604 sw = 1;
2605 }
fe44a5e8 2606 if (OP(scan) == SUSPEND) {
3280af22 2607 locinput = PL_reginput;
565764a8 2608 nextchr = UCHARAT(locinput);
fe44a5e8 2609 }
c277df42
IZ
2610 /* FALL THROUGH. */
2611 case LONGJMP:
2612 do_longjump:
2613 next = scan + ARG(scan);
2614 if (next == scan)
2615 next = NULL;
a0d0e21e
LW
2616 break;
2617 default:
c030ccd9 2618 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
c277df42 2619 (unsigned long)scan, OP(scan));
cea2e8a9 2620 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 2621 }
a0d0e21e
LW
2622 scan = next;
2623 }
a687059c 2624
a0d0e21e
LW
2625 /*
2626 * We get here only if there's trouble -- normally "case END" is
2627 * the terminating point.
2628 */
cea2e8a9 2629 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 2630 /*NOTREACHED*/
4633a7c4
LW
2631 sayNO;
2632
2633yes:
2634#ifdef DEBUGGING
3280af22 2635 PL_regindent--;
4633a7c4
LW
2636#endif
2637 return 1;
2638
2639no:
2640#ifdef DEBUGGING
3280af22 2641 PL_regindent--;
4633a7c4 2642#endif
a0d0e21e 2643 return 0;
a687059c
LW
2644}
2645
2646/*
2647 - regrepeat - repeatedly match something simple, report how many
2648 */
2649/*
2650 * [This routine now assumes that it will only match on things of length 1.
2651 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 2652 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 2653 */
76e3520e 2654STATIC I32
cea2e8a9 2655S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 2656{
5c0ca799 2657 dTHR;
a0d0e21e
LW
2658 register char *scan;
2659 register char *opnd;
2660 register I32 c;
3280af22 2661 register char *loceol = PL_regeol;
a0ed51b3 2662 register I32 hardcount = 0;
a0d0e21e 2663
3280af22 2664 scan = PL_reginput;
c277df42 2665 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 2666 loceol = scan + max;
161b471a 2667 opnd = (char *) OPERAND(p);
a0d0e21e 2668 switch (OP(p)) {
22c35a8c 2669 case REG_ANY:
a0d0e21e
LW
2670 while (scan < loceol && *scan != '\n')
2671 scan++;
2672 break;
2673 case SANY:
2674 scan = loceol;
2675 break;
a0ed51b3
LW
2676 case ANYUTF8:
2677 loceol = PL_regeol;
2678 while (scan < loceol && *scan != '\n') {
2679 scan += UTF8SKIP(scan);
2680 hardcount++;
2681 }
2682 break;
2683 case SANYUTF8:
2684 loceol = PL_regeol;
2685 while (scan < loceol) {
2686 scan += UTF8SKIP(scan);
2687 hardcount++;
2688 }
2689 break;
bbce6d69 2690 case EXACT: /* length of string is 1 */
2691 c = UCHARAT(++opnd);
2692 while (scan < loceol && UCHARAT(scan) == c)
2693 scan++;
2694 break;
2695 case EXACTF: /* length of string is 1 */
2696 c = UCHARAT(++opnd);
2697 while (scan < loceol &&
22c35a8c 2698 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 2699 scan++;
2700 break;
2701 case EXACTFL: /* length of string is 1 */
3280af22 2702 PL_reg_flags |= RF_tainted;
bbce6d69 2703 c = UCHARAT(++opnd);
2704 while (scan < loceol &&
22c35a8c 2705 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
2706 scan++;
2707 break;
a0ed51b3
LW
2708 case ANYOFUTF8:
2709 loceol = PL_regeol;
2710 while (scan < loceol && REGINCLASSUTF8(p, (U8*)scan)) {
2711 scan += UTF8SKIP(scan);
2712 hardcount++;
2713 }
2714 break;
a0d0e21e 2715 case ANYOF:
ae5c130c 2716 while (scan < loceol && REGINCLASS(opnd, *scan))
a0d0e21e 2717 scan++;
a0d0e21e
LW
2718 break;
2719 case ALNUM:
2720 while (scan < loceol && isALNUM(*scan))
2721 scan++;
2722 break;
a0ed51b3
LW
2723 case ALNUMUTF8:
2724 loceol = PL_regeol;
dfe13c55 2725 while (scan < loceol && swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2726 scan += UTF8SKIP(scan);
2727 hardcount++;
2728 }
2729 break;
bbce6d69 2730 case ALNUML:
3280af22 2731 PL_reg_flags |= RF_tainted;
bbce6d69 2732 while (scan < loceol && isALNUM_LC(*scan))
2733 scan++;
2734 break;
a0ed51b3
LW
2735 case ALNUMLUTF8:
2736 PL_reg_flags |= RF_tainted;
2737 loceol = PL_regeol;
dfe13c55 2738 while (scan < loceol && isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2739 scan += UTF8SKIP(scan);
2740 hardcount++;
2741 }
2742 break;
2743 break;
a0d0e21e
LW
2744 case NALNUM:
2745 while (scan < loceol && !isALNUM(*scan))
2746 scan++;
2747 break;
a0ed51b3
LW
2748 case NALNUMUTF8:
2749 loceol = PL_regeol;
dfe13c55 2750 while (scan < loceol && !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
a0ed51b3
LW
2751 scan += UTF8SKIP(scan);
2752 hardcount++;
2753 }
2754 break;
bbce6d69 2755 case NALNUML:
3280af22 2756 PL_reg_flags |= RF_tainted;
bbce6d69 2757 while (scan < loceol && !isALNUM_LC(*scan))
2758 scan++;
2759 break;
a0ed51b3
LW
2760 case NALNUMLUTF8:
2761 PL_reg_flags |= RF_tainted;
2762 loceol = PL_regeol;
dfe13c55 2763 while (scan < loceol && !isALNUM_LC_utf8((U8*)scan)) {
a0ed51b3
LW
2764 scan += UTF8SKIP(scan);
2765 hardcount++;
2766 }
2767 break;
a0d0e21e
LW
2768 case SPACE:
2769 while (scan < loceol && isSPACE(*scan))
2770 scan++;
2771 break;
a0ed51b3
LW
2772 case SPACEUTF8:
2773 loceol = PL_regeol;
dfe13c55 2774 while (scan < loceol && (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2775 scan += UTF8SKIP(scan);
2776 hardcount++;
2777 }
2778 break;
bbce6d69 2779 case SPACEL:
3280af22 2780 PL_reg_flags |= RF_tainted;
bbce6d69 2781 while (scan < loceol && isSPACE_LC(*scan))
2782 scan++;
2783 break;
a0ed51b3
LW
2784 case SPACELUTF8:
2785 PL_reg_flags |= RF_tainted;
2786 loceol = PL_regeol;
dfe13c55 2787 while (scan < loceol && (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2788 scan += UTF8SKIP(scan);
2789 hardcount++;
2790 }
2791 break;
a0d0e21e
LW
2792 case NSPACE:
2793 while (scan < loceol && !isSPACE(*scan))
2794 scan++;
2795 break;
a0ed51b3
LW
2796 case NSPACEUTF8:
2797 loceol = PL_regeol;
dfe13c55 2798 while (scan < loceol && !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
a0ed51b3
LW
2799 scan += UTF8SKIP(scan);
2800 hardcount++;
2801 }
2802 break;
bbce6d69 2803 case NSPACEL:
3280af22 2804 PL_reg_flags |= RF_tainted;
bbce6d69 2805 while (scan < loceol && !isSPACE_LC(*scan))
2806 scan++;
2807 break;
a0ed51b3
LW
2808 case NSPACELUTF8:
2809 PL_reg_flags |= RF_tainted;
2810 loceol = PL_regeol;
dfe13c55 2811 while (scan < loceol && !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
a0ed51b3
LW
2812 scan += UTF8SKIP(scan);
2813 hardcount++;
2814 }
2815 break;
a0d0e21e
LW
2816 case DIGIT:
2817 while (scan < loceol && isDIGIT(*scan))
2818 scan++;
2819 break;
a0ed51b3
LW
2820 case DIGITUTF8:
2821 loceol = PL_regeol;
dfe13c55 2822 while (scan < loceol && swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2823 scan += UTF8SKIP(scan);
2824 hardcount++;
2825 }
2826 break;
2827 break;
a0d0e21e
LW
2828 case NDIGIT:
2829 while (scan < loceol && !isDIGIT(*scan))
2830 scan++;
2831 break;
a0ed51b3
LW
2832 case NDIGITUTF8:
2833 loceol = PL_regeol;
dfe13c55 2834 while (scan < loceol && !swash_fetch(PL_utf8_digit,(U8*)scan)) {
a0ed51b3
LW
2835 scan += UTF8SKIP(scan);
2836 hardcount++;
2837 }
2838 break;
a0d0e21e
LW
2839 default: /* Called on something of 0 width. */
2840 break; /* So match right here or not at all. */
2841 }
a687059c 2842
a0ed51b3
LW
2843 if (hardcount)
2844 c = hardcount;
2845 else
2846 c = scan - PL_reginput;
3280af22 2847 PL_reginput = scan;
a687059c 2848
c277df42
IZ
2849 DEBUG_r(
2850 {
2851 SV *prop = sv_newmortal();
2852
2853 regprop(prop, p);
2854 PerlIO_printf(Perl_debug_log,
2855 "%*s %s can match %ld times out of %ld...\n",
2856 REPORT_CODE_OFF+1, "", SvPVX(prop),c,max);
2857 });
2858
a0d0e21e 2859 return(c);
a687059c
LW
2860}
2861
2862/*
c277df42
IZ
2863 - regrepeat_hard - repeatedly match something, report total lenth and length
2864 *
2865 * The repeater is supposed to have constant length.
2866 */
2867
76e3520e 2868STATIC I32
cea2e8a9 2869S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 2870{
5c0ca799 2871 dTHR;
c277df42
IZ
2872 register char *scan;
2873 register char *start;
3280af22 2874 register char *loceol = PL_regeol;
a0ed51b3 2875 I32 l = 0;
708e3b05 2876 I32 count = 0, res = 1;
a0ed51b3
LW
2877
2878 if (!max)
2879 return 0;
c277df42 2880
3280af22 2881 start = PL_reginput;
a0ed51b3 2882 if (UTF) {
708e3b05 2883 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2884 if (!count++) {
2885 l = 0;
2886 while (start < PL_reginput) {
2887 l++;
2888 start += UTF8SKIP(start);
2889 }
2890 *lp = l;
2891 if (l == 0)
2892 return max;
2893 }
2894 if (count == max)
2895 return count;
2896 }
2897 }
2898 else {
708e3b05 2899 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
2900 if (!count++) {
2901 *lp = l = PL_reginput - start;
2902 if (max != REG_INFTY && l*max < loceol - scan)
2903 loceol = scan + l*max;
2904 if (l == 0)
2905 return max;
c277df42
IZ
2906 }
2907 }
2908 }
708e3b05 2909 if (!res)
3280af22 2910 PL_reginput = scan;
c277df42 2911
a0ed51b3 2912 return count;
c277df42
IZ
2913}
2914
2915/*
cb8d8820 2916 - reginclass - determine if a character falls into a character class
bbce6d69 2917 */
2918
76e3520e 2919STATIC bool
cea2e8a9 2920S_reginclass(pTHX_ register char *p, register I32 c)
bbce6d69 2921{
5c0ca799 2922 dTHR;
bbce6d69 2923 char flags = *p;
2924 bool match = FALSE;
2925
2926 c &= 0xFF;
ae5c130c 2927 if (ANYOF_TEST(p, c))
bbce6d69 2928 match = TRUE;
2929 else if (flags & ANYOF_FOLD) {
2930 I32 cf;
2931 if (flags & ANYOF_LOCALE) {
3280af22 2932 PL_reg_flags |= RF_tainted;
22c35a8c 2933 cf = PL_fold_locale[c];
bbce6d69 2934 }
2935 else
22c35a8c 2936 cf = PL_fold[c];
ae5c130c 2937 if (ANYOF_TEST(p, cf))
bbce6d69 2938 match = TRUE;
2939 }
2940
2941 if (!match && (flags & ANYOF_ISA)) {
3280af22 2942 PL_reg_flags |= RF_tainted;
bbce6d69 2943
2944 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
2945 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
2946 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
2947 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
2948 {
2949 match = TRUE;
2950 }
2951 }
2952
ae5c130c 2953 return (flags & ANYOF_INVERT) ? !match : match;
bbce6d69 2954}
2955
a0ed51b3 2956STATIC bool
cea2e8a9 2957S_reginclassutf8(pTHX_ regnode *f, U8 *p)
c485e607
NIS
2958{
2959 dTHR;
a0ed51b3
LW
2960 char flags = ARG1(f);
2961 bool match = FALSE;
2962 SV *sv = (SV*)PL_regdata->data[ARG2(f)];
2963
2964 if (swash_fetch(sv, p))
2965 match = TRUE;
2966 else if (flags & ANYOF_FOLD) {
2967 I32 cf;
dfe13c55 2968 U8 tmpbuf[10];
a0ed51b3
LW
2969 if (flags & ANYOF_LOCALE) {
2970 PL_reg_flags |= RF_tainted;
2971 uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
2972 }
2973 else
2974 uv_to_utf8(tmpbuf, toLOWER_utf8(p));
2975 if (swash_fetch(sv, tmpbuf))
2976 match = TRUE;
2977 }
2978
2979 if (!match && (flags & ANYOF_ISA)) {
2980 PL_reg_flags |= RF_tainted;
2981
2982 if (((flags & ANYOF_ALNUML) && isALNUM_LC_utf8(p)) ||
2983 ((flags & ANYOF_NALNUML) && !isALNUM_LC_utf8(p)) ||
2984 ((flags & ANYOF_SPACEL) && isSPACE_LC_utf8(p)) ||
2985 ((flags & ANYOF_NSPACEL) && !isSPACE_LC_utf8(p)))
2986 {
2987 match = TRUE;
2988 }
2989 }
2990
2991 return (flags & ANYOF_INVERT) ? !match : match;
2992}
161b471a 2993
dfe13c55 2994STATIC U8 *
cea2e8a9 2995S_reghop(pTHX_ U8 *s, I32 off)
c485e607
NIS
2996{
2997 dTHR;
a0ed51b3
LW
2998 if (off >= 0) {
2999 while (off-- && s < (U8*)PL_regeol)
3000 s += UTF8SKIP(s);
3001 }
3002 else {
3003 while (off++) {
3004 if (s > (U8*)PL_bostr) {
3005 s--;
3006 if (*s & 0x80) {
3007 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3008 s--;
3009 } /* XXX could check well-formedness here */
3010 }
3011 }
3012 }
3013 return s;
3014}
161b471a 3015
dfe13c55 3016STATIC U8 *
cea2e8a9 3017S_reghopmaybe(pTHX_ U8* s, I32 off)
a0ed51b3 3018{
c485e607 3019 dTHR;
a0ed51b3
LW
3020 if (off >= 0) {
3021 while (off-- && s < (U8*)PL_regeol)
3022 s += UTF8SKIP(s);
3023 if (off >= 0)
3024 return 0;
3025 }
3026 else {
3027 while (off++) {
3028 if (s > (U8*)PL_bostr) {
3029 s--;
3030 if (*s & 0x80) {
3031 while (s > (U8*)PL_bostr && (*s & 0xc0) == 0x80)
3032 s--;
3033 } /* XXX could check well-formedness here */
3034 }
3035 else
3036 break;
3037 }
3038 if (off <= 0)
3039 return 0;
3040 }
3041 return s;
3042}