#include "re_top.h"
#endif
-/* At least one required character in the target string is expressible only in
- * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
- = "Can't match, because target string needs to be in UTF-8\n";
-
-#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
- goto target; \
-} STMT_END
-
/*
* pregcomp and pregexec -- regsub and regerror are not used in perl
*
#include "inline_invlist.c"
#include "unicode_constants.h"
+#ifdef DEBUGGING
+/* At least one required character in the target string is expressible only in
+ * UTF-8. */
+static const char* const non_utf8_target_but_utf8_required
+ = "Can't match, because target string needs to be in UTF-8\n";
+#endif
+
+#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s", non_utf8_target_but_utf8_required));\
+ goto target; \
+} STMT_END
+
#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
#ifndef STATIC
}
check = prog->check_substr;
}
- if ((prog->extflags & RXf_ANCH) /* Match at beg-of-str or after \n */
- && !(prog->extflags & RXf_ANCH_GPOS)) /* \G isn't a BOS or \n */
- {
- ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
+ if (prog->extflags & RXf_ANCH) { /* Match at \G, beg-of-str or after \n */
+ ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
|| ( (prog->extflags & RXf_ANCH_BOL)
&& !multiline ) ); /* Check after \n? */
if (!ml_anch) {
- if ( !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
- && (strpos != strbeg)) {
+ /* we are only allowed to match at BOS or \G */
+
+ if (prog->extflags & RXf_ANCH_GPOS) {
+ /* in this case, we hope(!) that the caller has already
+ * set strpos to pos()-gofs, and will already have checked
+ * that this anchor position is legal
+ */
+ ;
+ }
+ else if (!(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
+ && (strpos != strbeg))
+ {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
}
switch (OP(c)) {
case ANYOF:
case ANYOF_SYNTHETIC:
- case ANYOF_WARN_SUPER:
if (utf8_target) {
REXEC_FBC_UTF8_CLASS_SCAN(
reginclass(prog, c, (U8*)s, utf8_target));
: strbeg; /* pos() not defined; use start of string */
DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
- "GPOS ganch set to strbeg[%"IVdf"]\n", reginfo->ganch - strbeg));
+ "GPOS ganch set to strbeg[%"IVdf"]\n", (IV)(reginfo->ganch - strbeg)));
/* in the presence of \G, we may need to start looking earlier in
* the string than the suggested start point of stringarg:
- * if gofs->prog is set, then that's a known, fixed minimum
+ * if prog->gofs is set, then that's a known, fixed minimum
* offset, such as
* /..\G/: gofs = 2
* /ab|c\G/: gofs = 1
Not newSVsv, either, as it does not COW.
*/
reginfo->sv = newSV(0);
- sv_setsv(reginfo->sv, sv);
+ SvSetSV_nosteal(reginfo->sv, sv);
SAVEFREESV(reginfo->sv);
}
break;
case ANYOF: /* /[abc]/ */
- case ANYOF_WARN_SUPER:
if (NEXTCHR_IS_EOS)
sayNO;
if (utf8_target) {
else { /* /(??{}) */
/* if its overloaded, let the regex compiler handle
* it; otherwise extract regex, or stringify */
+ if (SvGMAGICAL(ret))
+ ret = sv_mortalcopy(ret);
if (!SvAMAGIC(ret)) {
SV *sv = ret;
if (SvROK(sv))
sv = SvRV(sv);
if (SvTYPE(sv) == SVt_REGEXP)
re_sv = (REGEXP*) sv;
- else if (SvSMAGICAL(sv)) {
- MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
+ else if (SvSMAGICAL(ret)) {
+ MAGIC *mg = mg_find(ret, PERL_MAGIC_qr);
if (mg)
re_sv = (REGEXP *) mg->mg_obj;
}
- /* force any magic, undef warnings here */
- if (!re_sv) {
+ /* force any undef warnings here */
+ if (!re_sv && !SvPOK(ret) && !SvNIOK(ret)) {
ret = sv_mortalcopy(ret);
(void) SvPV_force_nolen(ret);
}
pm_flags);
if (!(SvFLAGS(ret)
- & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
- | SVs_GMG))) {
+ & (SVs_TEMP | SVs_GMG | SVf_ROK))
+ && (!SvPADTMP(ret) || SvREADONLY(ret))) {
/* This isn't a first class regexp. Instead, it's
caching a regexp onto an existing, Perl visible
scalar. */
sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
}
- /* safe to do now that any $1 etc has been
- * interpolated into the new pattern string and
- * compiled */
- S_regcp_restore(aTHX_ rex, runops_cp, &maxopenparen);
}
SAVEFREESV(re_sv);
re = ReANY(re_sv);
break;
}
case ANYOF:
- case ANYOF_WARN_SUPER:
if (utf8_target) {
while (hardcount < max
&& scan < loceol
match = TRUE;
}
else if (flags & ANYOF_LOCALE) {
- RXp_MATCH_TAINTED_on(prog);
-
- if ((flags & ANYOF_LOC_FOLD)
- && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
- {
- match = TRUE;
- }
+ if (flags & ANYOF_LOC_FOLD) {
+ RXp_MATCH_TAINTED_on(prog);
+ if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
+ match = TRUE;
+ }
+ }
else if (ANYOF_POSIXL_TEST_ANY_SET(n)) {
/* The data structure is arranged so bits 0, 2, 4, ... are set
int count = 0;
int to_complement = 0;
+
+ RXp_MATCH_TAINTED_on(prog);
while (count < ANYOF_MAX) {
if (ANYOF_POSIXL_TEST(n, count)
&& to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
}
if (UNICODE_IS_SUPER(c)
- && OP(n) == ANYOF_WARN_SUPER
+ && (flags & ANYOF_WARN_SUPER)
&& ckWARN_d(WARN_NON_UNICODE))
{
Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
- "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", c);
+ "Matched non-Unicode code point 0x%04"UVXf" against Unicode property; may not be portable", c);
}
}
+#if ANYOF_INVERT != 1
+ /* Depending on compiler optimization cBOOL takes time, so if don't have to
+ * use it, don't */
+# error ANYOF_INVERT needs to be set to 1, or guarded with cBOOL below,
+#endif
+
/* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
- return cBOOL(flags & ANYOF_INVERT) ^ match;
+ return (flags & ANYOF_INVERT) ^ match;
}
STATIC U8 *