X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7cb3f9598b37fcd8b30ea273d668c8b48d5f4c76..62805098c7dbcf459cf5674ecbe5961ba4807178:/inline.h

diff --git a/inline.h b/inline.h
index 86e005d..98d9edc 100644
--- a/inline.h
+++ b/inline.h
@@ -25,6 +25,14 @@ S_av_top_index(pTHX_ AV *av)
 
 /* ------------------------------- cv.h ------------------------------- */
 
+PERL_STATIC_INLINE GV *
+S_CvGV(pTHX_ CV *sv)
+{
+    return CvNAMED(sv)
+	? Perl_cvgv_from_hek(aTHX_ sv)
+	: ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
+}
+
 PERL_STATIC_INLINE I32 *
 S_CvDEPTHp(const CV * const sv)
 {
@@ -82,13 +90,73 @@ S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
 }
 #endif
 
+/* ------------------------------- pad.h ------------------------------ */
+
+#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
+PERL_STATIC_INLINE bool
+PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
+{
+    /* is seq within the range _LOW to _HIGH ?
+     * This is complicated by the fact that PL_cop_seqmax
+     * may have wrapped around at some point */
+    if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
+	return FALSE; /* not yet introduced */
+
+    if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
+    /* in compiling scope */
+	if (
+	    (seq >  COP_SEQ_RANGE_LOW(pn))
+	    ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
+	    : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
+	)
+	    return TRUE;
+    }
+    else if (
+	(COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
+	?
+	    (  seq >  COP_SEQ_RANGE_LOW(pn)
+	    || seq <= COP_SEQ_RANGE_HIGH(pn))
+
+	:    (  seq >  COP_SEQ_RANGE_LOW(pn)
+	     && seq <= COP_SEQ_RANGE_HIGH(pn))
+    )
+	return TRUE;
+    return FALSE;
+}
+#endif
+
+/* ------------------------------- pp.h ------------------------------- */
+
+PERL_STATIC_INLINE I32
+S_TOPMARK(pTHX)
+{
+    DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
+				 "MARK top  %p %" IVdf "\n",
+				  PL_markstack_ptr,
+				  (IV)*PL_markstack_ptr)));
+    return *PL_markstack_ptr;
+}
+
+PERL_STATIC_INLINE I32
+S_POPMARK(pTHX)
+{
+    DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
+				 "MARK pop  %p %" IVdf "\n",
+				  (PL_markstack_ptr-1),
+				  (IV)*(PL_markstack_ptr-1))));
+    assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
+    return *PL_markstack_ptr--;
+}
+
 /* ----------------------------- regexp.h ----------------------------- */
 
 PERL_STATIC_INLINE struct regexp *
 S_ReANY(const REGEXP * const re)
 {
+    XPV* const p = (XPV*)SvANY(re);
     assert(isREGEXP(re));
-    return re->sv_u.svu_rx;
+    return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
+                                   : (struct regexp *)p;
 }
 
 /* ------------------------------- sv.h ------------------------------- */
@@ -148,27 +216,15 @@ SvAMAGIC_off(SV *sv)
 }
 
 PERL_STATIC_INLINE U32
-S_SvPADTMP_on(SV *sv)
-{
-    assert(!(SvFLAGS(sv) & SVs_PADMY));
-    return SvFLAGS(sv) |= SVs_PADTMP;
-}
-PERL_STATIC_INLINE U32
-S_SvPADTMP_off(SV *sv)
-{
-    assert(!(SvFLAGS(sv) & SVs_PADMY));
-    return SvFLAGS(sv) &= ~SVs_PADTMP;
-}
-PERL_STATIC_INLINE U32
 S_SvPADSTALE_on(SV *sv)
 {
-    assert(SvFLAGS(sv) & SVs_PADMY);
+    assert(!(SvFLAGS(sv) & SVs_PADTMP));
     return SvFLAGS(sv) |= SVs_PADSTALE;
 }
 PERL_STATIC_INLINE U32
 S_SvPADSTALE_off(SV *sv)
 {
-    assert(SvFLAGS(sv) & SVs_PADMY);
+    assert(!(SvFLAGS(sv) & SVs_PADTMP));
     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 }
 #if defined(PERL_CORE) || defined (PERL_EXT)
@@ -188,68 +244,24 @@ S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 /* ------------------------------- handy.h ------------------------------- */
 
 /* saves machine code for a common noreturn idiom typically used in Newx*() */
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
+#ifdef GCC_DIAG_PRAGMA
+GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
 #endif
 static void
 S_croak_memory_wrap(void)
 {
     Perl_croak_nocontext("%s",PL_memory_wrap);
 }
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-#ifdef BOOTSTRAP_CHARSET
-static bool
-S_bootstrap_ctype(U8 character, UV classnum, bool full_Latin1)
-{
-    /* See comments in handy.h.  This is placed in this file primarily to avoid
-     * having to have an entry for it in embed.fnc */
-
-    dTHX;
-
-    if (! full_Latin1 && ! isASCII(character)) {
-        return FALSE;
-    }
-
-    switch (classnum) {
-        case _CC_ALPHANUMERIC: return isALPHANUMERIC_L1(character);
-        case _CC_ALPHA:     return isALPHA_L1(character);
-        case _CC_ASCII:     return isASCII_L1(character);
-        case _CC_BLANK:     return isBLANK_L1(character);
-        case _CC_CASED:     return isLOWER_L1(character)
-                                   || isUPPER_L1(character);
-        case _CC_CNTRL:     return isCNTRL_L1(character);
-        case _CC_DIGIT:     return isDIGIT_L1(character);
-        case _CC_GRAPH:     return isGRAPH_L1(character);
-        case _CC_LOWER:     return isLOWER_L1(character);
-        case _CC_PRINT:     return isPRINT_L1(character);
-        case _CC_PSXSPC:    return isPSXSPC_L1(character);
-        case _CC_PUNCT:     return isPUNCT_L1(character);
-        case _CC_SPACE:     return isSPACE_L1(character);
-        case _CC_UPPER:     return isUPPER_L1(character);
-        case _CC_WORDCHAR:  return isWORDCHAR_L1(character);
-        case _CC_XDIGIT:    return isXDIGIT_L1(character);
-        case _CC_VERTSPACE: return isSPACE_L1(character) && ! isBLANK_L1(character);
-        case _CC_IDFIRST:   return isIDFIRST_L1(character);
-        case _CC_QUOTEMETA: return _isQUOTEMETA(character);
-        case _CC_CHARNAME_CONT: return isCHARNAME_CONT(character);
-        case _CC_NONLATIN1_FOLD: return _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(character);
-        case _CC_NON_FINAL_FOLD: return _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(character);
-        case _CC_IS_IN_SOME_FOLD: return _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(character);
-        case _CC_BACKSLASH_FOO_LBRACE_IS_META: return 0;
-
-
-        default: break;
-    }
-    Perl_croak(aTHX_ "panic: bootstrap_ctype() has an unexpected character class '%" UVxf "'", classnum);
-}
+#ifdef GCC_DIAG_PRAGMA
+GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
 #endif
 
 /* ------------------------------- utf8.h ------------------------------- */
 
+/*
+=head1 Unicode Support
+*/
+
 PERL_STATIC_INLINE void
 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 {
@@ -259,63 +271,423 @@ S_append_utf8_from_native_byte(const U8 byte, U8** dest)
     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 
     if (NATIVE_BYTE_IS_INVARIANT(byte))
-        *(*dest)++ = byte;
+        *((*dest)++) = byte;
     else {
-        *(*dest)++ = UTF8_EIGHT_BIT_HI(byte);
-        *(*dest)++ = UTF8_EIGHT_BIT_LO(byte);
+        *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
+        *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
+    }
+}
+
+/*
+=for apidoc valid_utf8_to_uvchr
+Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
+the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
+it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
+non-Unicode code points are allowed.
+
+=cut
+
+ */
+
+PERL_STATIC_INLINE UV
+Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
+{
+    const UV expectlen = UTF8SKIP(s);
+    const U8* send = s + expectlen;
+    UV uv = *s;
+
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+
+    if (retlen) {
+        *retlen = expectlen;
+    }
+
+    /* An invariant is trivially returned */
+    if (expectlen == 1) {
+	return uv;
+    }
+
+    /* Remove the leading bits that indicate the number of bytes, leaving just
+     * the bits that are part of the value */
+    uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
+
+    /* Now, loop through the remaining bytes, accumulating each into the
+     * working total as we go.  (I khw tried unrolling the loop for up to 4
+     * bytes, but there was no performance improvement) */
+    for (++s; s < send; s++) {
+        uv = UTF8_ACCUMULATE(uv, *s);
     }
+
+    return UNI_TO_NATIVE(uv);
+
 }
 
-/* These two exist only to replace the macros they formerly were so that their
- * use can be deprecated */
+/*
+=for apidoc is_utf8_invariant_string
+
+Returns TRUE if the first C<len> bytes of the string C<s> are the same
+regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
+EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
+are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
+the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
+characters are invariant, but so also are the C1 controls.
+
+If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
+use this option, that C<s> can't have embedded C<NUL> characters and has to
+have a terminating C<NUL> byte).
+
+See also
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+
+*/
+
+#define is_utf8_invariant_string(s, len)                                    \
+                                is_utf8_invariant_string_loc(s, len, NULL)
+
+/*
+=for apidoc is_utf8_invariant_string_loc
+
+Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
+the first UTF-8 variant character in the C<ep> pointer; if all characters are
+UTF-8 invariant, this function does not change the contents of C<*ep>.
+
+=cut
+
+*/
 
 PERL_STATIC_INLINE bool
-S_isIDFIRST_lazy(pTHX_ const char* p)
+S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 {
-    PERL_ARGS_ASSERT_ISIDFIRST_LAZY;
+    const U8* send;
+    const U8* x = s;
+
+    PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
+
+    if (len == 0) {
+        len = strlen((const char *)s);
+    }
+
+    send = s + len;
+
+#ifndef EBCDIC
+    /* Try to get the widest word on this platform */
+#  ifdef HAS_LONG_LONG
+#    define PERL_WORDCAST unsigned long long
+#    define PERL_WORDSIZE LONGLONGSIZE
+#  else
+#    define PERL_WORDCAST UV
+#    define PERL_WORDSIZE UVSIZE
+#  endif
+
+#  if PERL_WORDSIZE == 4
+#    define PERL_VARIANTS_WORD_MASK 0x80808080
+#    define PERL_WORD_BOUNDARY_MASK 0x3
+#  elif PERL_WORDSIZE == 8
+#    define PERL_VARIANTS_WORD_MASK UINT64_C(0x8080808080808080)
+#    define PERL_WORD_BOUNDARY_MASK 0x7
+#  else
+#    error Unexpected word size
+#  endif
+
+    /* Process per-byte until reach word boundary.  XXX This loop could be
+     * eliminated if we knew that this platform had fast unaligned reads */
+    while (x < send && (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) {
+        if (! UTF8_IS_INVARIANT(*x)) {
+            if (ep) {
+                *ep = x;
+            }
+
+            return FALSE;
+        }
+        x++;
+    }
+
+    /* Process per-word as long as we have at least a full word left */
+    while (x + PERL_WORDSIZE <= send) {
+        if ((* (PERL_WORDCAST *) x) & PERL_VARIANTS_WORD_MASK)  {
+
+            /* Found a variant.  Just return if caller doesn't want its exact
+             * position */
+            if (! ep) {
+                return FALSE;
+            }
+
+            /* Otherwise fall into final loop to find which byte it is */
+            break;
+        }
+        x += PERL_WORDSIZE;
+    }
 
-    return isIDFIRST_lazy_if(p,1);
+#  undef PERL_WORDCAST
+#  undef PERL_WORDSIZE
+#  undef PERL_WORD_BOUNDARY_MASK
+#  undef PERL_VARIANTS_WORD_MASK
+#endif
+
+    /* Process per-byte */
+    while (x < send) {
+	if (! UTF8_IS_INVARIANT(*x)) {
+            if (ep) {
+                *ep = x;
+            }
+
+            return FALSE;
+        }
+
+        x++;
+    }
+
+    return TRUE;
 }
 
+/*
+=for apidoc is_utf8_string
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
+be calculated using C<strlen(s)> (which means if you use this option, that C<s>
+can't have embedded C<NUL> characters and has to have a terminating C<NUL>
+byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
+
+This function considers Perl's extended UTF-8 to be valid.  That means that
+code points above Unicode, surrogates, and non-character code points are
+considered valid by this function.  Use C<L</is_strict_utf8_string>>,
+C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
+code points are considered valid.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+
+=cut
+*/
+
+#define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
+
+#if defined(PERL_CORE) || defined (PERL_EXT)
+
+/*
+=for apidoc is_utf8_non_invariant_string
+
+Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
+C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
+UTF-8; otherwise returns FALSE.
+
+A TRUE return means that at least one code point represented by the sequence
+either is a wide character not representable as a single byte, or the
+representation differs depending on whether the sequence is encoded in UTF-8 or
+not.
+
+See also
+C<L<perlapi/is_utf8_invariant_string>>,
+C<L<perlapi/is_utf8_string>>
+
+=cut
+
+This is commonly used to determine if a SV's UTF-8 flag should be turned on.
+It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
+it otherwise contains invalid UTF-8.
+
+It is an internal function because khw thinks that XS code shouldn't be working
+at this low a level.  A valid use case could change that.
+
+*/
+
 PERL_STATIC_INLINE bool
-S_isALNUM_lazy(pTHX_ const char* p)
+S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
 {
-    PERL_ARGS_ASSERT_ISALNUM_LAZY;
+    const U8 * first_variant;
+
+    PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
+
+    if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        return FALSE;
+    }
 
-    return isALNUM_lazy_if(p,1);
+    return is_utf8_string(first_variant, len - (first_variant - s));
 }
 
-/* ------------------------------- perl.h ----------------------------- */
+#endif
 
 /*
-=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
+=for apidoc is_strict_utf8_string
 
-Test that the given C<pv> doesn't contain any internal NUL characters.
-If it does, set C<errno> to ENOENT, optionally warn, and return FALSE.
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8-encoded string that is fully interchangeable by any application using
+Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
+calculated using C<strlen(s)> (which means if you use this option, that C<s>
+can't have embedded C<NUL> characters and has to have a terminating C<NUL>
+byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 
-Return TRUE if the name is safe.
+This function returns FALSE for strings containing any
+code points above the Unicode max of 0x10FFFF, surrogate code points, or
+non-character code points.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_strict_utf8_string(s, len)  is_strict_utf8_string_loclen(s, len, NULL, NULL)
+
+/*
+=for apidoc is_c9strict_utf8_string
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8-encoded string that conforms to
+L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
+otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
+C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
+C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
+characters being ASCII constitute 'a valid UTF-8 string'.
+
+This function returns FALSE for strings containing any code points above the
+Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
+code points per
+L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_c9strict_utf8_string(s, len)  is_c9strict_utf8_string_loclen(s, len, NULL, 0)
+
+/*
+=for apidoc is_utf8_string_flags
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8 string, subject to the restrictions imposed by C<flags>;
+returns FALSE otherwise.  If C<len> is 0, it will be calculated
+using C<strlen(s)> (which means if you use this option, that C<s> can't have
+embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
+that all characters being ASCII constitute 'a valid UTF-8 string'.
+
+If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
+C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
+as C<L</is_strict_utf8_string>>; and if C<flags> is
+C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
+C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
+combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
+C<L</utf8n_to_uvchr>>, with the same meanings.
 
-Used by the IS_SAFE_SYSCALL() macro.
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
 
 =cut
 */
 
 PERL_STATIC_INLINE bool
-S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
-    /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
-     * perl itself uses xce*() functions which accept 8-bit strings.
-     */
+S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
+{
+    const U8 * first_variant;
 
-    PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
+    PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
+    assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
 
-    if (pv && len > 1) {
-        char *null_at;
-        if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
-                SETERRNO(ENOENT, LIB_INVARG);
-                Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
-                                   "Invalid \\0 character in %s for %s: %s\\0%s",
-                                   what, op_name, pv, null_at+1);
+    if (len == 0) {
+        len = strlen((const char *)s);
+    }
+
+    if (flags == 0) {
+        return is_utf8_string(s, len);
+    }
+
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+                                        == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
+    {
+        return is_strict_utf8_string(s, len);
+    }
+
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+                                       == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+    {
+        return is_c9strict_utf8_string(s, len);
+    }
+
+    if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        const U8* const send = s + len;
+        const U8* x = first_variant;
+
+        while (x < send) {
+            STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+            if (UNLIKELY(! cur_len)) {
                 return FALSE;
+            }
+            x += cur_len;
         }
     }
 
@@ -324,42 +696,1225 @@ S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char
 
 /*
 
-Return true if the supplied filename has a newline character
-immediately before the final NUL.
+=for apidoc is_utf8_string_loc
 
-My original look at this incorrectly used the len from SvPV(), but
-that's incorrect, since we allow for a NUL in pv[len-1].
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
 
-So instead, strlen() and work from there.
+See also C<L</is_utf8_string_loclen>>.
 
-This allow for the user reading a filename, forgetting to chomp it,
-then calling:
+=cut
+*/
 
-  open my $foo, "$file\0";
+#define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
+
+/*
 
+=for apidoc is_utf8_string_loclen
+
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc>>.
+
+=cut
 */
 
-#ifdef PERL_CORE
+PERL_STATIC_INLINE bool
+Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+    const U8 * first_variant;
+
+    PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
+
+    if (len == 0) {
+        len = strlen((const char *) s);
+    }
+
+    if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        if (el)
+            *el = len;
+
+        if (ep) {
+            *ep = s + len;
+        }
+
+        return TRUE;
+    }
+
+    {
+        const U8* const send = s + len;
+        const U8* x = first_variant;
+        STRLEN outlen = first_variant - s;
+
+        while (x < send) {
+            const STRLEN cur_len = isUTF8_CHAR(x, send);
+            if (UNLIKELY(! cur_len)) {
+                break;
+            }
+            x += cur_len;
+            outlen++;
+        }
+
+        if (el)
+            *el = outlen;
+
+        if (ep) {
+            *ep = x;
+        }
+
+        return (x == send);
+    }
+}
+
+/*
+
+=for apidoc is_strict_utf8_string_loc
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_strict_utf8_string_loc(s, len, ep)                               \
+                                is_strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_strict_utf8_string_loclen
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_strict_utf8_string_loc>>.
+
+=cut
+*/
 
 PERL_STATIC_INLINE bool
-S_should_warn_nl(const char *pv) {
-    STRLEN len;
+S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+    const U8 * first_variant;
 
-    PERL_ARGS_ASSERT_SHOULD_WARN_NL;
+    PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
 
-    len = strlen(pv);
+    if (len == 0) {
+        len = strlen((const char *) s);
+    }
 
-    return len > 0 && pv[len-1] == '\n';
+    if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        if (el)
+            *el = len;
+
+        if (ep) {
+            *ep = s + len;
+        }
+
+        return TRUE;
+    }
+
+    {
+        const U8* const send = s + len;
+        const U8* x = first_variant;
+        STRLEN outlen = first_variant - s;
+
+        while (x < send) {
+            const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
+            if (UNLIKELY(! cur_len)) {
+                break;
+            }
+            x += cur_len;
+            outlen++;
+        }
+
+        if (el)
+            *el = outlen;
+
+        if (ep) {
+            *ep = x;
+        }
+
+        return (x == send);
+    }
 }
 
-#endif
+/*
+
+=for apidoc is_c9strict_utf8_string_loc
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_c9strict_utf8_string_loc(s, len, ep)	                            \
+                            is_c9strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_c9strict_utf8_string_loclen
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
+characters in the C<el> pointer.
+
+See also C<L</is_c9strict_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+    const U8 * first_variant;
+
+    PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
+
+    if (len == 0) {
+        len = strlen((const char *) s);
+    }
+
+    if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        if (el)
+            *el = len;
+
+        if (ep) {
+            *ep = s + len;
+        }
+
+        return TRUE;
+    }
+
+    {
+        const U8* const send = s + len;
+        const U8* x = first_variant;
+        STRLEN outlen = first_variant - s;
+
+        while (x < send) {
+            const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
+            if (UNLIKELY(! cur_len)) {
+                break;
+            }
+            x += cur_len;
+            outlen++;
+        }
+
+        if (el)
+            *el = outlen;
+
+        if (ep) {
+            *ep = x;
+        }
+
+        return (x == send);
+    }
+}
+
+/*
+
+=for apidoc is_utf8_string_loc_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_utf8_string_loclen_flags>>.
+
+=cut
+*/
+
+#define is_utf8_string_loc_flags(s, len, ep, flags)                         \
+                        is_utf8_string_loclen_flags(s, len, ep, 0, flags)
+
+
+/* The above 3 actual functions could have been moved into the more general one
+ * just below, and made #defines that call it with the right 'flags'.  They are
+ * currently kept separate to increase their chances of getting inlined */
+
+/*
+
+=for apidoc is_utf8_string_loclen_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc_flags>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
+{
+    const U8 * first_variant;
+
+    PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
+    assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
+
+    if (len == 0) {
+        len = strlen((const char *) s);
+    }
+
+    if (flags == 0) {
+        return is_utf8_string_loclen(s, len, ep, el);
+    }
+
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+                                        == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
+    {
+        return is_strict_utf8_string_loclen(s, len, ep, el);
+    }
+
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+                                    == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+    {
+        return is_c9strict_utf8_string_loclen(s, len, ep, el);
+    }
+
+    if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+        if (el)
+            *el = len;
+
+        if (ep) {
+            *ep = s + len;
+        }
+
+        return TRUE;
+    }
+
+    {
+        const U8* send = s + len;
+        const U8* x = first_variant;
+        STRLEN outlen = first_variant - s;
+
+        while (x < send) {
+            const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+            if (UNLIKELY(! cur_len)) {
+                break;
+            }
+            x += cur_len;
+            outlen++;
+        }
+
+        if (el)
+            *el = outlen;
+
+        if (ep) {
+            *ep = x;
+        }
+
+        return (x == send);
+    }
+}
+
+/*
+=for apidoc utf8_distance
+
+Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
+and C<b>.
+
+WARNING: use only if you *know* that the pointers point inside the
+same UTF-8 buffer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE IV
+Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
+{
+    PERL_ARGS_ASSERT_UTF8_DISTANCE;
+
+    return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
+}
+
+/*
+=for apidoc utf8_hop
+
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
+
+WARNING: do not use the following unless you *know* C<off> is within
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop(const U8 *s, SSize_t off)
+{
+    PERL_ARGS_ASSERT_UTF8_HOP;
+
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
+    if (off >= 0) {
+	while (off--)
+	    s += UTF8SKIP(s);
+    }
+    else {
+	while (off++) {
+	    s--;
+	    while (UTF8_IS_CONTINUATION(*s))
+		s--;
+	}
+    }
+    GCC_DIAG_IGNORE(-Wcast-qual);
+    return (U8 *)s;
+    GCC_DIAG_RESTORE;
+}
+
+/*
+=for apidoc utf8_hop_forward
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+forward.
+
+C<off> must be non-negative.
+
+C<s> must be before or equal to C<end>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
+{
+    PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
+
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
+    assert(s <= end);
+    assert(off >= 0);
+
+    while (off--) {
+        STRLEN skip = UTF8SKIP(s);
+        if ((STRLEN)(end - s) <= skip) {
+            GCC_DIAG_IGNORE(-Wcast-qual);
+            return (U8 *)end;
+            GCC_DIAG_RESTORE;
+        }
+        s += skip;
+    }
+
+    GCC_DIAG_IGNORE(-Wcast-qual);
+    return (U8 *)s;
+    GCC_DIAG_RESTORE;
+}
+
+/*
+=for apidoc utf8_hop_back
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+backward.
+
+C<off> must be non-positive.
+
+C<s> must be after or equal to C<start>.
+
+When moving backward it will not move before C<start>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
+{
+    PERL_ARGS_ASSERT_UTF8_HOP_BACK;
+
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
+    assert(start <= s);
+    assert(off <= 0);
+
+    while (off++ && s > start) {
+        s--;
+        while (UTF8_IS_CONTINUATION(*s) && s > start)
+            s--;
+    }
+    
+    GCC_DIAG_IGNORE(-Wcast-qual);
+    return (U8 *)s;
+    GCC_DIAG_RESTORE;
+}
+
+/*
+=for apidoc utf8_hop_safe
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+either forward or backward.
+
+When moving backward it will not move before C<start>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed those limits even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
+{
+    PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
+
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
+    assert(start <= s && s <= end);
+
+    if (off >= 0) {
+        return utf8_hop_forward(s, off, end);
+    }
+    else {
+        return utf8_hop_back(s, off, start);
+    }
+}
+
+/*
+
+=for apidoc is_utf8_valid_partial_char
+
+Returns 0 if the sequence of bytes starting at C<s> and looking no further than
+S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
+points.  Otherwise, it returns 1 if there exists at least one non-empty
+sequence of bytes that when appended to sequence C<s>, starting at position
+C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
+otherwise returns 0.
+
+In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
+point.
+
+This is useful when a fixed-length buffer is being tested for being well-formed
+UTF-8, but the final few bytes in it don't comprise a full character; that is,
+it is split somewhere in the middle of the final code point's UTF-8
+representation.  (Presumably when the buffer is refreshed with the next chunk
+of data, the new first bytes will complete the partial code point.)   This
+function is used to verify that the final bytes in the current buffer are in
+fact the legal beginning of some code point, so that if they aren't, the
+failure can be signalled without having to wait for the next read.
+
+=cut
+*/
+#define is_utf8_valid_partial_char(s, e)                                    \
+                                is_utf8_valid_partial_char_flags(s, e, 0)
+
+/*
+
+=for apidoc is_utf8_valid_partial_char_flags
+
+Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
+or not the input is a valid UTF-8 encoded partial character, but it takes an
+extra parameter, C<flags>, which can further restrict which code points are
+considered valid.
+
+If C<flags> is 0, this behaves identically to
+C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
+of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
+there is any sequence of bytes that can complete the input partial character in
+such a way that a non-prohibited character is formed, the function returns
+TRUE; otherwise FALSE.  Non character code points cannot be determined based on
+partial character input.  But many  of the other possible excluded types can be
+determined from just the first one or two bytes.
+
+=cut
+ */
+
+PERL_STATIC_INLINE bool
+S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+{
+    PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
+
+    assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
+
+    if (s >= e || s + UTF8SKIP(s) <= e) {
+        return FALSE;
+    }
+
+    return cBOOL(_is_utf8_char_helper(s, e, flags));
+}
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_flags
+
+Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
+is entirely valid UTF-8, subject to the restrictions given by C<flags>;
+otherwise it returns FALSE.
+
+If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
+without restriction.  If the final few bytes of the buffer do not form a
+complete code point, this will return TRUE anyway, provided that
+C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
+
+If C<flags> in non-zero, it can be any combination of the
+C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
+same meanings.
+
+This function differs from C<L</is_utf8_string_flags>> only in that the latter
+returns FALSE if the final few bytes of the string don't form a complete code
+point.
+
+=cut
+ */
+#define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
+                is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loc_flags
+
+Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
+failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
+to the beginning of any partial character at the end of the buffer; if there is
+no partial character C<*ep> will contain C<s>+C<len>.
+
+See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
+
+=cut
+*/
+
+#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
+                is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loclen_flags
+
+Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
+complete, valid characters found in the C<el> pointer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
+                                       STRLEN len,
+                                       const U8 **ep,
+                                       STRLEN *el,
+                                       const U32 flags)
+{
+    const U8 * maybe_partial;
+
+    PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
+
+    if (! ep) {
+        ep  = &maybe_partial;
+    }
+
+    /* If it's entirely valid, return that; otherwise see if the only error is
+     * that the final few bytes are for a partial character */
+    return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
+           || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
+}
+
+/* ------------------------------- perl.h ----------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
+
+Test that the given C<pv> doesn't contain any internal C<NUL> characters.
+If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
+
+Return TRUE if the name is safe.
+
+Used by the C<IS_SAFE_SYSCALL()> macro.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
+    /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
+     * perl itself uses xce*() functions which accept 8-bit strings.
+     */
+
+    PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
+
+    if (len > 1) {
+        char *null_at;
+        if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
+                SETERRNO(ENOENT, LIB_INVARG);
+                Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
+                                   "Invalid \\0 character in %s for %s: %s\\0%s",
+                                   what, op_name, pv, null_at+1);
+                return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+/*
+
+Return true if the supplied filename has a newline character
+immediately before the first (hopefully only) NUL.
+
+My original look at this incorrectly used the len from SvPV(), but
+that's incorrect, since we allow for a NUL in pv[len-1].
+
+So instead, strlen() and work from there.
+
+This allow for the user reading a filename, forgetting to chomp it,
+then calling:
+
+  open my $foo, "$file\0";
+
+*/
+
+#ifdef PERL_CORE
+
+PERL_STATIC_INLINE bool
+S_should_warn_nl(const char *pv) {
+    STRLEN len;
+
+    PERL_ARGS_ASSERT_SHOULD_WARN_NL;
+
+    len = strlen(pv);
+
+    return len > 0 && pv[len-1] == '\n';
+}
+
+#endif
+
+/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
+
+#define MAX_CHARSET_NAME_LENGTH 2
+
+PERL_STATIC_INLINE const char *
+get_regex_charset_name(const U32 flags, STRLEN* const lenp)
+{
+    /* Returns a string that corresponds to the name of the regex character set
+     * given by 'flags', and *lenp is set the length of that string, which
+     * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
+
+    *lenp = 1;
+    switch (get_regex_charset(flags)) {
+        case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
+        case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
+        case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
+	case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
+	case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
+	    *lenp = 2;
+	    return ASCII_MORE_RESTRICT_PAT_MODS;
+    }
+    /* The NOT_REACHED; hides an assert() which has a rather complex
+     * definition in perl.h. */
+    NOT_REACHED; /* NOTREACHED */
+    return "?";	    /* Unknown */
+}
+
+/*
+
+Return false if any get magic is on the SV other than taint magic.
+
+*/
+
+PERL_STATIC_INLINE bool
+S_sv_only_taint_gmagic(SV *sv) {
+    MAGIC *mg = SvMAGIC(sv);
+
+    PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
+
+    while (mg) {
+        if (mg->mg_type != PERL_MAGIC_taint
+            && !(mg->mg_flags & MGf_GSKIP)
+            && mg->mg_virtual->svt_get) {
+            return FALSE;
+        }
+        mg = mg->mg_moremagic;
+    }
+
+    return TRUE;
+}
+
+/* ------------------ cop.h ------------------------------------------- */
+
+
+/* Enter a block. Push a new base context and return its address. */
+
+PERL_STATIC_INLINE PERL_CONTEXT *
+S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
+{
+    PERL_CONTEXT * cx;
+
+    PERL_ARGS_ASSERT_CX_PUSHBLOCK;
+
+    CXINC;
+    cx = CX_CUR();
+    cx->cx_type        = type;
+    cx->blk_gimme      = gimme;
+    cx->blk_oldsaveix  = saveix;
+    cx->blk_oldsp      = (I32)(sp - PL_stack_base);
+    cx->blk_oldcop     = PL_curcop;
+    cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
+    cx->blk_oldscopesp = PL_scopestack_ix;
+    cx->blk_oldpm      = PL_curpm;
+    cx->blk_old_tmpsfloor = PL_tmps_floor;
+
+    PL_tmps_floor        = PL_tmps_ix;
+    CX_DEBUG(cx, "PUSH");
+    return cx;
+}
+
+
+/* Exit a block (RETURN and LAST). */
+
+PERL_STATIC_INLINE void
+S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_POPBLOCK;
+
+    CX_DEBUG(cx, "POP");
+    /* these 3 are common to cx_popblock and cx_topblock */
+    PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+    PL_scopestack_ix = cx->blk_oldscopesp;
+    PL_curpm         = cx->blk_oldpm;
+
+    /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
+     * and leaves a CX entry lying around for repeated use, so
+     * skip for multicall */                  \
+    assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
+            || PL_savestack_ix == cx->blk_oldsaveix);
+    PL_curcop     = cx->blk_oldcop;
+    PL_tmps_floor = cx->blk_old_tmpsfloor;
+}
+
+/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
+ * Whereas cx_popblock() restores the state to the point just before
+ * cx_pushblock() was called,  cx_topblock() restores it to the point just
+ * *after* cx_pushblock() was called. */
+
+PERL_STATIC_INLINE void
+S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_TOPBLOCK;
+
+    CX_DEBUG(cx, "TOP");
+    /* these 3 are common to cx_popblock and cx_topblock */
+    PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+    PL_scopestack_ix = cx->blk_oldscopesp;
+    PL_curpm         = cx->blk_oldpm;
+
+    PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
+{
+    U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
+
+    PERL_ARGS_ASSERT_CX_PUSHSUB;
+
+    PERL_DTRACE_PROBE_ENTRY(cv);
+    cx->blk_sub.cv = cv;
+    cx->blk_sub.olddepth = CvDEPTH(cv);
+    cx->blk_sub.prevcomppad = PL_comppad;
+    cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
+    cx->blk_sub.retop = retop;
+    SvREFCNT_inc_simple_void_NN(cv);
+    cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
+}
+
+
+/* subsets of cx_popsub() */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
+{
+    CV *cv;
+
+    PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
+    assert(CxTYPE(cx) == CXt_SUB);
+
+    PL_comppad = cx->blk_sub.prevcomppad;
+    PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+    cv = cx->blk_sub.cv;
+    CvDEPTH(cv) = cx->blk_sub.olddepth;
+    cx->blk_sub.cv = NULL;
+    SvREFCNT_dec(cv);
+}
+
+
+/* handle the @_ part of leaving a sub */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
+{
+    AV *av;
+
+    PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
+    assert(CxTYPE(cx) == CXt_SUB);
+    assert(AvARRAY(MUTABLE_AV(
+        PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
+                CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
+
+    CX_POP_SAVEARRAY(cx);
+    av = MUTABLE_AV(PAD_SVl(0));
+    if (UNLIKELY(AvREAL(av)))
+        /* abandon @_ if it got reified */
+        clear_defarray(av, 0);
+    else {
+        CLEAR_ARGARRAY(av);
+    }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_POPSUB;
+    assert(CxTYPE(cx) == CXt_SUB);
+
+    PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
+
+    if (CxHASARGS(cx))
+        cx_popsub_args(cx);
+    cx_popsub_common(cx);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
+{
+    PERL_ARGS_ASSERT_CX_PUSHFORMAT;
+
+    cx->blk_format.cv          = cv;
+    cx->blk_format.retop       = retop;
+    cx->blk_format.gv          = gv;
+    cx->blk_format.dfoutgv     = PL_defoutgv;
+    cx->blk_format.prevcomppad = PL_comppad;
+    cx->blk_u16                = 0;
+
+    SvREFCNT_inc_simple_void_NN(cv);
+    CvDEPTH(cv)++;
+    SvREFCNT_inc_void(cx->blk_format.dfoutgv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
+{
+    CV *cv;
+    GV *dfout;
+
+    PERL_ARGS_ASSERT_CX_POPFORMAT;
+    assert(CxTYPE(cx) == CXt_FORMAT);
+
+    dfout = cx->blk_format.dfoutgv;
+    setdefout(dfout);
+    cx->blk_format.dfoutgv = NULL;
+    SvREFCNT_dec_NN(dfout);
+
+    PL_comppad = cx->blk_format.prevcomppad;
+    PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+    cv = cx->blk_format.cv;
+    cx->blk_format.cv = NULL;
+    --CvDEPTH(cv);
+    SvREFCNT_dec_NN(cv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
+{
+    PERL_ARGS_ASSERT_CX_PUSHEVAL;
+
+    cx->blk_eval.retop         = retop;
+    cx->blk_eval.old_namesv    = namesv;
+    cx->blk_eval.old_eval_root = PL_eval_root;
+    cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
+    cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
+    cx->blk_eval.cur_top_env   = PL_top_env;
+
+    assert(!(PL_in_eval     & ~ 0x3F));
+    assert(!(PL_op->op_type & ~0x1FF));
+    cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
+{
+    SV *sv;
+
+    PERL_ARGS_ASSERT_CX_POPEVAL;
+    assert(CxTYPE(cx) == CXt_EVAL);
+
+    PL_in_eval = CxOLD_IN_EVAL(cx);
+    assert(!(PL_in_eval & 0xc0));
+    PL_eval_root = cx->blk_eval.old_eval_root;
+    sv = cx->blk_eval.cur_text;
+    if (sv && CxEVAL_TXT_REFCNTED(cx)) {
+        cx->blk_eval.cur_text = NULL;
+        SvREFCNT_dec_NN(sv);
+    }
+
+    sv = cx->blk_eval.old_namesv;
+    if (sv) {
+        cx->blk_eval.old_namesv = NULL;
+        SvREFCNT_dec_NN(sv);
+    }
+}
+
+
+/* push a plain loop, i.e.
+ *     { block }
+ *     while (cond) { block }
+ *     for (init;cond;continue) { block }
+ * This loop can be last/redo'ed etc.
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
+    cx->blk_loop.my_op = cLOOP;
+}
+
+
+/* push a true for loop, i.e.
+ *     for var (list) { block }
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
+{
+    PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
+
+    /* this one line is common with cx_pushloop_plain */
+    cx->blk_loop.my_op = cLOOP;
+
+    cx->blk_loop.itervar_u.svp = (SV**)itervarp;
+    cx->blk_loop.itersave      = itersave;
+#ifdef USE_ITHREADS
+    cx->blk_loop.oldcomppad = PL_comppad;
+#endif
+}
+
+
+/* pop all loop types, including plain */
+
+PERL_STATIC_INLINE void
+S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_POPLOOP;
+
+    assert(CxTYPE_is_LOOP(cx));
+    if (  CxTYPE(cx) == CXt_LOOP_ARY
+       || CxTYPE(cx) == CXt_LOOP_LAZYSV)
+    {
+        /* Free ary or cur. This assumes that state_u.ary.ary
+         * aligns with state_u.lazysv.cur. See cx_dup() */
+        SV *sv = cx->blk_loop.state_u.lazysv.cur;
+        cx->blk_loop.state_u.lazysv.cur = NULL;
+        SvREFCNT_dec_NN(sv);
+        if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
+            sv = cx->blk_loop.state_u.lazysv.end;
+            cx->blk_loop.state_u.lazysv.end = NULL;
+            SvREFCNT_dec_NN(sv);
+        }
+    }
+    if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
+        SV *cursv;
+        SV **svp = (cx)->blk_loop.itervar_u.svp;
+        if ((cx->cx_type & CXp_FOR_GV))
+            svp = &GvSV((GV*)svp);
+        cursv = *svp;
+        *svp = cx->blk_loop.itersave;
+        cx->blk_loop.itersave = NULL;
+        SvREFCNT_dec(cursv);
+    }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_PUSHWHEN;
+
+    cx->blk_givwhen.leave_op = cLOGOP->op_other;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
+{
+    PERL_ARGS_ASSERT_CX_POPWHEN;
+    assert(CxTYPE(cx) == CXt_WHEN);
+
+    PERL_UNUSED_ARG(cx);
+    PERL_UNUSED_CONTEXT;
+    /* currently NOOP */
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
+{
+    PERL_ARGS_ASSERT_CX_PUSHGIVEN;
+
+    cx->blk_givwhen.leave_op = cLOGOP->op_other;
+    cx->blk_givwhen.defsv_save = orig_defsv;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
+{
+    SV *sv;
+
+    PERL_ARGS_ASSERT_CX_POPGIVEN;
+    assert(CxTYPE(cx) == CXt_GIVEN);
+
+    sv = GvSV(PL_defgv);
+    GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
+    cx->blk_givwhen.defsv_save = NULL;
+    SvREFCNT_dec(sv);
+}
+
+/* ------------------ util.h ------------------------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc foldEQ
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same
+case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
+match themselves and their opposite case counterparts.  Non-cased and non-ASCII
+range bytes match only themselves.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ(const char *s1, const char *s2, I32 len)
+{
+    const U8 *a = (const U8 *)s1;
+    const U8 *b = (const U8 *)s2;
+
+    PERL_ARGS_ASSERT_FOLDEQ;
+
+    assert(len >= 0);
+
+    while (len--) {
+	if (*a != *b && *a != PL_fold[*b])
+	    return 0;
+	a++,b++;
+    }
+    return 1;
+}
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
+{
+    /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
+     * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
+     * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
+     * does it check that the strings each have at least 'len' characters */
+
+    const U8 *a = (const U8 *)s1;
+    const U8 *b = (const U8 *)s2;
+
+    PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
+
+    assert(len >= 0);
+
+    while (len--) {
+	if (*a != *b && *a != PL_fold_latin1[*b]) {
+	    return 0;
+	}
+	a++, b++;
+    }
+    return 1;
+}
+
+/*
+=for apidoc foldEQ_locale
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same case-insensitively in the current locale; false otherwise.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
+{
+    dVAR;
+    const U8 *a = (const U8 *)s1;
+    const U8 *b = (const U8 *)s2;
+
+    PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
+
+    assert(len >= 0);
+
+    while (len--) {
+	if (*a != *b && *a != PL_fold_locale[*b])
+	    return 0;
+	a++,b++;
+    }
+    return 1;
+}
+
+#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
+
+PERL_STATIC_INLINE void *
+S_my_memrchr(const char * s, const char c, const STRLEN len)
+{
+    /* memrchr(), since many platforms lack it */
+
+    const char * t = s + len - 1;
+
+    PERL_ARGS_ASSERT_MY_MEMRCHR;
+
+    while (t >= s) {
+        if (*t == c) {
+            return (void *) t;
+        }
+        t--;
+    }
+
+    return NULL;
+}
+
+#endif
 
 /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
  * ex: set ts=8 sts=4 sw=4 et:
  */