}
#endif
+/* ------------------------------- pad.h ------------------------------ */
+
+#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
+PERL_STATIC_INLINE bool
+PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
+{
+ /* is seq within the range _LOW to _HIGH ?
+ * This is complicated by the fact that PL_cop_seqmax
+ * may have wrapped around at some point */
+ if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
+ return FALSE; /* not yet introduced */
+
+ if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
+ /* in compiling scope */
+ if (
+ (seq > COP_SEQ_RANGE_LOW(pn))
+ ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
+ : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
+ )
+ return TRUE;
+ }
+ else if (
+ (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
+ ?
+ ( seq > COP_SEQ_RANGE_LOW(pn)
+ || seq <= COP_SEQ_RANGE_HIGH(pn))
+
+ : ( seq > COP_SEQ_RANGE_LOW(pn)
+ && seq <= COP_SEQ_RANGE_HIGH(pn))
+ )
+ return TRUE;
+ return FALSE;
+}
+#endif
+
+/* ------------------------------- pp.h ------------------------------- */
+
+PERL_STATIC_INLINE I32
+S_TOPMARK(pTHX)
+{
+ DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
+ "MARK top %p %"IVdf"\n",
+ PL_markstack_ptr,
+ (IV)*PL_markstack_ptr)));
+ return *PL_markstack_ptr;
+}
+
+PERL_STATIC_INLINE I32
+S_POPMARK(pTHX)
+{
+ DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
+ "MARK pop %p %"IVdf"\n",
+ (PL_markstack_ptr-1),
+ (IV)*(PL_markstack_ptr-1))));
+ assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
+ return *PL_markstack_ptr--;
+}
+
/* ----------------------------- regexp.h ----------------------------- */
PERL_STATIC_INLINE struct regexp *
/* ------------------------------- utf8.h ------------------------------- */
+/*
+=head1 Unicode Support
+*/
+
PERL_STATIC_INLINE void
S_append_utf8_from_native_byte(const U8 byte, U8** dest)
{
PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
if (NATIVE_BYTE_IS_INVARIANT(byte))
- *(*dest)++ = byte;
+ *((*dest)++) = byte;
else {
- *(*dest)++ = UTF8_EIGHT_BIT_HI(byte);
- *(*dest)++ = UTF8_EIGHT_BIT_LO(byte);
+ *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
+ *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
}
}
/*
+=for apidoc valid_utf8_to_uvchr
+Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
+the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
+it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
+non-Unicode code points are allowed.
+
+=cut
-A helper function for the macro isUTF8_CHAR(), which should be used instead of
-this function. The macro will handle smaller code points directly saving time,
-using this function as a fall-back for higher code points.
+ */
-Tests if the first bytes of string C<s> form a valid UTF-8 character. 0 is
-returned if the bytes starting at C<s> up to but not including C<e> do not form a
-complete well-formed UTF-8 character; otherwise the number of bytes in the
-character is returned.
+PERL_STATIC_INLINE UV
+Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
+{
+ const UV expectlen = UTF8SKIP(s);
+ const U8* send = s + expectlen;
+ UV uv = *s;
-Note that an INVARIANT (i.e. ASCII on non-EBCDIC) character is a valid UTF-8
-character.
+ PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
-=cut */
-PERL_STATIC_INLINE STRLEN
-S__is_utf8_char_slow(const U8 *s, const U8 *e)
+ if (retlen) {
+ *retlen = expectlen;
+ }
+
+ /* An invariant is trivially returned */
+ if (expectlen == 1) {
+ return uv;
+ }
+
+ /* Remove the leading bits that indicate the number of bytes, leaving just
+ * the bits that are part of the value */
+ uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
+
+ /* Now, loop through the remaining bytes, accumulating each into the
+ * working total as we go. (I khw tried unrolling the loop for up to 4
+ * bytes, but there was no performance improvement) */
+ for (++s; s < send; s++) {
+ uv = UTF8_ACCUMULATE(uv, *s);
+ }
+
+ return UNI_TO_NATIVE(uv);
+
+}
+
+/*
+=for apidoc is_utf8_invariant_string
+
+Returns TRUE if the first C<len> bytes of the string C<s> are the same
+regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
+EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
+are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
+the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
+characters are invariant, but so also are the C1 controls.
+
+If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
+use this option, that C<s> can't have embedded C<NUL> characters and has to
+have a terminating C<NUL> byte).
+
+See also
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
+
+ for (; x < send; ++x) {
+ if (!UTF8_IS_INVARIANT(*x))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*
+=for apidoc is_utf8_string
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
+be calculated using C<strlen(s)> (which means if you use this option, that C<s>
+can't have embedded C<NUL> characters and has to have a terminating C<NUL>
+byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
+
+This function considers Perl's extended UTF-8 to be valid. That means that
+code points above Unicode, surrogates, and non-character code points are
+considered valid by this function. Use C<L</is_strict_utf8_string>>,
+C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
+code points are considered valid.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+Perl_is_utf8_string(const U8 *s, const STRLEN len)
+{
+ /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
+ * Be aware of possible changes to that */
+
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ return FALSE;
+ }
+ x += cur_len;
+ }
+
+ return TRUE;
+}
+
+/*
+=for apidoc is_strict_utf8_string
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8-encoded string that is fully interchangeable by any application using
+Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
+calculated using C<strlen(s)> (which means if you use this option, that C<s>
+can't have embedded C<NUL> characters and has to have a terminating C<NUL>
+byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
+
+This function returns FALSE for strings containing any
+code points above the Unicode max of 0x10FFFF, surrogate code points, or
+non-character code points.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_strict_utf8_string(const U8 *s, const STRLEN len)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
+
+ while (x < send) {
+ const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ return FALSE;
+ }
+ x += cur_len;
+ }
+
+ return TRUE;
+}
+
+/*
+=for apidoc is_c9strict_utf8_string
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8-encoded string that conforms to
+L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
+otherwise it returns FALSE. If C<len> is 0, it will be calculated using
+C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
+C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
+characters being ASCII constitute 'a valid UTF-8 string'.
+
+This function returns FALSE for strings containing any code points above the
+Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
+code points per
+L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_flags>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
+
+ while (x < send) {
+ const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ return FALSE;
+ }
+ x += cur_len;
+ }
+
+ return TRUE;
+}
+
+/* The above 3 functions could have been moved into the more general one just
+ * below, and made #defines that call it with the right 'flags'. They are
+ * currently kept separate to increase their chances of getting inlined */
+
+/*
+=for apidoc is_utf8_string_flags
+
+Returns TRUE if the first C<len> bytes of string C<s> form a valid
+UTF-8 string, subject to the restrictions imposed by C<flags>;
+returns FALSE otherwise. If C<len> is 0, it will be calculated
+using C<strlen(s)> (which means if you use this option, that C<s> can't have
+embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
+that all characters being ASCII constitute 'a valid UTF-8 string'.
+
+If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
+C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
+as C<L</is_strict_utf8_string>>; and if C<flags> is
+C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
+C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
+combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
+C<L</utf8n_to_uvchr>>, with the same meanings.
+
+See also
+C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_string>>,
+C<L</is_utf8_string_loc>>,
+C<L</is_utf8_string_loc_flags>>,
+C<L</is_utf8_string_loclen>>,
+C<L</is_utf8_string_loclen_flags>>,
+C<L</is_utf8_fixed_width_buf_flags>>,
+C<L</is_utf8_fixed_width_buf_loc_flags>>,
+C<L</is_utf8_fixed_width_buf_loclen_flags>>,
+C<L</is_strict_utf8_string>>,
+C<L</is_strict_utf8_string_loc>>,
+C<L</is_strict_utf8_string_loclen>>,
+C<L</is_c9strict_utf8_string>>,
+C<L</is_c9strict_utf8_string_loc>>,
+and
+C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_ABOVE_31_BIT)));
+
+ if (flags == 0) {
+ return is_utf8_string(s, len);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
+ {
+ return is_strict_utf8_string(s, len);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+ {
+ return is_c9strict_utf8_string(s, len);
+ }
+
+ while (x < send) {
+ STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+ if (UNLIKELY(! cur_len)) {
+ return FALSE;
+ }
+ x += cur_len;
+ }
+
+ return TRUE;
+}
+
+/*
+
+=for apidoc is_utf8_string_loc
+
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_utf8_string_loclen
+
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+ STRLEN outlen = 0;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+}
+
+/*
+
+=for apidoc is_strict_utf8_string_loc
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_strict_utf8_string_loc(s, len, ep) \
+ is_strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_strict_utf8_string_loclen
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_strict_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+ STRLEN outlen = 0;
+
+ PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
+
+ while (x < send) {
+ const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+}
+
+/*
+
+=for apidoc is_c9strict_utf8_string_loc
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_c9strict_utf8_string_loc(s, len, ep) \
+ is_c9strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_c9strict_utf8_string_loclen
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
+characters in the C<el> pointer.
+
+See also C<L</is_c9strict_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+ STRLEN outlen = 0;
+
+ PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
+
+ while (x < send) {
+ const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+}
+
+/*
+
+=for apidoc is_utf8_string_loc_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_utf8_string_loclen_flags>>.
+
+=cut
+*/
+
+#define is_utf8_string_loc_flags(s, len, ep, flags) \
+ is_utf8_string_loclen_flags(s, len, ep, 0, flags)
+
+
+/* The above 3 actual functions could have been moved into the more general one
+ * just below, and made #defines that call it with the right 'flags'. They are
+ * currently kept separate to increase their chances of getting inlined */
+
+/*
+
+=for apidoc is_utf8_string_loclen_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc_flags>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
+{
+ const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* x = s;
+ STRLEN outlen = 0;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_ABOVE_31_BIT)));
+
+ if (flags == 0) {
+ return is_utf8_string_loclen(s, len, ep, el);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
+ {
+ return is_strict_utf8_string_loclen(s, len, ep, el);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+ {
+ return is_c9strict_utf8_string_loclen(s, len, ep, el);
+ }
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+}
+
+/*
+=for apidoc utf8_distance
+
+Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
+and C<b>.
+
+WARNING: use only if you *know* that the pointers point inside the
+same UTF-8 buffer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE IV
+Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
{
- dTHX; /* The function called below requires thread context */
+ PERL_ARGS_ASSERT_UTF8_DISTANCE;
+
+ return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
+}
+
+/*
+=for apidoc utf8_hop
+
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
+
+WARNING: do not use the following unless you *know* C<off> is within
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop(const U8 *s, SSize_t off)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ if (off >= 0) {
+ while (off--)
+ s += UTF8SKIP(s);
+ }
+ else {
+ while (off++) {
+ s--;
+ while (UTF8_IS_CONTINUATION(*s))
+ s--;
+ }
+ }
+ return (U8 *)s;
+}
+
+/*
+
+=for apidoc is_utf8_valid_partial_char
+
+Returns 0 if the sequence of bytes starting at C<s> and looking no further than
+S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
+points. Otherwise, it returns 1 if there exists at least one non-empty
+sequence of bytes that when appended to sequence C<s>, starting at position
+C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
+otherwise returns 0.
+
+In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
+point.
+
+This is useful when a fixed-length buffer is being tested for being well-formed
+UTF-8, but the final few bytes in it don't comprise a full character; that is,
+it is split somewhere in the middle of the final code point's UTF-8
+representation. (Presumably when the buffer is refreshed with the next chunk
+of data, the new first bytes will complete the partial code point.) This
+function is used to verify that the final bytes in the current buffer are in
+fact the legal beginning of some code point, so that if they aren't, the
+failure can be signalled without having to wait for the next read.
+
+=cut
+*/
+#define is_utf8_valid_partial_char(s, e) \
+ is_utf8_valid_partial_char_flags(s, e, 0)
+
+/*
+
+=for apidoc is_utf8_valid_partial_char_flags
+
+Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
+or not the input is a valid UTF-8 encoded partial character, but it takes an
+extra parameter, C<flags>, which can further restrict which code points are
+considered valid.
+
+If C<flags> is 0, this behaves identically to
+C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
+of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
+there is any sequence of bytes that can complete the input partial character in
+such a way that a non-prohibited character is formed, the function returns
+TRUE; otherwise FALSE. Non character code points cannot be determined based on
+partial character input. But many of the other possible excluded types can be
+determined from just the first one or two bytes.
+
+=cut
+ */
+
+PERL_STATIC_INLINE bool
+S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+{
+ PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
+
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_ABOVE_31_BIT)));
+
+ if (s >= e || s + UTF8SKIP(s) <= e) {
+ return FALSE;
+ }
+
+ return cBOOL(_is_utf8_char_helper(s, e, flags));
+}
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_flags
+
+Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
+is entirely valid UTF-8, subject to the restrictions given by C<flags>;
+otherwise it returns FALSE.
+
+If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
+without restriction. If the final few bytes of the buffer do not form a
+complete code point, this will return TRUE anyway, provided that
+C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
+
+If C<flags> in non-zero, it can be any combination of the
+C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
+same meanings.
+
+This function differs from C<L</is_utf8_string_flags>> only in that the latter
+returns FALSE if the final few bytes of the string don't form a complete code
+point.
+
+=cut
+ */
+#define is_utf8_fixed_width_buf_flags(s, len, flags) \
+ is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loc_flags
- STRLEN actual_len;
+Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
+failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
+to the beginning of any partial character at the end of the buffer; if there is
+no partial character C<*ep> will contain C<s>+C<len>.
- PERL_ARGS_ASSERT__IS_UTF8_CHAR_SLOW;
+See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
- assert(e >= s);
- utf8n_to_uvchr(s, e - s, &actual_len, UTF8_CHECK_ONLY);
+=cut
+*/
- return (actual_len == (STRLEN) -1) ? 0 : actual_len;
+#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
+ is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loclen_flags
+
+Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
+complete, valid characters found in the C<el> pointer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
+ const STRLEN len,
+ const U8 **ep,
+ STRLEN *el,
+ const U32 flags)
+{
+ const U8 * maybe_partial;
+
+ PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
+
+ if (! ep) {
+ ep = &maybe_partial;
+ }
+
+ /* If it's entirely valid, return that; otherwise see if the only error is
+ * that the final few bytes are for a partial character */
+ return is_utf8_string_loclen_flags(s, len, ep, el, flags)
+ || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
}
/* ------------------------------- perl.h ----------------------------- */
=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
Test that the given C<pv> doesn't contain any internal C<NUL> characters.
-If it does, set C<errno> to ENOENT, optionally warn, and return FALSE.
+If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
Return TRUE if the name is safe.
-Used by the IS_SAFE_SYSCALL() macro.
+Used by the C<IS_SAFE_SYSCALL()> macro.
=cut
*/
PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
- if (pv && len > 1) {
+ if (len > 1) {
char *null_at;
if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
SETERRNO(ENOENT, LIB_INVARG);
/*
Return true if the supplied filename has a newline character
-immediately before the final NUL.
+immediately before the first (hopefully only) NUL.
My original look at this incorrectly used the len from SvPV(), but
that's incorrect, since we allow for a NUL in pv[len-1].
}
/*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
+
+Return false if any get magic is on the SV other than taint magic.
+
+*/
+
+PERL_STATIC_INLINE bool
+S_sv_only_taint_gmagic(SV *sv) {
+ MAGIC *mg = SvMAGIC(sv);
+
+ PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
+
+ while (mg) {
+ if (mg->mg_type != PERL_MAGIC_taint
+ && !(mg->mg_flags & MGf_GSKIP)
+ && mg->mg_virtual->svt_get) {
+ return FALSE;
+ }
+ mg = mg->mg_moremagic;
+ }
+
+ return TRUE;
+}
+
+/* ------------------ cop.h ------------------------------------------- */
+
+
+/* Enter a block. Push a new base context and return its address. */
+
+PERL_STATIC_INLINE PERL_CONTEXT *
+S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
+{
+ PERL_CONTEXT * cx;
+
+ PERL_ARGS_ASSERT_CX_PUSHBLOCK;
+
+ CXINC;
+ cx = CX_CUR();
+ cx->cx_type = type;
+ cx->blk_gimme = gimme;
+ cx->blk_oldsaveix = saveix;
+ cx->blk_oldsp = (I32)(sp - PL_stack_base);
+ cx->blk_oldcop = PL_curcop;
+ cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
+ cx->blk_oldscopesp = PL_scopestack_ix;
+ cx->blk_oldpm = PL_curpm;
+ cx->blk_old_tmpsfloor = PL_tmps_floor;
+
+ PL_tmps_floor = PL_tmps_ix;
+ CX_DEBUG(cx, "PUSH");
+ return cx;
+}
+
+
+/* Exit a block (RETURN and LAST). */
+
+PERL_STATIC_INLINE void
+S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPBLOCK;
+
+ CX_DEBUG(cx, "POP");
+ /* these 3 are common to cx_popblock and cx_topblock */
+ PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+ PL_scopestack_ix = cx->blk_oldscopesp;
+ PL_curpm = cx->blk_oldpm;
+
+ /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
+ * and leaves a CX entry lying around for repeated use, so
+ * skip for multicall */ \
+ assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
+ || PL_savestack_ix == cx->blk_oldsaveix);
+ PL_curcop = cx->blk_oldcop;
+ PL_tmps_floor = cx->blk_old_tmpsfloor;
+}
+
+/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
+ * Whereas cx_popblock() restores the state to the point just before
+ * cx_pushblock() was called, cx_topblock() restores it to the point just
+ * *after* cx_pushblock() was called. */
+
+PERL_STATIC_INLINE void
+S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_TOPBLOCK;
+
+ CX_DEBUG(cx, "TOP");
+ /* these 3 are common to cx_popblock and cx_topblock */
+ PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+ PL_scopestack_ix = cx->blk_oldscopesp;
+ PL_curpm = cx->blk_oldpm;
+
+ PL_stack_sp = PL_stack_base + cx->blk_oldsp;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
+{
+ U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
+
+ PERL_ARGS_ASSERT_CX_PUSHSUB;
+
+ PERL_DTRACE_PROBE_ENTRY(cv);
+ cx->blk_sub.cv = cv;
+ cx->blk_sub.olddepth = CvDEPTH(cv);
+ cx->blk_sub.prevcomppad = PL_comppad;
+ cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
+ cx->blk_sub.retop = retop;
+ SvREFCNT_inc_simple_void_NN(cv);
+ cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
+}
+
+
+/* subsets of cx_popsub() */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
+{
+ CV *cv;
+
+ PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
+ assert(CxTYPE(cx) == CXt_SUB);
+
+ PL_comppad = cx->blk_sub.prevcomppad;
+ PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+ cv = cx->blk_sub.cv;
+ CvDEPTH(cv) = cx->blk_sub.olddepth;
+ cx->blk_sub.cv = NULL;
+ SvREFCNT_dec(cv);
+}
+
+
+/* handle the @_ part of leaving a sub */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
+{
+ AV *av;
+
+ PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
+ assert(CxTYPE(cx) == CXt_SUB);
+ assert(AvARRAY(MUTABLE_AV(
+ PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
+ CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
+
+ CX_POP_SAVEARRAY(cx);
+ av = MUTABLE_AV(PAD_SVl(0));
+ if (UNLIKELY(AvREAL(av)))
+ /* abandon @_ if it got reified */
+ clear_defarray(av, 0);
+ else {
+ CLEAR_ARGARRAY(av);
+ }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPSUB;
+ assert(CxTYPE(cx) == CXt_SUB);
+
+ PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
+
+ if (CxHASARGS(cx))
+ cx_popsub_args(cx);
+ cx_popsub_common(cx);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHFORMAT;
+
+ cx->blk_format.cv = cv;
+ cx->blk_format.retop = retop;
+ cx->blk_format.gv = gv;
+ cx->blk_format.dfoutgv = PL_defoutgv;
+ cx->blk_format.prevcomppad = PL_comppad;
+ cx->blk_u16 = 0;
+
+ SvREFCNT_inc_simple_void_NN(cv);
+ CvDEPTH(cv)++;
+ SvREFCNT_inc_void(cx->blk_format.dfoutgv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
+{
+ CV *cv;
+ GV *dfout;
+
+ PERL_ARGS_ASSERT_CX_POPFORMAT;
+ assert(CxTYPE(cx) == CXt_FORMAT);
+
+ dfout = cx->blk_format.dfoutgv;
+ setdefout(dfout);
+ cx->blk_format.dfoutgv = NULL;
+ SvREFCNT_dec_NN(dfout);
+
+ PL_comppad = cx->blk_format.prevcomppad;
+ PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+ cv = cx->blk_format.cv;
+ cx->blk_format.cv = NULL;
+ --CvDEPTH(cv);
+ SvREFCNT_dec_NN(cv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHEVAL;
+
+ cx->blk_eval.retop = retop;
+ cx->blk_eval.old_namesv = namesv;
+ cx->blk_eval.old_eval_root = PL_eval_root;
+ cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
+ cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
+ cx->blk_eval.cur_top_env = PL_top_env;
+
+ assert(!(PL_in_eval & ~ 0x7F));
+ assert(!(PL_op->op_type & ~0x1FF));
+ cx->blk_u16 = (PL_in_eval & 0x7F) | ((U16)PL_op->op_type << 7);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
+{
+ SV *sv;
+
+ PERL_ARGS_ASSERT_CX_POPEVAL;
+ assert(CxTYPE(cx) == CXt_EVAL);
+
+ PL_in_eval = CxOLD_IN_EVAL(cx);
+ PL_eval_root = cx->blk_eval.old_eval_root;
+ sv = cx->blk_eval.cur_text;
+ if (sv && SvSCREAM(sv)) {
+ cx->blk_eval.cur_text = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+
+ sv = cx->blk_eval.old_namesv;
+ if (sv) {
+ cx->blk_eval.old_namesv = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+}
+
+
+/* push a plain loop, i.e.
+ * { block }
+ * while (cond) { block }
+ * for (init;cond;continue) { block }
+ * This loop can be last/redo'ed etc.
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
+ cx->blk_loop.my_op = cLOOP;
+}
+
+
+/* push a true for loop, i.e.
+ * for var (list) { block }
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
+{
+ PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
+
+ /* this one line is common with cx_pushloop_plain */
+ cx->blk_loop.my_op = cLOOP;
+
+ cx->blk_loop.itervar_u.svp = (SV**)itervarp;
+ cx->blk_loop.itersave = itersave;
+#ifdef USE_ITHREADS
+ cx->blk_loop.oldcomppad = PL_comppad;
+#endif
+}
+
+
+/* pop all loop types, including plain */
+
+PERL_STATIC_INLINE void
+S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPLOOP;
+
+ assert(CxTYPE_is_LOOP(cx));
+ if ( CxTYPE(cx) == CXt_LOOP_ARY
+ || CxTYPE(cx) == CXt_LOOP_LAZYSV)
+ {
+ /* Free ary or cur. This assumes that state_u.ary.ary
+ * aligns with state_u.lazysv.cur. See cx_dup() */
+ SV *sv = cx->blk_loop.state_u.lazysv.cur;
+ cx->blk_loop.state_u.lazysv.cur = NULL;
+ SvREFCNT_dec_NN(sv);
+ if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
+ sv = cx->blk_loop.state_u.lazysv.end;
+ cx->blk_loop.state_u.lazysv.end = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+ }
+ if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
+ SV *cursv;
+ SV **svp = (cx)->blk_loop.itervar_u.svp;
+ if ((cx->cx_type & CXp_FOR_GV))
+ svp = &GvSV((GV*)svp);
+ cursv = *svp;
+ *svp = cx->blk_loop.itersave;
+ cx->blk_loop.itersave = NULL;
+ SvREFCNT_dec(cursv);
+ }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_PUSHWHEN;
+
+ cx->blk_givwhen.leave_op = cLOGOP->op_other;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPWHEN;
+ assert(CxTYPE(cx) == CXt_WHEN);
+
+ PERL_UNUSED_ARG(cx);
+ PERL_UNUSED_CONTEXT;
+ /* currently NOOP */
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHGIVEN;
+
+ cx->blk_givwhen.leave_op = cLOGOP->op_other;
+ cx->blk_givwhen.defsv_save = orig_defsv;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
+{
+ SV *sv;
+
+ PERL_ARGS_ASSERT_CX_POPGIVEN;
+ assert(CxTYPE(cx) == CXt_GIVEN);
+
+ sv = GvSV(PL_defgv);
+ GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
+ cx->blk_givwhen.defsv_save = NULL;
+ SvREFCNT_dec(sv);
+}
+
+/*
* ex: set ts=8 sts=4 sw=4 et:
*/