/* ------------------------------- handy.h ------------------------------- */
/* saves machine code for a common noreturn idiom typically used in Newx*() */
-#ifdef GCC_DIAG_PRAGMA
-GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
-#endif
+GCC_DIAG_IGNORE_DECL(-Wunused-function);
static void
S_croak_memory_wrap(void)
{
Perl_croak_nocontext("%s",PL_memory_wrap);
}
-#ifdef GCC_DIAG_PRAGMA
-GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
-#endif
+GCC_DIAG_RESTORE_DECL;
/* ------------------------------- utf8.h ------------------------------- */
* or'ing together the lowest bits of 'x'. Hopefully the final term gets
* optimized out completely on a 32-bit system, and its mask gets optimized out
* on a 64-bit system */
-#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
- | (PTR2nat(x) >> 1) \
- | ( (PTR2nat(x) >> 2) \
- & PERL_WORD_BOUNDARY_MASK)))
+#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
+ | ( PTR2nat(x) >> 1) \
+ | ( ( (PTR2nat(x) \
+ & PERL_WORD_BOUNDARY_MASK) >> 2))))
/* Do the word-at-a-time iff there is at least one usable full word. That
* means that after advancing to a word boundary, there still is at least a
} while (x + PERL_WORDSIZE <= send);
}
-# undef PERL_WORDSIZE
-# undef PERL_WORD_BOUNDARY_MASK
-# undef PERL_VARIANTS_WORD_MASK
#endif
/* Process per-byte */
return TRUE;
}
+#if defined(PERL_CORE) || defined(PERL_EXT)
+
+/*
+=for apidoc variant_under_utf8_count
+
+This function looks at the sequence of bytes between C<s> and C<e>, which are
+assumed to be encoded in ASCII/Latin1, and returns how many of them would
+change should the string be translated into UTF-8. Due to the nature of UTF-8,
+each of these would occupy two bytes instead of the single one in the input
+string. Thus, this function returns the precise number of bytes the string
+would expand by when translated to UTF-8.
+
+Unlike most of the other functions that have C<utf8> in their name, the input
+to this function is NOT a UTF-8-encoded string. The function name is slightly
+I<odd> to emphasize this.
+
+This function is internal to Perl because khw thinks that any XS code that
+would want this is probably operating too close to the internals. Presenting a
+valid use case could change that.
+
+See also
+C<L<perlapi/is_utf8_invariant_string>>
+and
+C<L<perlapi/is_utf8_invariant_string_loc>>,
+
+=cut
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_variant_under_utf8_count(const U8* const s, const U8* const e)
+{
+ const U8* x = s;
+ Size_t count = 0;
+
+ PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
+
+# ifndef EBCDIC
+
+ if ((STRLEN) (e - x) >= PERL_WORDSIZE
+ + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
+ - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
+ {
+
+ /* Process per-byte until reach word boundary. XXX This loop could be
+ * eliminated if we knew that this platform had fast unaligned reads */
+ while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+ count += ! UTF8_IS_INVARIANT(*x++);
+ }
+
+ /* Process per-word as long as we have at least a full word left */
+ do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
+ explanation of how this works */
+ count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
+ * PERL_COUNT_MULTIPLIER)
+ >> ((PERL_WORDSIZE - 1) * CHARBITS);
+ x += PERL_WORDSIZE;
+ } while (x + PERL_WORDSIZE <= e);
+ }
+
+# endif
+
+ /* Process per-byte */
+ while (x < e) {
+ if (! UTF8_IS_INVARIANT(*x)) {
+ count++;
+ }
+
+ x++;
+ }
+
+ return count;
+}
+
+#endif
+
+#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
+# undef PERL_WORDSIZE
+# undef PERL_COUNT_MULTIPLIER
+# undef PERL_WORD_BOUNDARY_MASK
+# undef PERL_VARIANTS_WORD_MASK
+#endif
+
/*
=for apidoc is_utf8_string
s--;
}
}
- GCC_DIAG_IGNORE(-Wcast-qual);
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
return (U8 *)s;
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
/*
while (off--) {
STRLEN skip = UTF8SKIP(s);
if ((STRLEN)(end - s) <= skip) {
- GCC_DIAG_IGNORE(-Wcast-qual);
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
return (U8 *)end;
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
s += skip;
}
- GCC_DIAG_IGNORE(-Wcast-qual);
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
return (U8 *)s;
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
/*
s--;
}
- GCC_DIAG_IGNORE(-Wcast-qual);
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
return (U8 *)s;
- GCC_DIAG_RESTORE;
+ GCC_DIAG_RESTORE_STMT;
}
/*