+#ifndef EBCDIC
+
+PERL_STATIC_INLINE unsigned int
+S__variant_byte_number(PERL_UINTMAX_T word)
+{
+
+ /* This returns the position in a word (0..7) of the first variant byte in
+ * it. This is a helper function. Note that there are no branches */
+
+ assert(word);
+
+ /* Get just the msb bits of each byte */
+ word &= PERL_VARIANTS_WORD_MASK;
+
+# ifdef USING_MSVC6 /* VC6 has some issues with the normal code, and the
+ easiest thing is to hide that from the callers */
+ {
+ unsigned int i;
+ const U8 * s = (U8 *) &word;
+ dTHX;
+
+ for (i = 0; i < sizeof(word); i++ ) {
+ if (s[i]) {
+ return i;
+ }
+ }
+
+ Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
+ __FILE__, __LINE__);
+ }
+
+# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
+
+ /* Bytes are stored like
+ * Byte8 ... Byte2 Byte1
+ * 63..56...15...8 7...0
+ *
+ * Isolate the lsb;
+ * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
+ *
+ * The word will look this this, with a rightmost set bit in position 's':
+ * ('x's are don't cares)
+ * s
+ * x..x100..0
+ * x..xx10..0 Right shift (rightmost 0 is shifted off)
+ * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
+ * the 1 just to their left into a 0; the remainder is
+ * untouched
+ * 0..0011..1 The xor with x..xx10..0 clears that remainder, sets
+ * bottom to all 1
+ * 0..0100..0 Add 1 to clear the word except for the bit in 's'
+ *
+ * Another method is to do 'word &= -word'; but it generates a compiler
+ * message on some platforms about taking the negative of an unsigned */
+
+ word >>= 1;
+ word = 1 + (word ^ (word - 1));
+
+# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
+
+ /* Bytes are stored like
+ * Byte1 Byte2 ... Byte8
+ * 63..56 55..47 ... 7...0
+ *
+ * Isolate the msb; http://codeforces.com/blog/entry/10330
+ *
+ * Only the most significant set bit matters. Or'ing word with its right
+ * shift of 1 makes that bit and the next one to its right both 1. Then
+ * right shifting by 2 makes for 4 1-bits in a row. ... We end with the
+ * msb and all to the right being 1. */
+ word |= word >> 1;
+ word |= word >> 2;
+ word |= word >> 4;
+ word |= word >> 8;
+ word |= word >> 16;
+ word |= word >> 32; /* This should get optimized out on 32-bit systems. */
+
+ /* Then subtracting the right shift by 1 clears all but the left-most of
+ * the 1 bits, which is our desired result */
+ word -= (word >> 1);
+
+# else
+# error Unexpected byte order
+# endif
+
+ /* Here 'word' has a single bit set: the msb of the first byte in which it
+ * is set. Calculate that position in the word. We can use this
+ * specialized solution: https://stackoverflow.com/a/32339674/1626653,
+ * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
+ * just get shifted off at compile time) */
+ word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
+ | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
+ | (39 << 24) | (47 << 16)
+ | (55 << 8) | (63 << 0));
+ word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
+
+ /* Here, word contains the position 7..63 of that bit. Convert to 0..7 */
+ word = ((word + 1) >> 3) - 1;
+
+# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
+
+ /* And invert the result */
+ word = CHARBITS - word - 1;
+
+# endif
+
+ return (unsigned int) word;
+}
+
+#endif
+#if defined(PERL_CORE) || defined(PERL_EXT)
+
+/*
+=for apidoc variant_under_utf8_count
+
+This function looks at the sequence of bytes between C<s> and C<e>, which are
+assumed to be encoded in ASCII/Latin1, and returns how many of them would
+change should the string be translated into UTF-8. Due to the nature of UTF-8,
+each of these would occupy two bytes instead of the single one in the input
+string. Thus, this function returns the precise number of bytes the string
+would expand by when translated to UTF-8.
+
+Unlike most of the other functions that have C<utf8> in their name, the input
+to this function is NOT a UTF-8-encoded string. The function name is slightly
+I<odd> to emphasize this.
+
+This function is internal to Perl because khw thinks that any XS code that
+would want this is probably operating too close to the internals. Presenting a
+valid use case could change that.
+
+See also
+C<L<perlapi/is_utf8_invariant_string>>
+and
+C<L<perlapi/is_utf8_invariant_string_loc>>,
+
+=cut
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_variant_under_utf8_count(const U8* const s, const U8* const e)
+{
+ const U8* x = s;
+ Size_t count = 0;
+
+ PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
+
+# ifndef EBCDIC
+
+ /* Test if the string is long enough to use word-at-a-time. (Logic is the
+ * same as for is_utf8_invariant_string()) */
+ if ((STRLEN) (e - x) >= PERL_WORDSIZE
+ + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
+ - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
+ {
+
+ /* Process per-byte until reach word boundary. XXX This loop could be
+ * eliminated if we knew that this platform had fast unaligned reads */
+ while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+ count += ! UTF8_IS_INVARIANT(*x++);
+ }
+
+ /* Process per-word as long as we have at least a full word left */
+ do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
+ explanation of how this works */
+ count += (Size_t)
+ ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
+ * PERL_COUNT_MULTIPLIER)
+ >> ((PERL_WORDSIZE - 1) * CHARBITS);
+ x += PERL_WORDSIZE;
+ } while (x + PERL_WORDSIZE <= e);
+ }
+
+# endif
+
+ /* Process per-byte */
+ while (x < e) {
+ if (! UTF8_IS_INVARIANT(*x)) {
+ count++;
+ }
+
+ x++;
+ }
+
+ return count;
+}
+
+#endif
+
+#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
+# undef PERL_WORDSIZE
+# undef PERL_COUNT_MULTIPLIER
+# undef PERL_WORD_BOUNDARY_MASK
+# undef PERL_VARIANTS_WORD_MASK
+#endif
+