-#ifndef UV_IS_QUAD
-
-STATIC int
-S_is_utf8_cp_above_31_bits(const U8 * const s,
- const U8 * const e,
- const bool consider_overlongs)
-{
- /* Returns TRUE if the first code point represented by the Perl-extended-
- * UTF-8-encoded string starting at 's', and looking no further than 'e -
- * 1' doesn't fit into 31 bytes. That is, that if it is >= 2**31.
- *
- * The function handles the case where the input bytes do not include all
- * the ones necessary to represent a full character. That is, they may be
- * the intial bytes of the representation of a code point, but possibly
- * the final ones necessary for the complete representation may be beyond
- * 'e - 1'.
- *
- * The function also can handle the case where the input is an overlong
- * sequence. If 'consider_overlongs' is 0, the function assumes the
- * input is not overlong, without checking, and will return based on that
- * assumption. If this parameter is 1, the function will go to the trouble
- * of figuring out if it actually evaluates to above or below 31 bits.
- *
- * The sequence is otherwise assumed to be well-formed, without checking.
- */
-
- const STRLEN len = e - s;
- int is_overlong;
-
- PERL_ARGS_ASSERT_IS_UTF8_CP_ABOVE_31_BITS;
-
- assert(! UTF8_IS_INVARIANT(*s) && e > s);
-
-#ifdef EBCDIC
-
- PERL_UNUSED_ARG(consider_overlongs);
-
- /* On the EBCDIC code pages we handle, only the native start byte 0xFE can
- * mean a 32-bit or larger code point (0xFF is an invariant). 0xFE can
- * also be the start byte for a 31-bit code point; we need at least 2
- * bytes, and maybe up through 8 bytes, to determine that. (It can also be
- * the start byte for an overlong sequence, but for 30-bit or smaller code
- * points, so we don't have to worry about overlongs on EBCDIC.) */
- if (*s != 0xFE) {
- return 0;
- }
-
- if (len == 1) {
- return -1;
- }
-
-#else
-
- /* On ASCII, FE and FF are the only start bytes that can evaluate to
- * needing more than 31 bits. */
- if (LIKELY(*s < 0xFE)) {
- return 0;
- }
-
- /* What we have left are FE and FF. Both of these require more than 31
- * bits unless they are for overlongs. */
- if (! consider_overlongs) {
- return 1;
- }
-
- /* Here, we have FE or FF. If the input isn't overlong, it evaluates to
- * above 31 bits. But we need more than one byte to discern this, so if
- * passed just the start byte, it could be an overlong evaluating to
- * smaller */
- if (len == 1) {
- return -1;
- }
-
- /* Having excluded len==1, and knowing that FE and FF are both valid start
- * bytes, we can call the function below to see if the sequence is
- * overlong. (We don't need the full generality of the called function,
- * but for these huge code points, speed shouldn't be a consideration, and
- * the compiler does have enough information, since it's static to this
- * file, to optimize to just the needed parts.) */
- is_overlong = is_utf8_overlong_given_start_byte_ok(s, len);
-
- /* If it isn't overlong, more than 31 bits are required. */
- if (is_overlong == 0) {
- return 1;
- }
-
- /* If it is indeterminate if it is overlong, return that */
- if (is_overlong < 0) {
- return -1;
- }
-
- /* Here is overlong. Such a sequence starting with FE is below 31 bits, as
- * the max it can be is 2**31 - 1 */
- if (*s == 0xFE) {
- return 0;
- }
-
-#endif
-
- /* Here, ASCII and EBCDIC rejoin:
- * On ASCII: We have an overlong sequence starting with FF
- * On EBCDIC: We have a sequence starting with FE. */
-
- { /* For C89, use a block so the declaration can be close to its use */
-
-#ifdef EBCDIC
-
- /* U+7FFFFFFF (2 ** 31 - 1)
- * [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] 10 11 12 13
- * IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x42\x73\x73\x73\x73\x73\x73
- * IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x42\x72\x72\x72\x72\x72\x72
- * POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x42\x75\x75\x75\x75\x75\x75
- * I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA1\xBF\xBF\xBF\xBF\xBF\xBF
- * U+80000000 (2 ** 31):
- * IBM-1047: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
- * IBM-037: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
- * POSIX-BC: \xFE\x41\x41\x41\x41\x41\x41\x43\x41\x41\x41\x41\x41\x41
- * I8: \xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA2\xA0\xA0\xA0\xA0\xA0\xA0
- *
- * and since we know that *s = \xfe, any continuation sequcence
- * following it that is gt the below is above 31 bits
- [0] [1] [2] [3] [4] [5] [6] */
- const U8 conts_for_highest_30_bit[] = "\x41\x41\x41\x41\x41\x41\x42";
-
-#else
-
- /* FF overlong for U+7FFFFFFF (2 ** 31 - 1)
- * ASCII: \xFF\x80\x80\x80\x80\x80\x80\x81\xBF\xBF\xBF\xBF\xBF
- * FF overlong for U+80000000 (2 ** 31):
- * ASCII: \xFF\x80\x80\x80\x80\x80\x80\x82\x80\x80\x80\x80\x80
- * and since we know that *s = \xff, any continuation sequcence
- * following it that is gt the below is above 30 bits
- [0] [1] [2] [3] [4] [5] [6] */
- const U8 conts_for_highest_30_bit[] = "\x80\x80\x80\x80\x80\x80\x81";
-
-
-#endif
- const STRLEN conts_len = sizeof(conts_for_highest_30_bit) - 1;
- const STRLEN cmp_len = MIN(conts_len, len - 1);
-
- /* Now compare the continuation bytes in s with the ones we have
- * compiled in that are for the largest 30 bit code point. If we have
- * enough bytes available to determine the answer, or the bytes we do
- * have differ from them, we can compare the two to get a definitive
- * answer (Note that in UTF-EBCDIC, the two lowest possible
- * continuation bytes are \x41 and \x42.) */
- if (cmp_len >= conts_len || memNE(s + 1,
- conts_for_highest_30_bit,
- cmp_len))
- {
- return cBOOL(memGT(s + 1, conts_for_highest_30_bit, cmp_len));
- }
-
- /* Here, all the bytes we have are the same as the highest 30-bit code
- * point, but we are missing so many bytes that we can't make the
- * determination */
- return -1;
- }
-}
-
-#endif
-