make dNOOP usable outside function in C++

[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c

index d64ce90..3123bd0 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -37,9 +37,10 @@ static const char malformed_text[] = "Malformed UTF-8 character";
  static const char unees[] =
                          "Malformed UTF-8 character (unexpected end of string)";
  static const char cp_above_legal_max[] =
- "Use of code point 0x%" UVXf " is deprecated; the permissible max is 0x%" UVXf ". This will be fatal in Perl 5.28";
+                        "Use of code point 0x%" UVXf " is not allowed; the"
+                        " permissible max is 0x%" UVXf;
  
-#define MAX_NON_DEPRECATED_CP ((UV) (IV_MAX))
+#define MAX_EXTERNALLY_LEGAL_CP ((UV) (IV_MAX))
  
  /*
  =head1 Unicode Support
@@ -207,11 +208,8 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
       * performance hit on these high EBCDIC code points. */
  
      if (UNLIKELY(UNICODE_IS_SUPER(uv))) {
-        if (   UNLIKELY(uv > MAX_NON_DEPRECATED_CP)
-            && ckWARN_d(WARN_DEPRECATED))
-        {
-            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                        cp_above_legal_max, uv, MAX_NON_DEPRECATED_CP);
+        if (UNLIKELY(uv > MAX_EXTERNALLY_LEGAL_CP)) {
+            Perl_croak(aTHX_ cp_above_legal_max, uv, MAX_EXTERNALLY_LEGAL_CP);
          }
          if (      (flags & UNICODE_WARN_SUPER)
              || (  (flags & UNICODE_WARN_PERL_EXTENDED)
@@ -295,9 +293,8 @@ is the recommended wide native character-aware way of saying
  
      *(d++) = uv;
  
-This function accepts any UV as input, but very high code points (above
-C<IV_MAX> on the platform)  will raise a deprecation warning.  This is
-typically 0x7FFF_FFFF in a 32-bit word.
+This function accepts any code point from 0..C<IV_MAX> as input.
+C<IV_MAX> is typically 0x7FFF_FFFF in a 32-bit word.
  
  It is possible to forbid or warn on non-Unicode code points, or those that may
  be problematic by using L</uvchr_to_utf8_flags>.
@@ -332,9 +329,8 @@ This is the Unicode-aware way of saying
  
      *(d++) = uv;
  
-If C<flags> is 0, this function accepts any UV as input, but very high code
-points (above C<IV_MAX> for the platform)  will raise a deprecation warning.
-This is typically 0x7FFF_FFFF in a 32-bit word.
+If C<flags> is 0, this function accepts any code point from 0..C<IV_MAX> as
+input.  C<IV_MAX> is typically 0x7FFF_FFFF in a 32-bit word.
  
  Specifying C<flags> can further restrict what is allowed and not warned on, as
  follows:
@@ -656,21 +652,13 @@ S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
      return -1;
  }
  
-/* Anything larger than this will overflow the word if it were converted into a UV */
-#if defined(UV_IS_QUAD)
+#if defined(UV_IS_QUAD) /* These assume IV_MAX is 2**63-1 */
  #  ifdef EBCDIC     /* Actually is I8 */
  #   define HIGHEST_REPRESENTABLE_UTF8                                       \
-                "\xFF\xAF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+                "\xFF\xA7\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
  #  else
  #   define HIGHEST_REPRESENTABLE_UTF8                                       \
-                "\xFF\x80\x8F\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
-#  endif
-#else   /* 32-bit */
-#  ifdef EBCDIC
-#   define HIGHEST_REPRESENTABLE_UTF8                                       \
-                "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF"
-#  else
-#   define HIGHEST_REPRESENTABLE_UTF8  "\xFE\x83\xBF\xBF\xBF\xBF\xBF"
+                "\xFF\x80\x87\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
  #  endif
  #endif
  
@@ -680,13 +668,13 @@ S_does_utf8_overflow(const U8 * const s,
                       const bool consider_overlongs)
  {
      /* Returns an int indicating whether or not the UTF-8 sequence from 's' to
-     * 'e' - 1 would overflow a UV on this platform; that is if it represents a
-     * code point larger than the highest representable code point.  It returns
-     * 1 if it does overflow; 0 if it doesn't, and -1 if there isn't enough
-     * information to tell.  This last return value can happen if the sequence
-     * is incomplete, missing some trailing bytes that would form a complete
-     * character.  If there are enough bytes to make a definitive decision,
-     * this function does so.
+     * 'e' - 1 would overflow an IV on this platform; that is if it represents
+     * a code point larger than the highest representable code point.  It
+     * returns 1 if it does overflow; 0 if it doesn't, and -1 if there isn't
+     * enough information to tell.  This last return value can happen if the
+     * sequence is incomplete, missing some trailing bytes that would form a
+     * complete character.  If there are enough bytes to make a definitive
+     * decision, this function does so.
       *
       * If 'consider_overlongs' is TRUE, the function checks for the possibility
       * that the sequence is an overlong that doesn't overflow.  Otherwise, it
@@ -699,13 +687,77 @@ S_does_utf8_overflow(const U8 * const s,
       *
       * 'e' - 1 must not be beyond a full character. */
  
-    const STRLEN len = e - s;
-    const U8 *x;
-    const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF8;
  
      PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
      assert(s <= e && s + UTF8SKIP(s) >= e);
  
+#if ! defined(UV_IS_QUAD)
+
+    return is_utf8_cp_above_31_bits(s, e, consider_overlongs);
+
+#else
+
+    PERL_UNUSED_ARG(consider_overlongs);
+
+    {
+        const STRLEN len = e - s;
+        const U8 *x;
+        const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF8;
+
+        for (x = s; x < e; x++, y++) {
+
+            if (UNLIKELY(NATIVE_UTF8_TO_I8(*x) == *y)) {
+                continue;
+            }
+
+            /* If this byte is larger than the corresponding highest UTF-8
+             * byte, the sequence overflow; otherwise the byte is less than,
+             * and so the sequence doesn't overflow */
+            return NATIVE_UTF8_TO_I8(*x) > *y;
+
+        }
+
+        /* Got to the end and all bytes are the same.  If the input is a whole
+         * character, it doesn't overflow.  And if it is a partial character,
+         * there's not enough information to tell */
+        if (len < sizeof(HIGHEST_REPRESENTABLE_UTF8) - 1) {
+            return -1;
+        }
+
+        return 0;
+    }
+
+#endif
+
+}
+
+#if 0
+
+/* This is the portions of the above function that deal with UV_MAX instead of
+ * IV_MAX.  They are left here in case we want to combine them so that internal
+ * uses can have larger code points.  The only logic difference is that the
+ * 32-bit EBCDIC platform is treate like the 64-bit, and the 32-bit ASCII has
+ * different logic.
+ */
+
+/* Anything larger than this will overflow the word if it were converted into a UV */
+#if defined(UV_IS_QUAD)
+#  ifdef EBCDIC     /* Actually is I8 */
+#   define HIGHEST_REPRESENTABLE_UTF8                                       \
+                "\xFF\xAF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#  else
+#   define HIGHEST_REPRESENTABLE_UTF8                                       \
+                "\xFF\x80\x8F\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#  endif
+#else   /* 32-bit */
+#  ifdef EBCDIC
+#   define HIGHEST_REPRESENTABLE_UTF8                                       \
+                "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF"
+#  else
+#   define HIGHEST_REPRESENTABLE_UTF8  "\xFE\x83\xBF\xBF\xBF\xBF\xBF"
+#  endif
+#endif
+
  #if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
  
      /* On 32 bit ASCII machines, many overlongs that start with FF don't
@@ -725,34 +777,14 @@ S_does_utf8_overflow(const U8 * const s,
          return s[sizeof(FF_OVERLONG_PREFIX) - 1] > 0x83;
      }
  
-#else
-
-    PERL_UNUSED_ARG(consider_overlongs);
-
+/* Starting with the #else, the rest of the function is identical except
+ *      1.  we need to move the 'len' declaration to be global to the function
+ *      2.  the endif move to just after the UNUSED_ARG.
+ * An empty endif is given just below to satisfy the preprocessor
+ */
  #endif
  
-    for (x = s; x < e; x++, y++) {
-
-        if (UNLIKELY(NATIVE_UTF8_TO_I8(*x) == *y)) {
-            continue;
-        }
-
-        /* If this byte is larger than the corresponding highest UTF-8 byte,
-         * the sequence overflow; otherwise the byte is less than, and so the
-         * sequence doesn't overflow */
-        return NATIVE_UTF8_TO_I8(*x) > *y;
-
-    }
-
-    /* Got to the end and all bytes are the same.  If the input is a whole
-     * character, it doesn't overflow.  And if it is a partial character,
-     * there's not enough information to tell */
-    if (len < sizeof(HIGHEST_REPRESENTABLE_UTF8) - 1) {
-        return -1;
-    }
-
-    return 0;
-}
+#endif
  
  #undef F0_ABOVE_OVERLONG
  #undef F8_ABOVE_OVERLONG
@@ -920,10 +952,10 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
  }
  
  char *
-Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format)
+Perl__byte_dump_string(pTHX_ const U8 * const start, const STRLEN len, const bool format)
  {
      /* Returns a mortalized C string that is a displayable copy of the 'len'
-     * bytes starting at 's'.  'format' gives how to display each byte.
+     * bytes starting at 'start'.  'format' gives how to display each byte.
       * Currently, there are only two formats, so it is currently a bool:
       *      0   \xab
       *      1    ab         (that is a space between two hex digit bytes)
@@ -931,7 +963,8 @@ Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format)
  
      const STRLEN output_len = 4 * len + 1;  /* 4 bytes per each input, plus a
                                                 trailing NUL */
-    const U8 * const e = s + len;
+    const U8 * s = start;
+    const U8 * const e = start + len;
      char * output;
      char * d;
  
@@ -941,12 +974,14 @@ Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format)
      SAVEFREEPV(output);
  
      d = output;
-    for (; s < e; s++) {
+    for (s = start; s < e; s++) {
          const unsigned high_nibble = (*s & 0xF0) >> 4;
          const unsigned low_nibble =  (*s & 0x0F);
  
          if (format) {
-            *d++ = ' ';
+            if (s > start) {
+                *d++ = ' ';
+            }
          }
          else {
              *d++ = '\\';
@@ -1103,10 +1138,6 @@ EBCDIC platforms, and sometimes when the L<overlong
  malformation|/C<UTF8_GOT_LONG>> is also present.  The new names accurately
  describe the situation in all cases.
  
-It is now deprecated to have very high code points (above C<IV_MAX> on the
-platforms) and this function will raise a deprecation warning for these (unless
-such warnings are turned off).  This value is typically 0x7FFF_FFFF (2**31 -1)
-in a 32-bit word.
  
  All other code points corresponding to Unicode characters, including private
  use and those yet to be assigned, are never considered malformed and never
@@ -1213,7 +1244,7 @@ in a position where only a continuation type one should be.
  =item C<UTF8_GOT_OVERFLOW>
  
  The input sequence was malformed in that it is for a code point that is not
-representable in the number of bits available in a UV on the current platform.
+representable in the number of bits available in an IV on the current platform.
  
  =item C<UTF8_GOT_SHORT>
  
@@ -1474,14 +1505,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                       |UTF8_WARN_NONCHAR
                        |UTF8_WARN_SURROGATE
                        |UTF8_WARN_SUPER
-                      |UTF8_WARN_PERL_EXTENDED))
-                   /* In case of a malformation, 'uv' is not valid, and has
-                    * been changed to something in the Unicode range.
-                    * Currently we don't output a deprecation message if there
-                    * is already a malformation, so we don't have to special
-                    * case the test immediately below */
-            || (   UNLIKELY(uv > MAX_NON_DEPRECATED_CP)
-                && ckWARN_d(WARN_DEPRECATED))))
+                      |UTF8_WARN_PERL_EXTENDED))))
      {
          /* If there were no malformations, or the only malformation is an
           * overlong, 'uv' is valid */
@@ -1587,11 +1611,9 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                      disallowed = TRUE;
                  }
  
-                /* Likewise, warn if any say to, plus if deprecation warnings
-                 * are on, because this code point is above IV_MAX */
-                if (      ckWARN_d(WARN_DEPRECATED)
-                    || ! (flags & UTF8_ALLOW_OVERFLOW)
-                    ||   (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
+                /* Likewise, warn if any say to */
+                if (  ! (flags & UTF8_ALLOW_OVERFLOW)
+                    ||  (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
                  {
  
                      /* The warnings code explicitly says it doesn't handle the
@@ -1793,21 +1815,6 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                      *errors |= UTF8_GOT_SUPER;
                      disallowed = TRUE;
                  }
-
-                /* The deprecated warning overrides any non-deprecated one.  If
-                 * there are other problems, a deprecation message is not
-                 * really helpful, so don't bother to raise it in that case.
-                 * This also keeps the code from having to handle the case
-                 * where 'uv' is not valid. */
-                if (   ! (orig_problems
-                                    & (UTF8_GOT_TOO_SHORT|UTF8_GOT_OVERFLOW))
-                    && UNLIKELY(uv > MAX_NON_DEPRECATED_CP)
-                    && ckWARN_d(WARN_DEPRECATED))
-                {
-                    message = Perl_form(aTHX_ cp_above_legal_max,
-                                              uv, MAX_NON_DEPRECATED_CP);
-                    pack_warn = packWARN(WARN_DEPRECATED);
-                }
              }
              else if (possible_problems & UTF8_GOT_NONCHAR) {
                  possible_problems &= ~UTF8_GOT_NONCHAR;
@@ -1873,9 +1880,15 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                              U8 tmpbuf[UTF8_MAXBYTES+1];
                              const U8 * const e = uvoffuni_to_utf8_flags(tmpbuf,
                                                                          uv, 0);
-                            const char * preface = (uv <= PERL_UNICODE_MAX)
-                                                   ? "U+"
-                                                   : "0x";
+                            /* Don't use U+ for non-Unicode code points, which
+                             * includes those in the Latin1 range */
+                            const char * preface = (    uv > PERL_UNICODE_MAX
+#ifdef EBCDIC
+                                                     || uv <= 0xFF
+#endif
+                                                    )
+                                                   ? "0x"
+                                                   : "U+";
                              message = Perl_form(aTHX_
                                  "%s: %s (overlong; instead use %s to represent"
                                  " %s%0*" UVXf ")",
@@ -1885,7 +1898,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                                  preface,
                                  ((uv < 256) ? 2 : 4), /* Field width of 2 for
                                                           small code points */
-                                uv);
+                                UNI_TO_NATIVE(uv));
                          }
                      }
                  }
@@ -1936,9 +1949,6 @@ the next possible position in C<s> that could begin a non-malformed character.
  See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is
  returned.
  
-Code points above the platform's C<IV_MAX> will raise a deprecation warning,
-unless those are turned off.
-
  =cut
  
  Also implemented as a macro in utf8.h
@@ -1978,9 +1988,6 @@ is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
  next possible position in C<s> that could begin a non-malformed character.
  See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
  
-Code points above the platform's C<IV_MAX> will raise a deprecation warning,
-unless those are turned off.
-
  =cut
  */
  
@@ -1991,16 +1998,18 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
  
      assert(send > s);
  
-    /* Call the low level routine, asking for checks */
      return NATIVE_TO_UNI(utf8_to_uvchr_buf(s, send, retlen));
  }
  
  /*
  =for apidoc utf8_length
  
-Return the length of the UTF-8 char encoded string C<s> in characters.
-Stops at C<e> (inclusive).  If C<e E<lt> s> or if the scan would end
-up past C<e>, croaks.
+Returns the number of characters in the sequence of UTF-8-encoded bytes starting
+at C<s> and ending at the byte just before C<e>.  If <s> and <e> point to the
+same place, it returns 0 with no warning raised.
+
+If C<e E<lt> s> or if the scan would end up past C<e>, it raises a UTF8 warning
+and returns the number of valid characters.
  
  =cut
  */
@@ -2300,8 +2309,8 @@ Perl_bytes_from_utf8_loc(const U8 *s, STRLEN *lenp, bool *is_utf8p, const U8** f
      }
  
    finish_and_return:
-        *d = '\0';
-        *lenp = d - converted_start;
+    *d = '\0';
+    *lenp = d - converted_start;
  
      /* Trim unused space */
      Renew(converted_start, *lenp + 1, U8);
@@ -2347,16 +2356,30 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp)
          append_utf8_from_native_byte(*s, &d);
          s++;
      }
+
      *d = '\0';
      *lenp = d-dst;
+
+    /* Trim unused space */
+    Renew(dst, *lenp + 1, U8);
+
      return dst;
  }
  
  /*
- * Convert native (big-endian) or reversed (little-endian) UTF-16 to UTF-8.
+ * Convert native (big-endian) UTF-16 to UTF-8.  For reversed (little-endian),
+ * use utf16_to_utf8_reversed().
   *
- * Destination must be pre-extended to 3/2 source.  Do not use in-place.
- * We optimize for native, for obvious reasons. */
+ * UTF-16 requires 2 bytes for every code point below 0x10000; otherwise 4 bytes.
+ * UTF-8 requires 1-3 bytes for every code point below 0x1000; otherwise 4 bytes.
+ * UTF-EBCDIC requires 1-4 bytes for every code point below 0x1000; otherwise 4-5 bytes.
+ *
+ * These functions don't check for overflow.  The worst case is every code
+ * point in the input is 2 bytes, and requires 4 bytes on output.  (If the code
+ * is never going to run in EBCDIC, it is 2 bytes requiring 3 on output.)  Therefore the
+ * destination must be pre-extended to 2 times the source length.
+ *
+ * Do not use in-place.  We optimize for native, for obvious reasons. */
  
  U8*
  Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
@@ -2384,10 +2407,12 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
             *d++ = UTF8_TWO_BYTE_LO(UNI_TO_NATIVE(uv));
             continue;
         }
+
  #define FIRST_HIGH_SURROGATE UNICODE_SURROGATE_FIRST
  #define LAST_HIGH_SURROGATE  0xDBFF
  #define FIRST_LOW_SURROGATE  0xDC00
  #define LAST_LOW_SURROGATE   UNICODE_SURROGATE_LAST
+#define FIRST_IN_PLANE1      0x10000
  
          /* This assumes that most uses will be in the first Unicode plane, not
           * needing surrogates */
@@ -2406,13 +2431,13 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
                  }
                 p += 2;
                 uv = ((uv - FIRST_HIGH_SURROGATE) << 10)
-                                       + (low - FIRST_LOW_SURROGATE) + 0x10000;
+                                + (low - FIRST_LOW_SURROGATE) + FIRST_IN_PLANE1;
             }
         }
  #ifdef EBCDIC
          d = uvoffuni_to_utf8_flags(d, uv, 0);
  #else
-       if (uv < 0x10000) {
+       if (uv < FIRST_IN_PLANE1) {
             *d++ = (U8)(( uv >> 12)         | 0xe0);
             *d++ = (U8)(((uv >>  6) & 0x3f) | 0x80);
             *d++ = (U8)(( uv        & 0x3f) | 0x80);
@@ -3118,11 +3143,9 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp,
                  }
  
                  if (UNLIKELY(UNICODE_IS_SUPER(uv1))) {
-                    if (   UNLIKELY(uv1 > MAX_NON_DEPRECATED_CP)
-                        && ckWARN_d(WARN_DEPRECATED))
-                    {
-                        Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                                cp_above_legal_max, uv1, MAX_NON_DEPRECATED_CP);
+                    if (UNLIKELY(uv1 > MAX_EXTERNALLY_LEGAL_CP)) {
+                        Perl_croak(aTHX_ cp_above_legal_max, uv1,
+                                         MAX_EXTERNALLY_LEGAL_CP);
                      }
                      if (ckWARN_d(WARN_NON_UNICODE)) {
                          const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
@@ -3586,17 +3609,12 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
         if (flags & FOLD_FLAGS_LOCALE) {
  
  #           define LONG_S_T      LATIN_SMALL_LIGATURE_LONG_S_T_UTF8
-            const unsigned int long_s_t_len    = sizeof(LONG_S_T) - 1;
-
  #         ifdef LATIN_CAPITAL_LETTER_SHARP_S_UTF8
  #           define CAP_SHARP_S   LATIN_CAPITAL_LETTER_SHARP_S_UTF8
  
-            const unsigned int cap_sharp_s_len = sizeof(CAP_SHARP_S) - 1;
-
              /* Special case these two characters, as what normally gets
               * returned under locale doesn't work */
-            if (UTF8SKIP(p) == cap_sharp_s_len
-                && memEQ((char *) p, CAP_SHARP_S, cap_sharp_s_len))
+            if (memEQs((char *) p, UTF8SKIP(p), CAP_SHARP_S))
              {
                  /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
                  Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
@@ -3606,8 +3624,7 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
              }
              else
  #endif
-                 if (UTF8SKIP(p) == long_s_t_len
-                     && memEQ((char *) p, LONG_S_T, long_s_t_len))
+                 if (memEQs((char *) p, UTF8SKIP(p), LONG_S_T))
              {
                  /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
                  Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
@@ -3626,9 +3643,7 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
               * 255/256 boundary which is forbidden under /l, and so the code
               * wouldn't catch that they are equivalent (which they are only in
               * this release) */
-            else if (UTF8SKIP(p) == sizeof(DOTTED_I) - 1
-                     && memEQ((char *) p, DOTTED_I, sizeof(DOTTED_I) - 1))
-            {
+            else if (memEQs((char *) p, UTF8SKIP(p), DOTTED_I)) {
                  /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
                  Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
                                "Can't do fc(\"\\x{0130}\") on non-UTF-8 locale; "
@@ -5075,7 +5090,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
           * size based on worst possible case, which is each line in the input
           * creates 2 elements in the inversion list: 1) the beginning of a
           * range in the list; 2) the beginning of a range not in the list.  */
-        while ((loc = (strchr(loc, '\n'))) != NULL) {
+        while ((loc = (char *) memchr(loc, '\n', lend - (U8 *) loc)) != NULL) {
              elements += 2;
              loc++;
          }
@@ -5220,10 +5235,8 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
      /* May change: warns if surrogates, non-character code points, or
       * non-Unicode code points are in 's' which has length 'len' bytes.
       * Returns TRUE if none found; FALSE otherwise.  The only other validity
-     * check is to make sure that this won't exceed the string's length.
-     *
-     * Code points above the platform's C<IV_MAX> will raise a deprecation
-     * warning, unless those are turned off.  */
+     * check is to make sure that this won't exceed the string's length nor
+     * overflow */
  
      const U8* const e = s + len;
      bool ok = TRUE;