Move finding perl versions from mktodo to devtools.pl

[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h

index 8ab27f5..e6c7864 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -69,7 +69,7 @@ the string is invariant.
  #define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs)                              \
                  uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs)
  #define utf8_to_uvchr_buf(s, e, lenp)                                          \
-                   _utf8_to_uvchr_buf((const U8 *) (s), (const U8 *) e, lenp)
+            utf8_to_uvchr_buf_helper((const U8 *) (s), (const U8 *) e, lenp)
  #define utf8n_to_uvchr(s, len, lenp, flags)                                    \
                                  utf8n_to_uvchr_error(s, len, lenp, flags, 0)
  #define utf8n_to_uvchr_error(s, len, lenp, flags, errors)                      \
@@ -108,11 +108,19 @@ the string is invariant.
  #else  /* ! EBCDIC */
  START_EXTERN_C
  
-/* How wide can a single UTF-8 encoded character become in bytes. */
-/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
- * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
- * expressed with 4 bytes.  However, Perl thinks of UTF-8 as a way to encode
- * non-negative integers in a binary format, even those above Unicode */
+/*
+
+=for apidoc AmnU|STRLEN|UTF8_MAXBYTES
+
+The maximum width of a single UTF-8 encoded character, in bytes.
+
+NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
+is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
+expressed with 4 bytes.  However, Perl thinks of UTF-8 as a way to encode
+non-negative integers in a binary format, even those above Unicode.
+
+=cut
+ */
  #define UTF8_MAXBYTES 13
  
  #ifdef DOINIT
@@ -275,6 +283,7 @@ platforms.  FF signals to use 13 bytes for the encoded character.  This breaks
  the paradigm that the number of leading bits gives how many total bytes there
  are in the character.
  
+=cut
  */
  
  /* Is the representation of the Unicode code point 'cp' the same regardless of
@@ -842,14 +851,14 @@ fit in an IV on the current machine.
                      && (    NATIVE_UTF8_TO_I8(*(s)) >  0xF9                 \
                          || (NATIVE_UTF8_TO_I8(*((s) + 1)) >= 0xA2))         \
                      &&  LIKELY((s) + UTF8SKIP(s) <= (e)))                   \
-                    ? _is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
+                    ?  is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
  #else
  #   define UTF8_IS_SUPER(s, e)                                              \
                     ((    LIKELY((e) > (s) + 3)                              \
                       &&  (*(U8*) (s)) >= 0xF4                               \
                       && ((*(U8*) (s)) >  0xF4 || (*((U8*) (s) + 1) >= 0x90))\
                       &&  LIKELY((s) + UTF8SKIP(s) <= (e)))                  \
-                    ? _is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
+                    ?  is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
  #endif
  
  /* These are now machine generated, and the 'given' clause is no longer
@@ -866,6 +875,10 @@ of the Unicode non-character code points; otherwise it evaluates to 0.  If
  non-zero, the value gives how many bytes starting at C<s> comprise the code
  point's representation.
  
+=for apidoc AmnU|UV|UNICODE_REPLACEMENT
+
+Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER
+
  =cut
   */
  #define UTF8_IS_NONCHAR(s, e)                                                  \
@@ -1016,7 +1029,7 @@ L</is_utf8_string_loclen_flags> to check entire strings.
        ? 1                                                                   \
        : UNLIKELY(((e) - (s)) < UTF8SKIP(s))                                 \
          ? 0                                                                 \
-        : _is_utf8_char_helper(s, e, flags))
+        : is_utf8_char_helper(s, e, flags))
  
  /* Do not use; should be deprecated.  Use isUTF8_CHAR() instead; this is
   * retained solely for backwards compatibility */