This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Deprecate isFOO_utf8() macros
authorKarl Williamson <khw@cpan.org>
Fri, 16 Dec 2016 02:51:26 +0000 (19:51 -0700)
committerKarl Williamson <khw@cpan.org>
Fri, 23 Dec 2016 23:48:36 +0000 (16:48 -0700)
These macros are being replaced by a safe version; they now generate a
deprecation message at each call site upon the first use there in each
program run.

embed.fnc
embed.h
embedvar.h
ext/XS-APItest/t/handy.t
handy.h
intrpvar.h
pod/perldelta.pod
proto.h
sv.c
utf8.c
utf8.h

index a63f463..561ad9f 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -812,7 +812,11 @@ AmndP      |bool   |is_utf8_valid_partial_char                                 \
 AnidR  |bool   |is_utf8_valid_partial_char_flags                           \
                |NN const U8 * const s|NN const U8 * const e|const U32 flags
 AMpR   |bool   |_is_uni_FOO|const U8 classnum|const UV c
-AMpR   |bool   |_is_utf8_FOO|const U8 classnum|NN const U8 *p
+AMpR   |bool   |_is_utf8_FOO|U8 classnum|NN const U8 * const p             \
+               |NN const char * const name                                 \
+               |NN const char * const alternative                          \
+               |const bool use_utf8|const bool use_locale                  \
+               |NN const char * const file|const unsigned line
 AMpR   |bool   |_is_utf8_FOO_with_len|const U8 classnum|NN const U8 *p     \
                |NN const U8 * const e
 ADMpR  |bool   |is_utf8_alnum  |NN const U8 *p
@@ -823,8 +827,6 @@ AMpR        |bool   |_is_utf8_idcont|NN const U8 *p
 AMpR   |bool   |_is_utf8_idstart|NN const U8 *p
 AMpR   |bool   |_is_utf8_xidcont|NN const U8 *p
 AMpR   |bool   |_is_utf8_xidstart|NN const U8 *p
-AMpR   |bool   |_is_utf8_perl_idcont|NN const U8 *p
-AMpR   |bool   |_is_utf8_perl_idstart|NN const U8 *p
 AMpR   |bool   |_is_utf8_perl_idcont_with_len|NN const U8 *p               \
                |NN const U8 * const e
 AMpR   |bool   |_is_utf8_perl_idstart_with_len|NN const U8 *p              \
@@ -1721,6 +1723,12 @@ sMR      |char * |unexpected_non_continuation_text                       \
                |const STRLEN non_cont_byte_pos                         \
                |const STRLEN expect_len
 sM     |char * |_byte_dump_string|NN const U8 * s|const STRLEN len
+s      |void   |warn_on_first_deprecated_use                               \
+                               |NN const char * const name                 \
+                               |NN const char * const alternative          \
+                               |const bool use_locale                      \
+                               |NN const char * const file                 \
+                               |const unsigned line
 s      |UV     |_to_utf8_case  |const UV uv1                                   \
                                |NN const U8 *p                                 \
                                |NN U8* ustrp                                   \
@@ -2443,8 +2451,10 @@ Es       |U8     |regtail_study  |NN RExC_state_t *pRExC_state \
 #  endif
 #endif
 
-#if defined(PERL_IN_REGEXEC_C)
+#if defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
 EXRpM  |bool   |isFOO_lc       |const U8 classnum|const U8 character
+#endif
+#if defined(PERL_IN_REGEXEC_C)
 ERs    |bool   |isFOO_utf8_lc  |const U8 classnum|NN const U8* character
 ERs    |SSize_t|regmatch       |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
 WERs   |I32    |regrepeat      |NN regexp *prog|NN char **startposp \
@@ -2732,7 +2742,10 @@ sRM      |UV     |check_locale_boundary_crossing                             \
                |const UV result                                            \
                |NN U8* const ustrp                                         \
                |NN STRLEN *lenp
-iR     |bool   |is_utf8_common |NN const U8 *const p|NN SV **swash|NN const char * const swashname|NULLOK SV* const invlist
+iR     |bool   |is_utf8_common |NN const U8 *const p                       \
+                               |NN SV **swash                              \
+                               |NN const char * const swashname            \
+                               |NULLOK SV* const invlist
 iR     |bool   |is_utf8_common_with_len|NN const U8 *const p               \
                                           |NN const U8 *const e            \
                                    |NN SV **swash                          \
diff --git a/embed.h b/embed.h
index 2254895..4687806 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define _is_uni_FOO(a,b)       Perl__is_uni_FOO(aTHX_ a,b)
 #define _is_uni_perl_idcont(a) Perl__is_uni_perl_idcont(aTHX_ a)
 #define _is_uni_perl_idstart(a)        Perl__is_uni_perl_idstart(aTHX_ a)
-#define _is_utf8_FOO(a,b)      Perl__is_utf8_FOO(aTHX_ a,b)
+#define _is_utf8_FOO(a,b,c,d,e,f,g,h)  Perl__is_utf8_FOO(aTHX_ a,b,c,d,e,f,g,h)
 #define _is_utf8_FOO_with_len(a,b,c)   Perl__is_utf8_FOO_with_len(aTHX_ a,b,c)
 #define _is_utf8_idcont(a)     Perl__is_utf8_idcont(aTHX_ a)
 #define _is_utf8_idstart(a)    Perl__is_utf8_idstart(aTHX_ a)
 #define _is_utf8_mark(a)       Perl__is_utf8_mark(aTHX_ a)
-#define _is_utf8_perl_idcont(a)        Perl__is_utf8_perl_idcont(aTHX_ a)
 #define _is_utf8_perl_idcont_with_len(a,b)     Perl__is_utf8_perl_idcont_with_len(aTHX_ a,b)
-#define _is_utf8_perl_idstart(a)       Perl__is_utf8_perl_idstart(aTHX_ a)
 #define _is_utf8_perl_idstart_with_len(a,b)    Perl__is_utf8_perl_idstart_with_len(aTHX_ a,b)
 #define _is_utf8_xidcont(a)    Perl__is_utf8_xidcont(aTHX_ a)
 #define _is_utf8_xidstart(a)   Perl__is_utf8_xidstart(aTHX_ a)
 #define backup_one_SB(a,b,c)   S_backup_one_SB(aTHX_ a,b,c)
 #define backup_one_WB(a,b,c,d) S_backup_one_WB(aTHX_ a,b,c,d)
 #define find_byclass(a,b,c,d,e)        S_find_byclass(aTHX_ a,b,c,d,e)
-#define isFOO_lc(a,b)          Perl_isFOO_lc(aTHX_ a,b)
 #define isFOO_utf8_lc(a,b)     S_isFOO_utf8_lc(aTHX_ a,b)
 #define isGCB(a,b,c,d,e)       S_isGCB(aTHX_ a,b,c,d,e)
 #define isLB(a,b,c,d,e,f)      S_isLB(aTHX_ a,b,c,d,e,f)
 #define to_byte_substr(a)      S_to_byte_substr(aTHX_ a)
 #define to_utf8_substr(a)      S_to_utf8_substr(aTHX_ a)
 #  endif
+#  if defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+#define isFOO_lc(a,b)          Perl_isFOO_lc(aTHX_ a,b)
+#  endif
 #  if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
 #define _to_fold_latin1(a,b,c,d)       Perl__to_fold_latin1(aTHX_ a,b,c,d)
 #  endif
 #define swatch_get(a,b,c)      S_swatch_get(aTHX_ a,b,c)
 #define to_lower_latin1                S_to_lower_latin1
 #define unexpected_non_continuation_text(a,b,c,d)      S_unexpected_non_continuation_text(aTHX_ a,b,c,d)
+#define warn_on_first_deprecated_use(a,b,c,d,e)        S_warn_on_first_deprecated_use(aTHX_ a,b,c,d,e)
 #  endif
 #  if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
 #define _to_upper_title_latin1(a,b,c,d)        Perl__to_upper_title_latin1(aTHX_ a,b,c,d)
index c413932..f1fa5ba 100644 (file)
 #define PL_scopestack_max      (vTHX->Iscopestack_max)
 #define PL_scopestack_name     (vTHX->Iscopestack_name)
 #define PL_secondgv            (vTHX->Isecondgv)
+#define PL_seen_deprecated_macro       (vTHX->Iseen_deprecated_macro)
 #define PL_sharehook           (vTHX->Isharehook)
 #define PL_sig_pending         (vTHX->Isig_pending)
 #define PL_sighandlerp         (vTHX->Isighandlerp)
index 036b9c1..81e4c7c 100644 (file)
@@ -153,13 +153,14 @@ my %properties = (
                    xdigit => 'XDigit',
                 );
 
+my %seen;
 my @warnings;
 local $SIG{__WARN__} = sub { push @warnings, @_ };
 
 my %utf8_param_code = (
                         "_safe"                 =>  0,
                         "_safe, malformed"      =>  1,
-                        "unsafe"                => -1,
+                        "deprecated unsafe"     => -1,
                       );
 
 foreach my $name (sort keys %properties, 'octal') {
@@ -314,13 +315,14 @@ foreach my $name (sort keys %properties, 'octal') {
 
                     foreach my $utf8_param("_safe",
                                            "_safe, malformed",
-                                           "unsafe"
+                                           "deprecated unsafe"
                                           )
                     {
                         my $utf8_param_code = $utf8_param_code{$utf8_param};
                         my $expect_error = $utf8_param_code > 0;
                         next if      $expect_error
-                                && ! try_malforming($i, $function, $suffix =~ /LC/);
+                                && ! try_malforming($i, $function,
+                                                    $suffix =~ /LC/);
 
                         my $display_call = "is${function}$suffix( $display_name"
                                          . ", $utf8_param )$display_locale";
@@ -346,6 +348,29 @@ foreach my $name (sort keys %properties, 'octal') {
                         elsif (is ($@, "", "$display_call didn't give error")) {
                             is ($ret, $truth,
                                 "${tab}And correctly returned $truth");
+                            if ($utf8_param_code < 0) {
+                                my $warnings_ok;
+                                my $unique_function = "is" . $function . $suffix;
+                                if (! $seen{$unique_function}++) {
+                                    $warnings_ok = is(@warnings, 1,
+                                        "${tab}This is first call to"
+                                      . " $unique_function; Got a single"
+                                      . " warning");
+                                    if ($warnings_ok) {
+                                        $warnings_ok = like($warnings[0],
+                qr/starting in Perl .* will require an additional parameter/,
+                                            "${tab}The warning was the expected"
+                                          . " deprecation one");
+                                    }
+                                }
+                                else {
+                                    $warnings_ok = is(@warnings, 0,
+                                        "${tab}This subsequent call to"
+                                      . " $unique_function did not warn");
+                                }
+                                $warnings_ok or diag("@warnings");
+                                undef @warnings;
+                            }
                         }
                     }
                 }
diff --git a/handy.h b/handy.h
index 625343b..98ae51d 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -582,7 +582,14 @@ future releases.
 Variant C<isFOO_utf8> is like C<isFOO_utf8_safe>, but takes just a single
 parameter, C<p>, which has the same meaning as the corresponding parameter does
 in C<isFOO_utf8_safe>.  The function therefore can't check if it is reading
-beyond the end of the string.
+beyond the end of the string.  Starting in Perl v5.30, it will take a second
+parameter, becoming a synonym for C<isFOO_utf8_safe>.  At that time every
+program that uses it will have to be changed to successfully compile.  In the
+meantime, the first runtime call to C<isFOO_utf8> from each call point in the
+program will raise a deprecation warning, enabled by default.  You can convert
+your program now to use C<isFOO_utf8_safe>, and avoid the warnings, and get an
+extra measure of protection, or you can wait until v5.30, when you'll be forced
+to add the C<e> parameter.
 
 Variant C<isFOO_LC> is like the C<isFOO_A> and C<isFOO_L1> variants, but the
 result is based on the current locale, which is what C<LC> in the name stands
@@ -615,7 +622,14 @@ future releases.
 Variant C<isFOO_LC_utf8> is like C<isFOO_LC_utf8_safe>, but takes just a single
 parameter, C<p>, which has the same meaning as the corresponding parameter does
 in C<isFOO_LC_utf8_safe>.  The function therefore can't check if it is reading
-beyond the end of the string.
+beyond the end of the string.  Starting in Perl v5.30, it will take a second
+parameter, becoming a synonym for C<isFOO_LC_utf8_safe>.  At that time every
+program that uses it will have to be changed to successfully compile.  In the
+meantime, the first runtime call to C<isFOO_LC_utf8> from each call point in
+the program will raise a deprecation warning, enabled by default.  You can
+convert your program now to use C<isFOO_LC_utf8_safe>, and avoid the warnings,
+and get an extra measure of protection, or you can wait until v5.30, when
+you'll be forced to add the C<e> parameter.
 
 =for apidoc Am|bool|isALPHA|char ch
 Returns a boolean indicating whether the specified character is an
@@ -1043,6 +1057,9 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  * above ASCII in the latter case) */
 
 #  define _CC_SPACE             10      /* \s, [:space:] */
+#  define _CC_PSXSPC            _CC_SPACE   /* XXX Temporary, can be removed
+                                               when the deprecated isFOO_utf8()
+                                               functions are removed */
 #  define _CC_BLANK             11      /* [:blank:] */
 #  define _CC_XDIGIT            12      /* [:xdigit:] */
 #  define _CC_CNTRL             13      /* [:cntrl:] */
@@ -1062,6 +1079,9 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
 #  define _CC_IS_IN_SOME_FOLD          22
 #  define _CC_MNEMONIC_CNTRL           23
 
+#  define _CC_IDCONT 24 /* XXX Temporary, can be removed when the deprecated
+                           isFOO_utf8() functions are removed */
+
 /* This next group is only used on EBCDIC platforms, so theoretically could be
  * shared with something entirely different that's only on ASCII platforms */
 #  define _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE 28
@@ -1701,14 +1721,14 @@ END_EXTERN_C
  * 'utf8' parameter.  This relies on the fact that ASCII characters have the
  * same representation whether utf8 or not.  Note that it assumes that the utf8
  * has been validated, and ignores 'use bytes' */
-#define _generic_utf8(classnum, p, utf8) (UTF8_IS_INVARIANT(*(p))              \
-                                         ? _generic_isCC(*(p), classnum)       \
-                                         : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
-                                           ? _generic_isCC(                    \
-                                                EIGHT_BIT_UTF8_TO_NATIVE(*(p), \
-                                                                   *((p)+1 )), \
-                                                classnum)                      \
-                                           : utf8)
+#define _base_generic_utf8(enum_name, name, p, use_locale )                 \
+    _is_utf8_FOO(CAT2(_CC_, enum_name),                                     \
+                 (const U8 *) p,                                            \
+                 "is" STRINGIFY(name) "_utf8",                              \
+                 "is" STRINGIFY(name) "_utf8_safe",                         \
+                 1, use_locale, __FILE__,__LINE__)
+
+#define _generic_utf8(name, p) _base_generic_utf8(name, name, p, 0)
 
 /* The "_safe" macros make sure that we don't attempt to read beyond 'e', but
  * they don't otherwise go out of their way to look for malformed UTF-8.  If
@@ -1746,8 +1766,6 @@ END_EXTERN_C
              : above_latin1))
 /* Like the above, but calls 'above_latin1(p)' to get the utf8 value.
  * 'above_latin1' can be a macro */
-#define _generic_func_utf8(classnum, above_latin1, p)  \
-                                    _generic_utf8(classnum, p, above_latin1(p))
 #define _generic_func_utf8_safe(classnum, above_latin1, p, e)               \
                     _generic_utf8_safe(classnum, p, e, above_latin1(p, e))
 #define _generic_non_swash_utf8_safe(classnum, above_latin1, p, e)          \
@@ -1758,8 +1776,6 @@ END_EXTERN_C
                               : above_latin1(p)))
 /* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
  * 'above_latin1' parameter */
-#define _generic_swash_utf8(classnum, p)  \
-                      _generic_utf8(classnum, p, _is_utf8_FOO(classnum, p))
 #define _generic_swash_utf8_safe(classnum, p, e)                            \
 _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
 
@@ -1767,13 +1783,6 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
  * characters in the upper-Latin1 range (128-255 on ASCII platforms) which the
  * class is TRUE for.  Hence it can skip the tests for this range.
  * 'above_latin1' should include its arguments */
-#define _generic_utf8_no_upper_latin1(classnum, p, above_latin1)               \
-                                         (UTF8_IS_INVARIANT(*(p))              \
-                                         ? _generic_isCC(*(p), classnum)       \
-                                         : (UTF8_IS_ABOVE_LATIN1(*(p)))        \
-                                           ? above_latin1                      \
-                                           : 0)
-
 #define _generic_utf8_safe_no_upper_latin1(classnum, p, e, above_latin1)    \
          (__ASSERT_(_utf8_safe_assert(p, e))                                \
          (UTF8_IS_INVARIANT(*(p)))                                          \
@@ -1790,12 +1799,24 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
  * points; the regcharclass.h ones are implemented as a series of
  * "if-else-if-else ..." */
 
-#define isALPHA_utf8(p)        _generic_swash_utf8(_CC_ALPHA, p)
-#define isALPHANUMERIC_utf8(p) _generic_swash_utf8(_CC_ALPHANUMERIC, p)
-#define isASCII_utf8(p)        isASCII(*p) /* Because ASCII is invariant under
-                                               utf8, the non-utf8 macro works
-                                             */
-#define isBLANK_utf8(p)        _generic_func_utf8(_CC_BLANK, is_HORIZWS_high, p)
+#define isALPHA_utf8(p)         _generic_utf8(ALPHA, p)
+#define isALPHANUMERIC_utf8(p)  _generic_utf8(ALPHANUMERIC, p)
+#define isASCII_utf8(p)         _generic_utf8(ASCII, p)
+#define isBLANK_utf8(p)         _generic_utf8(BLANK, p)
+#define isCNTRL_utf8(p)         _generic_utf8(CNTRL, p)
+#define isDIGIT_utf8(p)         _generic_utf8(DIGIT, p)
+#define isGRAPH_utf8(p)         _generic_utf8(GRAPH, p)
+#define isIDCONT_utf8(p)        _generic_utf8(IDCONT, p)
+#define isIDFIRST_utf8(p)       _generic_utf8(IDFIRST, p)
+#define isLOWER_utf8(p)         _generic_utf8(LOWER, p)
+#define isPRINT_utf8(p)         _generic_utf8(PRINT, p)
+#define isPSXSPC_utf8(p)        _generic_utf8(PSXSPC, p)
+#define isPUNCT_utf8(p)         _generic_utf8(PUNCT, p)
+#define isSPACE_utf8(p)         _generic_utf8(SPACE, p)
+#define isUPPER_utf8(p)         _generic_utf8(UPPER, p)
+#define isVERTWS_utf8(p)        _generic_utf8(VERTSPACE, p)
+#define isWORDCHAR_utf8(p)      _generic_utf8(WORDCHAR, p)
+#define isXDIGIT_utf8(p)        _generic_utf8(XDIGIT, p)
 
 #define isALPHA_utf8_safe(p, e)  _generic_swash_utf8_safe(_CC_ALPHA, p, e)
 #define isALPHANUMERIC_utf8_safe(p, e)                                      \
@@ -1810,19 +1831,12 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
 #ifdef EBCDIC
     /* Because all controls are UTF-8 invariants in EBCDIC, we can use this
      * more efficient macro instead of the more general one */
-#   define isCNTRL_utf8(p)      isCNTRL_L1(*(p))
 #   define isCNTRL_utf8_safe(p, e)                                          \
                     (__ASSERT_(_utf8_safe_assert(p, e)) isCNTRL_L1(*(p))
 #else
-#   define isCNTRL_utf8(p)          _generic_utf8(_CC_CNTRL, p, 0)
 #   define isCNTRL_utf8_safe(p, e)  _generic_utf8_safe(_CC_CNTRL, p, e, 0)
 #endif
 
-#define isDIGIT_utf8(p)         _generic_utf8_no_upper_latin1(_CC_DIGIT, p,   \
-                                                  _is_utf8_FOO(_CC_DIGIT, p))
-#define isGRAPH_utf8(p)         _generic_swash_utf8(_CC_GRAPH, p)
-#define isIDCONT_utf8(p)        _generic_func_utf8(_CC_WORDCHAR,              \
-                                                  _is_utf8_perl_idcont, p)
 #define isDIGIT_utf8_safe(p, e)                                             \
             _generic_utf8_safe_no_upper_latin1(_CC_DIGIT, p, e,             \
                                     _is_utf8_FOO_with_len(_CC_DIGIT, p, e))
@@ -1836,19 +1850,6 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
  * ever wanted to know about.  (In the ASCII range, there isn't a difference.)
  * This used to be not the XID version, but we decided to go with the more
  * modern Unicode definition */
-#define isIDFIRST_utf8(p)   _generic_func_utf8(_CC_IDFIRST,                  \
-                                                _is_utf8_perl_idstart, p)
-
-#define isLOWER_utf8(p)     _generic_swash_utf8(_CC_LOWER, p)
-#define isPRINT_utf8(p)     _generic_swash_utf8(_CC_PRINT, p)
-#define isPSXSPC_utf8(p)    isSPACE_utf8(p)
-#define isPUNCT_utf8(p)     _generic_swash_utf8(_CC_PUNCT, p)
-#define isSPACE_utf8(p)     _generic_func_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
-#define isUPPER_utf8(p)     _generic_swash_utf8(_CC_UPPER, p)
-#define isVERTWS_utf8(p)    _generic_func_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
-#define isWORDCHAR_utf8(p)  _generic_swash_utf8(_CC_WORDCHAR, p)
-#define isXDIGIT_utf8(p)    _generic_utf8_no_upper_latin1(_CC_XDIGIT, p,     \
-                                                          is_XDIGIT_high(p))
 #define isIDFIRST_utf8_safe(p, e)                                           \
     _generic_func_utf8_safe(_CC_IDFIRST,                                    \
                     _is_utf8_perl_idstart_with_len, (U8 *) (p), (U8 *) (e))
@@ -1880,42 +1881,26 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
  * isALPHA_LC_utf8.  These are like _generic_utf8, but if the first code point
  * in 'p' is within the 0-255 range, it uses locale rules from the passed-in
  * 'macro' parameter */
-#define _generic_LC_utf8(macro, p, utf8)                                    \
-                         (UTF8_IS_INVARIANT(*(p))                           \
-                         ? macro(*(p))                                      \
-                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))              \
-                           ? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1)))\
-                           : utf8)
-
-#define _generic_LC_swash_utf8(macro, classnum, p)                         \
-                    _generic_LC_utf8(macro, p, _is_utf8_FOO(classnum, p))
-#define _generic_LC_func_utf8(macro, above_latin1, p)                         \
-                              _generic_LC_utf8(macro, p, above_latin1(p))
-
-#define isALPHANUMERIC_LC_utf8(p) _generic_LC_swash_utf8(isALPHANUMERIC_LC,   \
-                                                      _CC_ALPHANUMERIC, p)
-#define isALPHA_LC_utf8(p)    _generic_LC_swash_utf8(isALPHA_LC, _CC_ALPHA, p)
-#define isASCII_LC_utf8(p)     isASCII_LC(*p)
-#define isBLANK_LC_utf8(p)    _generic_LC_func_utf8(isBLANK_LC,               \
-                                                         is_HORIZWS_high, p)
-#define isCNTRL_LC_utf8(p)    _generic_LC_utf8(isCNTRL_LC, p, 0)
-#define isDIGIT_LC_utf8(p)    _generic_LC_swash_utf8(isDIGIT_LC, _CC_DIGIT, p)
-#define isGRAPH_LC_utf8(p)    _generic_LC_swash_utf8(isGRAPH_LC, _CC_GRAPH, p)
-#define isIDCONT_LC_utf8(p)   _generic_LC_func_utf8(isIDCONT_LC,              \
-                                                    _is_utf8_perl_idcont, p)
-#define isIDFIRST_LC_utf8(p)  _generic_LC_func_utf8(isIDFIRST_LC,             \
-                                                    _is_utf8_perl_idstart, p)
-#define isLOWER_LC_utf8(p)    _generic_LC_swash_utf8(isLOWER_LC, _CC_LOWER, p)
-#define isPRINT_LC_utf8(p)    _generic_LC_swash_utf8(isPRINT_LC, _CC_PRINT, p)
-#define isPSXSPC_LC_utf8(p)    isSPACE_LC_utf8(p)
-#define isPUNCT_LC_utf8(p)    _generic_LC_swash_utf8(isPUNCT_LC, _CC_PUNCT, p)
-#define isSPACE_LC_utf8(p)    _generic_LC_func_utf8(isSPACE_LC,               \
-                                                        is_XPERLSPACE_high, p)
-#define isUPPER_LC_utf8(p)    _generic_LC_swash_utf8(isUPPER_LC, _CC_UPPER, p)
-#define isWORDCHAR_LC_utf8(p) _generic_LC_swash_utf8(isWORDCHAR_LC,           \
-                                                            _CC_WORDCHAR, p)
-#define isXDIGIT_LC_utf8(p)   _generic_LC_func_utf8(isXDIGIT_LC,              \
-                                                            is_XDIGIT_high, p)
+#define _generic_LC_utf8(name, p) _base_generic_utf8(name, name, p, 1)
+
+#define isALPHA_LC_utf8(p)         _generic_LC_utf8(ALPHA, p)
+#define isALPHANUMERIC_LC_utf8(p)  _generic_LC_utf8(ALPHANUMERIC, p)
+#define isASCII_LC_utf8(p)         _generic_LC_utf8(ASCII, p)
+#define isBLANK_LC_utf8(p)         _generic_LC_utf8(BLANK, p)
+#define isCNTRL_LC_utf8(p)         _generic_LC_utf8(CNTRL, p)
+#define isDIGIT_LC_utf8(p)         _generic_LC_utf8(DIGIT, p)
+#define isGRAPH_LC_utf8(p)         _generic_LC_utf8(GRAPH, p)
+#define isIDCONT_LC_utf8(p)        _generic_LC_utf8(IDCONT, p)
+#define isIDFIRST_LC_utf8(p)       _generic_LC_utf8(IDFIRST, p)
+#define isLOWER_LC_utf8(p)         _generic_LC_utf8(LOWER, p)
+#define isPRINT_LC_utf8(p)         _generic_LC_utf8(PRINT, p)
+#define isPSXSPC_LC_utf8(p)        _generic_LC_utf8(PSXSPC, p)
+#define isPUNCT_LC_utf8(p)         _generic_LC_utf8(PUNCT, p)
+#define isSPACE_LC_utf8(p)         _generic_LC_utf8(SPACE, p)
+#define isUPPER_LC_utf8(p)         _generic_LC_utf8(UPPER, p)
+#define isWORDCHAR_LC_utf8(p)      _generic_LC_utf8(WORDCHAR, p)
+#define isXDIGIT_LC_utf8(p)        _generic_LC_utf8(XDIGIT, p)
+
 /* For internal core Perl use only: the base macros for defining macros like
  * isALPHA_LC_utf8_safe.  These are like _generic_utf8, but if the first code
  * point in 'p' is within the 0-255 range, it uses locale rules from the
index 1aa94f7..a078be4 100644 (file)
@@ -628,6 +628,7 @@ PERLVAR(I, GCB_invlist, SV *)
 PERLVAR(I, LB_invlist, SV *)
 PERLVAR(I, SB_invlist, SV *)
 PERLVAR(I, WB_invlist, SV *)
+PERLVAR(I, seen_deprecated_macro, HV *)
 
 PERLVAR(I, last_swash_hv, HV *)
 PERLVAR(I, last_swash_tmps, U8 *)
index 372b5fe..472d45b 100644 (file)
@@ -331,7 +331,8 @@ New versions of macros like C<isALPHA_utf8> have been added, each with the
 suffix C<_safe>, like C<isSPACE_utf8_safe>.  These take an extra
 parameter, giving an upper limit of how far into the string it is safe
 to read.  Using the old versions could cause attempts to read beyond the
-end of the input buffer if the UTF-8 is not well-formed.  Details are at
+end of the input buffer if the UTF-8 is not well-formed, and their use
+now raises a deprecation warning.  Details are at
 L<perlapi/Character classification>.
 
 =item *
diff --git a/proto.h b/proto.h
index 96da770..939e821 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -54,10 +54,10 @@ PERL_CALLCONV bool  Perl__is_uni_perl_idcont(pTHX_ UV c)
 PERL_CALLCONV bool     Perl__is_uni_perl_idstart(pTHX_ UV c)
                        __attribute__warn_unused_result__;
 
-PERL_CALLCONV bool     Perl__is_utf8_FOO(pTHX_ const U8 classnum, const U8 *p)
+PERL_CALLCONV bool     Perl__is_utf8_FOO(pTHX_ U8 classnum, const U8 * const p, const char * const name, const char * const alternative, const bool use_utf8, const bool use_locale, const char * const file, const unsigned line)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT__IS_UTF8_FOO  \
-       assert(p)
+       assert(p); assert(name); assert(alternative); assert(file)
 
 PERL_CALLCONV bool     Perl__is_utf8_FOO_with_len(pTHX_ const U8 classnum, const U8 *p, const U8 * const e)
                        __attribute__warn_unused_result__;
@@ -79,21 +79,11 @@ PERL_CALLCONV bool  Perl__is_utf8_mark(pTHX_ const U8 *p)
 #define PERL_ARGS_ASSERT__IS_UTF8_MARK \
        assert(p)
 
-PERL_CALLCONV bool     Perl__is_utf8_perl_idcont(pTHX_ const U8 *p)
-                       __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT  \
-       assert(p)
-
 PERL_CALLCONV bool     Perl__is_utf8_perl_idcont_with_len(pTHX_ const U8 *p, const U8 * const e)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT_WITH_LEN \
        assert(p); assert(e)
 
-PERL_CALLCONV bool     Perl__is_utf8_perl_idstart(pTHX_ const U8 *p)
-                       __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART \
-       assert(p)
-
 PERL_CALLCONV bool     Perl__is_utf8_perl_idstart_with_len(pTHX_ const U8 *p, const U8 * const e)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART_WITH_LEN        \
@@ -5276,9 +5266,6 @@ STATIC char*      S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
 #define PERL_ARGS_ASSERT_FIND_BYCLASS  \
        assert(prog); assert(c); assert(s); assert(strend)
 
-PERL_CALLCONV bool     Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
-                       __attribute__warn_unused_result__;
-
 STATIC bool    S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \
@@ -5360,6 +5347,11 @@ STATIC void      S_to_utf8_substr(pTHX_ regexp * prog);
 #define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR        \
        assert(prog)
 #endif
+#if defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
+PERL_CALLCONV bool     Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
+                       __attribute__warn_unused_result__;
+
+#endif
 #if defined(PERL_IN_SCOPE_C)
 STATIC void    S_save_pushptri32ptr(pTHX_ void *const ptr1, const I32 i, void *const ptr2, const int type);
 STATIC SV*     S_save_scalar_at(pTHX_ SV **sptr, const U32 flags);
@@ -5672,6 +5664,9 @@ STATIC char *     S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLE
 #define PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT      \
        assert(s)
 
+STATIC void    S_warn_on_first_deprecated_use(pTHX_ const char * const name, const char * const alternative, const bool use_locale, const char * const file, const unsigned line);
+#define PERL_ARGS_ASSERT_WARN_ON_FIRST_DEPRECATED_USE  \
+       assert(name); assert(alternative); assert(file)
 #endif
 #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_PP_C)
 PERL_CALLCONV UV       Perl__to_upper_title_latin1(pTHX_ const U8 c, U8 *p, STRLEN *lenp, const char S_or_s);
diff --git a/sv.c b/sv.c
index e915e7d..0382e96 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -15325,6 +15325,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param);
     PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param);
     PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param);
+    PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
     PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
     PL_utf8_toupper    = sv_dup_inc(proto_perl->Iutf8_toupper, param);
     PL_utf8_totitle    = sv_dup_inc(proto_perl->Iutf8_totitle, param);
diff --git a/utf8.c b/utf8.c
index 9fb9e6c..de7a2e6 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2523,17 +2523,125 @@ S_is_utf8_common_with_len(pTHX_ const U8 *const p, const U8 * const e, SV **swas
     return swash_fetch(*swash, p, TRUE) != 0;
 }
 
+STATIC void
+S_warn_on_first_deprecated_use(pTHX_ const char * const name,
+                                     const char * const alternative,
+                                     const bool use_locale,
+                                     const char * const file,
+                                     const unsigned line)
+{
+    const char * key;
+
+    PERL_ARGS_ASSERT_WARN_ON_FIRST_DEPRECATED_USE;
+
+    if (ckWARN_d(WARN_DEPRECATED)) {
+
+        key = Perl_form(aTHX_ "%s;%d;%s;%d", name, use_locale, file, line);
+       if (! hv_fetch(PL_seen_deprecated_macro, key, strlen(key), 0)) {
+            if (! PL_seen_deprecated_macro) {
+                PL_seen_deprecated_macro = newHV();
+            }
+            if (! hv_store(PL_seen_deprecated_macro, key,
+                           strlen(key), &PL_sv_undef, 0))
+            {
+               Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
+            }
+
+                Perl_warner(aTHX_ WARN_DEPRECATED,
+                            "In %s, line %d, starting in Perl v5.30, %s() will"
+                            " require an additional parameter.  Avoid this"
+                            " message by converting to use %s().\n",
+                            file, line, name, alternative);
+        }
+    }
+}
+
 bool
-Perl__is_utf8_FOO(pTHX_ const U8 classnum, const U8 *p)
+Perl__is_utf8_FOO(pTHX_       U8   classnum,
+                        const U8   *p,
+                        const char * const name,
+                        const char * const alternative,
+                        const bool use_utf8,
+                        const bool use_locale,
+                        const char * const file,
+                        const unsigned line)
 {
     PERL_ARGS_ASSERT__IS_UTF8_FOO;
 
-    assert(classnum < _FIRST_NON_SWASH_CC);
+    warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
+
+    if (use_utf8 && UTF8_IS_ABOVE_LATIN1(*p)) {
+        SV * invlist;
+
+        switch (classnum) {
+            case _CC_WORDCHAR:
+            case _CC_DIGIT:
+            case _CC_ALPHA:
+            case _CC_LOWER:
+            case _CC_UPPER:
+            case _CC_PUNCT:
+            case _CC_PRINT:
+            case _CC_ALPHANUMERIC:
+            case _CC_GRAPH:
+            case _CC_CASED:
+
+                return is_utf8_common(p,
+                                      &PL_utf8_swash_ptrs[classnum],
+                                      swash_property_names[classnum],
+                                      PL_XPosix_ptrs[classnum]);
+
+            case _CC_SPACE:
+                return is_XPERLSPACE_high(p);
+            case _CC_BLANK:
+                return is_HORIZWS_high(p);
+            case _CC_XDIGIT:
+                return is_XDIGIT_high(p);
+            case _CC_CNTRL:
+                return 0;
+            case _CC_ASCII:
+                return 0;
+            case _CC_VERTSPACE:
+                return is_VERTWS_high(p);
+            case _CC_IDFIRST:
+                if (! PL_utf8_perl_idstart) {
+                    invlist = _new_invlist_C_array(_Perl_IDStart_invlist);
+                }
+                return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", invlist);
+            case _CC_IDCONT:
+                if (! PL_utf8_perl_idcont) {
+                    invlist = _new_invlist_C_array(_Perl_IDCont_invlist);
+                }
+                return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", invlist);
+        }
+    }
+
+    /* idcont is the same as wordchar below 256 */
+    if (classnum == _CC_IDCONT) {
+        classnum = _CC_WORDCHAR;
+    }
+    else if (classnum == _CC_IDFIRST) {
+        if (*p == '_') {
+            return TRUE;
+        }
+        classnum = _CC_ALPHA;
+    }
+
+    if (! use_locale) {
+        if (! use_utf8 || UTF8_IS_INVARIANT(*p)) {
+            return _generic_isCC(*p, classnum);
+        }
 
-    return is_utf8_common(p,
-                          &PL_utf8_swash_ptrs[classnum],
-                          swash_property_names[classnum],
-                          PL_XPosix_ptrs[classnum]);
+        return _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p + 1 )), classnum);
+    }
+    else {
+        if (! use_utf8 || UTF8_IS_INVARIANT(*p)) {
+            return isFOO_lc(classnum, *p);
+        }
+
+        return isFOO_lc(classnum, EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p + 1 )));
+    }
+
+    NOT_REACHED; /* NOTREACHED */
 }
 
 bool
@@ -2552,19 +2660,6 @@ Perl__is_utf8_FOO_with_len(pTHX_ const U8 classnum, const U8 *p,
 }
 
 bool
-Perl__is_utf8_perl_idstart(pTHX_ const U8 *p)
-{
-    SV* invlist = NULL;
-
-    PERL_ARGS_ASSERT__IS_UTF8_PERL_IDSTART;
-
-    if (! PL_utf8_perl_idstart) {
-        invlist = _new_invlist_C_array(_Perl_IDStart_invlist);
-    }
-    return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", invlist);
-}
-
-bool
 Perl__is_utf8_perl_idstart_with_len(pTHX_ const U8 *p, const U8 * const e)
 {
     SV* invlist = NULL;
@@ -2589,19 +2684,6 @@ Perl__is_utf8_xidstart(pTHX_ const U8 *p)
 }
 
 bool
-Perl__is_utf8_perl_idcont(pTHX_ const U8 *p)
-{
-    SV* invlist = NULL;
-
-    PERL_ARGS_ASSERT__IS_UTF8_PERL_IDCONT;
-
-    if (! PL_utf8_perl_idcont) {
-        invlist = _new_invlist_C_array(_Perl_IDCont_invlist);
-    }
-    return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", invlist);
-}
-
-bool
 Perl__is_utf8_perl_idcont_with_len(pTHX_ const U8 *p, const U8 * const e)
 {
     SV* invlist = NULL;
diff --git a/utf8.h b/utf8.h
index d7c4e1a..2ec14fc 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -672,24 +672,30 @@ with a ptr argument.
  * beginning of a utf8 character.  Now that foo_utf8() determines that itself,
  * no need to do it again here
  */
-#define isIDFIRST_lazy_if(p,UTF) ((IN_BYTES || !UTF)                \
-                                ? isIDFIRST(*(p))                  \
-                                : isIDFIRST_utf8((const U8*)p))
-#define isWORDCHAR_lazy_if(p,UTF)   ((IN_BYTES || (!UTF))           \
-                                ? isWORDCHAR(*(p))                 \
-                                : isWORDCHAR_utf8((const U8*)p))
-#define isALNUM_lazy_if(p,UTF)   isWORDCHAR_lazy_if(p,UTF)
+#define isIDFIRST_lazy_if(p,UTF)                                            \
+            _is_utf8_FOO(_CC_IDFIRST, (const U8 *) p, "isIDFIRST_lazy_if",  \
+                         "isIDFIRST_lazy_if_safe",                          \
+                         cBOOL(UTF && ! IN_BYTES), 0, __FILE__,__LINE__)
 
 #define isIDFIRST_lazy_if_safe(p, e, UTF)                                   \
                    ((IN_BYTES || !UTF)                                      \
                      ? isIDFIRST(*(p))                                      \
                      : isIDFIRST_utf8_safe(p, e))
 
+#define isWORDCHAR_lazy_if(p,UTF)                                           \
+            _is_utf8_FOO(_CC_IDFIRST, (const U8 *) p, "isWORDCHAR_lazy_if", \
+                         "isWORDCHAR_lazy_if_safe",                         \
+                         cBOOL(UTF && ! IN_BYTES), 0, __FILE__,__LINE__)
+
 #define isWORDCHAR_lazy_if_safe(p, e, UTF)                                  \
                    ((IN_BYTES || !UTF)                                      \
                      ? isWORDCHAR(*(p))                                     \
                      : isWORDCHAR_utf8_safe((U8 *) p, (U8 *) e))
 
+#define isALNUM_lazy_if(p,UTF)                                              \
+            _is_utf8_FOO(_CC_IDFIRST, (const U8 *) p, "isALNUM_lazy_if",    \
+                         "isWORDCHAR_lazy_if_safe",                         \
+                         cBOOL(UTF && ! IN_BYTES), 0, __FILE__,__LINE__)
 
 #define UTF8_MAXLEN UTF8_MAXBYTES