This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
handy.h: Fix isBLANK_uni and isBLANK_utf8
authorKarl Williamson <public@khwilliamson.com>
Sat, 23 Jun 2012 18:57:54 +0000 (12:57 -0600)
committerKarl Williamson <public@khwilliamson.com>
Sat, 30 Jun 2012 04:22:40 +0000 (22:22 -0600)
These macros have never worked outside the Latin1 range, so this extends
them to work.

There are no tests I could find for things in handy.h, except that many
of them are called all over the place during the normal course of
events.  This commit adds a new file for such testing, containing for
now only with a few tests for the isBLANK's

13 files changed:
MANIFEST
embed.fnc
embed.h
embedvar.h
ext/XS-APItest/APItest.pm
ext/XS-APItest/APItest.xs
ext/XS-APItest/t/handy.t [new file with mode: 0644]
handy.h
intrpvar.h
perl.c
proto.h
sv.c
utf8.c

index 38f5da2..6f0e95e 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -3972,6 +3972,7 @@ ext/XS-APItest/t/gv_fetchmeth_autoload.t  XS::APItest: tests for gv_fetchmeth_aut
 ext/XS-APItest/t/gv_fetchmethod_flags.t        XS::APItest: tests for gv_fetchmethod_flags() and variants
 ext/XS-APItest/t/gv_fetchmeth.t                XS::APItest: tests for gv_fetchmeth() and variants
 ext/XS-APItest/t/gv_init.t     XS::APItest: tests for gv_init and variants
+ext/XS-APItest/t/handy.t       XS::APItest: tests for handy.h
 ext/XS-APItest/t/hash.t                XS::APItest: tests for hash related APIs
 ext/XS-APItest/t/keyword_multiline.t   test keyword plugin parsing across lines
 ext/XS-APItest/t/keyword_plugin.t      test keyword plugin mechanism
index c16dde8..fa14750 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -594,6 +594,7 @@ ApPR        |bool   |is_uni_alnum   |UV c
 ApPR   |bool   |is_uni_idfirst |UV c
 ApPR   |bool   |is_uni_alpha   |UV c
 ApPR   |bool   |is_uni_ascii   |UV c
+ApPR   |bool   |is_uni_blank   |UV c
 ApPR   |bool   |is_uni_space   |UV c
 ApPR   |bool   |is_uni_cntrl   |UV c
 ApPR   |bool   |is_uni_graph   |UV c
@@ -645,6 +646,7 @@ ApR |bool   |is_utf8_idcont |NN const U8 *p
 ApR    |bool   |is_utf8_xidcont        |NN const U8 *p
 ApR    |bool   |is_utf8_alpha  |NN const U8 *p
 ApR    |bool   |is_utf8_ascii  |NN const U8 *p
+ApR    |bool   |is_utf8_blank  |NN const U8 *p
 ApR    |bool   |is_utf8_space  |NN const U8 *p
 ApR    |bool   |is_utf8_perl_space     |NN const U8 *p
 ApR    |bool   |is_utf8_perl_word      |NN const U8 *p
diff --git a/embed.h b/embed.h
index 720e253..a4f7e45 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define is_uni_alpha_lc(a)     Perl_is_uni_alpha_lc(aTHX_ a)
 #define is_uni_ascii(a)                Perl_is_uni_ascii(aTHX_ a)
 #define is_uni_ascii_lc(a)     Perl_is_uni_ascii_lc(aTHX_ a)
+#define is_uni_blank(a)                Perl_is_uni_blank(aTHX_ a)
 #define is_uni_cntrl(a)                Perl_is_uni_cntrl(aTHX_ a)
 #define is_uni_cntrl_lc(a)     Perl_is_uni_cntrl_lc(aTHX_ a)
 #define is_uni_digit(a)                Perl_is_uni_digit(aTHX_ a)
 #define is_utf8_alnum(a)       Perl_is_utf8_alnum(aTHX_ a)
 #define is_utf8_alpha(a)       Perl_is_utf8_alpha(aTHX_ a)
 #define is_utf8_ascii(a)       Perl_is_utf8_ascii(aTHX_ a)
+#define is_utf8_blank(a)       Perl_is_utf8_blank(aTHX_ a)
 #define is_utf8_char           Perl_is_utf8_char
 #define is_utf8_char_buf       Perl_is_utf8_char_buf
 #define is_utf8_cntrl(a)       Perl_is_utf8_cntrl(aTHX_ a)
index 3922855..98efa6f 100644 (file)
 #define PL_utf8_X_prepend      (vTHX->Iutf8_X_prepend)
 #define PL_utf8_alnum          (vTHX->Iutf8_alnum)
 #define PL_utf8_alpha          (vTHX->Iutf8_alpha)
+#define PL_utf8_blank          (vTHX->Iutf8_blank)
 #define PL_utf8_digit          (vTHX->Iutf8_digit)
 #define PL_utf8_foldable       (vTHX->Iutf8_foldable)
 #define PL_utf8_foldclosures   (vTHX->Iutf8_foldclosures)
index 0eff22e..929bf49 100644 (file)
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.40';
+our $VERSION = '0.41';
 
 require XSLoader;
 
index 69b7066..8138ad5 100644 (file)
@@ -3456,3 +3456,17 @@ test_get_vtbl()
        RETVAL = PTR2UV(get_vtbl(-1));
     OUTPUT:
        RETVAL
+
+bool
+test_isBLANK_uni(UV ord)
+    CODE:
+        RETVAL = isBLANK_uni(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+test_isBLANK_utf8(char * p)
+    CODE:
+        RETVAL = isBLANK_utf8((U8 *) p);
+    OUTPUT:
+        RETVAL
diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t
new file mode 100644 (file)
index 0000000..48eb5b9
--- /dev/null
@@ -0,0 +1,14 @@
+#!perl -w
+
+use strict;
+use Test::More;
+
+use XS::APItest;
+
+ok(test_isBLANK_uni(ord("\N{EM SPACE}")), "EM SPACE is blank in isBLANK_uni()");
+ok(test_isBLANK_utf8("\N{EM SPACE}"), "EM SPACE is blank in isBLANK_utf8()");
+
+ok(! test_isBLANK_uni(ord("\N{GREEK DASIA}")), "GREEK DASIA is not a blank in isBLANK_uni()");
+ok(! test_isBLANK_utf8("\N{GREEK DASIA}"), "GREEK DASIA is not a blank in isBLANK_utf8()");
+
+done_testing;
diff --git a/handy.h b/handy.h
index abfc2c2..198ea0c 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -912,6 +912,7 @@ EXTCONST U32 PL_charclass[];
 /* Note that all ignore 'use bytes' */
 
 #define isALNUM_uni(c)         generic_uni(isWORDCHAR, is_uni_alnum, c)
+#define isBLANK_uni(c)         generic_uni(isBLANK, is_uni_blank, c)
 #define isIDFIRST_uni(c)        generic_uni(isIDFIRST, is_uni_idfirst, c)
 #define isALPHA_uni(c)         generic_uni(isALPHA, is_uni_alpha, c)
 #define isSPACE_uni(c)         generic_uni(isSPACE, is_uni_space, c)
@@ -932,7 +933,6 @@ EXTCONST U32 PL_charclass[];
 
 /* Posix and regular space differ only in U+000B, which is in Latin1 */
 #define isPSXSPC_uni(c)                ((c) < 256 ? isPSXSPC_L1(c) : isSPACE_uni(c))
-#define isBLANK_uni(c)         isBLANK(c) /* could be wrong */
 
 #define isALNUM_LC_uvchr(c)    (c < 256 ? isALNUM_LC(c) : is_uni_alnum_lc(c))
 #define isIDFIRST_LC_uvchr(c)  (c < 256 ? isIDFIRST_LC(c) : is_uni_idfirst_lc(c))
@@ -981,6 +981,7 @@ EXTCONST U32 PL_charclass[];
                                   : Perl__is_utf8__perl_idstart(aTHX_ p))
 #define isIDCONT_utf8(p)       generic_utf8(isWORDCHAR, is_utf8_xidcont, p)
 #define isALPHA_utf8(p)                generic_utf8(isALPHA, is_utf8_alpha, p)
+#define isBLANK_utf8(p)                generic_utf8(isBLANK, is_utf8_blank, p)
 #define isSPACE_utf8(p)                generic_utf8(isSPACE, is_utf8_space, p)
 #define isDIGIT_utf8(p)                generic_utf8(isDIGIT, is_utf8_digit, p)
 #define isUPPER_utf8(p)                generic_utf8(isUPPER, is_utf8_upper, p)
@@ -1004,11 +1005,10 @@ EXTCONST U32 PL_charclass[];
                                  ? isPSXSPC_L1(TWO_BYTE_UTF8_TO_UNI(*(p),     \
                                                                      *((p)+1)))\
                                   : isSPACE_utf8(p)))
-#define isBLANK_utf8(c)                isBLANK(c) /* could be wrong */
-
 #define isALNUM_LC_utf8(p)     isALNUM_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isIDFIRST_LC_utf8(p)   isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isALPHA_LC_utf8(p)     isALPHA_LC_uvchr(valid_utf8_to_uvchr(p,  0))
+#define isBLANK_LC_utf8(p)     isBLANK_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isSPACE_LC_utf8(p)     isSPACE_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isDIGIT_LC_utf8(p)     isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 #define isUPPER_LC_utf8(p)     isUPPER_LC_uvchr(valid_utf8_to_uvchr(p,  0))
@@ -1020,7 +1020,6 @@ EXTCONST U32 PL_charclass[];
 #define isPUNCT_LC_utf8(p)     isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p,  0))
 
 #define isPSXSPC_LC_utf8(c)    (isSPACE_LC_utf8(c) ||(c) == '\f')
-#define isBLANK_LC_utf8(c)     isBLANK(c) /* could be wrong */
 
 /* This conversion works both ways, strangely enough. On EBCDIC platforms,
  * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII */
index ffcac08..3e9600f 100644 (file)
@@ -614,6 +614,7 @@ PERLVAR(I, VertSpace,   SV *)
 /* utf8 character class swashes */
 PERLVAR(I, utf8_alnum, SV *)
 PERLVAR(I, utf8_alpha, SV *)
+PERLVAR(I, utf8_blank, SV *)
 PERLVAR(I, utf8_space, SV *)
 PERLVAR(I, utf8_graph, SV *)
 PERLVAR(I, utf8_digit, SV *)
diff --git a/perl.c b/perl.c
index 4348954..71e958a 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -991,6 +991,7 @@ perl_destruct(pTHXx)
     /* clear utf8 character classes */
     SvREFCNT_dec(PL_utf8_alnum);
     SvREFCNT_dec(PL_utf8_alpha);
+    SvREFCNT_dec(PL_utf8_blank);
     SvREFCNT_dec(PL_utf8_space);
     SvREFCNT_dec(PL_utf8_graph);
     SvREFCNT_dec(PL_utf8_digit);
@@ -1009,6 +1010,7 @@ perl_destruct(pTHXx)
     SvREFCNT_dec(PL_utf8_foldclosures);
     PL_utf8_alnum      = NULL;
     PL_utf8_alpha      = NULL;
+    PL_utf8_blank      = NULL;
     PL_utf8_space      = NULL;
     PL_utf8_graph      = NULL;
     PL_utf8_digit      = NULL;
diff --git a/proto.h b/proto.h
index 272f486..9752490 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1673,6 +1673,10 @@ PERL_CALLCONV bool       Perl_is_uni_ascii_lc(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
 
+PERL_CALLCONV bool     Perl_is_uni_blank(pTHX_ UV c)
+                       __attribute__warn_unused_result__
+                       __attribute__pure__;
+
 PERL_CALLCONV bool     Perl_is_uni_cntrl(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
@@ -1831,6 +1835,12 @@ PERL_CALLCONV bool       Perl_is_utf8_ascii(pTHX_ const U8 *p)
 #define PERL_ARGS_ASSERT_IS_UTF8_ASCII \
        assert(p)
 
+PERL_CALLCONV bool     Perl_is_utf8_blank(pTHX_ const U8 *p)
+                       __attribute__warn_unused_result__
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_UTF8_BLANK \
+       assert(p)
+
 PERL_CALLCONV STRLEN   Perl_is_utf8_char(const U8 *s)
                        __attribute__deprecated__
                        __attribute__nonnull__(1);
diff --git a/sv.c b/sv.c
index 7146f38..8b054c1 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13443,6 +13443,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     /* utf8 character class swashes */
     PL_utf8_alnum      = sv_dup_inc(proto_perl->Iutf8_alnum, param);
     PL_utf8_alpha      = sv_dup_inc(proto_perl->Iutf8_alpha, param);
+    PL_utf8_blank      = sv_dup_inc(proto_perl->Iutf8_blank, param);
     PL_utf8_space      = sv_dup_inc(proto_perl->Iutf8_space, param);
     PL_utf8_graph      = sv_dup_inc(proto_perl->Iutf8_graph, param);
     PL_utf8_digit      = sv_dup_inc(proto_perl->Iutf8_digit, param);
diff --git a/utf8.c b/utf8.c
index 510db6c..2592728 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1510,6 +1510,14 @@ Perl_is_uni_ascii(pTHX_ UV c)
 }
 
 bool
+Perl_is_uni_blank(pTHX_ UV c)
+{
+    U8 tmpbuf[UTF8_MAXBYTES+1];
+    uvchr_to_utf8(tmpbuf, c);
+    return is_utf8_blank(tmpbuf);
+}
+
+bool
 Perl_is_uni_space(pTHX_ UV c)
 {
     U8 tmpbuf[UTF8_MAXBYTES+1];
@@ -1830,6 +1838,12 @@ Perl_is_uni_ascii_lc(pTHX_ UV c)
 }
 
 bool
+Perl_is_uni_blank_lc(pTHX_ UV c)
+{
+    return is_uni_blank(c);    /* XXX no locale support yet */
+}
+
+bool
 Perl_is_uni_space_lc(pTHX_ UV c)
 {
     return is_uni_space(c);    /* XXX no locale support yet */
@@ -2036,6 +2050,16 @@ Perl_is_utf8_ascii(pTHX_ const U8 *p)
 }
 
 bool
+Perl_is_utf8_blank(pTHX_ const U8 *p)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_IS_UTF8_BLANK;
+
+    return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank");
+}
+
+bool
 Perl_is_utf8_space(pTHX_ const U8 *p)
 {
     dVAR;