Add functions for getting ctype ALNUMC
authorKarl Williamson <public@khwilliamson.com>
Mon, 3 Dec 2012 04:01:12 +0000 (21:01 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sun, 9 Dec 2012 17:30:00 +0000 (10:30 -0700)
We think this is meant to stand for C's alphanumeric, that is what is
matched by POSIX [:alnum:].  There were not functions and a dedicated
swash available for accessing it.  Future commits will want to use
these.

embed.fnc
embed.h
embedvar.h
handy.h
intrpvar.h
proto.h
sv.c
utf8.c

index fc2c2b8..04603ed 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -606,6 +606,7 @@ ApPR        |U32    |to_uni_upper_lc|U32 c
 ApPR   |U32    |to_uni_title_lc|U32 c
 ApPR   |U32    |to_uni_lower_lc|U32 c
 ApPR   |bool   |is_uni_alnum   |UV c
+AMpPR  |bool   |is_uni_alnumc  |UV c
 ApPR   |bool   |is_uni_idfirst |UV c
 ApPR   |bool   |is_uni_alpha   |UV c
 ApPR   |bool   |is_uni_ascii   |UV c
@@ -636,6 +637,7 @@ Ap  |UV     |to_uni_lower   |UV c|NN U8 *p|NN STRLEN *lenp
 Amp    |UV     |to_uni_fold    |UV c|NN U8 *p|NN STRLEN *lenp
 AMp    |UV     |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|const U8 flags
 ApPR   |bool   |is_uni_alnum_lc|UV c
+AMpPR  |bool   |is_uni_alnumc_lc|UV c
 ApPR   |bool   |is_uni_idfirst_lc|UV c
 AMpR   |bool   |_is_uni_perl_idstart|UV c
 ApPR   |bool   |is_uni_alpha_lc|UV c
@@ -657,6 +659,7 @@ Anpd        |bool   |is_utf8_string |NN const U8 *s|STRLEN len
 Anpdmb |bool   |is_utf8_string_loc|NN const U8 *s|STRLEN len|NULLOK const U8 **ep
 Anpd   |bool   |is_utf8_string_loclen|NN const U8 *s|STRLEN len|NULLOK const U8 **ep|NULLOK STRLEN *el
 ApR    |bool   |is_utf8_alnum  |NN const U8 *p
+AMpR   |bool   |is_utf8_alnumc |NN const U8 *p
 ApR    |bool   |is_utf8_idfirst|NN const U8 *p
 ApR    |bool   |is_utf8_xidfirst|NN const U8 *p
 AMpR   |bool   |_is_utf8_perl_idstart|NN const U8 *p
diff --git a/embed.h b/embed.h
index 95f9943..ad691a6 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define is_lvalue_sub()                Perl_is_lvalue_sub(aTHX)
 #define is_uni_alnum(a)                Perl_is_uni_alnum(aTHX_ a)
 #define is_uni_alnum_lc(a)     Perl_is_uni_alnum_lc(aTHX_ a)
+#define is_uni_alnumc(a)       Perl_is_uni_alnumc(aTHX_ a)
+#define is_uni_alnumc_lc(a)    Perl_is_uni_alnumc_lc(aTHX_ a)
 #define is_uni_alpha(a)                Perl_is_uni_alpha(aTHX_ a)
 #define is_uni_alpha_lc(a)     Perl_is_uni_alpha_lc(aTHX_ a)
 #define is_uni_ascii(a)                Perl_is_uni_ascii(aTHX_ a)
 #define is_uni_xdigit(a)       Perl_is_uni_xdigit(aTHX_ a)
 #define is_uni_xdigit_lc(a)    Perl_is_uni_xdigit_lc(aTHX_ a)
 #define is_utf8_alnum(a)       Perl_is_utf8_alnum(aTHX_ a)
+#define is_utf8_alnumc(a)      Perl_is_utf8_alnumc(aTHX_ a)
 #define is_utf8_alpha(a)       Perl_is_utf8_alpha(aTHX_ a)
 #define is_utf8_ascii(a)       Perl_is_utf8_ascii(aTHX_ a)
 #define is_utf8_blank(a)       Perl_is_utf8_blank(aTHX_ a)
index beb3bd2..05438e2 100644 (file)
 #define PL_utf8_X_extend       (vTHX->Iutf8_X_extend)
 #define PL_utf8_X_regular_begin        (vTHX->Iutf8_X_regular_begin)
 #define PL_utf8_alnum          (vTHX->Iutf8_alnum)
+#define PL_utf8_alnumc         (vTHX->Iutf8_alnumc)
 #define PL_utf8_alpha          (vTHX->Iutf8_alpha)
 #define PL_utf8_charname_begin (vTHX->Iutf8_charname_begin)
 #define PL_utf8_charname_continue      (vTHX->Iutf8_charname_continue)
diff --git a/handy.h b/handy.h
index ab1cfa9..8c68ea6 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -936,6 +936,7 @@ EXTCONST U32 PL_charclass[];
 #define isBLANK_uni(c)          _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
 #define isIDFIRST_uni(c)        _generic_uni(_CC_IDFIRST, _is_uni_perl_idstart, c)
 #define isALPHA_uni(c)          _generic_uni(_CC_ALPHA, is_uni_alpha, c)
+#define isALNUMC_uni(c)     _generic_uni(_CC_ALNUMC, is_uni_alnumc, c)
 #define isSPACE_uni(c)          _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)
 #define isVERTWS_uni(c)         _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)
 #define isDIGIT_uni(c)          _generic_uni(_CC_DIGIT, is_uni_digit, c)
@@ -968,6 +969,7 @@ EXTCONST U32 PL_charclass[];
 #define isIDFIRST_LC_uvchr(c)  _gnrc_is_LC_uvchr(isIDFIRST_LC,                 \
                                                         is_uni_idfirst_lc, c)
 #define isALPHA_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c)
+#define isALNUMC_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALNUMC_LC, is_uni_alnumc_lc, c)
 #define isSPACE_LC_uvchr(c)  _gnrc_is_LC_uvchr(isSPACE_LC,                     \
                                                        is_XPERLSPACE_cp_high, c)
 #define isDIGIT_LC_uvchr(c)  _gnrc_is_LC_uvchr(isDIGIT_LC, is_uni_digit_lc, c)
@@ -1018,6 +1020,7 @@ EXTCONST U32 PL_charclass[];
 
 #define isIDCONT_utf8(p)        _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
 #define isALPHA_utf8(p)         _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
+#define isALNUMC_utf8(p)        _generic_utf8(_CC_ALNUMC, is_utf8_alnumc, p)
 #define isBLANK_utf8(p)         _generic_utf8(_CC_BLANK, is_HORIZWS_high, p)
 #define isSPACE_utf8(p)         _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
 #define isVERTWS_utf8(p)        _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
index f58c0d1..d905208 100644 (file)
@@ -614,6 +614,7 @@ PERLVAR(I, HasMultiCharFold,   SV *)
 
 /* utf8 character class swashes */
 PERLVAR(I, utf8_alnum, SV *)   /* Should really be named "utf8_wordchar" */
+PERLVAR(I, utf8_alnumc,        SV *)
 PERLVAR(I, utf8_alpha, SV *)
 PERLVAR(I, utf8_graph, SV *)
 PERLVAR(I, utf8_digit, SV *)
diff --git a/proto.h b/proto.h
index 1a29b2f..ec49374 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1681,6 +1681,14 @@ PERL_CALLCONV bool       Perl_is_uni_alnum_lc(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
 
+PERL_CALLCONV bool     Perl_is_uni_alnumc(pTHX_ UV c)
+                       __attribute__warn_unused_result__
+                       __attribute__pure__;
+
+PERL_CALLCONV bool     Perl_is_uni_alnumc_lc(pTHX_ UV c)
+                       __attribute__warn_unused_result__
+                       __attribute__pure__;
+
 PERL_CALLCONV bool     Perl_is_uni_alpha(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
@@ -1791,6 +1799,12 @@ PERL_CALLCONV bool       Perl_is_utf8_alnum(pTHX_ const U8 *p)
 #define PERL_ARGS_ASSERT_IS_UTF8_ALNUM \
        assert(p)
 
+PERL_CALLCONV bool     Perl_is_utf8_alnumc(pTHX_ const U8 *p)
+                       __attribute__warn_unused_result__
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_IS_UTF8_ALNUMC        \
+       assert(p)
+
 PERL_CALLCONV bool     Perl_is_utf8_alpha(pTHX_ const U8 *p)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1);
diff --git a/sv.c b/sv.c
index c611e97..37cd39a 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -13639,6 +13639,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
 
     /* utf8 character class swashes */
     PL_utf8_alnum      = sv_dup_inc(proto_perl->Iutf8_alnum, param);
+    PL_utf8_alnumc     = sv_dup_inc(proto_perl->Iutf8_alnumc, param);
     PL_utf8_alpha      = sv_dup_inc(proto_perl->Iutf8_alpha, param);
     PL_utf8_graph      = sv_dup_inc(proto_perl->Iutf8_graph, param);
     PL_utf8_digit      = sv_dup_inc(proto_perl->Iutf8_digit, param);
diff --git a/utf8.c b/utf8.c
index 418f0d8..11af768 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1491,6 +1491,14 @@ Perl_is_uni_alnum(pTHX_ UV c)
 }
 
 bool
+Perl_is_uni_alnumc(pTHX_ UV c)
+{
+    U8 tmpbuf[UTF8_MAXBYTES+1];
+    uvchr_to_utf8(tmpbuf, c);
+    return is_utf8_alnumc(tmpbuf);
+}
+
+bool
 Perl_is_uni_idfirst(pTHX_ UV c)
 {
     U8 tmpbuf[UTF8_MAXBYTES+1];
@@ -1825,6 +1833,15 @@ Perl_is_uni_alnum_lc(pTHX_ UV c)
 }
 
 bool
+Perl_is_uni_alnumc_lc(pTHX_ UV c)
+{
+    if (c < 256) {
+        return isALNUMC_LC(UNI_TO_NATIVE(c));
+    }
+    return is_uni_alnumc(c);
+}
+
+bool
 Perl_is_uni_idfirst_lc(pTHX_ UV c)
 {
     return is_uni_idfirst(c);  /* XXX no locale support yet */
@@ -1979,6 +1996,16 @@ Perl_is_utf8_alnum(pTHX_ const U8 *p)
 }
 
 bool
+Perl_is_utf8_alnumc(pTHX_ const U8 *p)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_IS_UTF8_ALNUMC;
+
+    return is_utf8_common(p, &PL_utf8_alnumc, "IsAlnum");
+}
+
+bool
 Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */
 {
     dVAR;