From add4123adc9db7056121c97112791dbf273707c4 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 19 Nov 2012 14:36:12 -0700 Subject: [PATCH] Refactor is(SPACE|PSXSP)_(uni|utf8) macros and utf8.c This refactors the isSPACE_uni, is_SPACE_utf8, isPSXSPC_uni, and is_PSXSPC_utf8 macros in handy.h, so that no function call need be done to handle above Latin1 input. These macros are quite small, and unlikely to grow over time, as Unicode has mostly finished adding white space equivalents to the Standard. The functions that implement these in utf8.c are also changed to use the macros instead of generating a swash. This should speed things up slightly, with less memory used over time as the swash fills. --- handy.h | 9 +++++---- utf8.c | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/handy.h b/handy.h index 80792e3..178d975 100644 --- a/handy.h +++ b/handy.h @@ -929,7 +929,7 @@ EXTCONST U32 PL_charclass[]; #define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c) #define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, is_uni_idfirst, c) #define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c) -#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_uni_space, c) +#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c) #define isVERTWS_uni(c) _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c) #define isDIGIT_uni(c) _generic_uni(_CC_DIGIT, is_uni_digit, c) #define isUPPER_uni(c) _generic_uni(_CC_UPPER, is_uni_upper, c) @@ -945,7 +945,8 @@ EXTCONST U32 PL_charclass[]; #define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c) /* Posix and regular space differ only in U+000B, which is in Latin1 */ -#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_uni_space, c) +#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, \ + is_XPERLSPACE_cp_high, c) #define toUPPER_uni(c,s,l) to_uni_upper(c,s,l) #define toTITLE_uni(c,s,l) to_uni_title(c,s,l) @@ -1001,7 +1002,7 @@ EXTCONST U32 PL_charclass[]; #define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p) #define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p) #define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_HORIZWS_high, p) -#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_utf8_space, p) +#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p) #define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p) #define isDIGIT_utf8(p) _generic_utf8(_CC_DIGIT, is_utf8_digit, p) #define isUPPER_utf8(p) _generic_utf8(_CC_UPPER, is_utf8_upper, p) @@ -1021,7 +1022,7 @@ EXTCONST U32 PL_charclass[]; /* Posix and regular space differ only in U+000B, which is in ASCII (and hence * Latin1 */ -#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_utf8_space, p) +#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p) #define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0)) diff --git a/utf8.c b/utf8.c index 7092d06..5621317 100644 --- a/utf8.c +++ b/utf8.c @@ -1521,9 +1521,7 @@ Perl_is_uni_blank(pTHX_ UV c) bool Perl_is_uni_space(pTHX_ UV c) { - U8 tmpbuf[UTF8_MAXBYTES+1]; - uvchr_to_utf8(tmpbuf, c); - return is_utf8_space(tmpbuf); + return isSPACE_uni(c); } bool @@ -2067,7 +2065,7 @@ Perl_is_utf8_space(pTHX_ const U8 *p) PERL_ARGS_ASSERT_IS_UTF8_SPACE; - return is_utf8_common(p, &PL_utf8_space, "IsXPerlSpace"); + return isSPACE_utf8(p); } bool -- 1.8.3.1