From 567b353c280f568f67de0e8d8b78d7abc7c931f7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 14 Dec 2016 13:02:06 -0700 Subject: [PATCH 1/1] For character case changing, create macros and use This creates several macros that future commits will use to provide a layer between the caller and the function. --- handy.h | 6 ++++++ pp.c | 18 +++++++++--------- regcomp.c | 2 +- regexec.c | 10 +++++----- utf8.c | 8 ++++---- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/handy.h b/handy.h index 0d94ff1..4c7a82f 100644 --- a/handy.h +++ b/handy.h @@ -1880,6 +1880,12 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e)) #define toTITLE_utf8(p,s,l) to_utf8_title(p,s,l) #define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l) +/* For internal core use only, subject to change */ +#define _toFOLD_utf8_flags(p,s,l,f) _to_utf8_fold_flags (p,s,l,f) +#define _toLOWER_utf8_flags(p,s,l,f) _to_utf8_lower_flags(p,s,l,f) +#define _toTITLE_utf8_flags(p,s,l,f) _to_utf8_title_flags(p,s,l,f) +#define _toUPPER_utf8_flags(p,s,l,f) _to_utf8_upper_flags(p,s,l,f) + /* For internal core Perl use only: the base macros for defining macros like * isALPHA_LC_utf8. These are like _generic_utf8, but if the first code point * in 'p' is within the 0-255 range, it uses locale rules from the passed-in diff --git a/pp.c b/pp.c index 6fb20f6..26b1cb3 100644 --- a/pp.c +++ b/pp.c @@ -3790,16 +3790,16 @@ PP(pp_ucfirst) ulen = UTF8SKIP(s); if (op_type == OP_UCFIRST) { #ifdef USE_LOCALE_CTYPE - _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); + _toTITLE_utf8_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); #else - _to_utf8_title_flags(s, tmpbuf, &tculen, 0); + _toTITLE_utf8_flags(s, tmpbuf, &tculen, 0); #endif } else { #ifdef USE_LOCALE_CTYPE - _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); + _toLOWER_utf8_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); #else - _to_utf8_lower_flags(s, tmpbuf, &tculen, 0); + _toLOWER_utf8_flags(s, tmpbuf, &tculen, 0); #endif } @@ -4090,9 +4090,9 @@ PP(pp_uc) u = UTF8SKIP(s); #ifdef USE_LOCALE_CTYPE - uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); + uv = _toUPPER_utf8_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); #else - uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, 0); + uv = _toUPPER_utf8_flags(s, tmpbuf, &ulen, 0); #endif #define GREEK_CAPITAL_LETTER_IOTA 0x0399 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 @@ -4306,9 +4306,9 @@ PP(pp_lc) STRLEN ulen; #ifdef USE_LOCALE_CTYPE - _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); + _toLOWER_utf8_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); #else - _to_utf8_lower_flags(s, tmpbuf, &ulen, 0); + _toLOWER_utf8_flags(s, tmpbuf, &ulen, 0); #endif /* Here is where we would do context-sensitive actions. See the @@ -4516,7 +4516,7 @@ PP(pp_fc) const STRLEN u = UTF8SKIP(s); STRLEN ulen; - _to_utf8_fold_flags(s, tmpbuf, &ulen, flags); + _toFOLD_utf8_flags(s, tmpbuf, &ulen, flags); if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { const UV o = d - (U8*)SvPVX_const(dest); diff --git a/regcomp.c b/regcomp.c index 9f8923f..d232275 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3911,7 +3911,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, } else { STRLEN len; - _to_utf8_fold_flags(s, d, &len, FOLD_FLAGS_FULL); + _toFOLD_utf8_flags(s, d, &len, FOLD_FLAGS_FULL); d += len; } s += s_len; diff --git a/regexec.c b/regexec.c index 340a49e..8b5caa7 100644 --- a/regexec.c +++ b/regexec.c @@ -1500,7 +1500,7 @@ STMT_START { uscan += len; \ len=0; \ } else { \ - uvc = _to_utf8_fold_flags( (const U8*) uc, foldbuf, &foldlen, flags); \ + uvc = _toFOLD_utf8_flags( (const U8*) uc, foldbuf, &foldlen, flags); \ len = UTF8SKIP(uc); \ skiplen = UVCHR_SKIP( uvc ); \ foldlen -= skiplen; \ @@ -4133,10 +4133,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, } else { STRLEN len; - _to_utf8_fold_flags(s, - d, - &len, - FOLD_FLAGS_FULL | FOLD_FLAGS_LOCALE); + _toFOLD_utf8_flags(s, + d, + &len, + FOLD_FLAGS_FULL | FOLD_FLAGS_LOCALE); d += len; s += UTF8SKIP(s); } diff --git a/utf8.c b/utf8.c index 5fca6f7..94c5d81 100644 --- a/utf8.c +++ b/utf8.c @@ -2428,13 +2428,13 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags) uvchr_to_utf8(p, c); return CALL_FOLD_CASE(c, p, p, lenp, flags & FOLD_FLAGS_FULL); } - else { /* Otherwise, _to_utf8_fold_flags has the intelligence to deal with + else { /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with the special flags. */ U8 utf8_c[UTF8_MAXBYTES + 1]; needs_full_generality: uvchr_to_utf8(utf8_c, c); - return _to_utf8_fold_flags(utf8_c, p, lenp, flags); + return _toFOLD_utf8_flags(utf8_c, p, lenp, flags); } } @@ -5151,7 +5151,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c *foldbuf1 = toFOLD(*p1); } else if (u1) { - _to_utf8_fold_flags(p1, foldbuf1, &n1, flags_for_folder); + _toFOLD_utf8_flags(p1, foldbuf1, &n1, flags_for_folder); } else { /* Not UTF-8, get UTF-8 fold */ _to_uni_fold_flags(*p1, foldbuf1, &n1, flags_for_folder); @@ -5175,7 +5175,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c *foldbuf2 = toFOLD(*p2); } else if (u2) { - _to_utf8_fold_flags(p2, foldbuf2, &n2, flags_for_folder); + _toFOLD_utf8_flags(p2, foldbuf2, &n2, flags_for_folder); } else { _to_uni_fold_flags(*p2, foldbuf2, &n2, flags_for_folder); -- 1.8.3.1