This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Convert core to use toFOO_utf8_safe()
authorKarl Williamson <khw@cpan.org>
Mon, 19 Dec 2016 18:12:48 +0000 (11:12 -0700)
committerKarl Williamson <khw@cpan.org>
Sat, 24 Dec 2016 05:36:34 +0000 (22:36 -0700)
handy.h
pp.c
regcomp.c
regexec.c
utf8.c

diff --git a/handy.h b/handy.h
index add273d..129e98b 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -1926,15 +1926,15 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
 #define toUPPER_utf8(p,s,l)    to_utf8_upper(p,s,l)
 
 /* For internal core use only, subject to change */
-#define _toFOLD_utf8_flags(p,s,l,f)  _to_utf8_fold_flags (p,NULL,s,l,f)
-#define _toLOWER_utf8_flags(p,s,l,f) _to_utf8_lower_flags(p,NULL,s,l,f)
-#define _toTITLE_utf8_flags(p,s,l,f) _to_utf8_title_flags(p,NULL,s,l,f)
-#define _toUPPER_utf8_flags(p,s,l,f) _to_utf8_upper_flags(p,NULL,s,l,f)
-
-#define toFOLD_utf8_safe(p,e,s,l)   _to_utf8_fold_flags(p,e,s,l, FOLD_FLAGS_FULL)
-#define toLOWER_utf8_safe(p,e,s,l)  _to_utf8_lower_flags(p,e,s,l, 0)
-#define toTITLE_utf8_safe(p,e,s,l)  _to_utf8_title_flags(p,e,s,l, 0)
-#define toUPPER_utf8_safe(p,e,s,l)  _to_utf8_upper_flags(p,e,s,l, 0)
+#define _toFOLD_utf8_flags(p,e,s,l,f)  _to_utf8_fold_flags (p,e,s,l,f)
+#define _toLOWER_utf8_flags(p,e,s,l,f) _to_utf8_lower_flags(p,e,s,l,f)
+#define _toTITLE_utf8_flags(p,e,s,l,f) _to_utf8_title_flags(p,e,s,l,f)
+#define _toUPPER_utf8_flags(p,e,s,l,f) _to_utf8_upper_flags(p,e,s,l,f)
+
+#define toFOLD_utf8_safe(p,e,s,l)   _toFOLD_utf8_flags(p,e,s,l, FOLD_FLAGS_FULL)
+#define toLOWER_utf8_safe(p,e,s,l)  _toLOWER_utf8_flags(p,e,s,l, 0)
+#define toTITLE_utf8_safe(p,e,s,l)  _toTITLE_utf8_flags(p,e,s,l, 0)
+#define toUPPER_utf8_safe(p,e,s,l)  _toUPPER_utf8_flags(p,e,s,l, 0)
 
 /* For internal core Perl use only: the base macros for defining macros like
  * isALPHA_LC_utf8.  These are like _generic_utf8, but if the first code point
diff --git a/pp.c b/pp.c
index 9dad252..300d786 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3790,16 +3790,16 @@ PP(pp_ucfirst)
         ulen = UTF8SKIP(s);
         if (op_type == OP_UCFIRST) {
 #ifdef USE_LOCALE_CTYPE
-           _toTITLE_utf8_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+           _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
 #else
-           _toTITLE_utf8_flags(s, tmpbuf, &tculen, 0);
+           _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0);
 #endif
        }
         else {
 #ifdef USE_LOCALE_CTYPE
-           _toLOWER_utf8_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+           _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
 #else
-           _toLOWER_utf8_flags(s, tmpbuf, &tculen, 0);
+           _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
 #endif
        }
 
@@ -4090,9 +4090,9 @@ PP(pp_uc)
 
             u = UTF8SKIP(s);
 #ifdef USE_LOCALE_CTYPE
-            uv = _toUPPER_utf8_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+            uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
 #else
-            uv = _toUPPER_utf8_flags(s, tmpbuf, &ulen, 0);
+            uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
 #endif
 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
@@ -4306,9 +4306,9 @@ PP(pp_lc)
            STRLEN ulen;
 
 #ifdef USE_LOCALE_CTYPE
-           _toLOWER_utf8_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+           _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
 #else
-           _toLOWER_utf8_flags(s, tmpbuf, &ulen, 0);
+           _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
 #endif
 
            /* Here is where we would do context-sensitive actions.  See the
@@ -4516,7 +4516,7 @@ PP(pp_fc)
             const STRLEN u = UTF8SKIP(s);
             STRLEN ulen;
 
-            _toFOLD_utf8_flags(s, tmpbuf, &ulen, flags);
+            _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags);
 
             if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
                 const UV o = d - (U8*)SvPVX_const(dest);
index d232275..953a94d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3911,7 +3911,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
                     }
                     else {
                         STRLEN len;
-                        _toFOLD_utf8_flags(s, d, &len, FOLD_FLAGS_FULL);
+                        _toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
                         d += len;
                     }
                     s += s_len;
@@ -10047,7 +10047,9 @@ Perl__load_PL_utf8_foldclosures (pTHX)
         U8 dummy[UTF8_MAXBYTES_CASE+1];
 
         /* This string is just a short named one above \xff */
-        toFOLD_utf8((U8*) HYPHEN_UTF8, dummy, NULL);
+        toFOLD_utf8_safe((U8*) HYPHEN_UTF8,
+                         (U8 *) HYPHEN_UTF8 + sizeof(HYPHEN_UTF8),
+                         dummy, NULL);
         assert(PL_utf8_tofold); /* Verify that worked */
     }
     PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
@@ -10198,7 +10200,7 @@ S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
                 }
                 else {
                     STRLEN len;
-                    toFOLD_utf8(s, d, &len);
+                    toFOLD_utf8_safe(s, e, d, &len);
                     d += len;
                     s += UTF8SKIP(s);
                 }
index d9898cb..5c5241c 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1500,8 +1500,9 @@ STMT_START {
             uscan += len;                                                           \
             len=0;                                                                  \
         } else {                                                                    \
-            uvc = _toFOLD_utf8_flags( (const U8*) uc, foldbuf, &foldlen, flags);    \
             len = UTF8SKIP(uc);                                                     \
+            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, &foldlen,  \
+                                                                            flags); \
             skiplen = UVCHR_SKIP( uvc );                                            \
             foldlen -= skiplen;                                                     \
             uscan = foldbuf + skiplen;                                              \
@@ -4134,6 +4135,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                     else {
                         STRLEN len;
                         _toFOLD_utf8_flags(s,
+                                           pat_end,
                                            d,
                                            &len,
                                            FOLD_FLAGS_FULL | FOLD_FLAGS_LOCALE);
diff --git a/utf8.c b/utf8.c
index 433b4cb..fc7415a 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2434,7 +2434,7 @@ Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
 
       needs_full_generality:
        uvchr_to_utf8(utf8_c, c);
-       return _toFOLD_utf8_flags(utf8_c, p, lenp, flags);
+       return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c), p, lenp, flags);
     }
 }
 
@@ -5159,7 +5159,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     *foldbuf1 = toFOLD(*p1);
                 }
                 else if (u1) {
-                    _toFOLD_utf8_flags(p1, foldbuf1, &n1, flags_for_folder);
+                    _toFOLD_utf8_flags(p1, e1, foldbuf1, &n1, flags_for_folder);
                 }
                 else {  /* Not UTF-8, get UTF-8 fold */
                     _to_uni_fold_flags(*p1, foldbuf1, &n1, flags_for_folder);
@@ -5183,7 +5183,7 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
                     *foldbuf2 = toFOLD(*p2);
                 }
                 else if (u2) {
-                    _toFOLD_utf8_flags(p2, foldbuf2, &n2, flags_for_folder);
+                    _toFOLD_utf8_flags(p2, e2, foldbuf2, &n2, flags_for_folder);
                 }
                 else {
                     _to_uni_fold_flags(*p2, foldbuf2, &n2, flags_for_folder);