This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
hv.c: Stash-related UTF-8 cleanup.
authorBrian Fraser <fraserbn@gmail.com>
Sat, 23 Jul 2011 19:51:54 +0000 (16:51 -0300)
committerFather Chrysostomos <sprout@cpan.org>
Thu, 6 Oct 2011 20:01:01 +0000 (13:01 -0700)
This adds a new static function to hv.c, hek_eq_pvn_flags,
which replaces several memEQs.

It also cleans up hv_name_set and has the relevant calls
to hv_common and friends made UTF-8 aware.

Finally, it changes share_hek() to modify the hash passed
in if the pv was modified when downgrading.

hv.c

diff --git a/hv.c b/hv.c
index 6872aaa..a5a0f1a 100644 (file)
--- a/hv.c
+++ b/hv.c
@@ -1780,7 +1780,10 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
      * in sv_clear(), and changes here should be done there too */
     if (PL_phase != PERL_PHASE_DESTRUCT && (name = HvNAME(hv))) {
         if (PL_stashcache)
-           (void)hv_delete(PL_stashcache, name, HvNAMELEN_get(hv), G_DISCARD);
+           (void)hv_delete(PL_stashcache, name,
+                            HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv),
+                            G_DISCARD
+                           );
        hv_name_set(hv, NULL, 0, 0);
     }
     hfreeentries(hv);
@@ -1793,7 +1796,9 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
            mro_isa_changed_in(hv);
         if (PL_stashcache)
            (void)hv_delete(
-                   PL_stashcache, name, HvENAMELEN_get(hv), G_DISCARD
+                   PL_stashcache, name,
+                    HEK_UTF8(HvENAME_HEK(hv)) ? -HvENAMELEN_get(hv) : HvENAMELEN_get(hv),
+                    G_DISCARD
                  );
       }
 
@@ -1802,7 +1807,7 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
       name = HvNAME(hv);
       if (flags & HV_NAME_SETALL ? !!aux->xhv_name_u.xhvnameu_name : !!name) {
         if (name && PL_stashcache)
-           (void)hv_delete(PL_stashcache, name, HvNAMELEN_get(hv), G_DISCARD);
+           (void)hv_delete(PL_stashcache, name, (HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv)), G_DISCARD);
        hv_name_set(hv, NULL, 0, flags);
       }
       if((meta = aux->xhv_mro_meta)) {
@@ -2016,7 +2021,6 @@ Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
     HEK **spot;
 
     PERL_ARGS_ASSERT_HV_NAME_SET;
-    PERL_UNUSED_ARG(flags);
 
     if (len > I32_MAX)
        Perl_croak(aTHX_ "panic: hv name too long (%"UVuf")", (UV) len);
@@ -2077,7 +2081,29 @@ Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
        spot = &iter->xhv_name_u.xhvnameu_name;
     }
     PERL_HASH(hash, name, len);
-    *spot = name ? share_hek(name, len, hash) : NULL;
+    *spot = name ? share_hek(name, flags & SVf_UTF8 ? -len : len, hash) : NULL;
+}
+
+/*
+This is basically sv_eq_flags() in sv.c, but we avoid the magic
+and bytes checking.
+*/
+
+STATIC I32
+hek_eq_pvn_flags(pTHX_ const HEK *hek, const char* pv, const I32 pvlen, const U32 flags) {
+    if ( (HEK_UTF8(hek) ? 1 : 0) != (flags & SVf_UTF8 ? 1 : 0) ) {
+        if (flags & SVf_UTF8)
+            return (bytes_cmp_utf8(
+                        (const U8*)HEK_KEY(hek), HEK_LEN(hek),
+                       (const U8*)pv, pvlen) == 0);
+        else
+            return (bytes_cmp_utf8(
+                        (const U8*)pv, pvlen,
+                       (const U8*)HEK_KEY(hek), HEK_LEN(hek)) == 0);
+    }
+    else
+        return ((HEK_KEY(hek) == pv)
+                    || memEQ(HEK_KEY(hek), pv, pvlen));
 }
 
 /*
@@ -2100,7 +2126,6 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
     U32 hash;
 
     PERL_ARGS_ASSERT_HV_ENAME_ADD;
-    PERL_UNUSED_ARG(flags);
 
     if (len > I32_MAX)
        Perl_croak(aTHX_ "panic: hv name too long (%"UVuf")", (UV) len);
@@ -2113,8 +2138,10 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
        HEK **hekp = xhv_name + (count < 0 ? -count : count);
        while (hekp-- > xhv_name)
            if (
-            HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len)
-           ) {
+                 (HEK_UTF8(*hekp) || (flags & SVf_UTF8)) 
+                    ? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags)
+                   : (HEK_LEN(*hekp) == (I32)len && memEQ(HEK_KEY(*hekp), name, len))
+               ) {
                if (hekp == xhv_name && count < 0)
                    aux->xhv_name_count = -count;
                return;
@@ -2122,18 +2149,21 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
        if (count < 0) aux->xhv_name_count--, count = -count;
        else aux->xhv_name_count++;
        Renew(aux->xhv_name_u.xhvnameu_names, count + 1, HEK *);
-       (aux->xhv_name_u.xhvnameu_names)[count] = share_hek(name, len, hash);
+       (aux->xhv_name_u.xhvnameu_names)[count] = share_hek(name, (flags & SVf_UTF8 ? -len : len), hash);
     }
     else {
        HEK *existing_name = aux->xhv_name_u.xhvnameu_name;
        if (
-           existing_name && HEK_LEN(existing_name) == (I32)len
-        && memEQ(HEK_KEY(existing_name), name, len)
+           existing_name && (
+             (HEK_UTF8(existing_name) || (flags & SVf_UTF8))
+                ? hek_eq_pvn_flags(aTHX_ existing_name, name, (I32)len, flags)
+               : (HEK_LEN(existing_name) == (I32)len && memEQ(HEK_KEY(existing_name), name, len))
+           )
        ) return;
        Newx(aux->xhv_name_u.xhvnameu_names, 2, HEK *);
        aux->xhv_name_count = existing_name ? 2 : -2;
        *aux->xhv_name_u.xhvnameu_names = existing_name;
-       (aux->xhv_name_u.xhvnameu_names)[1] = share_hek(name, len, hash);
+       (aux->xhv_name_u.xhvnameu_names)[1] = share_hek(name, (flags & SVf_UTF8 ? -len : len), hash);
     }
 }
 
@@ -2156,7 +2186,6 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
     struct xpvhv_aux *aux;
 
     PERL_ARGS_ASSERT_HV_ENAME_DELETE;
-    PERL_UNUSED_ARG(flags);
 
     if (len > I32_MAX)
        Perl_croak(aTHX_ "panic: hv name too long (%"UVuf")", (UV) len);
@@ -2172,8 +2201,9 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
        HEK **victim = namep + (count < 0 ? -count : count);
        while (victim-- > namep + 1)
            if (
-               HEK_LEN(*victim) == (I32)len
-            && memEQ(HEK_KEY(*victim), name, len)
+             (HEK_UTF8(*victim) || (flags & SVf_UTF8)) 
+                ? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags)
+               : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len))
            ) {
                unshare_hek_or_pvn(*victim, 0, 0, 0);
                if (count < 0) ++aux->xhv_name_count;
@@ -2194,15 +2224,18 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags)
                return;
            }
        if (
-           count > 0 && HEK_LEN(*namep) == (I32)len
-        && memEQ(HEK_KEY(*namep),name,len)
+           count > 0 && (HEK_UTF8(*namep) || (flags & SVf_UTF8)) 
+                ? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags)
+               : (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len))
        ) {
            aux->xhv_name_count = -count;
        }
     }
     else if(
-        HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len
-     && memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len)
+        (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8)) 
+                ? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags)
+               : (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len &&
+                            memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len))
     ) {
        HEK * const namehek = aux->xhv_name_u.xhvnameu_name;
        Newx(aux->xhv_name_u.xhvnameu_names, 1, HEK *);
@@ -2637,8 +2670,10 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
       /* If we found we were able to downgrade the string to bytes, then
          we should flag that it needs upgrading on keys or each.  Also flag
          that we need share_hek_flags to free the string.  */
-      if (str != save)
+      if (str != save) {
+          PERL_HASH(hash, str, len);
           flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
+      }
     }
 
     return share_hek_flags (str, len, hash, flags);