This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
One part of pp_pack couldn't correctly handle surprises from UTF-8
[perl5.git] / hv.c
diff --git a/hv.c b/hv.c
index bc64df4..a5336c6 100644 (file)
--- a/hv.c
+++ b/hv.c
@@ -72,6 +72,7 @@ S_new_he(pTHX)
     if (!*root)
        S_more_he(aTHX);
     he = *root;
+    assert(he);
     *root = HeNEXT(he);
     UNLOCK_SV_MUTEX;
     return he;
@@ -1592,8 +1593,8 @@ Perl_hv_clear(pTHX_ HV *hv)
                    if (HeVAL(entry) && SvREADONLY(HeVAL(entry))) {
                        SV* const keysv = hv_iterkeysv(entry);
                        Perl_croak(aTHX_
-       "Attempt to delete readonly key '%"SVf"' from a restricted hash",
-                                  keysv);
+                                  "Attempt to delete readonly key '%"SVf"' from a restricted hash",
+                                  (void*)keysv);
                    }
                    SvREFCNT_dec(HeVAL(entry));
                    HeVAL(entry) = &PL_sv_placeholder;
@@ -2117,7 +2118,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
        iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
        return NULL;
     }
-#ifdef DYNAMIC_ENV_FETCH  /* set up %ENV for iteration */
+#if defined(DYNAMIC_ENV_FETCH) && !defined(__riscos__)  /* set up %ENV for iteration */
     if (!entry && SvRMAGICAL((SV*)hv) && mg_find((SV*)hv, PERL_MAGIC_env)) {
        prime_env_iter();
 #ifdef VMS
@@ -2563,6 +2564,7 @@ in C<struct refcounted_he *>.
 HV *
 Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain)
 {
+    dVAR;
     HV *hv = newHV();
     U32 placeholders = 0;
     /* We could chase the chain once to get an idea of the number of keys,
@@ -2577,9 +2579,14 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain)
     }
 
     while (chain) {
-       const U32 hash = HEK_HASH(chain->refcounted_he_he.hent_hek);
+#ifdef USE_ITHREADS
+       U32 hash = chain->refcounted_he_hash;
+#else
+       U32 hash = HEK_HASH(chain->refcounted_he_hek);
+#endif
        HE **oentry = &((HvARRAY(hv))[hash & max]);
        HE *entry = *oentry;
+       SV *value;
 
        for (; entry; entry = HeNEXT(entry)) {
            if (HeHASH(entry) == hash) {
@@ -2589,12 +2596,53 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain)
        assert (!entry);
        entry = new_HE();
 
-       HeKEY_hek(entry) = share_hek_hek(chain->refcounted_he_he.hent_hek);
+#ifdef USE_ITHREADS
+       HeKEY_hek(entry)
+           = share_hek_flags(/* A big expression to find the key offset */
+                             (((chain->refcounted_he_data[0]
+                                & HVrhek_typemask) == HVrhek_PV)
+                              ? chain->refcounted_he_val.refcounted_he_u_len
+                              + 1 : 0) + 1 + chain->refcounted_he_data,
+                             chain->refcounted_he_keylen,
+                             chain->refcounted_he_hash,
+                             (chain->refcounted_he_data[0]
+                              & (HVhek_UTF8|HVhek_WASUTF8)));
+#else
+       HeKEY_hek(entry) = share_hek_hek(chain->refcounted_he_hek);
+#endif
 
-       HeVAL(entry) = chain->refcounted_he_he.he_valu.hent_val;
-       if (HeVAL(entry) == &PL_sv_placeholder)
+       switch(chain->refcounted_he_data[0] & HVrhek_typemask) {
+       case HVrhek_undef:
+           value = newSV(0);
+           break;
+       case HVrhek_delete:
+           value = &PL_sv_placeholder;
            placeholders++;
-       SvREFCNT_inc_void_NN(HeVAL(entry));
+           break;
+       case HVrhek_IV:
+           value = (chain->refcounted_he_data[0] & HVrhek_UV)
+               ? newSVuv(chain->refcounted_he_val.refcounted_he_u_iv)
+               : newSViv(chain->refcounted_he_val.refcounted_he_u_uv);
+           break;
+       case HVrhek_PV:
+           /* Create a string SV that directly points to the bytes in our
+              structure.  */
+           value = newSV(0);
+           sv_upgrade(value, SVt_PV);
+           SvPV_set(value, (char *) chain->refcounted_he_data + 1);
+           SvCUR_set(value, chain->refcounted_he_val.refcounted_he_u_len);
+           /* This stops anything trying to free it  */
+           SvLEN_set(value, 0);
+           SvPOK_on(value);
+           SvREADONLY_on(value);
+           if (chain->refcounted_he_data[0] & HVrhek_UTF8)
+               SvUTF8_on(value);
+           break;
+       default:
+           Perl_croak(aTHX_ "panic: refcounted_he_chain_2hv bad flags %x",
+                      chain->refcounted_he_data[0]);
+       }
+       HeVAL(entry) = value;
 
        /* Link it into the chain.  */
        HeNEXT(entry) = *oentry;
@@ -2607,7 +2655,7 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain)
        HvTOTALKEYS(hv)++;
 
     next_please:
-       chain = (struct refcounted_he *) chain->refcounted_he_he.hent_next;
+       chain = chain->refcounted_he_next;
     }
 
     if (placeholders) {
@@ -2638,19 +2686,90 @@ reference count of 1.
 struct refcounted_he *
 Perl_refcounted_he_new(pTHX_ struct refcounted_he *const parent,
                       SV *const key, SV *const value) {
+    dVAR;
     struct refcounted_he *he;
+    STRLEN key_len;
+    const char *key_p = SvPV_const(key, key_len);
+    STRLEN value_len = 0;
+    const char *value_p = NULL;
+    char value_type;
+    char flags;
+    STRLEN key_offset;
     U32 hash;
-    STRLEN len;
-    const char *p = SvPV_const(key, len);
+    bool is_utf8 = SvUTF8(key);
+
+    if (SvPOK(value)) {
+       value_type = HVrhek_PV;
+    } else if (SvIOK(value)) {
+       value_type = HVrhek_IV;
+    } else if (value == &PL_sv_placeholder) {
+       value_type = HVrhek_delete;
+    } else if (!SvOK(value)) {
+       value_type = HVrhek_undef;
+    } else {
+       value_type = HVrhek_PV;
+    }
+
+    if (value_type == HVrhek_PV) {
+       value_p = SvPV_const(value, value_len);
+       key_offset = value_len + 2;
+    } else {
+       value_len = 0;
+       key_offset = 1;
+    }
+    flags = value_type;
 
-    PERL_HASH(hash, p, len);
+#ifdef USE_ITHREADS
+    he = PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
+                             + key_len
+                             + key_offset);
+#else
+    he = PerlMemShared_malloc(sizeof(struct refcounted_he) - 1
+                             + key_offset);
+#endif
 
-    Newx(he, 1, struct refcounted_he);
 
-    he->refcounted_he_he.hent_next = (HE *)parent;
-    he->refcounted_he_he.he_valu.hent_val = value;
-    he->refcounted_he_he.hent_hek
-       = share_hek(p, SvUTF8(key) ? -(I32)len : len, hash);
+    he->refcounted_he_next = parent;
+
+    if (value_type == HVrhek_PV) {
+       Copy(value_p, he->refcounted_he_data + 1, value_len + 1, char);
+       he->refcounted_he_val.refcounted_he_u_len = value_len;
+       if (SvUTF8(value)) {
+           flags |= HVrhek_UTF8;
+       }
+    } else if (value_type == HVrhek_IV) {
+       if (SvUOK(value)) {
+           he->refcounted_he_val.refcounted_he_u_uv = SvUVX(value);
+           flags |= HVrhek_UV;
+       } else {
+           he->refcounted_he_val.refcounted_he_u_iv = SvIVX(value);
+       }
+    }
+
+    if (is_utf8) {
+       /* Hash keys are always stored normalised to (yes) ISO-8859-1.
+          As we're going to be building hash keys from this value in future,
+          normalise it now.  */
+       key_p = (char*)bytes_from_utf8((const U8*)key_p, &key_len, &is_utf8);
+       flags |= is_utf8 ? HVhek_UTF8 : HVhek_WASUTF8;
+    }
+    PERL_HASH(hash, key_p, key_len);
+
+#ifdef USE_ITHREADS
+    he->refcounted_he_hash = hash;
+    he->refcounted_he_keylen = key_len;
+    Copy(key_p, he->refcounted_he_data + key_offset, key_len, char);
+#else
+    he->refcounted_he_hek = share_hek_flags(key_p, key_len, hash, flags);
+#endif
+
+    if (flags & HVhek_WASUTF8) {
+       /* If it was downgraded from UTF-8, then the pointer returned from
+          bytes_from_utf8 is an allocated pointer that we must free.  */
+       Safefree(key_p);
+    }
+
+    he->refcounted_he_data[0] = flags;
     he->refcounted_he_refcnt = 1;
 
     return he;
@@ -2668,95 +2787,29 @@ and C<refcounted_he_free> iterates onto the parent node.
 
 void
 Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) {
+    PERL_UNUSED_CONTEXT;
+
     while (he) {
        struct refcounted_he *copy;
+       U32 new_count;
 
-       if (--he->refcounted_he_refcnt)
+       HINTS_REFCNT_LOCK;
+       new_count = --he->refcounted_he_refcnt;
+       HINTS_REFCNT_UNLOCK;
+       
+       if (new_count) {
            return;
+       }
 
-       unshare_hek_or_pvn (he->refcounted_he_he.hent_hek, 0, 0, 0);
-       SvREFCNT_dec(he->refcounted_he_he.he_valu.hent_val);
+#ifndef USE_ITHREADS
+       unshare_hek_or_pvn (he->refcounted_he_hek, 0, 0, 0);
+#endif
        copy = he;
-       he = (struct refcounted_he *) he->refcounted_he_he.hent_next;
-       Safefree(copy);
+       he = he->refcounted_he_next;
+       PerlMemShared_free(copy);
     }
 }
 
-
-/*
-=for apidoc refcounted_he_dup
-
-Duplicates the C<struct refcounted_he *> for a new thread.
-
-=cut
-*/
-
-#if defined(USE_ITHREADS)
-struct refcounted_he *
-Perl_refcounted_he_dup(pTHX_ const struct refcounted_he *const he,
-                       CLONE_PARAMS* param)
-{
-    struct refcounted_he *copy;
-
-    if (!he)
-       return NULL;
-
-    /* look for it in the table first */
-    copy = (struct refcounted_he *)ptr_table_fetch(PL_ptr_table, he);
-    if (copy)
-       return copy;
-
-    /* create anew and remember what it is */
-    Newx(copy, 1, struct refcounted_he);
-    ptr_table_store(PL_ptr_table, he, copy);
-
-    copy->refcounted_he_he.hent_next
-       = (HE *)Perl_refcounted_he_dup(aTHX_
-                                      (struct refcounted_he *)
-                                      he->refcounted_he_he.hent_next,
-                                      param);
-    copy->refcounted_he_he.he_valu.hent_val
-       = SvREFCNT_inc(sv_dup(he->refcounted_he_he.he_valu.hent_val, param));
-    copy->refcounted_he_he.hent_hek
-       = hek_dup(he->refcounted_he_he.hent_hek, param);
-    copy->refcounted_he_refcnt = he->refcounted_he_refcnt;
-    return copy;
-}
-
-/*
-=for apidoc refcounted_he_copy
-
-Copies a chain of C<struct refcounted_he *>. Used by C<pp_entereval>.
-
-=cut
-*/
-
-struct refcounted_he *
-Perl_refcounted_he_copy(pTHX_ const struct refcounted_he * he)
-{
-    struct refcounted_he *copy;
-    HEK *hek;
-    /* This is much easier to express recursively than iteratively.  */
-    if (!he)
-       return NULL;
-
-    Newx(copy, 1, struct refcounted_he);
-    copy->refcounted_he_he.hent_next
-       = (HE *)Perl_refcounted_he_copy(aTHX_
-                                      (struct refcounted_he *)
-                                      he->refcounted_he_he.hent_next);
-    copy->refcounted_he_he.he_valu.hent_val
-       = newSVsv(he->refcounted_he_he.he_valu.hent_val);
-    hek = he->refcounted_he_he.hent_hek;
-    copy->refcounted_he_he.hent_hek
-       = share_hek(HEK_KEY(hek),
-                   HEK_UTF8(hek) ? -(I32)HEK_LEN(hek) : HEK_LEN(hek),
-                   HEK_HASH(hek));
-    copy->refcounted_he_refcnt = 1;
-    return copy;
-}
-#endif
-
 /*
 =for apidoc hv_assert
 
@@ -2770,63 +2823,62 @@ Check that a hash is in an internally consistent state.
 void
 Perl_hv_assert(pTHX_ HV *hv)
 {
-  dVAR;
-  HE* entry;
-  int withflags = 0;
-  int placeholders = 0;
-  int real = 0;
-  int bad = 0;
-  const I32 riter = HvRITER_get(hv);
-  HE *eiter = HvEITER_get(hv);
-
-  (void)hv_iterinit(hv);
-
-  while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) {
-    /* sanity check the values */
-    if (HeVAL(entry) == &PL_sv_placeholder) {
-      placeholders++;
-    } else {
-      real++;
-    }
-    /* sanity check the keys */
-    if (HeSVKEY(entry)) {
-      /*EMPTY*/ /* Don't know what to check on SV keys.  */
-    } else if (HeKUTF8(entry)) {
-      withflags++;
-       if (HeKWASUTF8(entry)) {
-        PerlIO_printf(Perl_debug_log,
-                      "hash key has both WASUFT8 and UTF8: '%.*s'\n",
-                      (int) HeKLEN(entry),  HeKEY(entry));
-        bad = 1;
-       }
-    } else if (HeKWASUTF8(entry)) {
-      withflags++;
-    }
-  }
-  if (!SvTIED_mg((SV*)hv, PERL_MAGIC_tied)) {
-    if (HvUSEDKEYS(hv) != real) {
-      PerlIO_printf(Perl_debug_log, "Count %d key(s), but hash reports %d\n",
-                   (int) real, (int) HvUSEDKEYS(hv));
-      bad = 1;
-    }
-    if (HvPLACEHOLDERS_get(hv) != placeholders) {
-      PerlIO_printf(Perl_debug_log,
-                   "Count %d placeholder(s), but hash reports %d\n",
-                   (int) placeholders, (int) HvPLACEHOLDERS_get(hv));
-      bad = 1;
-    }
-  }
-  if (withflags && ! HvHASKFLAGS(hv)) {
-    PerlIO_printf(Perl_debug_log,
-                 "Hash has HASKFLAGS off but I count %d key(s) with flags\n",
-                 withflags);
-    bad = 1;
-  }
-  if (bad) {
-    sv_dump((SV *)hv);
-  }
-  HvRITER_set(hv, riter);              /* Restore hash iterator state */
-  HvEITER_set(hv, eiter);
+    dVAR;
+    HE* entry;
+    int withflags = 0;
+    int placeholders = 0;
+    int real = 0;
+    int bad = 0;
+    const I32 riter = HvRITER_get(hv);
+    HE *eiter = HvEITER_get(hv);
+
+    (void)hv_iterinit(hv);
+
+    while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) {
+       /* sanity check the values */
+       if (HeVAL(entry) == &PL_sv_placeholder)
+           placeholders++;
+       else
+           real++;
+       /* sanity check the keys */
+       if (HeSVKEY(entry)) {
+           NOOP;   /* Don't know what to check on SV keys.  */
+       } else if (HeKUTF8(entry)) {
+           withflags++;
+           if (HeKWASUTF8(entry)) {
+               PerlIO_printf(Perl_debug_log,
+                           "hash key has both WASUFT8 and UTF8: '%.*s'\n",
+                           (int) HeKLEN(entry),  HeKEY(entry));
+               bad = 1;
+           }
+       } else if (HeKWASUTF8(entry))
+           withflags++;
+    }
+    if (!SvTIED_mg((SV*)hv, PERL_MAGIC_tied)) {
+       static const char bad_count[] = "Count %d %s(s), but hash reports %d\n";
+       const int nhashkeys = HvUSEDKEYS(hv);
+       const int nhashplaceholders = HvPLACEHOLDERS_get(hv);
+
+       if (nhashkeys != real) {
+           PerlIO_printf(Perl_debug_log, bad_count, real, "keys", nhashkeys );
+           bad = 1;
+       }
+       if (nhashplaceholders != placeholders) {
+           PerlIO_printf(Perl_debug_log, bad_count, placeholders, "placeholder", nhashplaceholders );
+           bad = 1;
+       }
+    }
+    if (withflags && ! HvHASKFLAGS(hv)) {
+       PerlIO_printf(Perl_debug_log,
+                   "Hash has HASKFLAGS off but I count %d key(s) with flags\n",
+                   withflags);
+       bad = 1;
+    }
+    if (bad) {
+       sv_dump((SV *)hv);
+    }
+    HvRITER_set(hv, riter);            /* Restore hash iterator state */
+    HvEITER_set(hv, eiter);
 }
 
 #endif