This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Test-Harness to CPAN version 3.27
[perl5.git] / hv.c
diff --git a/hv.c b/hv.c
index 3de86d1..6476f51 100644 (file)
--- a/hv.c
+++ b/hv.c
@@ -526,7 +526,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
            bool needs_store;
            hv_magic_check (hv, &needs_copy, &needs_store);
            if (needs_copy) {
-               const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */
+               const bool save_taint = TAINT_get;
                if (keysv || is_utf8) {
                    if (!keysv) {
                        keysv = newSVpvn_utf8(key, klen, TRUE);
@@ -540,6 +540,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
                }
 
                TAINT_IF(save_taint);
+#ifdef NO_TAINT_SUPPORT
+                PERL_UNUSED_VAR(save_taint);
+#endif
                if (!needs_store) {
                    if (flags & HVhek_FREEKEY)
                        Safefree(key);
@@ -787,19 +790,47 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
        HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
     HeVAL(entry) = val;
 
+#ifdef PERL_HASH_RANDOMIZE_KEYS
     /* This logic semi-randomizes the insert order in a bucket.
      * Either we insert into the top, or the slot below the top,
-     * making it harder to see if there is a collision.
+     * making it harder to see if there is a collision. We also
+     * reset the iterator randomizer if there is one.
      */
-    PL_hash_rand_bits += (PTRV)entry ^ hash; /* we don't bother to use ptr_hash here */
-    PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
-    if ( !*oentry || (PL_hash_rand_bits & 1) ) {
+    if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
+        PL_hash_rand_bits++;
+        PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+        if ( PL_hash_rand_bits & 1 ) {
+            HeNEXT(entry) = HeNEXT(*oentry);
+            HeNEXT(*oentry) = entry;
+        } else {
+            HeNEXT(entry) = *oentry;
+            *oentry = entry;
+        }
+    } else
+#endif
+    {
         HeNEXT(entry) = *oentry;
         *oentry = entry;
-    } else {
-        HeNEXT(entry) = HeNEXT(*oentry);
-        HeNEXT(*oentry) = entry;
     }
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+    if (SvOOK(hv)) {
+        /* Currently this makes various tests warn in annoying ways.
+         * So Silenced for now. - Yves | bogus end of comment =>* /
+        if (HvAUX(hv)->xhv_riter != -1) {
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+                             "[TESTING] Inserting into a hash during each() traversal results in undefined behavior"
+                             pTHX__FORMAT
+                             pTHX__VALUE);
+        }
+        */
+        if (PL_HASH_RAND_BITS_ENABLED) {
+            if (PL_HASH_RAND_BITS_ENABLED == 1)
+                PL_hash_rand_bits += (PTRV)entry + 1;  /* we don't bother to use ptr_hash here */
+            PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+        }
+        HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
+    }
+#endif
 
     if (val == &PL_sv_placeholder)
        HvPLACEHOLDERS(hv)++;
@@ -1135,20 +1166,27 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
       PL_nomemok = FALSE;
       return;
     }
+#ifdef PERL_HASH_RANDOMIZE_KEYS
     /* the idea of this is that we create a "random" value by hashing the address of
      * the array, we then use the low bit to decide if we insert at the top, or insert
      * second from top. After each such insert we rotate the hashed value. So we can
      * use the same hashed value over and over, and in normal build environments use
      * very few ops to do so. ROTL32() should produce a single machine operation. */
-    PL_hash_rand_bits += ptr_hash((PTRV)a);
-    PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+    if (PL_HASH_RAND_BITS_ENABLED) {
+        if (PL_HASH_RAND_BITS_ENABLED == 1)
+            PL_hash_rand_bits += ptr_hash((PTRV)a);
+        PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+    }
+#endif
 
     if (SvOOK(hv)) {
         struct xpvhv_aux *const dest
             = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)];
         Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux);
         /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
         dest->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
     }
 
     PL_nomemok = FALSE;
@@ -1170,17 +1208,30 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
             U32 j = (HeHASH(entry) & newsize);
            if (j != (U32)i) {
                *oentry = HeNEXT(entry);
-                /* if the target cell is empty insert to top, otherwise
-                 * rotate the bucket rand 1 bit, and use the new low bit
-                 * to decide if we insert at top, or next from top.
-                 * IOW, we rotate only if we are dealing with colliding
-                 * elements. */
-                if (!aep[j] || ((PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1)) & 1)) {
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+                /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false
+                 * insert to top, otherwise rotate the bucket rand 1 bit,
+                 * and use the new low bit to decide if we insert at top,
+                 * or next from top. IOW, we only rotate on a collision.*/
+                if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
+                    PL_hash_rand_bits+= ROTL_UV(HeHASH(entry), 17);
+                    PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+                    if (PL_hash_rand_bits & 1) {
+                        HeNEXT(entry)= HeNEXT(aep[j]);
+                        HeNEXT(aep[j])= entry;
+                    } else {
+                        /* Note, this is structured in such a way as the optimizer
+                        * should eliminate the duplicated code here and below without
+                        * us needing to explicitly use a goto. */
+                        HeNEXT(entry) = aep[j];
+                        aep[j] = entry;
+                    }
+                } else
+#endif
+                {
+                    /* see comment above about duplicated code */
                     HeNEXT(entry) = aep[j];
                     aep[j] = entry;
-                } else {
-                    HeNEXT(entry)= HeNEXT(aep[j]);
-                    HeNEXT(aep[j])= entry;
                 }
            }
            else {
@@ -1223,6 +1274,21 @@ Perl_hv_ksplit(pTHX_ HV *hv, IV newmax)
     }
 }
 
+/* IMO this should also handle cases where hv_max is smaller than hv_keys
+ * as tied hashes could play silly buggers and mess us around. We will
+ * do the right thing during hv_store() afterwards, but still - Yves */
+#define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\
+    /* Can we use fewer buckets? (hv_max is always 2^n-1) */        \
+    if (hv_max < PERL_HASH_DEFAULT_HvMAX) {                         \
+        hv_max = PERL_HASH_DEFAULT_HvMAX;                           \
+    } else {                                                        \
+        while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \
+            hv_max = hv_max / 2;                                    \
+    }                                                               \
+    HvMAX(hv) = hv_max;                                             \
+} STMT_END
+
+
 HV *
 Perl_newHVhv(pTHX_ HV *ohv)
 {
@@ -1284,12 +1350,9 @@ Perl_newHVhv(pTHX_ HV *ohv)
        HE *entry;
        const I32 riter = HvRITER_get(ohv);
        HE * const eiter = HvEITER_get(ohv);
-       STRLEN hv_fill = HvFILL(ohv);
+        STRLEN hv_keys = HvTOTALKEYS(ohv);
 
-       /* Can we use fewer buckets? (hv_max is always 2^n-1) */
-       while (hv_max && hv_max + 1 >= hv_fill * 2)
-           hv_max = hv_max / 2;
-       HvMAX(hv) = hv_max;
+        HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
 
        hv_iterinit(ohv);
        while ((entry = hv_iternext_flags(ohv, 0))) {
@@ -1328,7 +1391,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv)
 
     if (ohv) {
        STRLEN hv_max = HvMAX(ohv);
-       STRLEN hv_fill = HvFILL(ohv);
+        STRLEN hv_keys = HvTOTALKEYS(ohv);
        HE *entry;
        const I32 riter = HvRITER_get(ohv);
        HE * const eiter = HvEITER_get(ohv);
@@ -1336,9 +1399,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv)
        ENTER;
        SAVEFREESV(hv);
 
-       while (hv_max && hv_max + 1 >= hv_fill * 2)
-           hv_max = hv_max / 2;
-       HvMAX(hv) = hv_max;
+        HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
 
        hv_iterinit(ohv);
        while ((entry = hv_iternext_flags(ohv, 0))) {
@@ -1364,6 +1425,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv)
     hv_magic(hv, NULL, PERL_MAGIC_hints);
     return hv;
 }
+#undef HV_SET_MAX_ADJUSTED_FOR_KEYS
 
 /* like hv_free_ent, but returns the SV rather than freeing it */
 STATIC SV*
@@ -1608,6 +1670,9 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
        }
        iter->xhv_riter = -1;   /* HvRITER(hv) = -1 */
        iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+        iter->xhv_last_rand = iter->xhv_rand;
+#endif
     }
 
     if (!((XPVHV*)SvANY(hv))->xhv_keys)
@@ -1746,7 +1811,7 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
     }
     if (!SvOOK(hv)) {
        Safefree(HvARRAY(hv));
-       xhv->xhv_max   = 7;     /* HvMAX(hv) = 7 (it's a normal hash) */
+        xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX;        /* HvMAX(hv) = 7 (it's a normal hash) */
        HvARRAY(hv) = 0;
     }
     /* if we're freeing the HV, the SvMAGIC field has been reused for
@@ -1843,14 +1908,25 @@ S_hv_auxinit(pTHX_ HV *hv) {
         }
         HvARRAY(hv) = (HE**)array;
         SvOOK_on(hv);
-        PL_hash_rand_bits += ptr_hash((PTRV)array);
-        PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+        iter = HvAUX(hv);
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+        if (PL_HASH_RAND_BITS_ENABLED) {
+            /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/
+            if (PL_HASH_RAND_BITS_ENABLED == 1)
+                PL_hash_rand_bits += ptr_hash((PTRV)array);
+            PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+        }
+        iter->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+    } else {
+        iter = HvAUX(hv);
     }
-    iter = HvAUX(hv);
 
     iter->xhv_riter = -1;      /* HvRITER(hv) = -1 */
     iter->xhv_eiter = NULL;    /* HvEITER(hv) = NULL */
-    iter->xhv_rand = (U32)PL_hash_rand_bits;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+    iter->xhv_last_rand = iter->xhv_rand;
+#endif
     iter->xhv_name_u.xhvnameu_name = 0;
     iter->xhv_name_count = 0;
     iter->xhv_backreferences = 0;
@@ -1893,6 +1969,9 @@ Perl_hv_iterinit(pTHX_ HV *hv)
        }
        iter->xhv_riter = -1;   /* HvRITER(hv) = -1 */
        iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+        iter->xhv_last_rand = iter->xhv_rand;
+#endif
     } else {
        hv_auxinit(hv);
     }
@@ -1948,6 +2027,27 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) {
 }
 
 void
+Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) {
+    struct xpvhv_aux *iter;
+
+    PERL_ARGS_ASSERT_HV_RAND_SET;
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+    if (!hv)
+        Perl_croak(aTHX_ "Bad hash");
+
+    if (SvOOK(hv)) {
+        iter = HvAUX(hv);
+    } else {
+        iter = hv_auxinit(hv);
+    }
+    iter->xhv_rand = new_xhv_rand;
+#else
+    Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set().");
+#endif
+}
+
+void
 Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) {
     struct xpvhv_aux *iter;
 
@@ -2353,6 +2453,18 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
        }
     }
 
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+    if (iter->xhv_last_rand != iter->xhv_rand) {
+        if (iter->xhv_riter != -1) {
+            Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+                             "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior"
+                             pTHX__FORMAT
+                             pTHX__VALUE);
+        }
+        iter->xhv_last_rand = iter->xhv_rand;
+    }
+#endif
+
     /* Skip the entire loop if the hash is empty.   */
     if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS)
        ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) {
@@ -2363,9 +2475,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
            if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) {
                /* There is no next one.  End of the hash.  */
                iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+                iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */
+#endif
                break;
            }
-            entry = (HvARRAY(hv))[(iter->xhv_riter ^ iter->xhv_rand) & xhv->xhv_max];
+            entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ];
 
            if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
                /* If we have an entry, but it's a placeholder, don't count it.
@@ -2378,7 +2493,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
               or if we run through it and find only placeholders.  */
        }
     }
-    else iter->xhv_riter = -1;
+    else {
+        iter->xhv_riter = -1;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+        iter->xhv_last_rand = iter->xhv_rand;
+#endif
+    }
 
     if (oldentry && HvLAZYDEL(hv)) {           /* was deleted earlier? */
        HvLAZYDEL_off(hv);