#include "perl.h"
#define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
+#define HV_FILL_THRESHOLD 31
static const char S_strtab_error[]
= "Cannot modify shared string table in hv_%s";
bool needs_store;
hv_magic_check (hv, &needs_copy, &needs_store);
if (needs_copy) {
- const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */
+ const bool save_taint = TAINT_get;
if (keysv || is_utf8) {
if (!keysv) {
keysv = newSVpvn_utf8(key, klen, TRUE);
}
TAINT_IF(save_taint);
+#ifdef NO_TAINT_SUPPORT
+ PERL_UNUSED_VAR(save_taint);
+#endif
if (!needs_store) {
if (flags & HVhek_FREEKEY)
Safefree(key);
recursive call would call the key conversion routine again.
However, as we replace the original key with the converted
key, this would result in a double conversion, which would show
- up as a bug if the conversion routine is not idempotent. */
+ up as a bug if the conversion routine is not idempotent.
+ Hence the use of HV_DISABLE_UVAR_XKEY. */
return hv_common(hv, keysv, key, klen, flags,
HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
val, hash);
HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
HeVAL(entry) = val;
+ if (!*oentry && SvOOK(hv)) {
+ /* initial entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ ++aux->xhv_fill_lazy;
+ }
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
/* This logic semi-randomizes the insert order in a bucket.
* Either we insert into the top, or the slot below the top,
* making it harder to see if there is a collision. We also
* reset the iterator randomizer if there is one.
*/
- if (SvOOK(hv))
- HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
- PL_hash_rand_bits += (PTRV)entry ^ hash; /* we don't bother to use ptr_hash here */
- PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
- if ( !*oentry || (PL_hash_rand_bits & 1) ) {
+ if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
+ PL_hash_rand_bits++;
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ if ( PL_hash_rand_bits & 1 ) {
+ HeNEXT(entry) = HeNEXT(*oentry);
+ HeNEXT(*oentry) = entry;
+ } else {
+ HeNEXT(entry) = *oentry;
+ *oentry = entry;
+ }
+ } else
+#endif
+ {
HeNEXT(entry) = *oentry;
*oentry = entry;
- } else {
- HeNEXT(entry) = HeNEXT(*oentry);
- HeNEXT(*oentry) = entry;
}
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (SvOOK(hv)) {
+ /* Currently this makes various tests warn in annoying ways.
+ * So Silenced for now. - Yves | bogus end of comment =>* /
+ if (HvAUX(hv)->xhv_riter != -1) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "[TESTING] Inserting into a hash during each() traversal results in undefined behavior"
+ pTHX__FORMAT
+ pTHX__VALUE);
+ }
+ */
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ }
+ HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
+ }
+#endif
if (val == &PL_sv_placeholder)
HvPLACEHOLDERS(hv)++;
XPVHV* xhv;
HE *entry;
HE **oentry;
+ HE *const *first_entry;
bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE;
int masked_flags;
masked_flags = (k_flags & HVhek_MASK);
- oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
+ first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
entry = *oentry;
for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
SV *sv;
HvPLACEHOLDERS(hv)++;
else {
*oentry = HeNEXT(entry);
+ if(!*first_entry && SvOOK(hv)) {
+ /* removed last entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ --aux->xhv_fill_lazy;
+ }
if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */)
HvLAZYDEL_on(hv);
else {
PL_nomemok = FALSE;
return;
}
+#ifdef PERL_HASH_RANDOMIZE_KEYS
/* the idea of this is that we create a "random" value by hashing the address of
* the array, we then use the low bit to decide if we insert at the top, or insert
* second from top. After each such insert we rotate the hashed value. So we can
* use the same hashed value over and over, and in normal build environments use
* very few ops to do so. ROTL32() should produce a single machine operation. */
- PL_hash_rand_bits += ptr_hash((PTRV)a);
- PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += ptr_hash((PTRV)a);
+ PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+ }
+#endif
if (SvOOK(hv)) {
struct xpvhv_aux *const dest
= (struct xpvhv_aux*) &a[newsize * sizeof(HE*)];
Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux);
/* we reset the iterator's xhv_rand as well, so they get a totally new ordering */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
dest->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+ /* For now, just reset the lazy fill counter.
+ It would be possible to update the counter in the code below
+ instead. */
+ dest->xhv_fill_lazy = 0;
}
PL_nomemok = FALSE;
U32 j = (HeHASH(entry) & newsize);
if (j != (U32)i) {
*oentry = HeNEXT(entry);
- /* if the target cell is empty insert to top, otherwise
- * rotate the bucket rand 1 bit, and use the new low bit
- * to decide if we insert at top, or next from top.
- * IOW, we rotate only if we are dealing with colliding
- * elements. */
- if (!aep[j] || ((PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1)) & 1)) {
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false
+ * insert to top, otherwise rotate the bucket rand 1 bit,
+ * and use the new low bit to decide if we insert at top,
+ * or next from top. IOW, we only rotate on a collision.*/
+ if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
+ PL_hash_rand_bits+= ROTL_UV(HeHASH(entry), 17);
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ if (PL_hash_rand_bits & 1) {
+ HeNEXT(entry)= HeNEXT(aep[j]);
+ HeNEXT(aep[j])= entry;
+ } else {
+ /* Note, this is structured in such a way as the optimizer
+ * should eliminate the duplicated code here and below without
+ * us needing to explicitly use a goto. */
+ HeNEXT(entry) = aep[j];
+ aep[j] = entry;
+ }
+ } else
+#endif
+ {
+ /* see comment above about duplicated code */
HeNEXT(entry) = aep[j];
aep[j] = entry;
- } else {
- HeNEXT(entry)= HeNEXT(aep[j]);
- HeNEXT(aep[j])= entry;
}
}
else {
}
}
+/* IMO this should also handle cases where hv_max is smaller than hv_keys
+ * as tied hashes could play silly buggers and mess us around. We will
+ * do the right thing during hv_store() afterwards, but still - Yves */
+#define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\
+ /* Can we use fewer buckets? (hv_max is always 2^n-1) */ \
+ if (hv_max < PERL_HASH_DEFAULT_HvMAX) { \
+ hv_max = PERL_HASH_DEFAULT_HvMAX; \
+ } else { \
+ while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \
+ hv_max = hv_max / 2; \
+ } \
+ HvMAX(hv) = hv_max; \
+} STMT_END
+
+
HV *
Perl_newHVhv(pTHX_ HV *ohv)
{
HE *entry;
const I32 riter = HvRITER_get(ohv);
HE * const eiter = HvEITER_get(ohv);
- STRLEN hv_fill = HvFILL(ohv);
+ STRLEN hv_keys = HvTOTALKEYS(ohv);
- /* Can we use fewer buckets? (hv_max is always 2^n-1) */
- while (hv_max && hv_max + 1 >= hv_fill * 2)
- hv_max = hv_max / 2;
- HvMAX(hv) = hv_max;
+ HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
hv_iterinit(ohv);
while ((entry = hv_iternext_flags(ohv, 0))) {
if (ohv) {
STRLEN hv_max = HvMAX(ohv);
- STRLEN hv_fill = HvFILL(ohv);
+ STRLEN hv_keys = HvTOTALKEYS(ohv);
HE *entry;
const I32 riter = HvRITER_get(ohv);
HE * const eiter = HvEITER_get(ohv);
ENTER;
SAVEFREESV(hv);
- while (hv_max && hv_max + 1 >= hv_fill * 2)
- hv_max = hv_max / 2;
- HvMAX(hv) = hv_max;
+ HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
hv_iterinit(ohv);
while ((entry = hv_iternext_flags(ohv, 0))) {
hv_magic(hv, NULL, PERL_MAGIC_hints);
return hv;
}
+#undef HV_SET_MAX_ADJUSTED_FOR_KEYS
/* like hv_free_ent, but returns the SV rather than freeing it */
STATIC SV*
PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY;
- if (SvOOK(hv) && ((iter = HvAUX(hv)))
- && ((entry = iter->xhv_eiter)) )
- {
- /* the iterator may get resurrected after each
- * destructor call, so check each time */
- if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
- HvLAZYDEL_off(hv);
- hv_free_ent(hv, entry);
- /* warning: at this point HvARRAY may have been
- * re-allocated, HvMAX changed etc */
- }
- iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
- iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+ if (SvOOK(hv) && ((iter = HvAUX(hv)))) {
+ if ((entry = iter->xhv_eiter)) {
+ /* the iterator may get resurrected after each
+ * destructor call, so check each time */
+ if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
+ HvLAZYDEL_off(hv);
+ hv_free_ent(hv, entry);
+ /* warning: at this point HvARRAY may have been
+ * re-allocated, HvMAX changed etc */
+ }
+ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ }
+ /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone
+ will actually call HvFILL() on a hash under destruction, so it
+ seems pointless attempting to track the number of keys remaining.
+ But if they do, we want to reset it again. */
+ if (iter->xhv_fill_lazy)
+ iter->xhv_fill_lazy = 0;
}
if (!((XPVHV*)SvANY(hv))->xhv_keys)
}
if (!SvOOK(hv)) {
Safefree(HvARRAY(hv));
- xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */
+ xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */
HvARRAY(hv) = 0;
}
/* if we're freeing the HV, the SvMAGIC field has been reused for
Returns the number of hash buckets that happen to be in use. This function is
wrapped by the macro C<HvFILL>.
-Previously this value was stored in the HV structure, rather than being
-calculated on demand.
+Previously this value was always stored in the HV structure, which created an
+overhead on every hash (and pretty much every object) for something that was
+rarely used. Now we calculate it on demand the first time that it is needed,
+and cache it if that calculation is going to be costly to repeat. The cached
+value is updated by insertions and deletions, but (currently) discarded if
+the hash is split.
=cut
*/
STRLEN
-Perl_hv_fill(pTHX_ HV const *const hv)
+Perl_hv_fill(pTHX_ HV *const hv)
{
STRLEN count = 0;
HE **ents = HvARRAY(hv);
+ struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL;
PERL_ARGS_ASSERT_HV_FILL;
+ /* No keys implies no buckets used.
+ One key can only possibly mean one bucket used. */
+ if (HvTOTALKEYS(hv) < 2)
+ return HvTOTALKEYS(hv);
+
+#ifndef DEBUGGING
+ if (aux && aux->xhv_fill_lazy)
+ return aux->xhv_fill_lazy;
+#endif
+
if (ents) {
HE *const *const last = ents + HvMAX(hv);
count = last + 1 - ents;
--count;
} while (++ents <= last);
}
+ if (aux) {
+#ifdef DEBUGGING
+ if (aux->xhv_fill_lazy)
+ assert(aux->xhv_fill_lazy == count);
+#endif
+ aux->xhv_fill_lazy = count;
+ } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) {
+ aux = hv_auxinit(hv);
+ aux->xhv_fill_lazy = count;
+ }
return count;
}
}
HvARRAY(hv) = (HE**)array;
SvOOK_on(hv);
- PL_hash_rand_bits += ptr_hash((PTRV)array);
- PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+ iter = HvAUX(hv);
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += ptr_hash((PTRV)array);
+ PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+ }
+ iter->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+ } else {
+ iter = HvAUX(hv);
}
- iter = HvAUX(hv);
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
- iter->xhv_rand = (U32)PL_hash_rand_bits;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ iter->xhv_fill_lazy = 0;
iter->xhv_name_u.xhvnameu_name = 0;
iter->xhv_name_count = 0;
iter->xhv_backreferences = 0;
}
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
} else {
hv_auxinit(hv);
}
}
void
+Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) {
+ struct xpvhv_aux *iter;
+
+ PERL_ARGS_ASSERT_HV_RAND_SET;
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (!hv)
+ Perl_croak(aTHX_ "Bad hash");
+
+ if (SvOOK(hv)) {
+ iter = HvAUX(hv);
+ } else {
+ iter = hv_auxinit(hv);
+ }
+ iter->xhv_rand = new_xhv_rand;
+#else
+ Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set().");
+#endif
+}
+
+void
Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) {
struct xpvhv_aux *iter;
}
}
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (iter->xhv_last_rand != iter->xhv_rand) {
+ if (iter->xhv_riter != -1) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior"
+ pTHX__FORMAT
+ pTHX__VALUE);
+ }
+ iter->xhv_last_rand = iter->xhv_rand;
+ }
+#endif
+
/* Skip the entire loop if the hash is empty. */
if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS)
? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) {
if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) {
/* There is no next one. End of the hash. */
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */
+#endif
break;
}
- entry = (HvARRAY(hv))[(iter->xhv_riter ^ iter->xhv_rand) & xhv->xhv_max];
+ entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ];
if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
/* If we have an entry, but it's a placeholder, don't count it.
or if we run through it and find only placeholders. */
}
}
- else iter->xhv_riter = -1;
+ else {
+ iter->xhv_riter = -1;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ }
if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */
HvLAZYDEL_off(hv);