/*
=head1 Hash Manipulation Functions
-
A HV structure represents a Perl hash. It consists mainly of an array
of pointers, each of which points to a linked list of HE structures. The
array is indexed by the hash function of the key, so each linked list
#include "perl.h"
#define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
+#define HV_FILL_THRESHOLD 31
static const char S_strtab_error[]
= "Cannot modify shared string table in hv_%s";
bool needs_store;
hv_magic_check (hv, &needs_copy, &needs_store);
if (needs_copy) {
- const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */
+ const bool save_taint = TAINT_get;
if (keysv || is_utf8) {
if (!keysv) {
keysv = newSVpvn_utf8(key, klen, TRUE);
}
TAINT_IF(save_taint);
+#ifdef NO_TAINT_SUPPORT
+ PERL_UNUSED_VAR(save_taint);
+#endif
if (!needs_store) {
if (flags & HVhek_FREEKEY)
Safefree(key);
recursive call would call the key conversion routine again.
However, as we replace the original key with the converted
key, this would result in a double conversion, which would show
- up as a bug if the conversion routine is not idempotent. */
+ up as a bug if the conversion routine is not idempotent.
+ Hence the use of HV_DISABLE_UVAR_XKEY. */
return hv_common(hv, keysv, key, klen, flags,
HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp,
val, hash);
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
HeVAL(entry) = val;
- HeNEXT(entry) = *oentry;
- *oentry = entry;
+
+ if (!*oentry && SvOOK(hv)) {
+ /* initial entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ ++aux->xhv_fill_lazy;
+ }
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ /* This logic semi-randomizes the insert order in a bucket.
+ * Either we insert into the top, or the slot below the top,
+ * making it harder to see if there is a collision. We also
+ * reset the iterator randomizer if there is one.
+ */
+ if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
+ PL_hash_rand_bits++;
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ if ( PL_hash_rand_bits & 1 ) {
+ HeNEXT(entry) = HeNEXT(*oentry);
+ HeNEXT(*oentry) = entry;
+ } else {
+ HeNEXT(entry) = *oentry;
+ *oentry = entry;
+ }
+ } else
+#endif
+ {
+ HeNEXT(entry) = *oentry;
+ *oentry = entry;
+ }
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (SvOOK(hv)) {
+ /* Currently this makes various tests warn in annoying ways.
+ * So Silenced for now. - Yves | bogus end of comment =>* /
+ if (HvAUX(hv)->xhv_riter != -1) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "[TESTING] Inserting into a hash during each() traversal results in undefined behavior"
+ pTHX__FORMAT
+ pTHX__VALUE);
+ }
+ */
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ }
+ HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits;
+ }
+#endif
if (val == &PL_sv_placeholder)
HvPLACEHOLDERS(hv)++;
xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
if ( DO_HSPLIT(xhv) ) {
- hsplit(hv);
+ const STRLEN oldsize = xhv->xhv_max + 1;
+ const U32 items = (U32)HvPLACEHOLDERS_get(hv);
+
+ if (items /* hash has placeholders */
+ && !SvREADONLY(hv) /* but is not a restricted hash */) {
+ /* If this hash previously was a "restricted hash" and had
+ placeholders, but the "restricted" flag has been turned off,
+ then the placeholders no longer serve any useful purpose.
+ However, they have the downsides of taking up RAM, and adding
+ extra steps when finding used values. It's safe to clear them
+ at this point, even though Storable rebuilds restricted hashes by
+ putting in all the placeholders (first) before turning on the
+ readonly flag, because Storable always pre-splits the hash.
+ If we're lucky, then we may clear sufficient placeholders to
+ avoid needing to split the hash at all. */
+ clear_placeholders(hv, items);
+ if (DO_HSPLIT(xhv))
+ hsplit(hv, oldsize, oldsize * 2);
+ } else
+ hsplit(hv, oldsize, oldsize * 2);
}
if (return_svp) {
/*
=for apidoc hv_scalar
-Evaluates the hash in scalar context and returns the result. Handles magic when the hash is tied.
+Evaluates the hash in scalar context and returns the result. Handles magic
+when the hash is tied.
=cut
*/
XPVHV* xhv;
HE *entry;
HE **oentry;
+ HE *const *first_entry;
bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE;
int masked_flags;
masked_flags = (k_flags & HVhek_MASK);
- oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
+ first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
entry = *oentry;
for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
SV *sv;
Safefree(key);
return NULL;
}
- if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))
- && !SvIsCOW(HeVAL(entry))) {
+ if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))) {
hv_notallowed(k_flags, key, klen,
"Attempt to delete readonly key '%"SVf"' from"
" a restricted hash");
HvPLACEHOLDERS(hv)++;
else {
*oentry = HeNEXT(entry);
+ if(!*first_entry && SvOOK(hv)) {
+ /* removed last entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ --aux->xhv_fill_lazy;
+ }
if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */)
HvLAZYDEL_on(hv);
else {
return NULL;
}
+
STATIC void
-S_hsplit(pTHX_ HV *hv)
+S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
{
dVAR;
- XPVHV* const xhv = (XPVHV*)SvANY(hv);
- const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */
- I32 newsize = oldsize * 2;
- I32 i;
+ STRLEN i = 0;
char *a = (char*) HvARRAY(hv);
HE **aep;
- PERL_ARGS_ASSERT_HSPLIT;
+ bool do_aux= (
+ /* already have an HvAUX(hv) so we have to move it */
+ SvOOK(hv) ||
+ /* no HvAUX() but array we are going to allocate is large enough
+ * there is no point in saving the space for the iterator, and
+ * speeds up later traversals. */
+ ( ( hv != PL_strtab ) && ( newsize >= PERL_HV_ALLOC_AUX_SIZE ) )
+ );
- /*PerlIO_printf(PerlIO_stderr(), "hsplit called for %p which had %d\n",
- (void*)hv, (int) oldsize);*/
+ PERL_ARGS_ASSERT_HSPLIT;
- if (HvPLACEHOLDERS_get(hv) && !SvREADONLY(hv)) {
- /* Can make this clear any placeholders first for non-restricted hashes,
- even though Storable rebuilds restricted hashes by putting in all the
- placeholders (first) before turning on the readonly flag, because
- Storable always pre-splits the hash. */
- hv_clear_placeholders(hv);
- }
-
PL_nomemok = TRUE;
-#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
- + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char);
- if (!a) {
- PL_nomemok = FALSE;
- return;
- }
- if (SvOOK(hv)) {
- Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
- }
-#else
- Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
- + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char);
+ + (do_aux ? sizeof(struct xpvhv_aux) : 0), char);
+ PL_nomemok = FALSE;
if (!a) {
- PL_nomemok = FALSE;
return;
}
- Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char);
- if (SvOOK(hv)) {
- Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ /* the idea of this is that we create a "random" value by hashing the address of
+ * the array, we then use the low bit to decide if we insert at the top, or insert
+ * second from top. After each such insert we rotate the hashed value. So we can
+ * use the same hashed value over and over, and in normal build environments use
+ * very few ops to do so. ROTL32() should produce a single machine operation. */
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += ptr_hash((PTRV)a);
+ PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
}
- Safefree(HvARRAY(hv));
#endif
-
- PL_nomemok = FALSE;
- Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/
- xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */
HvARRAY(hv) = (HE**) a;
- aep = (HE**)a;
+ HvMAX(hv) = newsize - 1;
+ /* before we zero the newly added memory, we
+ * need to deal with the aux struct that may be there
+ * or have been allocated by us*/
+ if (do_aux) {
+ struct xpvhv_aux *const dest
+ = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)];
+ if (SvOOK(hv)) {
+ /* alread have an aux, copy the old one in place. */
+ Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux);
+ /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ dest->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+ /* For now, just reset the lazy fill counter.
+ It would be possible to update the counter in the code below
+ instead. */
+ dest->xhv_fill_lazy = 0;
+ } else {
+ /* no existing aux structure, but we allocated space for one
+ * so intialize it properly. This unrolls hv_auxinit() a bit,
+ * since we have to do the realloc anyway. */
+ /* first we set the iterator's xhv_rand so it can be copied into lastrand below */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ dest->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+ /* this is the "non realloc" part of the hv_auxinit() */
+ (void)hv_auxinit_internal(dest);
+ /* Turn on the OOK flag */
+ SvOOK_on(hv);
+ }
+ }
+ /* now we can safely clear the second half */
+ Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/
+
+ if (!HvTOTALKEYS(hv)) /* skip rest if no entries */
+ return;
- for (i=0; i<oldsize; i++,aep++) {
- HE **oentry = aep;
- HE *entry = *aep;
- HE **bep;
+ newsize--;
+ aep = (HE**)a;
+ do {
+ HE **oentry = aep + i;
+ HE *entry = aep[i];
if (!entry) /* non-existent */
continue;
- bep = aep+oldsize;
do {
- if ((HeHASH(entry) & newsize) != (U32)i) {
+ U32 j = (HeHASH(entry) & newsize);
+ if (j != (U32)i) {
*oentry = HeNEXT(entry);
- HeNEXT(entry) = *bep;
- *bep = entry;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ /* if the target cell is empty or PL_HASH_RAND_BITS_ENABLED is false
+ * insert to top, otherwise rotate the bucket rand 1 bit,
+ * and use the new low bit to decide if we insert at top,
+ * or next from top. IOW, we only rotate on a collision.*/
+ if (aep[j] && PL_HASH_RAND_BITS_ENABLED) {
+ PL_hash_rand_bits+= ROTL32(HeHASH(entry), 17);
+ PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+ if (PL_hash_rand_bits & 1) {
+ HeNEXT(entry)= HeNEXT(aep[j]);
+ HeNEXT(aep[j])= entry;
+ } else {
+ /* Note, this is structured in such a way as the optimizer
+ * should eliminate the duplicated code here and below without
+ * us needing to explicitly use a goto. */
+ HeNEXT(entry) = aep[j];
+ aep[j] = entry;
+ }
+ } else
+#endif
+ {
+ /* see comment above about duplicated code */
+ HeNEXT(entry) = aep[j];
+ aep[j] = entry;
+ }
}
else {
oentry = &HeNEXT(entry);
}
entry = *oentry;
} while (entry);
- /* I think we don't actually need to keep track of the longest length,
- merely flag if anything is too long. But for the moment while
- developing this code I'll track it. */
- }
+ } while (i++ < oldsize);
}
void
XPVHV* xhv = (XPVHV*)SvANY(hv);
const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */
I32 newsize;
- I32 i;
char *a;
- HE **aep;
PERL_ARGS_ASSERT_HV_KSPLIT;
a = (char *) HvARRAY(hv);
if (a) {
- PL_nomemok = TRUE;
-#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
- Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
- + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char);
- if (!a) {
- PL_nomemok = FALSE;
- return;
- }
- if (SvOOK(hv)) {
- Copy(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
- }
-#else
- Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
- + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char);
- if (!a) {
- PL_nomemok = FALSE;
- return;
- }
- Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char);
- if (SvOOK(hv)) {
- Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
- }
- Safefree(HvARRAY(hv));
-#endif
- PL_nomemok = FALSE;
- Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/
- }
- else {
- Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ hsplit(hv, oldsize, newsize);
+ } else {
+ Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ xhv->xhv_max = --newsize;
+ HvARRAY(hv) = (HE **) a;
}
- xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */
- HvARRAY(hv) = (HE **) a;
- if (!xhv->xhv_keys /* !HvTOTALKEYS(hv) */) /* skip rest if no entries */
- return;
+}
- aep = (HE**)a;
- for (i=0; i<oldsize; i++,aep++) {
- HE **oentry = aep;
- HE *entry = *aep;
+/* IMO this should also handle cases where hv_max is smaller than hv_keys
+ * as tied hashes could play silly buggers and mess us around. We will
+ * do the right thing during hv_store() afterwards, but still - Yves */
+#define HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys) STMT_START {\
+ /* Can we use fewer buckets? (hv_max is always 2^n-1) */ \
+ if (hv_max < PERL_HASH_DEFAULT_HvMAX) { \
+ hv_max = PERL_HASH_DEFAULT_HvMAX; \
+ } else { \
+ while (hv_max > PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \
+ hv_max = hv_max / 2; \
+ } \
+ HvMAX(hv) = hv_max; \
+} STMT_END
- if (!entry) /* non-existent */
- continue;
- do {
- I32 j = (HeHASH(entry) & newsize);
-
- if (j != i) {
- j -= i;
- *oentry = HeNEXT(entry);
- HeNEXT(entry) = aep[j];
- aep[j] = entry;
- }
- else
- oentry = &HeNEXT(entry);
- entry = *oentry;
- } while (entry);
- }
-}
HV *
Perl_newHVhv(pTHX_ HV *ohv)
HE *entry;
const I32 riter = HvRITER_get(ohv);
HE * const eiter = HvEITER_get(ohv);
- STRLEN hv_fill = HvFILL(ohv);
+ STRLEN hv_keys = HvTOTALKEYS(ohv);
- /* Can we use fewer buckets? (hv_max is always 2^n-1) */
- while (hv_max && hv_max + 1 >= hv_fill * 2)
- hv_max = hv_max / 2;
- HvMAX(hv) = hv_max;
+ HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
hv_iterinit(ohv);
while ((entry = hv_iternext_flags(ohv, 0))) {
if (ohv) {
STRLEN hv_max = HvMAX(ohv);
- STRLEN hv_fill = HvFILL(ohv);
+ STRLEN hv_keys = HvTOTALKEYS(ohv);
HE *entry;
const I32 riter = HvRITER_get(ohv);
HE * const eiter = HvEITER_get(ohv);
ENTER;
SAVEFREESV(hv);
- while (hv_max && hv_max + 1 >= hv_fill * 2)
- hv_max = hv_max / 2;
- HvMAX(hv) = hv_max;
+ HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys);
hv_iterinit(ohv);
while ((entry = hv_iternext_flags(ohv, 0))) {
hv_magic(hv, NULL, PERL_MAGIC_hints);
return hv;
}
+#undef HV_SET_MAX_ADJUSTED_FOR_KEYS
/* like hv_free_ent, but returns the SV rather than freeing it */
STATIC SV*
/* not already placeholder */
if (HeVAL(entry) != &PL_sv_placeholder) {
if (HeVAL(entry)) {
- if (SvREADONLY(HeVAL(entry)) && !SvIsCOW(HeVAL(entry))) {
+ if (SvREADONLY(HeVAL(entry))) {
SV* const keysv = hv_iterkeysv(entry);
Perl_croak_nocontext(
"Attempt to delete readonly key '%"SVf"' from a restricted hash",
if (--items == 0) {
/* Finished. */
- HvTOTALKEYS(hv) -= (IV)HvPLACEHOLDERS_get(hv);
- if (HvUSEDKEYS(hv) == 0)
+ I32 placeholders = HvPLACEHOLDERS_get(hv);
+ HvTOTALKEYS(hv) -= (IV)placeholders;
+ /* HvUSEDKEYS expanded */
+ if ((HvTOTALKEYS(hv) - placeholders) == 0)
HvHASKFLAGS_off(hv);
HvPLACEHOLDERS_set(hv, 0);
return;
} while (--i >= 0);
/* You can't get here, hence assertion should always fail. */
assert (items == 0);
- assert (0);
+ NOT_REACHED;
}
STATIC void
PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY;
- if (SvOOK(hv) && ((iter = HvAUX(hv)))
- && ((entry = iter->xhv_eiter)) )
- {
- /* the iterator may get resurrected after each
- * destructor call, so check each time */
- if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
- HvLAZYDEL_off(hv);
- hv_free_ent(hv, entry);
- /* warning: at this point HvARRAY may have been
- * re-allocated, HvMAX changed etc */
- }
- iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
- iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+ if (SvOOK(hv) && ((iter = HvAUX(hv)))) {
+ if ((entry = iter->xhv_eiter)) {
+ /* the iterator may get resurrected after each
+ * destructor call, so check each time */
+ if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
+ HvLAZYDEL_off(hv);
+ hv_free_ent(hv, entry);
+ /* warning: at this point HvARRAY may have been
+ * re-allocated, HvMAX changed etc */
+ }
+ iter = HvAUX(hv); /* may have been realloced */
+ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ }
+ /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone
+ will actually call HvFILL() on a hash under destruction, so it
+ seems pointless attempting to track the number of keys remaining.
+ But if they do, we want to reset it again. */
+ if (iter->xhv_fill_lazy)
+ iter->xhv_fill_lazy = 0;
}
if (!((XPVHV*)SvANY(hv))->xhv_keys)
{
dVAR;
XPVHV* xhv;
- const char *name;
- const bool save = !!SvREFCNT(hv);
+ bool save;
if (!hv)
return;
+ save = !!SvREFCNT(hv);
DEBUG_A(Perl_hv_assert(aTHX_ hv));
xhv = (XPVHV*)SvANY(hv);
if they will be freed anyway. */
/* note that the code following prior to hfreeentries is duplicated
* in sv_clear(), and changes here should be done there too */
- if (PL_phase != PERL_PHASE_DESTRUCT && (name = HvNAME(hv))) {
+ if (PL_phase != PERL_PHASE_DESTRUCT && HvNAME(hv)) {
if (PL_stashcache) {
DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for '%"
- HEKf"'\n", HvNAME_HEK(hv)));
- (void)hv_delete(PL_stashcache, name,
- HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv),
- G_DISCARD
- );
+ HEKf"'\n", HEKfARG(HvNAME_HEK(hv))));
+ (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD);
}
hv_name_set(hv, NULL, 0, 0);
}
}
hfreeentries(hv);
if (SvOOK(hv)) {
- struct xpvhv_aux * const aux = HvAUX(hv);
struct mro_meta *meta;
+ const char *name;
- if ((name = HvENAME_get(hv))) {
+ if (HvENAME_get(hv)) {
if (PL_phase != PERL_PHASE_DESTRUCT)
mro_isa_changed_in(hv);
if (PL_stashcache) {
DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for effective name '%"
- HEKf"'\n", HvENAME_HEK(hv)));
- (void)hv_delete(
- PL_stashcache, name,
- HEK_UTF8(HvENAME_HEK(hv)) ? -HvENAMELEN_get(hv) : HvENAMELEN_get(hv),
- G_DISCARD
- );
+ HEKf"'\n", HEKfARG(HvENAME_HEK(hv))));
+ (void)hv_deletehek(PL_stashcache, HvENAME_HEK(hv), G_DISCARD);
}
}
/* If this call originated from sv_clear, then we must check for
* effective names that need freeing, as well as the usual name. */
name = HvNAME(hv);
- if (flags & HV_NAME_SETALL ? !!aux->xhv_name_u.xhvnameu_name : !!name) {
+ if (flags & HV_NAME_SETALL ? !!HvAUX(hv)->xhv_name_u.xhvnameu_name : !!name) {
if (name && PL_stashcache) {
DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for name '%"
- HEKf"'\n", HvNAME_HEK(hv)));
- (void)hv_delete(PL_stashcache, name, (HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv)), G_DISCARD);
+ HEKf"'\n", HEKfARG(HvNAME_HEK(hv))));
+ (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD);
}
hv_name_set(hv, NULL, 0, flags);
}
- if((meta = aux->xhv_mro_meta)) {
+ if((meta = HvAUX(hv)->xhv_mro_meta)) {
if (meta->mro_linear_all) {
SvREFCNT_dec_NN(meta->mro_linear_all);
/* mro_linear_current is just acting as a shortcut pointer,
SvREFCNT_dec(meta->mro_linear_current);
SvREFCNT_dec(meta->mro_nextmethod);
SvREFCNT_dec(meta->isa);
+ SvREFCNT_dec(meta->super);
Safefree(meta);
- aux->xhv_mro_meta = NULL;
+ HvAUX(hv)->xhv_mro_meta = NULL;
}
- SvREFCNT_dec(aux->xhv_super);
- if (!aux->xhv_name_u.xhvnameu_name && ! aux->xhv_backreferences)
+ if (!HvAUX(hv)->xhv_name_u.xhvnameu_name && ! HvAUX(hv)->xhv_backreferences)
SvFLAGS(hv) &= ~SVf_OOK;
}
if (!SvOOK(hv)) {
Safefree(HvARRAY(hv));
- xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */
+ xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */
HvARRAY(hv) = 0;
}
/* if we're freeing the HV, the SvMAGIC field has been reused for
/*
=for apidoc hv_fill
-Returns the number of hash buckets that happen to be in use. This function is
+Returns the number of hash buckets that
+happen to be in use. This function is
wrapped by the macro C<HvFILL>.
-Previously this value was stored in the HV structure, rather than being
-calculated on demand.
+Previously this value was always stored in the HV structure, which created an
+overhead on every hash (and pretty much every object) for something that was
+rarely used. Now we calculate it on demand the first
+time that it is needed, and cache it if that calculation
+is going to be costly to repeat. The cached
+value is updated by insertions and deletions, but (currently) discarded if
+the hash is split.
=cut
*/
STRLEN
-Perl_hv_fill(pTHX_ HV const *const hv)
+Perl_hv_fill(pTHX_ HV *const hv)
{
STRLEN count = 0;
HE **ents = HvARRAY(hv);
+ struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL;
PERL_ARGS_ASSERT_HV_FILL;
+ /* No keys implies no buckets used.
+ One key can only possibly mean one bucket used. */
+ if (HvTOTALKEYS(hv) < 2)
+ return HvTOTALKEYS(hv);
+
+#ifndef DEBUGGING
+ if (aux && aux->xhv_fill_lazy)
+ return aux->xhv_fill_lazy;
+#endif
+
if (ents) {
HE *const *const last = ents + HvMAX(hv);
count = last + 1 - ents;
--count;
} while (++ents <= last);
}
+ if (aux) {
+#ifdef DEBUGGING
+ if (aux->xhv_fill_lazy)
+ assert(aux->xhv_fill_lazy == count);
+#endif
+ aux->xhv_fill_lazy = count;
+ } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) {
+ aux = hv_auxinit(hv);
+ aux->xhv_fill_lazy = count;
+ }
return count;
}
-static struct xpvhv_aux*
-S_hv_auxinit(HV *hv) {
- struct xpvhv_aux *iter;
- char *array;
-
- PERL_ARGS_ASSERT_HV_AUXINIT;
+/* hash a pointer to a U32 - Used in the hash traversal randomization
+ * and bucket order randomization code
+ *
+ * this code was derived from Sereal, which was derived from autobox.
+ */
- if (!HvARRAY(hv)) {
- Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
- + sizeof(struct xpvhv_aux), char);
- } else {
- array = (char *) HvARRAY(hv);
- Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
- + sizeof(struct xpvhv_aux), char);
- }
- HvARRAY(hv) = (HE**) array;
- SvOOK_on(hv);
- iter = HvAUX(hv);
+PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) {
+#if PTRSIZE == 8
+ /*
+ * This is one of Thomas Wang's hash functions for 64-bit integers from:
+ * http://www.concentric.net/~Ttwang/tech/inthash.htm
+ */
+ u = (~u) + (u << 18);
+ u = u ^ (u >> 31);
+ u = u * 21;
+ u = u ^ (u >> 11);
+ u = u + (u << 6);
+ u = u ^ (u >> 22);
+#else
+ /*
+ * This is one of Bob Jenkins' hash functions for 32-bit integers
+ * from: http://burtleburtle.net/bob/hash/integer.html
+ */
+ u = (u + 0x7ed55d16) + (u << 12);
+ u = (u ^ 0xc761c23c) ^ (u >> 19);
+ u = (u + 0x165667b1) + (u << 5);
+ u = (u + 0xd3a2646c) ^ (u << 9);
+ u = (u + 0xfd7046c5) + (u << 3);
+ u = (u ^ 0xb55a4f09) ^ (u >> 16);
+#endif
+ return (U32)u;
+}
+static struct xpvhv_aux*
+S_hv_auxinit_internal(struct xpvhv_aux *iter) {
+ PERL_ARGS_ASSERT_HV_AUXINIT_INTERNAL;
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ iter->xhv_fill_lazy = 0;
iter->xhv_name_u.xhvnameu_name = 0;
iter->xhv_name_count = 0;
iter->xhv_backreferences = 0;
iter->xhv_mro_meta = NULL;
- iter->xhv_super = NULL;
+ iter->xhv_aux_flags = 0;
return iter;
}
+
+static struct xpvhv_aux*
+S_hv_auxinit(pTHX_ HV *hv) {
+ struct xpvhv_aux *iter;
+ char *array;
+
+ PERL_ARGS_ASSERT_HV_AUXINIT;
+
+ if (!SvOOK(hv)) {
+ if (!HvARRAY(hv)) {
+ Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
+ + sizeof(struct xpvhv_aux), char);
+ } else {
+ array = (char *) HvARRAY(hv);
+ Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
+ + sizeof(struct xpvhv_aux), char);
+ }
+ HvARRAY(hv) = (HE**)array;
+ SvOOK_on(hv);
+ iter = HvAUX(hv);
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (PL_HASH_RAND_BITS_ENABLED) {
+ /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/
+ if (PL_HASH_RAND_BITS_ENABLED == 1)
+ PL_hash_rand_bits += ptr_hash((PTRV)array);
+ PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+ }
+ iter->xhv_rand = (U32)PL_hash_rand_bits;
+#endif
+ } else {
+ iter = HvAUX(hv);
+ }
+
+ return hv_auxinit_internal(iter);
+}
+
/*
=for apidoc hv_iterinit
Perl_croak(aTHX_ "Bad hash");
if (SvOOK(hv)) {
- struct xpvhv_aux * const iter = HvAUX(hv);
+ struct xpvhv_aux * iter = HvAUX(hv);
HE * const entry = iter->xhv_eiter; /* HvEITER(hv) */
if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
HvLAZYDEL_off(hv);
hv_free_ent(hv, entry);
}
+ iter = HvAUX(hv); /* may have been reallocated */
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
} else {
hv_auxinit(hv);
}
}
void
+Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) {
+ struct xpvhv_aux *iter;
+
+ PERL_ARGS_ASSERT_HV_RAND_SET;
+
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (!hv)
+ Perl_croak(aTHX_ "Bad hash");
+
+ if (SvOOK(hv)) {
+ iter = HvAUX(hv);
+ } else {
+ iter = hv_auxinit(hv);
+ }
+ iter->xhv_rand = new_xhv_rand;
+#else
+ Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set().");
+#endif
+}
+
+void
Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) {
struct xpvhv_aux *iter;
/* The first elem may be null. */
if(*name) unshare_hek_or_pvn(*name, 0, 0, 0);
Safefree(name);
+ iter = HvAUX(hv); /* may been realloced */
spot = &iter->xhv_name_u.xhvnameu_name;
iter->xhv_name_count = 0;
}
}
else if (flags & HV_NAME_SETALL) {
unshare_hek_or_pvn(iter->xhv_name_u.xhvnameu_name, 0, 0, 0);
+ iter = HvAUX(hv); /* may been realloced */
spot = &iter->xhv_name_u.xhvnameu_name;
}
else {
: (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len))
) {
unshare_hek_or_pvn(*victim, 0, 0, 0);
+ aux = HvAUX(hv); /* may been realloced */
if (count < 0) ++aux->xhv_name_count;
else --aux->xhv_name_count;
if (
Returns entries from a hash iterator. See C<hv_iterinit> and C<hv_iternext>.
The C<flags> value will normally be zero; if HV_ITERNEXT_WANTPLACEHOLDERS is
set the placeholders keys (for restricted hashes) will be returned in addition
-to normal keys. By default placeholders are automatically skipped over.
+to normal keys. By default placeholders are automatically skipped over.
Currently a placeholder is implemented with a value that is
C<&PL_sv_placeholder>. Note that the implementation of placeholders and
restricted hashes may change, and the implementation currently is
SvREFCNT_dec(HeVAL(entry));
Safefree(HeKEY_hek(entry));
del_HE(entry);
+ iter = HvAUX(hv); /* may been realloced */
iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
HvLAZYDEL_off(hv);
return NULL;
}
}
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ if (iter->xhv_last_rand != iter->xhv_rand) {
+ if (iter->xhv_riter != -1) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior"
+ pTHX__FORMAT
+ pTHX__VALUE);
+ }
+ iter = HvAUX(hv); /* may been realloced */
+ iter->xhv_last_rand = iter->xhv_rand;
+ }
+#endif
+
/* Skip the entire loop if the hash is empty. */
if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS)
? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) {
if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) {
/* There is no next one. End of the hash. */
iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */
+#endif
break;
}
- entry = (HvARRAY(hv))[iter->xhv_riter];
+ entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ];
if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
/* If we have an entry, but it's a placeholder, don't count it.
or if we run through it and find only placeholders. */
}
}
- else iter->xhv_riter = -1;
+ else {
+ iter->xhv_riter = -1;
+#ifdef PERL_HASH_RANDOMIZE_KEYS
+ iter->xhv_last_rand = iter->xhv_rand;
+#endif
+ }
if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */
HvLAZYDEL_off(hv);
hv_free_ent(hv, oldentry);
}
+ iter = HvAUX(hv); /* may been realloced */
iter->xhv_eiter = entry; /* HvEITER(hv) = entry */
return entry;
}
xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
if (!next) { /* initial entry? */
} else if ( DO_HSPLIT(xhv) ) {
- hsplit(PL_strtab);
+ const STRLEN oldsize = xhv->xhv_max + 1;
+ hsplit(PL_strtab, oldsize, oldsize * 2);
}
}
return HeKEY_hek(entry);
}
-I32 *
+SSize_t *
Perl_hv_placeholders_p(pTHX_ HV *hv)
{
dVAR;
const char *keyend = keypv + keylen, *p;
STRLEN nonascii_count = 0;
for (p = keypv; p != keyend; p++) {
- U8 c = (U8)*p;
- if (c & 0x80) {
- if (!((c & 0xfe) == 0xc2 && ++p != keyend &&
- (((U8)*p) & 0xc0) == 0x80))
+ if (! UTF8_IS_INVARIANT(*p)) {
+ if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
goto canonicalised_key;
+ }
nonascii_count++;
+ p++;
}
}
if (nonascii_count) {
keypv = q;
for (; p != keyend; p++, q++) {
U8 c = (U8)*p;
- *q = (char)
- ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c);
+ if (UTF8_IS_INVARIANT(c)) {
+ *q = (char) c;
+ }
+ else {
+ p++;
+ *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p);
+ }
}
}
flags &= ~REFCOUNTED_HE_KEY_UTF8;
const char *keyend = keypv + keylen, *p;
STRLEN nonascii_count = 0;
for (p = keypv; p != keyend; p++) {
- U8 c = (U8)*p;
- if (c & 0x80) {
- if (!((c & 0xfe) == 0xc2 && ++p != keyend &&
- (((U8)*p) & 0xc0) == 0x80))
+ if (! UTF8_IS_INVARIANT(*p)) {
+ if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
goto canonicalised_key;
+ }
nonascii_count++;
+ p++;
}
}
if (nonascii_count) {
keypv = q;
for (; p != keyend; p++, q++) {
U8 c = (U8)*p;
- *q = (char)
- ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c);
+ if (UTF8_IS_INVARIANT(c)) {
+ *q = (char) c;
+ }
+ else {
+ p++;
+ *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p);
+ }
}
}
flags &= ~REFCOUNTED_HE_KEY_UTF8;
Perl_refcounted_he_inc(pTHX_ struct refcounted_he *he)
{
dVAR;
+ PERL_UNUSED_CONTEXT;
if (he) {
HINTS_REFCNT_LOCK;
he->refcounted_he_refcnt++;
struct refcounted_he *const chain = cop->cop_hints_hash;
PERL_ARGS_ASSERT_COP_FETCH_LABEL;
+ PERL_UNUSED_CONTEXT;
if (!chain)
return NULL;
/*
=for apidoc cop_store_label
-Save a label into a C<cop_hints_hash>. You need to set flags to C<SVf_UTF8>
+Save a label into a C<cop_hints_hash>.
+You need to set flags to C<SVf_UTF8>
for a utf-8 label.
=cut