#include "EXTERN.h"
#define PERL_IN_HV_C
+#define PERL_HASH_INTERNAL_ACCESS
#include "perl.h"
+#define HV_MAX_LENGTH_BEFORE_SPLIT 14
+
STATIC HE*
S_new_he(pTHX)
{
he = HeNEXT(he);
del_HE(ohe);
}
+ PL_hv_fetch_ent_mh = Nullhe;
}
#if defined(USE_ITHREADS)
}
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
for (; entry; entry = HeNEXT(entry)) {
+ if (!HeKEY_hek(entry))
+ continue;
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
}
else
HeKFLAGS(entry) = flags;
+ if (flags & HVhek_ENABLEHVKFLAGS)
+ HvHASKFLAGS_on(hv);
}
if (flags & HVhek_FREEKEY)
Safefree(key);
/* if we find a placeholder, we pretend we haven't found anything */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
break;
return &HeVAL(entry);
#endif
}
+ keysave = key = SvPV(keysv, klen);
xhv = (XPVHV*)SvANY(hv);
if (!xhv->xhv_array /* !HvARRAY(hv) */) {
if (lval
return 0;
}
- keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv)!=0);
if (is_utf8) {
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
}
else
HeKFLAGS(entry) = flags;
+ if (flags & HVhek_ENABLEHVKFLAGS)
+ HvHASKFLAGS_on(hv);
}
if (key != keysave)
Safefree(key);
/* if we find a placeholder, we pretend we haven't found anything */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
break;
return entry;
}
register U32 hash, int flags)
{
register XPVHV* xhv;
- register I32 i;
+ register U32 n_links;
register HE *entry;
register HE **oentry;
if (flags)
HvHASKFLAGS_on((SV*)hv);
- if (!hash)
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
if (!xhv->xhv_array /* !HvARRAY(hv) */)
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
- for (entry = *oentry; entry; i=0, entry = HeNEXT(entry)) {
+ n_links = 0;
+
+ for (entry = *oentry; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
continue;
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
xhv->xhv_placeholders--; /* yes, can store into placeholder slot */
else
SvREFCNT_dec(HeVAL(entry));
/* We have been requested to insert a placeholder. Currently
only Storable is allowed to do this. */
xhv->xhv_placeholders++;
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
} else
HeVAL(entry) = val;
/* We have been requested to insert a placeholder. Currently
only Storable is allowed to do this. */
xhv->xhv_placeholders++;
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
} else
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
- hsplit(hv);
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
+ hsplit(hv);
}
return &HeVAL(entry);
XPVHV* xhv;
char *key;
STRLEN klen;
- I32 i;
+ U32 n_links;
HE *entry;
HE **oentry;
bool is_utf8;
HvHASKFLAGS_on((SV*)hv);
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
+ n_links = 0;
entry = *oentry;
- for (; entry; i=0, entry = HeNEXT(entry)) {
+ for (; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
continue;
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
xhv->xhv_placeholders--; /* yes, can store into placeholder slot */
else
SvREFCNT_dec(HeVAL(entry));
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
- hsplit(hv);
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
+ hsplit(hv);
}
return entry;
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
if (k_flags & HVhek_FREEKEY)
Safefree(key);
/* if placeholder is here, it's already been deleted.... */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
{
if (SvREADONLY(hv))
return Nullsv; /* if still SvREADONLY, leave it deleted. */
sv = Nullsv;
else {
sv = sv_2mortal(HeVAL(entry));
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
}
/*
* an error.
*/
if (SvREADONLY(hv)) {
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
/* We'll be saving this slot, so the number of allocated keys
* doesn't go down, but the number placeholders goes up */
xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
Safefree(key);
/* if placeholder is here, it's already been deleted.... */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
{
if (SvREADONLY(hv))
return Nullsv; /* if still SvREADONLY, leave it deleted. */
sv = Nullsv;
else {
sv = sv_2mortal(HeVAL(entry));
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
}
/*
* an error.
*/
if (SvREADONLY(hv)) {
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
/* We'll be saving this slot, so the number of allocated keys
* doesn't go down, but the number placeholders goes up */
xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
#ifdef DYNAMIC_ENV_FETCH
if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
if (k_flags & HVhek_FREEKEY)
Safefree(key);
/* If we find the key, but the value is a placeholder, return false. */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
return FALSE;
return TRUE;
if (key != keysave)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
if (k_flags & HVhek_FREEKEY)
Safefree(key);
/* If we find the key, but the value is a placeholder, return false. */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
return FALSE;
return TRUE;
}
register HE **bep;
register HE *entry;
register HE **oentry;
+ int longest_chain = 0;
+ int was_shared;
PL_nomemok = TRUE;
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
+ int left_length = 0;
+ int right_length = 0;
+
if (!*aep) /* non-existent */
continue;
bep = aep+oldsize;
if (!*bep)
xhv->xhv_fill++; /* HvFILL(hv)++ */
*bep = entry;
+ right_length++;
continue;
}
- else
+ else {
oentry = &HeNEXT(entry);
+ left_length++;
+ }
}
if (!*aep) /* everything moved */
xhv->xhv_fill--; /* HvFILL(hv)-- */
+ /* I think we don't actually need to keep track of the longest length,
+ merely flag if anything is too long. But for the moment while
+ developing this code I'll track it. */
+ if (left_length > longest_chain)
+ longest_chain = left_length;
+ if (right_length > longest_chain)
+ longest_chain = right_length;
}
+
+
+ /* Pick your policy for "hashing isn't working" here: */
+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
+ || HvREHASH(hv)) {
+ return;
+ }
+
+ if (hv == PL_strtab) {
+ /* Urg. Someone is doing something nasty to the string table.
+ Can't win. */
+ return;
+ }
+
+ /* Awooga. Awooga. Pathological data. */
+ /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", hv,
+ longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
+
+ ++newsize;
+ Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ was_shared = HvSHAREKEYS(hv);
+
+ xhv->xhv_fill = 0;
+ HvSHAREKEYS_off(hv);
+ HvREHASH_on(hv);
+
+ aep = (HE **) xhv->xhv_array;
+
+ for (i=0; i<newsize; i++,aep++) {
+ entry = *aep;
+ while (entry) {
+ /* We're going to trash this HE's next pointer when we chain it
+ into the new hash below, so store where we go next. */
+ HE *next = HeNEXT(entry);
+ UV hash;
+
+ /* Rehash it */
+ PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+ if (was_shared) {
+ /* Unshare it. */
+ HEK *new_hek
+ = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+ hash, HeKFLAGS(entry));
+ unshare_hek (HeKEY_hek(entry));
+ HeKEY_hek(entry) = new_hek;
+ } else {
+ /* Not shared, so simply write the new hash in. */
+ HeHASH(entry) = hash;
+ }
+ /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+ HEK_REHASH_on(HeKEY_hek(entry));
+ /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+ /* Copy oentry to the correct new chain. */
+ bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+ if (!*bep)
+ xhv->xhv_fill++; /* HvFILL(hv)++ */
+ HeNEXT(entry) = *bep;
+ *bep = entry;
+
+ entry = next;
+ }
+ }
+ Safefree (xhv->xhv_array);
+ xhv->xhv_array = a; /* HvARRAY(hv) = a */
}
void
#ifndef NODEFAULT_SHAREKEYS
HvSHAREKEYS_on(hv); /* key-sharing on by default */
#endif
+
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */
xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */
xhv = (XPVHV*)SvANY(hv);
- if(SvREADONLY(hv)) {
+ if (SvREADONLY(hv)) {
/* restricted hash: convert all keys to placeholders */
I32 i;
HE* entry;
- for (i=0; i< (I32) xhv->xhv_max; i++) {
+ for (i = 0; i <= (I32) xhv->xhv_max; i++) {
entry = ((HE**)xhv->xhv_array)[i];
for (; entry; entry = HeNEXT(entry)) {
/* not already placeholder */
- if (HeVAL(entry) != &PL_sv_undef) {
+ if (HeVAL(entry) != &PL_sv_placeholder) {
if (HeVAL(entry) && SvREADONLY(HeVAL(entry))) {
SV* keysv = hv_iterkeysv(entry);
Perl_croak(aTHX_
- "Attempt to delete readonly key '%_' from a restricted hash",
- keysv);
+ "Attempt to delete readonly key '%"SVf"' from a restricted hash",
+ keysv);
}
SvREFCNT_dec(HeVAL(entry));
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
}
}
}
hfreeentries(hv);
- xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
- xhv->xhv_keys = 0; /* HvKEYS(hv) = 0 */
xhv->xhv_placeholders = 0; /* HvPLACEHOLDERS(hv) = 0 */
if (xhv->xhv_array /* HvARRAY(hv) */)
(void)memzero(xhv->xhv_array /* HvARRAY(hv) */,
mg_clear((SV*)hv);
HvHASKFLAGS_off(hv);
+ HvREHASH_off(hv);
}
STATIC void
riter = 0;
max = HvMAX(hv);
array = HvARRAY(hv);
+ /* make everyone else think the array is empty, so that the destructors
+ * called for freed entries can't recusively mess with us */
+ HvARRAY(hv) = Null(HE**);
+ HvFILL(hv) = 0;
+ ((XPVHV*) SvANY(hv))->xhv_keys = 0;
+
entry = array[0];
for (;;) {
if (entry) {
entry = array[riter];
}
}
+ HvARRAY(hv) = array;
(void)hv_iterinit(hv);
}
}
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */
xhv->xhv_array = 0; /* HvARRAY(hv) = 0 */
- xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
- xhv->xhv_keys = 0; /* HvKEYS(hv) = 0 */
xhv->xhv_placeholders = 0; /* HvPLACEHOLDERS(hv) = 0 */
if (SvRMAGICAL(hv))
The C<flags> value will normally be zero; if HV_ITERNEXT_WANTPLACEHOLDERS is
set the placeholders keys (for restricted hashes) will be returned in addition
to normal keys. By default placeholders are automatically skipped over.
-Currently a placeholder is implemented with a value that is literally
-<&Perl_sv_undef> (a regular C<undef> value is a normal read-write SV for which
-C<!SvOK> is false). Note that the implementation of placeholders and
+Currently a placeholder is implemented with a value that is
+C<&Perl_sv_placeholder>. Note that the implementation of placeholders and
restricted hashes may change, and the implementation currently is
insufficiently abstracted for any change to be tidy.
* Skip past any placeholders -- don't want to include them in
* any iteration.
*/
- while (entry && HeVAL(entry) == &PL_sv_undef) {
+ while (entry && HeVAL(entry) == &PL_sv_placeholder) {
entry = HeNEXT(entry);
}
}
if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
/* If we have an entry, but it's a placeholder, don't count it.
Try the next. */
- while (entry && HeVAL(entry) == &PL_sv_undef)
+ while (entry && HeVAL(entry) == &PL_sv_placeholder)
entry = HeNEXT(entry);
}
/* Will loop again if this linked list starts NULL
hv_free_ent(hv, oldentry);
}
+ /*if (HvREHASH(hv) && entry && !HeKREHASH(entry))
+ PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", hv, entry);*/
+
xhv->xhv_eiter = entry; /* HvEITER(hv) = entry */
return entry;
}
sv = newSVpvn ((char*)as_utf8, utf8_len);
SvUTF8_on (sv);
Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
- } else {
+ } else if (flags & HVhek_REHASH) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK. This hv is using custom a hasing
+ algorithm. Hence we can't return a shared string scalar, as
+ that would contain the (wrong) hash value, and might get passed
+ into an hv routine with a regular hash */
+
+ sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+ if (HEK_UTF8(hek))
+ SvUTF8_on (sv);
+ } else {
sv = newSVpvn_share(HEK_KEY(hek),
(HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
HEK_HASH(hek));
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
hv_store(PL_strtab, str, len, Nullsv, hash);
+
+ Can't rehash the shared string table, so not sure if it's worth
+ counting the number of entries in the linked list
*/
xhv = (XPVHV*)SvANY(PL_strtab);
/* assert(xhv_array != 0) */
xhv->xhv_keys++; /* HvKEYS(hv)++ */
if (i) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
+ } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
hsplit(PL_strtab);
}
}