#include "EXTERN.h"
#define PERL_IN_HV_C
+#define PERL_HASH_INTERNAL_ACCESS
#include "perl.h"
+#define HV_MAX_LENGTH_BEFORE_SPLIT 14
+
STATIC HE*
S_new_he(pTHX)
{
he = HeNEXT(he);
del_HE(ohe);
}
+ PL_hv_fetch_ent_mh = Nullhe;
}
#if defined(USE_ITHREADS)
}
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
}
else
HeKFLAGS(entry) = flags;
- if (flags)
+ if (flags & HVhek_ENABLEHVKFLAGS)
HvHASKFLAGS_on(hv);
}
if (flags & HVhek_FREEKEY)
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
}
else
HeKFLAGS(entry) = flags;
- if (flags)
+ if (flags & HVhek_ENABLEHVKFLAGS)
HvHASKFLAGS_on(hv);
}
if (key != keysave)
register U32 hash, int flags)
{
register XPVHV* xhv;
- register I32 i;
+ register U32 n_links;
register HE *entry;
register HE **oentry;
if (flags)
HvHASKFLAGS_on((SV*)hv);
- if (!hash)
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
if (!xhv->xhv_array /* !HvARRAY(hv) */)
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
- for (entry = *oentry; entry; i=0, entry = HeNEXT(entry)) {
+ n_links = 0;
+
+ for (entry = *oentry; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
hsplit(hv);
}
XPVHV* xhv;
char *key;
STRLEN klen;
- I32 i;
+ U32 n_links;
HE *entry;
HE **oentry;
bool is_utf8;
HvHASKFLAGS_on((SV*)hv);
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
+ n_links = 0;
entry = *oentry;
- for (; entry; i=0, entry = HeNEXT(entry)) {
+ for (; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
- hsplit(hv);
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
+ hsplit(hv);
}
return entry;
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
#ifdef DYNAMIC_ENV_FETCH
if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
if (key != keysave)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
register HE **bep;
register HE *entry;
register HE **oentry;
+ int longest_chain = 0;
+ int was_shared;
PL_nomemok = TRUE;
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
+ int left_length = 0;
+ int right_length = 0;
+
if (!*aep) /* non-existent */
continue;
bep = aep+oldsize;
if (!*bep)
xhv->xhv_fill++; /* HvFILL(hv)++ */
*bep = entry;
+ right_length++;
continue;
}
- else
+ else {
oentry = &HeNEXT(entry);
+ left_length++;
+ }
}
if (!*aep) /* everything moved */
xhv->xhv_fill--; /* HvFILL(hv)-- */
+ /* I think we don't actually need to keep track of the longest length,
+ merely flag if anything is too long. But for the moment while
+ developing this code I'll track it. */
+ if (left_length > longest_chain)
+ longest_chain = left_length;
+ if (right_length > longest_chain)
+ longest_chain = right_length;
}
+
+
+ /* Pick your policy for "hashing isn't working" here: */
+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
+ || HvREHASH(hv)) {
+ return;
+ }
+
+ if (hv == PL_strtab) {
+ /* Urg. Someone is doing something nasty to the string table.
+ Can't win. */
+ return;
+ }
+
+ /* Awooga. Awooga. Pathological data. */
+ /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", hv,
+ longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
+
+ ++newsize;
+ Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ was_shared = HvSHAREKEYS(hv);
+
+ xhv->xhv_fill = 0;
+ HvSHAREKEYS_off(hv);
+ HvREHASH_on(hv);
+
+ aep = (HE **) xhv->xhv_array;
+
+ for (i=0; i<newsize; i++,aep++) {
+ entry = *aep;
+ while (entry) {
+ /* We're going to trash this HE's next pointer when we chain it
+ into the new hash below, so store where we go next. */
+ HE *next = HeNEXT(entry);
+ UV hash;
+
+ /* Rehash it */
+ PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+ if (was_shared) {
+ /* Unshare it. */
+ HEK *new_hek
+ = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+ hash, HeKFLAGS(entry));
+ unshare_hek (HeKEY_hek(entry));
+ HeKEY_hek(entry) = new_hek;
+ } else {
+ /* Not shared, so simply write the new hash in. */
+ HeHASH(entry) = hash;
+ }
+ /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+ HEK_REHASH_on(HeKEY_hek(entry));
+ /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+ /* Copy oentry to the correct new chain. */
+ bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+ if (!*bep)
+ xhv->xhv_fill++; /* HvFILL(hv)++ */
+ HeNEXT(entry) = *bep;
+ *bep = entry;
+
+ entry = next;
+ }
+ }
+ Safefree (xhv->xhv_array);
+ xhv->xhv_array = a; /* HvARRAY(hv) = a */
}
void
#ifndef NODEFAULT_SHAREKEYS
HvSHAREKEYS_on(hv); /* key-sharing on by default */
#endif
+
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */
xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */
mg_clear((SV*)hv);
HvHASKFLAGS_off(hv);
+ HvREHASH_off(hv);
}
STATIC void
hv_free_ent(hv, oldentry);
}
+ /*if (HvREHASH(hv) && entry && !HeKREHASH(entry))
+ PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", hv, entry);*/
+
xhv->xhv_eiter = entry; /* HvEITER(hv) = entry */
return entry;
}
sv = newSVpvn ((char*)as_utf8, utf8_len);
SvUTF8_on (sv);
Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
- } else {
+ } else if (flags & HVhek_REHASH) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK. This hv is using custom a hasing
+ algorithm. Hence we can't return a shared string scalar, as
+ that would contain the (wrong) hash value, and might get passed
+ into an hv routine with a regular hash */
+
+ sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+ if (HEK_UTF8(hek))
+ SvUTF8_on (sv);
+ } else {
sv = newSVpvn_share(HEK_KEY(hek),
(HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
HEK_HASH(hek));
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
hv_store(PL_strtab, str, len, Nullsv, hash);
+
+ Can't rehash the shared string table, so not sure if it's worth
+ counting the number of entries in the linked list
*/
xhv = (XPVHV*)SvANY(PL_strtab);
/* assert(xhv_array != 0) */
xhv->xhv_keys++; /* HvKEYS(hv)++ */
if (i) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
+ } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
hsplit(PL_strtab);
}
}