X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/db4fbf16f81968637095420c2f968de1d2519030..e6965c14693b6cad1c65f3a588597285a0e525a2:/hv.c diff --git a/hv.c b/hv.c index 9c5670b..bb9cb27 100644 --- a/hv.c +++ b/hv.c @@ -18,7 +18,6 @@ /* =head1 Hash Manipulation Functions - A HV structure represents a Perl hash. It consists mainly of an array of pointers, each of which points to a linked list of HE structures. The array is indexed by the hash function of the key, so each linked list @@ -35,7 +34,8 @@ holds the key and hash value. #define PERL_HASH_INTERNAL_ACCESS #include "perl.h" -#define HV_MAX_LENGTH_BEFORE_SPLIT 14 +#define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ +#define HV_FILL_THRESHOLD 31 static const char S_strtab_error[] = "Cannot modify shared string table in hv_%s"; @@ -50,7 +50,6 @@ static const char S_strtab_error[] STATIC HE* S_new_he(pTHX) { - dVAR; HE* he; void ** const root = &PL_body_roots[HE_SVSLOT]; @@ -78,7 +77,7 @@ S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) { const int flags_masked = flags & HVhek_MASK; char *k; - register HEK *hek; + HEK *hek; PERL_ARGS_ASSERT_SAVE_HEK_FLAGS; @@ -101,7 +100,6 @@ S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) void Perl_free_tied_hv_pool(pTHX) { - dVAR; HE *he = PL_hv_fetch_ent_mh; while (he) { HE * const ohe = he; @@ -277,7 +275,10 @@ negative the key is assumed to be in UTF-8-encoded Unicode. Returns the SV which corresponds to the specified key in the hash. The absolute value of C is the length of the key. If C is negative the key is assumed to be in UTF-8-encoded Unicode. If -C is set then the fetch will be part of a store. Check that the +C is set then the fetch will be part of a store. This means that if +there is no value in the hash associated with the given key, then one is +created and a pointer to it is returned. The C it points to can be +assigned to. But always check that the return value is non-null before dereferencing it to an C. See L for more @@ -334,7 +335,7 @@ Perl_hv_common_key_len(pTHX_ HV *hv, const char *key, I32 klen_i32, void * Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, - int flags, int action, SV *val, register U32 hash) + int flags, int action, SV *val, U32 hash) { dVAR; XPVHV* xhv; @@ -344,6 +345,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool is_utf8; int masked_flags; const int return_svp = action & HV_FETCH_JUST_SV; + HEK *keysv_hek = NULL; if (!hv) return NULL; @@ -385,7 +387,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (SvIsCOW_shared_hash(keysv)) { flags = HVhek_KEYCANONICAL | (is_utf8 ? HVhek_UTF8 : 0); } else { - flags = 0; + flags = is_utf8 ? HVhek_UTF8 : 0; } } else { is_utf8 = ((flags & HVhek_UTF8) ? TRUE : FALSE); @@ -393,8 +395,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (action & HV_DELETE) { return (void *) hv_delete_common(hv, keysv, key, klen, - flags | (is_utf8 ? HVhek_UTF8 : 0), - action, hash); + flags, action, hash); } xhv = (XPVHV*)SvANY(hv); @@ -524,13 +525,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); if (needs_copy) { - const bool save_taint = PL_tainted; + const bool save_taint = TAINT_get; if (keysv || is_utf8) { if (!keysv) { keysv = newSVpvn_utf8(key, klen, TRUE); } - if (PL_tainting) - PL_tainted = SvTAINTED(keysv); + if (TAINTING_get) + TAINT_set(SvTAINTED(keysv)); keysv = sv_2mortal(newSVsv(keysv)); mg_copy(MUTABLE_SV(hv), val, (char*)keysv, HEf_SVKEY); } else { @@ -538,6 +539,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } TAINT_IF(save_taint); +#ifdef NO_TAINT_SUPPORT + PERL_UNUSED_VAR(save_taint); +#endif if (!needs_store) { if (flags & HVhek_FREEKEY) Safefree(key); @@ -592,7 +596,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } } - if (is_utf8 & !(flags & HVhek_KEYCANONICAL)) { + if (is_utf8 && !(flags & HVhek_KEYCANONICAL)) { char * const keysave = (char *)key; key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8); if (is_utf8) @@ -611,18 +615,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } } - if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv))))) - PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv)); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); + } else if (!hash) - hash = SvSHARED_HASH(keysv); - - /* We don't have a pointer to the hv, so we have to replicate the - flag into every HEK, so that hv_iterkeysv can see it. - And yes, you do need this even though you are not "storing" because - you can flip the flags below if doing an lval lookup. (And that - was put in to give the semantics Andreas was expecting.) */ - if (HvREHASH(hv)) - flags |= HVhek_REHASH; + PERL_HASH(hash, key, klen); masked_flags = (flags & HVhek_MASK); @@ -633,16 +632,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, { entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; } + + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + HE *orig_entry = entry; + + for (; entry; entry = HeNEXT(entry)) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + entry = orig_entry; + } + for (; entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) { if (HeKFLAGS(entry) != masked_flags) { /* We match if HVhek_UTF8 bit in our flags and hash key's @@ -711,6 +742,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } return entry; } + + not_found: #ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */ if (!(action & HV_FETCH_ISSTORE) && SvRMAGICAL((const SV *)hv) @@ -750,7 +783,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, recursive call would call the key conversion routine again. However, as we replace the original key with the converted key, this would result in a double conversion, which would show - up as a bug if the conversion routine is not idempotent. */ + up as a bug if the conversion routine is not idempotent. + Hence the use of HV_DISABLE_UVAR_XKEY. */ return hv_common(hv, keysv, key, klen, flags, HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp, val, hash); @@ -790,37 +824,83 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; - HeNEXT(entry) = *oentry; - *oentry = entry; + + if (!*oentry && SvOOK(hv)) { + /* initial entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + ++aux->xhv_fill_lazy; + } + +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* This logic semi-randomizes the insert order in a bucket. + * Either we insert into the top, or the slot below the top, + * making it harder to see if there is a collision. We also + * reset the iterator randomizer if there is one. + */ + if ( *oentry && PL_HASH_RAND_BITS_ENABLED) { + PL_hash_rand_bits++; + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + if ( PL_hash_rand_bits & 1 ) { + HeNEXT(entry) = HeNEXT(*oentry); + HeNEXT(*oentry) = entry; + } else { + HeNEXT(entry) = *oentry; + *oentry = entry; + } + } else +#endif + { + HeNEXT(entry) = *oentry; + *oentry = entry; + } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (SvOOK(hv)) { + /* Currently this makes various tests warn in annoying ways. + * So Silenced for now. - Yves | bogus end of comment =>* / + if (HvAUX(hv)->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "[TESTING] Inserting into a hash during each() traversal results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */ + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + } + HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits; + } +#endif if (val == &PL_sv_placeholder) HvPLACEHOLDERS(hv)++; if (masked_flags & HVhek_ENABLEHVKFLAGS) HvHASKFLAGS_on(hv); - { - const HE *counter = HeNEXT(entry); - - xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ - if (!counter) { /* initial entry? */ - } else if (xhv->xhv_keys > xhv->xhv_max) { - /* Use only the old HvUSEDKEYS(hv) > HvMAX(hv) condition to limit - bucket splits on a rehashed hash, as we're not going to - split it again, and if someone is lucky (evil) enough to - get all the keys in one list they could exhaust our memory - as we repeatedly double the number of buckets on every - entry. Linear search feels a less worse thing to do. */ - hsplit(hv); - } else if(!HvREHASH(hv)) { - U32 n_links = 1; - - while ((counter = HeNEXT(counter))) - n_links++; - - if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) { - hsplit(hv); - } - } + xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ + if ( DO_HSPLIT(xhv) ) { + const STRLEN oldsize = xhv->xhv_max + 1; + const U32 items = (U32)HvPLACEHOLDERS_get(hv); + + if (items /* hash has placeholders */ + && !SvREADONLY(hv) /* but is not a restricted hash */) { + /* If this hash previously was a "restricted hash" and had + placeholders, but the "restricted" flag has been turned off, + then the placeholders no longer serve any useful purpose. + However, they have the downsides of taking up RAM, and adding + extra steps when finding used values. It's safe to clear them + at this point, even though Storable rebuilds restricted hashes by + putting in all the placeholders (first) before turning on the + readonly flag, because Storable always pre-splits the hash. + If we're lucky, then we may clear sufficient placeholders to + avoid needing to split the hash at all. */ + clear_placeholders(hv, items); + if (DO_HSPLIT(xhv)) + hsplit(hv, oldsize, oldsize * 2); + } else + hsplit(hv, oldsize, oldsize * 2); } if (return_svp) { @@ -853,7 +933,8 @@ S_hv_magic_check(HV *hv, bool *needs_copy, bool *needs_store) /* =for apidoc hv_scalar -Evaluates the hash in scalar context and returns the result. Handles magic when the hash is tied. +Evaluates the hash in scalar context and returns the result. Handles magic +when the hash is tied. =cut */ @@ -907,11 +988,17 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, int k_flags, I32 d_flags, U32 hash) { dVAR; - register XPVHV* xhv; - register HE *entry; - register HE **oentry; + XPVHV* xhv; + HE *entry; + HE **oentry; + HE **first_entry; bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; int masked_flags; + HEK *keysv_hek = NULL; + U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ + SV *sv; + GV *gv = NULL; + HV *stash = NULL; if (SvRMAGICAL(hv)) { bool needs_copy; @@ -957,7 +1044,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (!HvARRAY(hv)) return NULL; - if (is_utf8) { + if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) { const char * const keysave = key; key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8); @@ -976,30 +1063,60 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvHASKFLAGS_on(MUTABLE_SV(hv)); } - if (HvREHASH(hv) || (!hash && !(keysv && (SvIsCOW_shared_hash(keysv))))) - PERL_HASH_INTERNAL_(hash, key, klen, HvREHASH(hv)); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); + } else if (!hash) - hash = SvSHARED_HASH(keysv); + PERL_HASH(hash, key, klen); masked_flags = (k_flags & HVhek_MASK); - oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; + first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; entry = *oentry; - for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { - SV *sv; - U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ - GV *gv = NULL; - HV *stash = NULL; + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + oentry = first_entry; + entry = *oentry; + } + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (hv == PL_strtab) { if (k_flags & HVhek_FREEKEY) Safefree(key); @@ -1012,8 +1129,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, Safefree(key); return NULL; } - if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry)) - && !SvIsCOW(HeVAL(entry))) { + if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))) { hv_notallowed(k_flags, key, klen, "Attempt to delete readonly key '%"SVf"' from" " a restricted hash"); @@ -1071,6 +1187,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvPLACEHOLDERS(hv)++; else { *oentry = HeNEXT(entry); + if(!*first_entry && SvOOK(hv)) { + /* removed last entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + --aux->xhv_fill_lazy; + } if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) HvLAZYDEL_on(hv); else { @@ -1095,6 +1217,8 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, return sv; } + + not_found: if (SvREADONLY(hv)) { hv_notallowed(k_flags, key, klen, "Attempt to delete disallowed key '%"SVf"' from" @@ -1106,175 +1230,137 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, return NULL; } + STATIC void -S_hsplit(pTHX_ HV *hv) +S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) { - dVAR; - register XPVHV* const xhv = (XPVHV*)SvANY(hv); - const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */ - register I32 newsize = oldsize * 2; - register I32 i; + STRLEN i = 0; char *a = (char*) HvARRAY(hv); - register HE **aep; - int longest_chain = 0; - int was_shared; + HE **aep; - PERL_ARGS_ASSERT_HSPLIT; + bool do_aux= ( + /* already have an HvAUX(hv) so we have to move it */ + SvOOK(hv) || + /* no HvAUX() but array we are going to allocate is large enough + * there is no point in saving the space for the iterator, and + * speeds up later traversals. */ + ( ( hv != PL_strtab ) && ( newsize >= PERL_HV_ALLOC_AUX_SIZE ) ) + ); - /*PerlIO_printf(PerlIO_stderr(), "hsplit called for %p which had %d\n", - (void*)hv, (int) oldsize);*/ + PERL_ARGS_ASSERT_HSPLIT; - if (HvPLACEHOLDERS_get(hv) && !SvREADONLY(hv)) { - /* Can make this clear any placeholders first for non-restricted hashes, - even though Storable rebuilds restricted hashes by putting in all the - placeholders (first) before turning on the readonly flag, because - Storable always pre-splits the hash. */ - hv_clear_placeholders(hv); - } - PL_nomemok = TRUE; -#if defined(STRANGE_MALLOC) || defined(MYMALLOC) Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - if (SvOOK(hv)) { - Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } -#else - Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); + + (do_aux ? sizeof(struct xpvhv_aux) : 0), char); + PL_nomemok = FALSE; if (!a) { - PL_nomemok = FALSE; return; } - Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char); - if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); + +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* the idea of this is that we create a "random" value by hashing the address of + * the array, we then use the low bit to decide if we insert at the top, or insert + * second from top. After each such insert we rotate the hashed value. So we can + * use the same hashed value over and over, and in normal build environments use + * very few ops to do so. ROTL32() should produce a single machine operation. */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)a); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); } - Safefree(HvARRAY(hv)); #endif - - PL_nomemok = FALSE; - Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ HvARRAY(hv) = (HE**) a; - aep = (HE**)a; + HvMAX(hv) = newsize - 1; + /* before we zero the newly added memory, we + * need to deal with the aux struct that may be there + * or have been allocated by us*/ + if (do_aux) { + struct xpvhv_aux *const dest + = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)]; + if (SvOOK(hv)) { + /* alread have an aux, copy the old one in place. */ + Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux); + /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + dest->xhv_rand = (U32)PL_hash_rand_bits; +#endif + /* For now, just reset the lazy fill counter. + It would be possible to update the counter in the code below + instead. */ + dest->xhv_fill_lazy = 0; + } else { + /* no existing aux structure, but we allocated space for one + * so initialize it properly. This unrolls hv_auxinit() a bit, + * since we have to do the realloc anyway. */ + /* first we set the iterator's xhv_rand so it can be copied into lastrand below */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + dest->xhv_rand = (U32)PL_hash_rand_bits; +#endif + /* this is the "non realloc" part of the hv_auxinit() */ + (void)hv_auxinit_internal(dest); + /* Turn on the OOK flag */ + SvOOK_on(hv); + } + } + /* now we can safely clear the second half */ + Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ + + if (!HvTOTALKEYS(hv)) /* skip rest if no entries */ + return; - for (i=0; i longest_chain) - longest_chain = left_length; - if (right_length > longest_chain) - longest_chain = right_length; - } - - - /* Pick your policy for "hashing isn't working" here: */ - if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ - || HvREHASH(hv)) { - return; - } - - if (hv == PL_strtab) { - /* Urg. Someone is doing something nasty to the string table. - Can't win. */ - return; - } - - /* Awooga. Awooga. Pathological data. */ - /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", (void*)hv, - longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ - - ++newsize; - Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - - was_shared = HvSHAREKEYS(hv); - - HvSHAREKEYS_off(hv); - HvREHASH_on(hv); - - aep = HvARRAY(hv); - - for (i=0; ixhv_max); - HeNEXT(entry) = *bep; - *bep = entry; - - entry = next; - } - } - Safefree (HvARRAY(hv)); - HvARRAY(hv) = (HE **)a; + } while (i++ < oldsize); } void Perl_hv_ksplit(pTHX_ HV *hv, IV newmax) { - dVAR; - register XPVHV* xhv = (XPVHV*)SvANY(hv); + XPVHV* xhv = (XPVHV*)SvANY(hv); const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */ - register I32 newsize; - register I32 i; - register char *a; - register HE **aep; + I32 newsize; + char *a; PERL_ARGS_ASSERT_HV_KSPLIT; @@ -1291,63 +1377,28 @@ Perl_hv_ksplit(pTHX_ HV *hv, IV newmax) a = (char *) HvARRAY(hv); if (a) { - PL_nomemok = TRUE; -#if defined(STRANGE_MALLOC) || defined(MYMALLOC) - Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - if (SvOOK(hv)) { - Copy(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } -#else - Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char); - if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - Safefree(HvARRAY(hv)); -#endif - PL_nomemok = FALSE; - Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - } - else { - Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + hsplit(hv, oldsize, newsize); + } else { + Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + xhv->xhv_max = --newsize; + HvARRAY(hv) = (HE **) a; } - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ - HvARRAY(hv) = (HE **) a; - if (!xhv->xhv_keys /* !HvTOTALKEYS(hv) */) /* skip rest if no entries */ - return; +} - aep = (HE**)a; - for (i=0; i PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \ + hv_max = hv_max / 2; \ + } \ + HvMAX(hv) = hv_max; \ +} STMT_END - if (!entry) /* non-existent */ - continue; - do { - register I32 j = (HeHASH(entry) & newsize); - - if (j != i) { - j -= i; - *oentry = HeNEXT(entry); - HeNEXT(entry) = aep[j]; - aep[j] = entry; - } - else - oentry = &HeNEXT(entry); - entry = *oentry; - } while (entry); - } -} HV * Perl_newHVhv(pTHX_ HV *ohv) @@ -1410,12 +1461,9 @@ Perl_newHVhv(pTHX_ HV *ohv) HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); - /* Can we use fewer buckets? (hv_max is always 2^n-1) */ - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1454,14 +1502,15 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) if (ohv) { STRLEN hv_max = HvMAX(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + ENTER; + SAVEFREESV(hv); + + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1475,27 +1524,28 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) else { (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry), HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry)); - SvREFCNT_dec(heksv); + SvREFCNT_dec_NN(heksv); } } HvRITER_set(ohv, riter); HvEITER_set(ohv, eiter); + + SvREFCNT_inc_simple_void_NN(hv); + LEAVE; } hv_magic(hv, NULL, PERL_MAGIC_hints); return hv; } +#undef HV_SET_MAX_ADJUSTED_FOR_KEYS /* like hv_free_ent, but returns the SV rather than freeing it */ STATIC SV* -S_hv_free_ent_ret(pTHX_ HV *hv, register HE *entry) +S_hv_free_ent_ret(pTHX_ HV *hv, HE *entry) { - dVAR; SV *val; PERL_ARGS_ASSERT_HV_FREE_ENT_RET; - if (!entry) - return NULL; val = HeVAL(entry); if (HeKLEN(entry) == HEf_SVKEY) { SvREFCNT_dec(HeKEY_sv(entry)); @@ -1511,9 +1561,8 @@ S_hv_free_ent_ret(pTHX_ HV *hv, register HE *entry) void -Perl_hv_free_ent(pTHX_ HV *hv, register HE *entry) +Perl_hv_free_ent(pTHX_ HV *hv, HE *entry) { - dVAR; SV *val; PERL_ARGS_ASSERT_HV_FREE_ENT; @@ -1526,10 +1575,8 @@ Perl_hv_free_ent(pTHX_ HV *hv, register HE *entry) void -Perl_hv_delayfree_ent(pTHX_ HV *hv, register HE *entry) +Perl_hv_delayfree_ent(pTHX_ HV *hv, HE *entry) { - dVAR; - PERL_ARGS_ASSERT_HV_DELAYFREE_ENT; if (!entry) @@ -1546,7 +1593,10 @@ Perl_hv_delayfree_ent(pTHX_ HV *hv, register HE *entry) =for apidoc hv_clear Frees the all the elements of a hash, leaving it empty. -The XS equivalent of %hash = (). See also L. +The XS equivalent of C<%hash = ()>. See also L. + +If any destructors are triggered as a result, the hv itself may +be freed. =cut */ @@ -1555,7 +1605,7 @@ void Perl_hv_clear(pTHX_ HV *hv) { dVAR; - register XPVHV* xhv; + XPVHV* xhv; if (!hv) return; @@ -1563,6 +1613,8 @@ Perl_hv_clear(pTHX_ HV *hv) xhv = (XPVHV*)SvANY(hv); + ENTER; + SAVEFREESV(SvREFCNT_inc_simple_NN(hv)); if (SvREADONLY(hv) && HvARRAY(hv) != NULL) { /* restricted hash: convert all keys to placeholders */ STRLEN i; @@ -1571,14 +1623,15 @@ Perl_hv_clear(pTHX_ HV *hv) for (; entry; entry = HeNEXT(entry)) { /* not already placeholder */ if (HeVAL(entry) != &PL_sv_placeholder) { - if (HeVAL(entry) && SvREADONLY(HeVAL(entry)) - && !SvIsCOW(HeVAL(entry))) { - SV* const keysv = hv_iterkeysv(entry); - Perl_croak(aTHX_ - "Attempt to delete readonly key '%"SVf"' from a restricted hash", - (void*)keysv); + if (HeVAL(entry)) { + if (SvREADONLY(HeVAL(entry))) { + SV* const keysv = hv_iterkeysv(entry); + Perl_croak_nocontext( + "Attempt to delete readonly key '%"SVf"' from a restricted hash", + (void*)keysv); + } + SvREFCNT_dec_NN(HeVAL(entry)); } - SvREFCNT_dec(HeVAL(entry)); HeVAL(entry) = &PL_sv_placeholder; HvPLACEHOLDERS(hv)++; } @@ -1593,13 +1646,13 @@ Perl_hv_clear(pTHX_ HV *hv) mg_clear(MUTABLE_SV(hv)); HvHASKFLAGS_off(hv); - HvREHASH_off(hv); } if (SvOOK(hv)) { if(HvENAME_get(hv)) mro_isa_changed_in(hv); HvEITER_set(hv, NULL); } + LEAVE; } /* @@ -1619,7 +1672,6 @@ See Hash::Util::lock_keys() for an example of its use. void Perl_hv_clear_placeholders(pTHX_ HV *hv) { - dVAR; const U32 items = (U32)HvPLACEHOLDERS_get(hv); PERL_ARGS_ASSERT_HV_CLEAR_PLACEHOLDERS; @@ -1659,8 +1711,10 @@ S_clear_placeholders(pTHX_ HV *hv, U32 items) if (--items == 0) { /* Finished. */ - HvTOTALKEYS(hv) -= (IV)HvPLACEHOLDERS_get(hv); - if (HvUSEDKEYS(hv) == 0) + I32 placeholders = HvPLACEHOLDERS_get(hv); + HvTOTALKEYS(hv) -= (IV)placeholders; + /* HvUSEDKEYS expanded */ + if ((HvTOTALKEYS(hv) - placeholders) == 0) HvHASKFLAGS_off(hv); HvPLACEHOLDERS_set(hv, 0); return; @@ -1672,7 +1726,7 @@ S_clear_placeholders(pTHX_ HV *hv, U32 items) } while (--i >= 0); /* You can't get here, hence assertion should always fail. */ assert (items == 0); - assert (0); + NOT_REACHED; /* NOTREACHED */ } STATIC void @@ -1711,19 +1765,29 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY; - if (SvOOK(hv) && ((iter = HvAUX(hv))) - && ((entry = iter->xhv_eiter)) ) - { - /* the iterator may get resurrected after each - * destructor call, so check each time */ - if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ - HvLAZYDEL_off(hv); - hv_free_ent(hv, entry); - /* warning: at this point HvARRAY may have been - * re-allocated, HvMAX changed etc */ - } - iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ - iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ + if (SvOOK(hv) && ((iter = HvAUX(hv)))) { + if ((entry = iter->xhv_eiter)) { + /* the iterator may get resurrected after each + * destructor call, so check each time */ + if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ + HvLAZYDEL_off(hv); + hv_free_ent(hv, entry); + /* warning: at this point HvARRAY may have been + * re-allocated, HvMAX changed etc */ + } + iter = HvAUX(hv); /* may have been realloced */ + iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ + iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } + /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone + will actually call HvFILL() on a hash under destruction, so it + seems pointless attempting to track the number of keys remaining. + But if they do, we want to reset it again. */ + if (iter->xhv_fill_lazy) + iter->xhv_fill_lazy = 0; } if (!((XPVHV*)SvANY(hv))->xhv_keys) @@ -1760,10 +1824,14 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) /* =for apidoc hv_undef -Undefines the hash. The XS equivalent of undef(%hash). +Undefines the hash. The XS equivalent of C. As well as freeing all the elements of the hash (like hv_clear()), this also frees any auxiliary data and storage associated with the hash. + +If any destructors are triggered as a result, the hv itself may +be freed. + See also L. =cut @@ -1772,12 +1840,12 @@ See also L. void Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) { - dVAR; - register XPVHV* xhv; - const char *name; + XPVHV* xhv; + bool save; if (!hv) return; + save = !!SvREFCNT(hv); DEBUG_A(Perl_hv_assert(aTHX_ hv)); xhv = (XPVHV*)SvANY(hv); @@ -1791,89 +1859,115 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) if they will be freed anyway. */ /* note that the code following prior to hfreeentries is duplicated * in sv_clear(), and changes here should be done there too */ - if (PL_phase != PERL_PHASE_DESTRUCT && (name = HvNAME(hv))) { - if (PL_stashcache) - (void)hv_delete(PL_stashcache, name, - HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv), - G_DISCARD - ); + if (PL_phase != PERL_PHASE_DESTRUCT && HvNAME(hv)) { + if (PL_stashcache) { + DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for '%" + HEKf"'\n", HEKfARG(HvNAME_HEK(hv)))); + (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD); + } hv_name_set(hv, NULL, 0, 0); } + if (save) { + ENTER; + SAVEFREESV(SvREFCNT_inc_simple_NN(hv)); + } hfreeentries(hv); if (SvOOK(hv)) { - struct xpvhv_aux * const aux = HvAUX(hv); struct mro_meta *meta; + const char *name; - if ((name = HvENAME_get(hv))) { + if (HvENAME_get(hv)) { if (PL_phase != PERL_PHASE_DESTRUCT) mro_isa_changed_in(hv); - if (PL_stashcache) - (void)hv_delete( - PL_stashcache, name, - HEK_UTF8(HvENAME_HEK(hv)) ? -HvENAMELEN_get(hv) : HvENAMELEN_get(hv), - G_DISCARD - ); + if (PL_stashcache) { + DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for effective name '%" + HEKf"'\n", HEKfARG(HvENAME_HEK(hv)))); + (void)hv_deletehek(PL_stashcache, HvENAME_HEK(hv), G_DISCARD); + } } /* If this call originated from sv_clear, then we must check for * effective names that need freeing, as well as the usual name. */ name = HvNAME(hv); - if (flags & HV_NAME_SETALL ? !!aux->xhv_name_u.xhvnameu_name : !!name) { - if (name && PL_stashcache) - (void)hv_delete(PL_stashcache, name, (HEK_UTF8(HvNAME_HEK(hv)) ? -HvNAMELEN_get(hv) : HvNAMELEN_get(hv)), G_DISCARD); + if (flags & HV_NAME_SETALL ? !!HvAUX(hv)->xhv_name_u.xhvnameu_name : !!name) { + if (name && PL_stashcache) { + DEBUG_o(Perl_deb(aTHX_ "hv_undef_flags clearing PL_stashcache for name '%" + HEKf"'\n", HEKfARG(HvNAME_HEK(hv)))); + (void)hv_deletehek(PL_stashcache, HvNAME_HEK(hv), G_DISCARD); + } hv_name_set(hv, NULL, 0, flags); } - if((meta = aux->xhv_mro_meta)) { + if((meta = HvAUX(hv)->xhv_mro_meta)) { if (meta->mro_linear_all) { - SvREFCNT_dec(MUTABLE_SV(meta->mro_linear_all)); - meta->mro_linear_all = NULL; - /* This is just acting as a shortcut pointer. */ - meta->mro_linear_current = NULL; - } else if (meta->mro_linear_current) { + SvREFCNT_dec_NN(meta->mro_linear_all); + /* mro_linear_current is just acting as a shortcut pointer, + hence the else. */ + } + else /* Only the current MRO is stored, so this owns the data. */ SvREFCNT_dec(meta->mro_linear_current); - meta->mro_linear_current = NULL; - } SvREFCNT_dec(meta->mro_nextmethod); SvREFCNT_dec(meta->isa); + SvREFCNT_dec(meta->super); Safefree(meta); - aux->xhv_mro_meta = NULL; + HvAUX(hv)->xhv_mro_meta = NULL; } - if (!aux->xhv_name_u.xhvnameu_name && ! aux->xhv_backreferences) + if (!HvAUX(hv)->xhv_name_u.xhvnameu_name && ! HvAUX(hv)->xhv_backreferences) SvFLAGS(hv) &= ~SVf_OOK; } if (!SvOOK(hv)) { Safefree(HvARRAY(hv)); - xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */ + xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */ HvARRAY(hv) = 0; } - HvPLACEHOLDERS_set(hv, 0); + /* if we're freeing the HV, the SvMAGIC field has been reused for + * other purposes, and so there can't be any placeholder magic */ + if (SvREFCNT(hv)) + HvPLACEHOLDERS_set(hv, 0); if (SvRMAGICAL(hv)) mg_clear(MUTABLE_SV(hv)); + if (save) LEAVE; } /* =for apidoc hv_fill -Returns the number of hash buckets that happen to be in use. This function is +Returns the number of hash buckets that +happen to be in use. This function is wrapped by the macro C. -Previously this value was stored in the HV structure, rather than being -calculated on demand. +Previously this value was always stored in the HV structure, which created an +overhead on every hash (and pretty much every object) for something that was +rarely used. Now we calculate it on demand the first +time that it is needed, and cache it if that calculation +is going to be costly to repeat. The cached +value is updated by insertions and deletions, but (currently) discarded if +the hash is split. =cut */ STRLEN -Perl_hv_fill(pTHX_ HV const *const hv) +Perl_hv_fill(pTHX_ HV *const hv) { STRLEN count = 0; HE **ents = HvARRAY(hv); + struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL; PERL_ARGS_ASSERT_HV_FILL; + /* No keys implies no buckets used. + One key can only possibly mean one bucket used. */ + if (HvTOTALKEYS(hv) < 2) + return HvTOTALKEYS(hv); + +#ifndef DEBUGGING + if (aux && aux->xhv_fill_lazy) + return aux->xhv_fill_lazy; +#endif + if (ents) { HE *const *const last = ents + HvMAX(hv); count = last + 1 - ents; @@ -1883,37 +1977,105 @@ Perl_hv_fill(pTHX_ HV const *const hv) --count; } while (++ents <= last); } + if (aux) { +#ifdef DEBUGGING + if (aux->xhv_fill_lazy) + assert(aux->xhv_fill_lazy == count); +#endif + aux->xhv_fill_lazy = count; + } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) { + aux = hv_auxinit(hv); + aux->xhv_fill_lazy = count; + } return count; } -static struct xpvhv_aux* -S_hv_auxinit(HV *hv) { - struct xpvhv_aux *iter; - char *array; - - PERL_ARGS_ASSERT_HV_AUXINIT; +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ - if (!HvARRAY(hv)) { - Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); - } else { - array = (char *) HvARRAY(hv); - Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); - } - HvARRAY(hv) = (HE**) array; - SvOOK_on(hv); - iter = HvAUX(hv); +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} +static struct xpvhv_aux* +S_hv_auxinit_internal(struct xpvhv_aux *iter) { + PERL_ARGS_ASSERT_HV_AUXINIT_INTERNAL; iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + iter->xhv_fill_lazy = 0; iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; iter->xhv_mro_meta = NULL; + iter->xhv_aux_flags = 0; return iter; } + +static struct xpvhv_aux* +S_hv_auxinit(pTHX_ HV *hv) { + struct xpvhv_aux *iter; + char *array; + + PERL_ARGS_ASSERT_HV_AUXINIT; + + if (!SvOOK(hv)) { + if (!HvARRAY(hv)) { + Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } else { + array = (char *) HvARRAY(hv); + Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } + HvARRAY(hv) = (HE**)array; + SvOOK_on(hv); + iter = HvAUX(hv); +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (PL_HASH_RAND_BITS_ENABLED) { + /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/ + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)array); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } + iter->xhv_rand = (U32)PL_hash_rand_bits; +#endif + } else { + iter = HvAUX(hv); + } + + return hv_auxinit_internal(iter); +} + /* =for apidoc hv_iterinit @@ -1934,20 +2096,19 @@ Perl_hv_iterinit(pTHX_ HV *hv) { PERL_ARGS_ASSERT_HV_ITERINIT; - /* FIXME: Are we not NULL, or do we croak? Place bets now! */ - - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - if (SvOOK(hv)) { - struct xpvhv_aux * const iter = HvAUX(hv); + struct xpvhv_aux * iter = HvAUX(hv); HE * const entry = iter->xhv_eiter; /* HvEITER(hv) */ if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv); hv_free_ent(hv, entry); } + iter = HvAUX(hv); /* may have been reallocated */ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } else { hv_auxinit(hv); } @@ -1962,9 +2123,6 @@ Perl_hv_riter_p(pTHX_ HV *hv) { PERL_ARGS_ASSERT_HV_RITER_P; - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); return &(iter->xhv_riter); } @@ -1975,9 +2133,6 @@ Perl_hv_eiter_p(pTHX_ HV *hv) { PERL_ARGS_ASSERT_HV_EITER_P; - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); return &(iter->xhv_eiter); } @@ -1988,9 +2143,6 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { PERL_ARGS_ASSERT_HV_RITER_SET; - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - if (SvOOK(hv)) { iter = HvAUX(hv); } else { @@ -2003,14 +2155,29 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { } void +Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) { + struct xpvhv_aux *iter; + + PERL_ARGS_ASSERT_HV_RAND_SET; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (SvOOK(hv)) { + iter = HvAUX(hv); + } else { + iter = hv_auxinit(hv); + } + iter->xhv_rand = new_xhv_rand; +#else + Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set()."); +#endif +} + +void Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) { struct xpvhv_aux *iter; PERL_ARGS_ASSERT_HV_EITER_SET; - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - if (SvOOK(hv)) { iter = HvAUX(hv); } else { @@ -2053,6 +2220,7 @@ Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags) /* The first elem may be null. */ if(*name) unshare_hek_or_pvn(*name, 0, 0, 0); Safefree(name); + iter = HvAUX(hv); /* may been realloced */ spot = &iter->xhv_name_u.xhvnameu_name; iter->xhv_name_count = 0; } @@ -2074,6 +2242,7 @@ Perl_hv_name_set(pTHX_ HV *hv, const char *name, U32 len, U32 flags) } else if (flags & HV_NAME_SETALL) { unshare_hek_or_pvn(iter->xhv_name_u.xhvnameu_name, 0, 0, 0); + iter = HvAUX(hv); /* may been realloced */ spot = &iter->xhv_name_u.xhvnameu_name; } else { @@ -2145,10 +2314,12 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags) PERL_HASH(hash, name, len); if (aux->xhv_name_count) { - HEK ** const xhv_name = aux->xhv_name_u.xhvnameu_names; I32 count = aux->xhv_name_count; - HEK **hekp = xhv_name + (count < 0 ? -count : count); + HEK ** const xhv_name = aux->xhv_name_u.xhvnameu_names + (count<0); + HEK **hekp = xhv_name + (count < 0 ? -count - 1 : count); while (hekp-- > xhv_name) + { + assert(*hekp); if ( (HEK_UTF8(*hekp) || (flags & SVf_UTF8)) ? hek_eq_pvn_flags(aTHX_ *hekp, name, (I32)len, flags) @@ -2158,6 +2329,7 @@ Perl_hv_ename_add(pTHX_ HV *hv, const char *name, U32 len, U32 flags) aux->xhv_name_count = -count; return; } + } if (count < 0) aux->xhv_name_count--, count = -count; else aux->xhv_name_count++; Renew(aux->xhv_name_u.xhvnameu_names, count + 1, HEK *); @@ -2194,7 +2366,6 @@ This is called when a stash is deleted from the symbol table. void Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) { - dVAR; struct xpvhv_aux *aux; PERL_ARGS_ASSERT_HV_ENAME_DELETE; @@ -2218,6 +2389,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len)) ) { unshare_hek_or_pvn(*victim, 0, 0, 0); + aux = HvAUX(hv); /* may been realloced */ if (count < 0) ++aux->xhv_name_count; else --aux->xhv_name_count; if ( @@ -2258,12 +2430,12 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) AV ** Perl_hv_backreferences_p(pTHX_ HV *hv) { - struct xpvhv_aux * const iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); - PERL_ARGS_ASSERT_HV_BACKREFERENCES_P; - PERL_UNUSED_CONTEXT; - - return &(iter->xhv_backreferences); + /* See also Perl_sv_get_backrefs in sv.c where this logic is unrolled */ + { + struct xpvhv_aux * const iter = SvOOK(hv) ? HvAUX(hv) : hv_auxinit(hv); + return &(iter->xhv_backreferences); + } } void @@ -2281,7 +2453,7 @@ Perl_hv_kill_backrefs(pTHX_ HV *hv) { HvAUX(hv)->xhv_backreferences = 0; Perl_sv_kill_backrefs(aTHX_ MUTABLE_SV(hv), av); if (SvTYPE(av) == SVt_PVAV) - SvREFCNT_dec(av); + SvREFCNT_dec_NN(av); } } @@ -2305,9 +2477,9 @@ trigger the resource deallocation. Returns entries from a hash iterator. See C and C. The C value will normally be zero; if HV_ITERNEXT_WANTPLACEHOLDERS is set the placeholders keys (for restricted hashes) will be returned in addition -to normal keys. By default placeholders are automatically skipped over. +to normal keys. By default placeholders are automatically skipped over. Currently a placeholder is implemented with a value that is -C<&Perl_sv_placeholder>. Note that the implementation of placeholders and +C<&PL_sv_placeholder>. Note that the implementation of placeholders and restricted hashes may change, and the implementation currently is insufficiently abstracted for any change to be tidy. @@ -2318,22 +2490,19 @@ HE * Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) { dVAR; - register XPVHV* xhv; - register HE *entry; + XPVHV* xhv; + HE *entry; HE *oldentry; MAGIC* mg; struct xpvhv_aux *iter; PERL_ARGS_ASSERT_HV_ITERNEXT_FLAGS; - if (!hv) - Perl_croak(aTHX_ "Bad hash"); - xhv = (XPVHV*)SvANY(hv); if (!SvOOK(hv)) { /* Too many things (well, pp_each at least) merrily assume that you can - call iv_iternext without calling hv_iterinit, so we'll have to deal + call hv_iternext without calling hv_iterinit, so we'll have to deal with it. */ hv_iterinit(hv); } @@ -2346,6 +2515,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) if (entry) { sv_setsv(key, HeSVKEY_force(entry)); SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */ + HeSVKEY_set(entry, NULL); } else { char *k; @@ -2353,6 +2523,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) /* one HE per MAGICAL hash */ iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */ + HvLAZYDEL_on(hv); /* make sure entry gets freed */ Zero(entry, 1, HE); Newxz(k, HEK_BASESIZE + sizeof(const SV *), char); hek = (HEK*)k; @@ -2368,7 +2539,9 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) SvREFCNT_dec(HeVAL(entry)); Safefree(HeKEY_hek(entry)); del_HE(entry); + iter = HvAUX(hv); /* may been realloced */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ + HvLAZYDEL_off(hv); return NULL; } } @@ -2405,6 +2578,19 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) } } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (iter->xhv_last_rand != iter->xhv_rand) { + if (iter->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + iter = HvAUX(hv); /* may been realloced */ + iter->xhv_last_rand = iter->xhv_rand; + } +#endif + /* Skip the entire loop if the hash is empty. */ if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS) ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) { @@ -2415,9 +2601,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) { /* There is no next one. End of the hash. */ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */ +#endif break; } - entry = (HvARRAY(hv))[iter->xhv_riter]; + entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ]; if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { /* If we have an entry, but it's a placeholder, don't count it. @@ -2430,16 +2619,19 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) or if we run through it and find only placeholders. */ } } - else iter->xhv_riter = -1; + else { + iter->xhv_riter = -1; +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv); hv_free_ent(hv, oldentry); } - /*if (HvREHASH(hv) && entry && !HeKREHASH(entry)) - PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", (void*)hv, (void*)entry);*/ - + iter = HvAUX(hv); /* may been realloced */ iter->xhv_eiter = entry; /* HvEITER(hv) = entry */ return entry; } @@ -2454,7 +2646,7 @@ C. */ char * -Perl_hv_iterkey(pTHX_ register HE *entry, I32 *retlen) +Perl_hv_iterkey(pTHX_ HE *entry, I32 *retlen) { PERL_ARGS_ASSERT_HV_ITERKEY; @@ -2482,7 +2674,7 @@ see C. */ SV * -Perl_hv_iterkeysv(pTHX_ register HE *entry) +Perl_hv_iterkeysv(pTHX_ HE *entry) { PERL_ARGS_ASSERT_HV_ITERKEYSV; @@ -2499,7 +2691,7 @@ C. */ SV * -Perl_hv_iterval(pTHX_ HV *hv, register HE *entry) +Perl_hv_iterval(pTHX_ HV *hv, HE *entry) { PERL_ARGS_ASSERT_HV_ITERVAL; @@ -2573,10 +2765,9 @@ Perl_unshare_hek(pTHX_ HEK *hek) STATIC void S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) { - dVAR; - register XPVHV* xhv; + XPVHV* xhv; HE *entry; - register HE **oentry; + HE **oentry; bool is_utf8 = FALSE; int k_flags = 0; const char * const save = str; @@ -2662,7 +2853,7 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) * len and hash must both be valid for str. */ HEK * -Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash) +Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash) { bool is_utf8 = FALSE; int flags = 0; @@ -2684,6 +2875,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash) we should flag that it needs upgrading on keys or each. Also flag that we need share_hek_flags to free the string. */ if (str != save) { + dVAR; PERL_HASH(hash, str, len); flags |= HVhek_WASUTF8 | HVhek_FREEKEY; } @@ -2693,13 +2885,12 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash) } STATIC HEK * -S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) +S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) { - dVAR; - register HE *entry; + HE *entry; const int flags_masked = flags & HVhek_MASK; const U32 hindex = hash & (I32) HvMAX(PL_strtab); - register XPVHV * const xhv = (XPVHV*)SvANY(PL_strtab); + XPVHV * const xhv = (XPVHV*)SvANY(PL_strtab); PERL_ARGS_ASSERT_SHARE_HEK_FLAGS; @@ -2763,8 +2954,9 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if (!next) { /* initial entry? */ - } else if (xhv->xhv_keys > xhv->xhv_max /* HvUSEDKEYS(hv) > HvMAX(hv) */) { - hsplit(PL_strtab); + } else if ( DO_HSPLIT(xhv) ) { + const STRLEN oldsize = xhv->xhv_max + 1; + hsplit(PL_strtab, oldsize, oldsize * 2); } } @@ -2776,10 +2968,9 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) return HeKEY_hek(entry); } -I32 * +SSize_t * Perl_hv_placeholders_p(pTHX_ HV *hv) { - dVAR; MAGIC *mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); PERL_ARGS_ASSERT_HV_PLACEHOLDERS_P; @@ -2798,10 +2989,10 @@ Perl_hv_placeholders_p(pTHX_ HV *hv) I32 Perl_hv_placeholders_get(pTHX_ const HV *hv) { - dVAR; MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); PERL_ARGS_ASSERT_HV_PLACEHOLDERS_GET; + PERL_UNUSED_CONTEXT; return mg ? mg->mg_len : 0; } @@ -2809,7 +3000,6 @@ Perl_hv_placeholders_get(pTHX_ const HV *hv) void Perl_hv_placeholders_set(pTHX_ HV *hv, I32 ph) { - dVAR; MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_rhash); PERL_ARGS_ASSERT_HV_PLACEHOLDERS_SET; @@ -3005,12 +3195,12 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, const char *keyend = keypv + keylen, *p; STRLEN nonascii_count = 0; for (p = keypv; p != keyend; p++) { - U8 c = (U8)*p; - if (c & 0x80) { - if (!((c & 0xfe) == 0xc2 && ++p != keyend && - (((U8)*p) & 0xc0) == 0x80)) + if (! UTF8_IS_INVARIANT(*p)) { + if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { goto canonicalised_key; + } nonascii_count++; + p++; } } if (nonascii_count) { @@ -3022,8 +3212,13 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, keypv = q; for (; p != keyend; p++, q++) { U8 c = (U8)*p; - *q = (char) - ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c); + if (UTF8_IS_INVARIANT(c)) { + *q = (char) c; + } + else { + p++; + *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p); + } } } flags &= ~REFCOUNTED_HE_KEY_UTF8; @@ -3046,14 +3241,13 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, memEQ(HEK_KEY(chain->refcounted_he_hek), keypv, keylen) && utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8) #endif - ) - return - flags & REFCOUNTED_HE_EXISTS - ? (chain->refcounted_he_data[0] & HVrhek_typemask) - == HVrhek_delete - ? NULL - : &PL_sv_yes - : sv_2mortal(refcounted_he_value(chain)); + ) { + if (flags & REFCOUNTED_HE_EXISTS) + return (chain->refcounted_he_data[0] & HVrhek_typemask) + == HVrhek_delete + ? NULL : &PL_sv_yes; + return sv_2mortal(refcounted_he_value(chain)); + } } return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder; } @@ -3176,12 +3370,12 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, const char *keyend = keypv + keylen, *p; STRLEN nonascii_count = 0; for (p = keypv; p != keyend; p++) { - U8 c = (U8)*p; - if (c & 0x80) { - if (!((c & 0xfe) == 0xc2 && ++p != keyend && - (((U8)*p) & 0xc0) == 0x80)) + if (! UTF8_IS_INVARIANT(*p)) { + if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { goto canonicalised_key; + } nonascii_count++; + p++; } } if (nonascii_count) { @@ -3193,8 +3387,13 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, keypv = q; for (; p != keyend; p++, q++) { U8 c = (U8)*p; - *q = (char) - ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c); + if (UTF8_IS_INVARIANT(c)) { + *q = (char) c; + } + else { + p++; + *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p); + } } } flags &= ~REFCOUNTED_HE_KEY_UTF8; @@ -3299,7 +3498,9 @@ no action occurs in this case. void Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) { +#ifdef USE_ITHREADS dVAR; +#endif PERL_UNUSED_CONTEXT; while (he) { @@ -3336,7 +3537,10 @@ to this function: no action occurs and a null pointer is returned. struct refcounted_he * Perl_refcounted_he_inc(pTHX_ struct refcounted_he *he) { +#ifdef USE_ITHREADS dVAR; +#endif + PERL_UNUSED_CONTEXT; if (he) { HINTS_REFCNT_LOCK; he->refcounted_he_refcnt++; @@ -3361,6 +3565,7 @@ Perl_cop_fetch_label(pTHX_ COP *const cop, STRLEN *len, U32 *flags) { struct refcounted_he *const chain = cop->cop_hints_hash; PERL_ARGS_ASSERT_COP_FETCH_LABEL; + PERL_UNUSED_CONTEXT; if (!chain) return NULL; @@ -3393,7 +3598,8 @@ Perl_cop_fetch_label(pTHX_ COP *const cop, STRLEN *len, U32 *flags) { /* =for apidoc cop_store_label -Save a label into a C. You need to set flags to C +Save a label into a C. +You need to set flags to C for a utf-8 label. =cut @@ -3495,8 +3701,8 @@ Perl_hv_assert(pTHX_ HV *hv) * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 - * indent-tabs-mode: t + * indent-tabs-mode: nil * End: * - * ex: set ts=8 sts=4 sw=4 noet: + * ex: set ts=8 sts=4 sw=4 et: */