X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0df056163a5c186d5ef583804a92195aeb095201..244e6e4e6778eaf8d73af45e27107cb7b38db256:/hv.c diff --git a/hv.c b/hv.c index 8b447c8..3cb0b07 100644 --- a/hv.c +++ b/hv.c @@ -36,6 +36,7 @@ holds the key and hash value. #include "perl.h" #define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ +#define HV_FILL_THRESHOLD 31 static const char S_strtab_error[] = "Cannot modify shared string table in hv_%s"; @@ -526,7 +527,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); if (needs_copy) { - const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */ + const bool save_taint = TAINT_get; if (keysv || is_utf8) { if (!keysv) { keysv = newSVpvn_utf8(key, klen, TRUE); @@ -540,6 +541,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } TAINT_IF(save_taint); +#ifdef NO_TAINT_SUPPORT + PERL_UNUSED_VAR(save_taint); +#endif if (!needs_store) { if (flags & HVhek_FREEKEY) Safefree(key); @@ -746,7 +750,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, recursive call would call the key conversion routine again. However, as we replace the original key with the converted key, this would result in a double conversion, which would show - up as a bug if the conversion routine is not idempotent. */ + up as a bug if the conversion routine is not idempotent. + Hence the use of HV_DISABLE_UVAR_XKEY. */ return hv_common(hv, keysv, key, klen, flags, HV_FETCH_ISSTORE|HV_DISABLE_UVAR_XKEY|return_svp, val, hash); @@ -786,8 +791,55 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; - HeNEXT(entry) = *oentry; - *oentry = entry; + + if (!*oentry && SvOOK(hv)) { + /* initial entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + ++aux->xhv_fill_lazy; + } + +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* This logic semi-randomizes the insert order in a bucket. + * Either we insert into the top, or the slot below the top, + * making it harder to see if there is a collision. We also + * reset the iterator randomizer if there is one. + */ + if ( *oentry && PL_HASH_RAND_BITS_ENABLED) { + PL_hash_rand_bits++; + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + if ( PL_hash_rand_bits & 1 ) { + HeNEXT(entry) = HeNEXT(*oentry); + HeNEXT(*oentry) = entry; + } else { + HeNEXT(entry) = *oentry; + *oentry = entry; + } + } else +#endif + { + HeNEXT(entry) = *oentry; + *oentry = entry; + } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (SvOOK(hv)) { + /* Currently this makes various tests warn in annoying ways. + * So Silenced for now. - Yves | bogus end of comment =>* / + if (HvAUX(hv)->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "[TESTING] Inserting into a hash during each() traversal results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */ + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + } + HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits; + } +#endif if (val == &PL_sv_placeholder) HvPLACEHOLDERS(hv)++; @@ -796,17 +848,26 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if ( DO_HSPLIT(xhv) ) { - /* This logic was in S_hsplit, but as the shared string table can't - contain placeholders, and we are the only other caller of S_hsplit, - it could only trigger from this callsite. So move it here. */ - if (HvPLACEHOLDERS_get(hv) && !SvREADONLY(hv)) { - /* Can make this clear any placeholders first for non-restricted - hashes, even though Storable rebuilds restricted hashes by + const STRLEN oldsize = xhv->xhv_max + 1; + const U32 items = (U32)HvPLACEHOLDERS_get(hv); + + if (items /* hash has placeholders */ + && !SvREADONLY(hv) /* but is not a restricted hash */) { + /* If this hash previously was a "restricted hash" and had + placeholders, but the "restricted" flag has been turned off, + then the placeholders no longer serve any useful purpose. + However, they have the downsides of taking up RAM, and adding + extra steps when finding used values. It's safe to clear them + at this point, even though Storable rebuilds restricted hashes by putting in all the placeholders (first) before turning on the - readonly flag, because Storable always pre-splits the hash. */ - hv_clear_placeholders(hv); - } - hsplit(hv); + readonly flag, because Storable always pre-splits the hash. + If we're lucky, then we may clear sufficient placeholders to + avoid needing to split the hash at all. */ + clear_placeholders(hv, items); + if (DO_HSPLIT(xhv)) + hsplit(hv, oldsize, oldsize * 2); + } else + hsplit(hv, oldsize, oldsize * 2); } if (return_svp) { @@ -896,6 +957,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, XPVHV* xhv; HE *entry; HE **oentry; + HE *const *first_entry; bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; int masked_flags; @@ -971,7 +1033,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, masked_flags = (k_flags & HVhek_MASK); - oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; + first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; entry = *oentry; for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { SV *sv; @@ -1000,8 +1062,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, Safefree(key); return NULL; } - if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry)) - && !SvIsCOW(HeVAL(entry))) { + if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))) { hv_notallowed(k_flags, key, klen, "Attempt to delete readonly key '%"SVf"' from" " a restricted hash"); @@ -1059,6 +1120,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvPLACEHOLDERS(hv)++; else { *oentry = HeNEXT(entry); + if(!*first_entry && SvOOK(hv)) { + /* removed last entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + --aux->xhv_fill_lazy; + } if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) HvLAZYDEL_on(hv); else { @@ -1095,13 +1162,10 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } STATIC void -S_hsplit(pTHX_ HV *hv) +S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) { dVAR; - XPVHV* const xhv = (XPVHV*)SvANY(hv); - const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */ - I32 newsize = oldsize * 2; - I32 i; + STRLEN i = 0; char *a = (char*) HvARRAY(hv); HE **aep; @@ -1117,17 +1181,43 @@ S_hsplit(pTHX_ HV *hv) PL_nomemok = FALSE; return; } +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* the idea of this is that we create a "random" value by hashing the address of + * the array, we then use the low bit to decide if we insert at the top, or insert + * second from top. After each such insert we rotate the hashed value. So we can + * use the same hashed value over and over, and in normal build environments use + * very few ops to do so. ROTL32() should produce a single machine operation. */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)a); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } +#endif + if (SvOOK(hv)) { - Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); + struct xpvhv_aux *const dest + = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)]; + Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux); + /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + dest->xhv_rand = (U32)PL_hash_rand_bits; +#endif + /* For now, just reset the lazy fill counter. + It would be possible to update the counter in the code below + instead. */ + dest->xhv_fill_lazy = 0; } PL_nomemok = FALSE; Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ + HvMAX(hv) = --newsize; HvARRAY(hv) = (HE**) a; - aep = (HE**)a; - for (i=0; ixhv_max+1; /* HvMAX(hv)+1 (sick) */ I32 newsize; - I32 i; char *a; - HE **aep; PERL_ARGS_ASSERT_HV_KSPLIT; @@ -1176,53 +1284,29 @@ Perl_hv_ksplit(pTHX_ HV *hv, IV newmax) return; /* overflow detection */ a = (char *) HvARRAY(hv); - if (!a) { + if (a) { + hsplit(hv, oldsize, newsize); + } else { Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); xhv->xhv_max = --newsize; HvARRAY(hv) = (HE **) a; - return; } +} - { - PL_nomemok = TRUE; - Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - if (SvOOK(hv)) { - Copy(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - PL_nomemok = FALSE; - Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - } - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ - HvARRAY(hv) = (HE **) a; - if (!xhv->xhv_keys /* !HvTOTALKEYS(hv) */) /* skip rest if no entries */ - return; - - aep = (HE**)a; - for (i=0; i PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \ + hv_max = hv_max / 2; \ + } \ + HvMAX(hv) = hv_max; \ +} STMT_END - if (j != i) { - *oentry = HeNEXT(entry); - HeNEXT(entry) = aep[j]; - aep[j] = entry; - } - else - oentry = &HeNEXT(entry); - entry = *oentry; - } while (entry); - } -} HV * Perl_newHVhv(pTHX_ HV *ohv) @@ -1285,12 +1369,9 @@ Perl_newHVhv(pTHX_ HV *ohv) HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); - /* Can we use fewer buckets? (hv_max is always 2^n-1) */ - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1329,7 +1410,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) if (ohv) { STRLEN hv_max = HvMAX(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); @@ -1337,9 +1418,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) ENTER; SAVEFREESV(hv); - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1365,6 +1444,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) hv_magic(hv, NULL, PERL_MAGIC_hints); return hv; } +#undef HV_SET_MAX_ADJUSTED_FOR_KEYS /* like hv_free_ent, but returns the SV rather than freeing it */ STATIC SV* @@ -1456,7 +1536,7 @@ Perl_hv_clear(pTHX_ HV *hv) /* not already placeholder */ if (HeVAL(entry) != &PL_sv_placeholder) { if (HeVAL(entry)) { - if (SvREADONLY(HeVAL(entry)) && !SvIsCOW(HeVAL(entry))) { + if (SvREADONLY(HeVAL(entry))) { SV* const keysv = hv_iterkeysv(entry); Perl_croak_nocontext( "Attempt to delete readonly key '%"SVf"' from a restricted hash", @@ -1596,19 +1676,28 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY; - if (SvOOK(hv) && ((iter = HvAUX(hv))) - && ((entry = iter->xhv_eiter)) ) - { - /* the iterator may get resurrected after each - * destructor call, so check each time */ - if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ - HvLAZYDEL_off(hv); - hv_free_ent(hv, entry); - /* warning: at this point HvARRAY may have been - * re-allocated, HvMAX changed etc */ - } - iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ - iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ + if (SvOOK(hv) && ((iter = HvAUX(hv)))) { + if ((entry = iter->xhv_eiter)) { + /* the iterator may get resurrected after each + * destructor call, so check each time */ + if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ + HvLAZYDEL_off(hv); + hv_free_ent(hv, entry); + /* warning: at this point HvARRAY may have been + * re-allocated, HvMAX changed etc */ + } + iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ + iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } + /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone + will actually call HvFILL() on a hash under destruction, so it + seems pointless attempting to track the number of keys remaining. + But if they do, we want to reset it again. */ + if (iter->xhv_fill_lazy) + iter->xhv_fill_lazy = 0; } if (!((XPVHV*)SvANY(hv))->xhv_keys) @@ -1738,16 +1827,16 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) SvREFCNT_dec(meta->mro_linear_current); SvREFCNT_dec(meta->mro_nextmethod); SvREFCNT_dec(meta->isa); + SvREFCNT_dec(meta->super); Safefree(meta); aux->xhv_mro_meta = NULL; } - SvREFCNT_dec(aux->xhv_super); if (!aux->xhv_name_u.xhvnameu_name && ! aux->xhv_backreferences) SvFLAGS(hv) &= ~SVf_OOK; } if (!SvOOK(hv)) { Safefree(HvARRAY(hv)); - xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */ + xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */ HvARRAY(hv) = 0; } /* if we're freeing the HV, the SvMAGIC field has been reused for @@ -1766,20 +1855,35 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) Returns the number of hash buckets that happen to be in use. This function is wrapped by the macro C. -Previously this value was stored in the HV structure, rather than being -calculated on demand. +Previously this value was always stored in the HV structure, which created an +overhead on every hash (and pretty much every object) for something that was +rarely used. Now we calculate it on demand the first time that it is needed, +and cache it if that calculation is going to be costly to repeat. The cached +value is updated by insertions and deletions, but (currently) discarded if +the hash is split. =cut */ STRLEN -Perl_hv_fill(pTHX_ HV const *const hv) +Perl_hv_fill(pTHX_ HV *const hv) { STRLEN count = 0; HE **ents = HvARRAY(hv); + struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL; PERL_ARGS_ASSERT_HV_FILL; + /* No keys implies no buckets used. + One key can only possibly mean one bucket used. */ + if (HvTOTALKEYS(hv) < 2) + return HvTOTALKEYS(hv); + +#ifndef DEBUGGING + if (aux && aux->xhv_fill_lazy) + return aux->xhv_fill_lazy; +#endif + if (ents) { HE *const *const last = ents + HvMAX(hv); count = last + 1 - ents; @@ -1789,35 +1893,95 @@ Perl_hv_fill(pTHX_ HV const *const hv) --count; } while (++ents <= last); } + if (aux) { +#ifdef DEBUGGING + if (aux->xhv_fill_lazy) + assert(aux->xhv_fill_lazy == count); +#endif + aux->xhv_fill_lazy = count; + } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) { + aux = hv_auxinit(hv); + aux->xhv_fill_lazy = count; + } return count; } +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ + +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} + + static struct xpvhv_aux* -S_hv_auxinit(HV *hv) { +S_hv_auxinit(pTHX_ HV *hv) { struct xpvhv_aux *iter; char *array; PERL_ARGS_ASSERT_HV_AUXINIT; - if (!HvARRAY(hv)) { - Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + if (!SvOOK(hv)) { + if (!HvARRAY(hv)) { + Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } else { + array = (char *) HvARRAY(hv); + Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } + HvARRAY(hv) = (HE**)array; + SvOOK_on(hv); + iter = HvAUX(hv); +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (PL_HASH_RAND_BITS_ENABLED) { + /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/ + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)array); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } + iter->xhv_rand = (U32)PL_hash_rand_bits; +#endif } else { - array = (char *) HvARRAY(hv); - Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + iter = HvAUX(hv); } - HvARRAY(hv) = (HE**) array; - SvOOK_on(hv); - iter = HvAUX(hv); iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + iter->xhv_fill_lazy = 0; iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; iter->xhv_mro_meta = NULL; - iter->xhv_super = NULL; return iter; } @@ -1855,6 +2019,9 @@ Perl_hv_iterinit(pTHX_ HV *hv) } iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } else { hv_auxinit(hv); } @@ -1910,6 +2077,27 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { } void +Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) { + struct xpvhv_aux *iter; + + PERL_ARGS_ASSERT_HV_RAND_SET; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (!hv) + Perl_croak(aTHX_ "Bad hash"); + + if (SvOOK(hv)) { + iter = HvAUX(hv); + } else { + iter = hv_auxinit(hv); + } + iter->xhv_rand = new_xhv_rand; +#else + Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set()."); +#endif +} + +void Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) { struct xpvhv_aux *iter; @@ -2315,6 +2503,18 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) } } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (iter->xhv_last_rand != iter->xhv_rand) { + if (iter->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + iter->xhv_last_rand = iter->xhv_rand; + } +#endif + /* Skip the entire loop if the hash is empty. */ if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS) ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) { @@ -2325,9 +2525,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) { /* There is no next one. End of the hash. */ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */ +#endif break; } - entry = (HvARRAY(hv))[iter->xhv_riter]; + entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ]; if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { /* If we have an entry, but it's a placeholder, don't count it. @@ -2340,7 +2543,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) or if we run through it and find only placeholders. */ } } - else iter->xhv_riter = -1; + else { + iter->xhv_riter = -1; +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv); @@ -2672,7 +2880,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if (!next) { /* initial entry? */ } else if ( DO_HSPLIT(xhv) ) { - hsplit(PL_strtab); + const STRLEN oldsize = xhv->xhv_max + 1; + hsplit(PL_strtab, oldsize, oldsize * 2); } } @@ -2684,7 +2893,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) return HeKEY_hek(entry); } -I32 * +SSize_t * Perl_hv_placeholders_p(pTHX_ HV *hv) { dVAR; @@ -2913,12 +3122,12 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, const char *keyend = keypv + keylen, *p; STRLEN nonascii_count = 0; for (p = keypv; p != keyend; p++) { - U8 c = (U8)*p; - if (c & 0x80) { - if (!((c & 0xfe) == 0xc2 && ++p != keyend && - (((U8)*p) & 0xc0) == 0x80)) + if (! UTF8_IS_INVARIANT(*p)) { + if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { goto canonicalised_key; + } nonascii_count++; + p++; } } if (nonascii_count) { @@ -2930,8 +3139,13 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, keypv = q; for (; p != keyend; p++, q++) { U8 c = (U8)*p; - *q = (char) - ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c); + if (UTF8_IS_INVARIANT(c)) { + *q = (char) c; + } + else { + p++; + *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p); + } } } flags &= ~REFCOUNTED_HE_KEY_UTF8; @@ -3083,12 +3297,12 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, const char *keyend = keypv + keylen, *p; STRLEN nonascii_count = 0; for (p = keypv; p != keyend; p++) { - U8 c = (U8)*p; - if (c & 0x80) { - if (!((c & 0xfe) == 0xc2 && ++p != keyend && - (((U8)*p) & 0xc0) == 0x80)) + if (! UTF8_IS_INVARIANT(*p)) { + if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) { goto canonicalised_key; + } nonascii_count++; + p++; } } if (nonascii_count) { @@ -3100,8 +3314,13 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, keypv = q; for (; p != keyend; p++, q++) { U8 c = (U8)*p; - *q = (char) - ((c & 0x80) ? ((c & 0x03) << 6) | (((U8)*++p) & 0x3f) : c); + if (UTF8_IS_INVARIANT(c)) { + *q = (char) c; + } + else { + p++; + *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p); + } } } flags &= ~REFCOUNTED_HE_KEY_UTF8;