X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/e8c10cf36903c9c33ff8323b6f1a05d27080bc4d..c8b79d5f6bcdd669165cd1439bf6145f0300e3d3:/hv.c diff --git a/hv.c b/hv.c index 9d619d0..6476f51 100644 --- a/hv.c +++ b/hv.c @@ -526,7 +526,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); if (needs_copy) { - const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */ + const bool save_taint = TAINT_get; if (keysv || is_utf8) { if (!keysv) { keysv = newSVpvn_utf8(key, klen, TRUE); @@ -540,6 +540,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } TAINT_IF(save_taint); +#ifdef NO_TAINT_SUPPORT + PERL_UNUSED_VAR(save_taint); +#endif if (!needs_store) { if (flags & HVhek_FREEKEY) Safefree(key); @@ -786,8 +789,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; - HeNEXT(entry) = *oentry; - *oentry = entry; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* This logic semi-randomizes the insert order in a bucket. + * Either we insert into the top, or the slot below the top, + * making it harder to see if there is a collision. We also + * reset the iterator randomizer if there is one. + */ + if ( *oentry && PL_HASH_RAND_BITS_ENABLED) { + PL_hash_rand_bits++; + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + if ( PL_hash_rand_bits & 1 ) { + HeNEXT(entry) = HeNEXT(*oentry); + HeNEXT(*oentry) = entry; + } else { + HeNEXT(entry) = *oentry; + *oentry = entry; + } + } else +#endif + { + HeNEXT(entry) = *oentry; + *oentry = entry; + } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (SvOOK(hv)) { + /* Currently this makes various tests warn in annoying ways. + * So Silenced for now. - Yves | bogus end of comment =>* / + if (HvAUX(hv)->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "[TESTING] Inserting into a hash during each() traversal results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */ + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + } + HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits; + } +#endif if (val == &PL_sv_placeholder) HvPLACEHOLDERS(hv)++; @@ -797,18 +840,25 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if ( DO_HSPLIT(xhv) ) { const STRLEN oldsize = xhv->xhv_max + 1; - - /* This logic was in S_hsplit, but as the shared string table can't - contain placeholders, and we are the only other caller of S_hsplit, - it could only trigger from this callsite. So move it here. */ - if (HvPLACEHOLDERS_get(hv) && !SvREADONLY(hv)) { - /* Can make this clear any placeholders first for non-restricted - hashes, even though Storable rebuilds restricted hashes by + const U32 items = (U32)HvPLACEHOLDERS_get(hv); + + if (items /* hash has placeholders */ + && !SvREADONLY(hv) /* but is not a restricted hash */) { + /* If this hash previously was a "restricted hash" and had + placeholders, but the "restricted" flag has been turned off, + then the placeholders no longer serve any useful purpose. + However, they have the downsides of taking up RAM, and adding + extra steps when finding used values. It's safe to clear them + at this point, even though Storable rebuilds restricted hashes by putting in all the placeholders (first) before turning on the - readonly flag, because Storable always pre-splits the hash. */ - hv_clear_placeholders(hv); - } - hsplit(hv, oldsize, oldsize * 2); + readonly flag, because Storable always pre-splits the hash. + If we're lucky, then we may clear sufficient placeholders to + avoid needing to split the hash at all. */ + clear_placeholders(hv, items); + if (DO_HSPLIT(xhv)) + hsplit(hv, oldsize, oldsize * 2); + } else + hsplit(hv, oldsize, oldsize * 2); } if (return_svp) { @@ -1100,7 +1150,7 @@ STATIC void S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) { dVAR; - STRLEN i; + STRLEN i = 0; char *a = (char*) HvARRAY(hv); HE **aep; @@ -1116,8 +1166,27 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) PL_nomemok = FALSE; return; } +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* the idea of this is that we create a "random" value by hashing the address of + * the array, we then use the low bit to decide if we insert at the top, or insert + * second from top. After each such insert we rotate the hashed value. So we can + * use the same hashed value over and over, and in normal build environments use + * very few ops to do so. ROTL32() should produce a single machine operation. */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)a); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } +#endif + if (SvOOK(hv)) { - Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); + struct xpvhv_aux *const dest + = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)]; + Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux); + /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + dest->xhv_rand = (U32)PL_hash_rand_bits; +#endif } PL_nomemok = FALSE; @@ -1129,7 +1198,7 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) return; aep = (HE**)a; - for (i=0; i PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \ + hv_max = hv_max / 2; \ + } \ + HvMAX(hv) = hv_max; \ +} STMT_END + + HV * Perl_newHVhv(pTHX_ HV *ohv) { @@ -1243,12 +1350,9 @@ Perl_newHVhv(pTHX_ HV *ohv) HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); - /* Can we use fewer buckets? (hv_max is always 2^n-1) */ - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1287,7 +1391,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) if (ohv) { STRLEN hv_max = HvMAX(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); @@ -1295,9 +1399,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) ENTER; SAVEFREESV(hv); - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1323,6 +1425,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) hv_magic(hv, NULL, PERL_MAGIC_hints); return hv; } +#undef HV_SET_MAX_ADJUSTED_FOR_KEYS /* like hv_free_ent, but returns the SV rather than freeing it */ STATIC SV* @@ -1567,6 +1670,9 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) } iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } if (!((XPVHV*)SvANY(hv))->xhv_keys) @@ -1705,7 +1811,7 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) } if (!SvOOK(hv)) { Safefree(HvARRAY(hv)); - xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */ + xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */ HvARRAY(hv) = 0; } /* if we're freeing the HV, the SvMAGIC field has been reused for @@ -1750,27 +1856,77 @@ Perl_hv_fill(pTHX_ HV const *const hv) return count; } +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ + +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} + + static struct xpvhv_aux* -S_hv_auxinit(HV *hv) { +S_hv_auxinit(pTHX_ HV *hv) { struct xpvhv_aux *iter; char *array; PERL_ARGS_ASSERT_HV_AUXINIT; - if (!HvARRAY(hv)) { - Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + if (!SvOOK(hv)) { + if (!HvARRAY(hv)) { + Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } else { + array = (char *) HvARRAY(hv); + Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } + HvARRAY(hv) = (HE**)array; + SvOOK_on(hv); + iter = HvAUX(hv); +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (PL_HASH_RAND_BITS_ENABLED) { + /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/ + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)array); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } + iter->xhv_rand = (U32)PL_hash_rand_bits; +#endif } else { - array = (char *) HvARRAY(hv); - Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + iter = HvAUX(hv); } - HvARRAY(hv) = (HE**) array; - SvOOK_on(hv); - iter = HvAUX(hv); iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; @@ -1813,6 +1969,9 @@ Perl_hv_iterinit(pTHX_ HV *hv) } iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } else { hv_auxinit(hv); } @@ -1868,6 +2027,27 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { } void +Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) { + struct xpvhv_aux *iter; + + PERL_ARGS_ASSERT_HV_RAND_SET; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (!hv) + Perl_croak(aTHX_ "Bad hash"); + + if (SvOOK(hv)) { + iter = HvAUX(hv); + } else { + iter = hv_auxinit(hv); + } + iter->xhv_rand = new_xhv_rand; +#else + Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set()."); +#endif +} + +void Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) { struct xpvhv_aux *iter; @@ -2273,6 +2453,18 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) } } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (iter->xhv_last_rand != iter->xhv_rand) { + if (iter->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + iter->xhv_last_rand = iter->xhv_rand; + } +#endif + /* Skip the entire loop if the hash is empty. */ if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS) ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) { @@ -2283,9 +2475,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) { /* There is no next one. End of the hash. */ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */ +#endif break; } - entry = (HvARRAY(hv))[iter->xhv_riter]; + entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ]; if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { /* If we have an entry, but it's a placeholder, don't count it. @@ -2298,7 +2493,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) or if we run through it and find only placeholders. */ } } - else iter->xhv_riter = -1; + else { + iter->xhv_riter = -1; +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv);