X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5aaab254c02795622bdf42e348ad8473aa1fc643..c8b79d5f6bcdd669165cd1439bf6145f0300e3d3:/hv.c diff --git a/hv.c b/hv.c index 23130cc..6476f51 100644 --- a/hv.c +++ b/hv.c @@ -526,7 +526,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool needs_store; hv_magic_check (hv, &needs_copy, &needs_store); if (needs_copy) { - const bool save_taint = TAINT_get; /* Unused var warning under NO_TAINT_SUPPORT */ + const bool save_taint = TAINT_get; if (keysv || is_utf8) { if (!keysv) { keysv = newSVpvn_utf8(key, klen, TRUE); @@ -540,6 +540,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } TAINT_IF(save_taint); +#ifdef NO_TAINT_SUPPORT + PERL_UNUSED_VAR(save_taint); +#endif if (!needs_store) { if (flags & HVhek_FREEKEY) Safefree(key); @@ -786,8 +789,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; - HeNEXT(entry) = *oentry; - *oentry = entry; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* This logic semi-randomizes the insert order in a bucket. + * Either we insert into the top, or the slot below the top, + * making it harder to see if there is a collision. We also + * reset the iterator randomizer if there is one. + */ + if ( *oentry && PL_HASH_RAND_BITS_ENABLED) { + PL_hash_rand_bits++; + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + if ( PL_hash_rand_bits & 1 ) { + HeNEXT(entry) = HeNEXT(*oentry); + HeNEXT(*oentry) = entry; + } else { + HeNEXT(entry) = *oentry; + *oentry = entry; + } + } else +#endif + { + HeNEXT(entry) = *oentry; + *oentry = entry; + } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (SvOOK(hv)) { + /* Currently this makes various tests warn in annoying ways. + * So Silenced for now. - Yves | bogus end of comment =>* / + if (HvAUX(hv)->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "[TESTING] Inserting into a hash during each() traversal results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += (PTRV)entry + 1; /* we don't bother to use ptr_hash here */ + PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); + } + HvAUX(hv)->xhv_rand= (U32)PL_hash_rand_bits; + } +#endif if (val == &PL_sv_placeholder) HvPLACEHOLDERS(hv)++; @@ -796,7 +839,26 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if ( DO_HSPLIT(xhv) ) { - hsplit(hv); + const STRLEN oldsize = xhv->xhv_max + 1; + const U32 items = (U32)HvPLACEHOLDERS_get(hv); + + if (items /* hash has placeholders */ + && !SvREADONLY(hv) /* but is not a restricted hash */) { + /* If this hash previously was a "restricted hash" and had + placeholders, but the "restricted" flag has been turned off, + then the placeholders no longer serve any useful purpose. + However, they have the downsides of taking up RAM, and adding + extra steps when finding used values. It's safe to clear them + at this point, even though Storable rebuilds restricted hashes by + putting in all the placeholders (first) before turning on the + readonly flag, because Storable always pre-splits the hash. + If we're lucky, then we may clear sufficient placeholders to + avoid needing to split the hash at all. */ + clear_placeholders(hv, items); + if (DO_HSPLIT(xhv)) + hsplit(hv, oldsize, oldsize * 2); + } else + hsplit(hv, oldsize, oldsize * 2); } if (return_svp) { @@ -1085,13 +1147,10 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } STATIC void -S_hsplit(pTHX_ HV *hv) +S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) { dVAR; - XPVHV* const xhv = (XPVHV*)SvANY(hv); - const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */ - I32 newsize = oldsize * 2; - I32 i; + STRLEN i = 0; char *a = (char*) HvARRAY(hv); HE **aep; @@ -1100,68 +1159,87 @@ S_hsplit(pTHX_ HV *hv) /*PerlIO_printf(PerlIO_stderr(), "hsplit called for %p which had %d\n", (void*)hv, (int) oldsize);*/ - if (HvPLACEHOLDERS_get(hv) && !SvREADONLY(hv)) { - /* Can make this clear any placeholders first for non-restricted hashes, - even though Storable rebuilds restricted hashes by putting in all the - placeholders (first) before turning on the readonly flag, because - Storable always pre-splits the hash. */ - hv_clear_placeholders(hv); - } - PL_nomemok = TRUE; -#if defined(STRANGE_MALLOC) || defined(MYMALLOC) Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); if (!a) { PL_nomemok = FALSE; return; } - if (SvOOK(hv)) { - Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } -#else - Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; +#ifdef PERL_HASH_RANDOMIZE_KEYS + /* the idea of this is that we create a "random" value by hashing the address of + * the array, we then use the low bit to decide if we insert at the top, or insert + * second from top. After each such insert we rotate the hashed value. So we can + * use the same hashed value over and over, and in normal build environments use + * very few ops to do so. ROTL32() should produce a single machine operation. */ + if (PL_HASH_RAND_BITS_ENABLED) { + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)a); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); } - Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char); +#endif + if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - Safefree(HvARRAY(hv)); + struct xpvhv_aux *const dest + = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)]; + Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux); + /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + dest->xhv_rand = (U32)PL_hash_rand_bits; #endif + } PL_nomemok = FALSE; Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ + HvMAX(hv) = --newsize; HvARRAY(hv) = (HE**) a; - aep = (HE**)a; - for (i=0; ixhv_max+1; /* HvMAX(hv)+1 (sick) */ I32 newsize; - I32 i; char *a; - HE **aep; PERL_ARGS_ASSERT_HV_KSPLIT; @@ -1190,63 +1266,28 @@ Perl_hv_ksplit(pTHX_ HV *hv, IV newmax) a = (char *) HvARRAY(hv); if (a) { - PL_nomemok = TRUE; -#if defined(STRANGE_MALLOC) || defined(MYMALLOC) - Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - if (SvOOK(hv)) { - Copy(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } -#else - Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) - + (SvOOK(hv) ? sizeof(struct xpvhv_aux) : 0), char); - if (!a) { - PL_nomemok = FALSE; - return; - } - Copy(HvARRAY(hv), a, oldsize * sizeof(HE*), char); - if (SvOOK(hv)) { - Copy(HvAUX(hv), &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux); - } - Safefree(HvARRAY(hv)); -#endif - PL_nomemok = FALSE; - Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); /* zero 2nd half*/ - } - else { - Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + hsplit(hv, oldsize, newsize); + } else { + Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + xhv->xhv_max = --newsize; + HvARRAY(hv) = (HE **) a; } - xhv->xhv_max = --newsize; /* HvMAX(hv) = --newsize */ - HvARRAY(hv) = (HE **) a; - if (!xhv->xhv_keys /* !HvTOTALKEYS(hv) */) /* skip rest if no entries */ - return; +} - aep = (HE**)a; - for (i=0; i PERL_HASH_DEFAULT_HvMAX && hv_max + 1 >= hv_keys * 2) \ + hv_max = hv_max / 2; \ + } \ + HvMAX(hv) = hv_max; \ +} STMT_END - if (!entry) /* non-existent */ - continue; - do { - I32 j = (HeHASH(entry) & newsize); - - if (j != i) { - j -= i; - *oentry = HeNEXT(entry); - HeNEXT(entry) = aep[j]; - aep[j] = entry; - } - else - oentry = &HeNEXT(entry); - entry = *oentry; - } while (entry); - } -} HV * Perl_newHVhv(pTHX_ HV *ohv) @@ -1309,12 +1350,9 @@ Perl_newHVhv(pTHX_ HV *ohv) HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); - /* Can we use fewer buckets? (hv_max is always 2^n-1) */ - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1353,7 +1391,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) if (ohv) { STRLEN hv_max = HvMAX(ohv); - STRLEN hv_fill = HvFILL(ohv); + STRLEN hv_keys = HvTOTALKEYS(ohv); HE *entry; const I32 riter = HvRITER_get(ohv); HE * const eiter = HvEITER_get(ohv); @@ -1361,9 +1399,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) ENTER; SAVEFREESV(hv); - while (hv_max && hv_max + 1 >= hv_fill * 2) - hv_max = hv_max / 2; - HvMAX(hv) = hv_max; + HV_SET_MAX_ADJUSTED_FOR_KEYS(hv,hv_max,hv_keys); hv_iterinit(ohv); while ((entry = hv_iternext_flags(ohv, 0))) { @@ -1377,7 +1413,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) else { (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry), HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry)); - SvREFCNT_dec(heksv); + SvREFCNT_dec_NN(heksv); } } HvRITER_set(ohv, riter); @@ -1389,6 +1425,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) hv_magic(hv, NULL, PERL_MAGIC_hints); return hv; } +#undef HV_SET_MAX_ADJUSTED_FOR_KEYS /* like hv_free_ent, but returns the SV rather than freeing it */ STATIC SV* @@ -1399,8 +1436,6 @@ S_hv_free_ent_ret(pTHX_ HV *hv, HE *entry) PERL_ARGS_ASSERT_HV_FREE_ENT_RET; - if (!entry) - return NULL; val = HeVAL(entry); if (HeKLEN(entry) == HEf_SVKEY) { SvREFCNT_dec(HeKEY_sv(entry)); @@ -1481,14 +1516,15 @@ Perl_hv_clear(pTHX_ HV *hv) for (; entry; entry = HeNEXT(entry)) { /* not already placeholder */ if (HeVAL(entry) != &PL_sv_placeholder) { - if (HeVAL(entry) && SvREADONLY(HeVAL(entry)) - && !SvIsCOW(HeVAL(entry))) { - SV* const keysv = hv_iterkeysv(entry); - Perl_croak(aTHX_ - "Attempt to delete readonly key '%"SVf"' from a restricted hash", - (void*)keysv); + if (HeVAL(entry)) { + if (SvREADONLY(HeVAL(entry)) && !SvIsCOW(HeVAL(entry))) { + SV* const keysv = hv_iterkeysv(entry); + Perl_croak_nocontext( + "Attempt to delete readonly key '%"SVf"' from a restricted hash", + (void*)keysv); + } + SvREFCNT_dec_NN(HeVAL(entry)); } - SvREFCNT_dec(HeVAL(entry)); HeVAL(entry) = &PL_sv_placeholder; HvPLACEHOLDERS(hv)++; } @@ -1634,6 +1670,9 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) } iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } if (!((XPVHV*)SvANY(hv))->xhv_keys) @@ -1753,16 +1792,14 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) } if((meta = aux->xhv_mro_meta)) { if (meta->mro_linear_all) { - SvREFCNT_dec(MUTABLE_SV(meta->mro_linear_all)); - meta->mro_linear_all = NULL; - /* This is just acting as a shortcut pointer. */ - meta->mro_linear_current = NULL; - } else if (meta->mro_linear_current) { + SvREFCNT_dec_NN(meta->mro_linear_all); + /* mro_linear_current is just acting as a shortcut pointer, + hence the else. */ + } + else /* Only the current MRO is stored, so this owns the data. */ SvREFCNT_dec(meta->mro_linear_current); - meta->mro_linear_current = NULL; - } SvREFCNT_dec(meta->mro_nextmethod); SvREFCNT_dec(meta->isa); Safefree(meta); @@ -1774,7 +1811,7 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) } if (!SvOOK(hv)) { Safefree(HvARRAY(hv)); - xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */ + xhv->xhv_max = PERL_HASH_DEFAULT_HvMAX; /* HvMAX(hv) = 7 (it's a normal hash) */ HvARRAY(hv) = 0; } /* if we're freeing the HV, the SvMAGIC field has been reused for @@ -1819,27 +1856,77 @@ Perl_hv_fill(pTHX_ HV const *const hv) return count; } +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ + +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} + + static struct xpvhv_aux* -S_hv_auxinit(HV *hv) { +S_hv_auxinit(pTHX_ HV *hv) { struct xpvhv_aux *iter; char *array; PERL_ARGS_ASSERT_HV_AUXINIT; - if (!HvARRAY(hv)) { - Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + if (!SvOOK(hv)) { + if (!HvARRAY(hv)) { + Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } else { + array = (char *) HvARRAY(hv); + Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } + HvARRAY(hv) = (HE**)array; + SvOOK_on(hv); + iter = HvAUX(hv); +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (PL_HASH_RAND_BITS_ENABLED) { + /* mix in some new state to PL_hash_rand_bits to "randomize" the traversal order*/ + if (PL_HASH_RAND_BITS_ENABLED == 1) + PL_hash_rand_bits += ptr_hash((PTRV)array); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); + } + iter->xhv_rand = (U32)PL_hash_rand_bits; +#endif } else { - array = (char *) HvARRAY(hv); - Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + iter = HvAUX(hv); } - HvARRAY(hv) = (HE**) array; - SvOOK_on(hv); - iter = HvAUX(hv); iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; @@ -1882,6 +1969,9 @@ Perl_hv_iterinit(pTHX_ HV *hv) } iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif } else { hv_auxinit(hv); } @@ -1937,6 +2027,27 @@ Perl_hv_riter_set(pTHX_ HV *hv, I32 riter) { } void +Perl_hv_rand_set(pTHX_ HV *hv, U32 new_xhv_rand) { + struct xpvhv_aux *iter; + + PERL_ARGS_ASSERT_HV_RAND_SET; + +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (!hv) + Perl_croak(aTHX_ "Bad hash"); + + if (SvOOK(hv)) { + iter = HvAUX(hv); + } else { + iter = hv_auxinit(hv); + } + iter->xhv_rand = new_xhv_rand; +#else + Perl_croak(aTHX_ "This Perl has not been built with support for randomized hash key traversal but something called Perl_hv_rand_set()."); +#endif +} + +void Perl_hv_eiter_set(pTHX_ HV *hv, HE *eiter) { struct xpvhv_aux *iter; @@ -2215,7 +2326,7 @@ Perl_hv_kill_backrefs(pTHX_ HV *hv) { HvAUX(hv)->xhv_backreferences = 0; Perl_sv_kill_backrefs(aTHX_ MUTABLE_SV(hv), av); if (SvTYPE(av) == SVt_PVAV) - SvREFCNT_dec(av); + SvREFCNT_dec_NN(av); } } @@ -2342,6 +2453,18 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) } } +#ifdef PERL_HASH_RANDOMIZE_KEYS + if (iter->xhv_last_rand != iter->xhv_rand) { + if (iter->xhv_riter != -1) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "Use of each() on hash after insertion without resetting hash iterator results in undefined behavior" + pTHX__FORMAT + pTHX__VALUE); + } + iter->xhv_last_rand = iter->xhv_rand; + } +#endif + /* Skip the entire loop if the hash is empty. */ if ((flags & HV_ITERNEXT_WANTPLACEHOLDERS) ? HvTOTALKEYS(hv) : HvUSEDKEYS(hv)) { @@ -2352,9 +2475,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) if (iter->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) { /* There is no next one. End of the hash. */ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; /* reset xhv_last_rand so we can detect inserts during traversal */ +#endif break; } - entry = (HvARRAY(hv))[iter->xhv_riter]; + entry = (HvARRAY(hv))[ PERL_HASH_ITER_BUCKET(iter) & xhv->xhv_max ]; if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { /* If we have an entry, but it's a placeholder, don't count it. @@ -2367,7 +2493,12 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) or if we run through it and find only placeholders. */ } } - else iter->xhv_riter = -1; + else { + iter->xhv_riter = -1; +#ifdef PERL_HASH_RANDOMIZE_KEYS + iter->xhv_last_rand = iter->xhv_rand; +#endif + } if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */ HvLAZYDEL_off(hv); @@ -2699,7 +2830,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ if (!next) { /* initial entry? */ } else if ( DO_HSPLIT(xhv) ) { - hsplit(PL_strtab); + const STRLEN oldsize = xhv->xhv_max + 1; + hsplit(PL_strtab, oldsize, oldsize * 2); } }