X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b5a2311a24e9ebd29ae29518bfa5f0a488608b3a..791c0c418987d82206c2610a67dd7f6503f011eb:/hv.h diff --git a/hv.h b/hv.h index b89377b..607152c 100644 --- a/hv.h +++ b/hv.h @@ -8,6 +8,26 @@ * */ +/* These control hash traversal randomization and the environment variable PERL_PERTURB_KEYS. + * Currently disabling this functionality will break a few tests, but should otherwise work fine. + * See perlrun for more details. */ + +#if defined(PERL_PERTURB_KEYS_DISABLED) +# define PL_HASH_RAND_BITS_ENABLED 0 +# define PERL_HASH_ITER_BUCKET(iter) ((iter)->xhv_riter) +#else +# define PERL_HASH_RANDOMIZE_KEYS 1 +# if defined(PERL_PERTURB_KEYS_RANDOM) +# define PL_HASH_RAND_BITS_ENABLED 1 +# elif defined(PERL_PERTURB_KEYS_DETERMINISTIC) +# define PL_HASH_RAND_BITS_ENABLED 2 +# else +# define USE_PERL_PERTURB_KEYS 1 +# define PL_HASH_RAND_BITS_ENABLED PL_hash_rand_bits_enabled +# endif +# define PERL_HASH_ITER_BUCKET(iter) (((iter)->xhv_riter) ^ ((iter)->xhv_rand)) +#endif + /* entry in hash value chain */ struct he { /* Keep hent_next first in this structure, because sv_free_arenas take @@ -23,12 +43,17 @@ struct he { /* hash key -- defined separately for use as shared pointer */ struct hek { - U32 hek_hash; /* hash of key */ - I32 hek_len; /* length of hash key */ - char hek_key[1]; /* variable-length hash key */ + U32 hek_hash; /* computed hash of key */ + I32 hek_len; /* length of the hash key */ + /* Be careful! Sometimes we store a pointer in the hek_key + * buffer, which means it must be 8 byte aligned or things + * dont work on aligned platforms like HPUX + * Also beware, the last byte of the hek_key buffer is a + * hidden flags byte about the key. */ + char hek_key[1]; /* variable-length hash key */ /* the hash-key is \0-terminated */ /* after the \0 there is a byte for flags, such as whether the key - is UTF-8 */ + is UTF-8 or WAS-UTF-8, or an SV */ }; struct shared_he { @@ -38,7 +63,7 @@ struct shared_he { /* Subject to change. Don't access this directly. - Use the funcs in mro.c + Use the funcs in mro_core.c */ struct mro_alg { @@ -61,6 +86,9 @@ struct mro_meta { U32 pkg_gen; /* Bumps when local methods/@ISA change */ const struct mro_alg *mro_which; /* which mro alg is in use? */ HV *isa; /* Everything this class @ISA */ + HV *super; /* SUPER method cache */ + CV *destroy; /* DESTROY method if destroy_gen non-zero */ + U32 destroy_gen; /* Generation number of DESTROY cache */ }; #define MRO_GET_PRIVATE_DATA(smeta, which) \ @@ -91,9 +119,17 @@ struct xpvhv_aux { */ I32 xhv_name_count; struct mro_meta *xhv_mro_meta; - HV * xhv_super; /* SUPER method cache */ +#ifdef PERL_HASH_RANDOMIZE_KEYS + U32 xhv_rand; /* random value for hash traversal */ + U32 xhv_last_rand; /* last random value for hash traversal, + used to detect each() after insert for warnings */ +#endif + U32 xhv_aux_flags; /* assorted extra flags */ }; +#define HvAUXf_SCAN_STASH 0x1 /* stash is being scanned by gv_check */ +#define HvAUXf_NO_DEREF 0x2 /* @{}, %{} etc (and nomethod) not present */ + /* hash structure: */ /* This structure must match the beginning of struct xpvmg in sv.h. */ struct xpvhv { @@ -103,514 +139,17 @@ struct xpvhv { STRLEN xhv_max; /* subscript of last element of xhv_array */ }; -/* hash a key */ -/* The use of a temporary pointer and the casting games - * is needed to serve the dual purposes of - * (a) the hashed data being interpreted as "unsigned char" (new since 5.8, - * a "char" can be either signed or unsigned, depending on the compiler) - * (b) catering for old code that uses a "char" - * - * The "hash seed" feature was added in Perl 5.8.1 to perturb the results - * to avoid "algorithmic complexity attacks". - * - * If USE_HASH_SEED is defined, hash randomisation is done by default - * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done - * only if the environment variable PERL_HASH_SEED is set. - * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) - */ -#ifndef PERL_HASH_SEED -# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) -# define PERL_HASH_SEED PL_hash_seed -# else -# define PERL_HASH_SEED "PeRlHaShhAcKpErl" -# endif -#endif - -#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED) -#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0]) -#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1]) - -/* legacy - only mod_perl should be doing this. */ -#ifdef PERL_HASH_INTERNAL_ACCESS -#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) -#endif - -/* Uncomment one of the following lines to use an alternative hash algorithm. -#define PERL_HASH_FUNC_SDBM -#define PERL_HASH_FUNC_DJB2 -#define PERL_HASH_FUNC_SUPERFAST -#define PERL_HASH_FUNC_MURMUR3 -#define PERL_HASH_FUNC_SIPHASH -#define PERL_HASH_FUNC_ONE_AT_A_TIME -*/ - -#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME)) -#define PERL_HASH_FUNC_MURMUR3 -#endif - -#if defined(PERL_HASH_FUNC_SIPHASH) -#define PERL_HASH_FUNC "SIPHASH" -#define PERL_HASH_SEED_BYTES 16 - -/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. - * The authors claim it is relatively secure compared to the alternatives - * and that performance wise it is a suitable hash for languages like Perl. - * See: - * - * https://www.131002.net/siphash/ - * - * This implementation seems to perform slightly slower than one-at-a-time for - * short keys, but degrades slower for longer keys. Murmur Hash outperforms it - * regardless of keys size. - * - * It is 64 bit only. - */ - -#define PERL_HASH_NEEDS_TWO_SEEDS - -#ifndef U64 -#define U64 uint64_t -#endif - -#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) - -#define U32TO8_LE(p, v) \ - (p)[0] = (U8)((v) ); (p)[1] = (U8)((v) >> 8); \ - (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24); - -#define U64TO8_LE(p, v) \ - U32TO8_LE((p), (U32)((v) )); \ - U32TO8_LE((p) + 4, (U32)((v) >> 32)); - -#define U8TO64_LE(p) \ - (((U64)((p)[0]) ) | \ - ((U64)((p)[1]) << 8) | \ - ((U64)((p)[2]) << 16) | \ - ((U64)((p)[3]) << 24) | \ - ((U64)((p)[4]) << 32) | \ - ((U64)((p)[5]) << 40) | \ - ((U64)((p)[6]) << 48) | \ - ((U64)((p)[7]) << 56)) - -#define SIPROUND \ - do { \ - v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \ - v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh; \ - v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh; \ - v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \ - } while(0) - -/* SipHash-2-4 */ -#define PERL_HASH(hash,str,len) STMT_START { \ - const char * const strtmp_PeRlHaSh = (str); \ - const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ - const U32 inlen_PeRlHaSh = (len); \ - /* "somepseudorandomlygeneratedbytes" */ \ - U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \ - U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \ - U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \ - U64 v3_PeRlHaSh = 0x7465646279746573ULL; \ -\ - U64 b_PeRlHaSh; \ - U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1; \ - U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2; \ - U64 m_PeRlHaSh; \ - const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \ - const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \ -\ - b_PeRlHaSh = ( ( U64 )(len) ) << 56; \ - v3_PeRlHaSh ^= k1_PeRlHaSh; \ - v2_PeRlHaSh ^= k0_PeRlHaSh; \ - v1_PeRlHaSh ^= k1_PeRlHaSh; \ - v0_PeRlHaSh ^= k0_PeRlHaSh; \ -\ - for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \ - { \ - m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \ - v3_PeRlHaSh ^= m_PeRlHaSh; \ - SIPROUND; \ - SIPROUND; \ - v0_PeRlHaSh ^= m_PeRlHaSh; \ - } \ -\ - switch( left_PeRlHaSh ) \ - { \ - case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] ) << 48; \ - case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] ) << 40; \ - case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] ) << 32; \ - case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] ) << 24; \ - case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] ) << 16; \ - case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] ) << 8; \ - case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \ - case 0: break; \ - } \ -\ - v3_PeRlHaSh ^= b_PeRlHaSh; \ - SIPROUND; \ - SIPROUND; \ - v0_PeRlHaSh ^= b_PeRlHaSh; \ -\ - v2_PeRlHaSh ^= 0xff; \ - SIPROUND; \ - SIPROUND; \ - SIPROUND; \ - SIPROUND; \ - b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh ^ v3_PeRlHaSh; \ - (hash)= (U32)(b_PeRlHaSh & U32_MAX); \ -} STMT_END - -#elif defined(PERL_HASH_FUNC_SUPERFAST) -#define PERL_HASH_FUNC "SUPERFAST" -/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in - * (http://burtleburtle.net/bob/hash/doobs.html) - * It is by Paul Hsieh (c) 2004 and is analysed here - * http://www.azillionmonkeys.com/qed/hash.html - * license terms are here: - * http://www.azillionmonkeys.com/qed/weblicense.html - */ -#undef get16bits -#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ - || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) -#define get16bits(d) (*((const U16 *) (d))) -#endif - -#if !defined (get16bits) -#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\ - +((const U8 *)(d))[0]) -#endif -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - register const char * const strtmp_PeRlHaSh = (str); \ - register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ - register U32 len_PeRlHaSh = (len); \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - register U32 tmp_PeRlHaSh; \ - register int rem_PeRlHaSh= len_PeRlHaSh & 3; \ - len_PeRlHaSh >>= 2; \ - \ - for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \ - hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - tmp_PeRlHaSh = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \ - hash_PeRlHaSh = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \ - str_PeRlHaSh += 2 * sizeof (U16); \ - hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ - } \ - \ - /* Handle end cases */ \ - switch (rem_PeRlHaSh) { \ - case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \ - hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 11; \ - break; \ - case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ - break; \ - case 1: hash_PeRlHaSh += *str_PeRlHaSh; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 1; \ - } \ - \ - /* Force "avalanching" of final 127 bits */ \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 5; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \ - hash_PeRlHaSh += hash_PeRlHaSh >> 17; \ - hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \ - (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \ - } STMT_END - -#elif defined(PERL_HASH_FUNC_MURMUR3) -#define PERL_HASH_FUNC "MURMUR3" -#define PERL_HASH_SEED_BYTES 4 - -/*----------------------------------------------------------------------------- - * MurmurHash3 was written by Austin Appleby, and is placed in the public - * domain. - * - * This implementation was originally written by Shane Day, and is also public domain, - * and was modified to function as a macro similar to other perl hash functions by - * Yves Orton. - * - * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) - * with support for progressive processing. - * - * If you want to understand the MurmurHash algorithm you would be much better - * off reading the original source. Just point your browser at: - * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp - * - * How does it work? - * - * We can only process entire 32 bit chunks of input, except for the very end - * that may be shorter. - * - * To handle endianess I simply use a macro that reads a U32 and define - * that macro to be a direct read on little endian machines, a read and swap - * on big endian machines, or a byte-by-byte read if the endianess is unknown. - */ - - -/*----------------------------------------------------------------------------- - * Endianess, misalignment capabilities and util macros - * - * The following 3 macros are defined in this section. The other macros defined - * are only needed to help derive these 3. - * - * MURMUR_READ_UINT32(x) Read a little endian unsigned 32-bit int - * MURMUR_UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries - * MURMUR_ROTL32(x,r) Rotate x left by r bits - */ - -/* Now find best way we can to READ_UINT32 */ -#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 - /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ - #define MURMUR_READ_UINT32(ptr) (*((U32*)(ptr))) -#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 - /* TODO: Add additional cases below where a compiler provided bswap32 is available */ - #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) - #define MURMUR_READ_UINT32(ptr) (__builtin_bswap32(*((U32*)(ptr)))) - #else - /* Without a known fast bswap32 we're just as well off doing this */ - #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - #define MURMUR_UNALIGNED_SAFE - #endif -#else - /* Unknown endianess so last resort is to read individual bytes */ - #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - - /* Since we're not doing word-reads we can skip the messing about with realignment */ - #define MURMUR_UNALIGNED_SAFE -#endif - -/* Find best way to ROTL32 */ -#if defined(_MSC_VER) - #include /* Microsoft put _rotl declaration in here */ - #define MURMUR_ROTL32(x,r) _rotl(x,r) -#else - /* gcc recognises this code and generates a rotate instruction for CPUs with one */ - #define MURMUR_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) -#endif - - -/*----------------------------------------------------------------------------- - * Core murmurhash algorithm macros */ - -#define MURMUR_C1 (0xcc9e2d51) -#define MURMUR_C2 (0x1b873593) -#define MURMUR_C3 (0xe6546b64) -#define MURMUR_C4 (0x85ebca6b) -#define MURMUR_C5 (0xc2b2ae35) - -/* This is the main processing body of the algorithm. It operates - * on each full 32-bits of input. */ -#define MURMUR_DOBLOCK(h1, k1) STMT_START { \ - k1 *= MURMUR_C1; \ - k1 = MURMUR_ROTL32(k1,15); \ - k1 *= MURMUR_C2; \ - \ - h1 ^= k1; \ - h1 = MURMUR_ROTL32(h1,13); \ - h1 = h1 * 5 + MURMUR_C3; \ -} STMT_END - - -/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ -/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ -#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \ - int MURMUR_DOBYTES_i = cnt; \ - while(MURMUR_DOBYTES_i--) { \ - c = c>>8 | *ptr++<<24; \ - n++; len--; \ - if(n==4) { \ - MURMUR_DOBLOCK(h1, c); \ - n = 0; \ - } \ - } \ -} STMT_END - -/* process the last 1..3 bytes and finalize */ -#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \ - /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\ - PeRlHaSh_len -= PeRlHaSh_len/4*4; \ - \ - /* Append any remaining bytes into carry */ \ - MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \ - \ - if (PeRlHaSh_bytes_in_carry) { \ - PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \ - PeRlHaSh_k1 *= MURMUR_C1; \ - PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15); \ - PeRlHaSh_k1 *= MURMUR_C2; \ - PeRlHaSh_h1 ^= PeRlHaSh_k1; \ - } \ - PeRlHaSh_h1 ^= PeRlHaSh_total_length; \ - \ - /* fmix */ \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ - PeRlHaSh_h1 *= MURMUR_C4; \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13; \ - PeRlHaSh_h1 *= MURMUR_C5; \ - PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \ - (hash)= PeRlHaSh_h1; \ -} STMT_END - -/* now we create the hash function */ - -#if defined(UNALIGNED_SAFE) -#define PERL_HASH(hash,str,len) STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 PeRlHaSh_len = len; \ - \ - U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ - U32 PeRlHaSh_k1; \ - U32 PeRlHaSh_carry = 0; \ - \ - const unsigned char *PeRlHaSh_end; \ - \ - int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ - I32 PeRlHaSh_total_length= PeRlHaSh_len; \ - \ - /* This CPU handles unaligned word access */ \ - /* Process 32-bit chunks */ \ - PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - \ - MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ - } STMT_END -#else -#define PERL_HASH(hash,str,len) STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 PeRlHaSh_len = len; \ - \ - U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ - U32 PeRlHaSh_k1; \ - U32 PeRlHaSh_carry = 0; \ - \ - const unsigned char *PeRlHaSh_end; \ - \ - int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \ - I32 PeRlHaSh_total_length= PeRlHaSh_len; \ - \ - /* This CPU does not handle unaligned word access */ \ - \ - /* Consume enough so that the next data byte is word aligned */ \ - int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3; \ - if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) { \ - MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\ - } \ - \ - /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \ - PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \ - switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */ \ - case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>24; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<8; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>16; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<16; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - break; \ - case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ \ - for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \ - PeRlHaSh_k1 = PeRlHaSh_carry>>8; \ - PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \ - PeRlHaSh_k1 |= PeRlHaSh_carry<<24; \ - MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \ - } \ - } \ - \ - MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\ - } STMT_END -#endif - -#elif defined(PERL_HASH_FUNC_DJB2) -#define PERL_HASH_FUNC "DJB2" -#define PERL_HASH_SEED_BYTES 4 -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ - hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \ - } \ - (hash) = hash_PeRlHaSh;\ - } STMT_END - -#elif defined(PERL_HASH_FUNC_SDBM) -#define PERL_HASH_FUNC "SDBM" -#define PERL_HASH_SEED_BYTES 4 -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ - hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \ - } \ - (hash) = hash_PeRlHaSh;\ - } STMT_END - -#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) -/* DEFAULT/HISTORIC HASH FUNCTION */ -#define PERL_HASH_FUNC "ONE_AT_A_TIME" -#define PERL_HASH_SEED_BYTES 4 - -/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins - * from requirements by Colin Plumb. - * (http://burtleburtle.net/bob/hash/doobs.html) */ -#define PERL_HASH(hash,str,len) \ - STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ - hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \ - hash_PeRlHaSh += (hash_PeRlHaSh << 10); \ - hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \ - } \ - hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ - hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ - (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ - } STMT_END -#endif -#ifndef PERL_HASH -#error "No hash function defined!" -#endif /* =head1 Hash Manipulation Functions -=for apidoc AmU||HEf_SVKEY +=for apidoc AmnU||HEf_SVKEY This flag, used in the length slot of hash entries and magic structures, specifies the structure contains an C pointer where a C pointer -is to be expected. (For information only--not to be used). +is to be expected. (For information only--not to be used). =head1 Handy Values -=for apidoc AmU||Nullhv +=for apidoc ADmnU||Nullhv Null HV pointer. (deprecated - use C<(HV *)NULL> instead) @@ -618,20 +157,20 @@ Null HV pointer. =head1 Hash Manipulation Functions =for apidoc Am|char*|HvNAME|HV* stash -Returns the package name of a stash, or NULL if C isn't a stash. -See C, C. +Returns the package name of a stash, or C if C isn't a stash. +See C>, C>. =for apidoc Am|STRLEN|HvNAMELEN|HV *stash Returns the length of the stash's name. =for apidoc Am|unsigned char|HvNAMEUTF8|HV *stash -Returns true if the name is in UTF8 encoding. +Returns true if the name is in UTF-8 encoding. =for apidoc Am|char*|HvENAME|HV* stash -Returns the effective name of a stash, or NULL if there is none. The +Returns the effective name of a stash, or NULL if there is none. The effective name represents a location in the symbol table where this stash -resides. It is updated automatically when packages are aliased or deleted. -A stash that is no longer in the symbol table has no effective name. This +resides. It is updated automatically when packages are aliased or deleted. +A stash that is no longer in the symbol table has no effective name. This name is preferable to C for use in MRO linearisations and isa caches. @@ -639,10 +178,10 @@ caches. Returns the length of the stash's effective name. =for apidoc Am|unsigned char|HvENAMEUTF8|HV *stash -Returns true if the effective name is in UTF8 encoding. +Returns true if the effective name is in UTF-8 encoding. =for apidoc Am|void*|HeKEY|HE* he -Returns the actual pointer stored in the key slot of the hash entry. The +Returns the actual pointer stored in the key slot of the hash entry. The pointer may be either C or C, depending on the value of C. Can be assigned to. The C or C macros are usually preferable for finding the value of a key. @@ -650,11 +189,12 @@ usually preferable for finding the value of a key. =for apidoc Am|STRLEN|HeKLEN|HE* he If this is negative, and amounts to C, it indicates the entry holds an C key. Otherwise, holds the actual length of the key. Can -be assigned to. The C macro is usually preferable for finding key +be assigned to. The C macro is usually preferable for finding key lengths. =for apidoc Am|SV*|HeVAL|HE* he -Returns the value slot (type C) stored in the hash entry. Can be assigned +Returns the value slot (type C) +stored in the hash entry. Can be assigned to. SV *foo= HeVAL(hv); @@ -672,14 +212,14 @@ not care about what the length of the key is, you may use the global variable C, though this is rather less efficient than using a local variable. Remember though, that hash keys in perl are free to contain embedded nulls, so using C or similar is not a good way to find -the length of hash keys. This is very similar to the C macro -described elsewhere in this document. See also C. +the length of hash keys. This is very similar to the C macro +described elsewhere in this document. See also C>. If you are using C to get values to pass to C to create a new SV, you should consider using C as it is more efficient. -=for apidoc Am|char*|HeUTF8|HE* he +=for apidoc Am|U32|HeUTF8|HE* he Returns whether the C value returned by C is encoded in UTF-8, doing any necessary dereferencing of possibly C keys. The value returned will be 0 or non-0, not necessarily 1 (or even a value with any low bits set), @@ -702,6 +242,20 @@ C. =cut */ +#define PERL_HASH_DEFAULT_HvMAX 7 + +/* During hsplit(), if HvMAX(hv)+1 (the new bucket count) is >= this value, + * we preallocate the HvAUX() struct. + * The assumption being that we are using so much space anyway we might + * as well allocate the extra bytes and speed up later keys() + * or each() operations. We don't do this to small hashes as we assume + * that a) it will be easy/fast to resize them to add the iterator, and b) that + * many of them will be objects which won't be traversed. Larger hashes however + * will take longer to extend, and the size of the aux struct is swamped by the + * overall length of the bucket array. + * */ +#define PERL_HV_ALLOC_AUX_SIZE (1 << 9) + /* these hash entry flags ride on hent_klen (for use only in magic/tied HVs) */ #define HEf_SVKEY -2 /* hent_key is an SV* */ @@ -709,7 +263,17 @@ C. # define Nullhv Null(HV*) #endif #define HvARRAY(hv) ((hv)->sv_u.svu_hash) -#define HvFILL(hv) Perl_hv_fill(aTHX_ (const HV *)(hv)) + +/* + +=for apidoc Am|STRLEN|HvFILL|HV *const hv + +See L. + +=cut + +*/ +#define HvFILL(hv) Perl_hv_fill(aTHX_ MUTABLE_HV(hv)) #define HvMAX(hv) ((XPVHV*) SvANY(hv))->xhv_max /* This quite intentionally does no flag checking first. That's your responsibility. */ @@ -720,6 +284,9 @@ C. #define HvEITER_set(hv,e) Perl_hv_eiter_set(aTHX_ MUTABLE_HV(hv), e) #define HvRITER_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_riter : -1) #define HvEITER_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_eiter : NULL) +#define HvRAND_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_rand : 0) +#define HvLASTRAND_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_last_rand : 0) + #define HvNAME(hv) HvNAME_get(hv) #define HvNAMELEN(hv) HvNAMELEN_get(hv) #define HvENAME(hv) HvENAME_get(hv) @@ -768,7 +335,7 @@ C. ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvAUX(hv)->xhv_name_count != -1) \ ? HEK_UTF8(HvENAME_HEK_NN(hv)) : 0) -/* the number of keys (including any placeholders) */ +/* the number of keys (including any placeholders) - NOT PART OF THE API */ #define XHvTOTALKEYS(xhv) ((xhv)->xhv_keys) /* @@ -832,7 +399,9 @@ C. ((HeKLEN(he) == HEf_SVKEY) ? \ HeKEY_sv(he) : \ newSVpvn_flags(HeKEY(he), \ - HeKLEN(he), SVs_TEMP)) : \ + HeKLEN(he), \ + SVs_TEMP | \ + ( HeKUTF8(he) ? SVf_UTF8 : 0 ))) : \ &PL_sv_undef) #define HeSVKEY_set(he,sv) ((HeKLEN(he) = HEf_SVKEY), (HeKEY_sv(he) = sv)) @@ -848,7 +417,8 @@ C. #define HVhek_UTF8 0x01 /* Key is utf8 encoded. */ #define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */ #define HVhek_UNSHARED 0x08 /* This key isn't a shared hash key. */ -#define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */ +/* the following flags are options for functions, they are not stored in heks */ +#define HVhek_FREEKEY 0x100 /* Internal flag to say key is Newx()ed. */ #define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder. * (may change, but Storable is a core module) */ #define HVhek_KEYCANONICAL 0x400 /* Internal flag - key is in canonical form. @@ -901,8 +471,7 @@ C. (val), (hash))) #define hv_exists_ent(hv, keysv, hash) \ - (hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISEXISTS, 0, (hash)) \ - ? TRUE : FALSE) + cBOOL(hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISEXISTS, 0, (hash))) #define hv_fetch_ent(hv, keysv, lval, hash) \ ((HE *) hv_common((hv), (keysv), NULL, 0, 0, \ ((lval) ? HV_FETCH_LVALUE : 0), NULL, (hash))) @@ -920,9 +489,10 @@ C. (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), \ (val), (hash))) + + #define hv_exists(hv, key, klen) \ - (hv_common_key_len((hv), (key), (klen), HV_FETCH_ISEXISTS, NULL, 0) \ - ? TRUE : FALSE) + cBOOL(hv_common_key_len((hv), (key), (klen), HV_FETCH_ISEXISTS, NULL, 0)) #define hv_fetch(hv, key, klen, lval) \ ((SV**) hv_common_key_len((hv), (key), (klen), (lval) \ @@ -933,6 +503,40 @@ C. (MUTABLE_SV(hv_common_key_len((hv), (key), (klen), \ (flags) | HV_DELETE, NULL, 0))) +/* Provide 's' suffix subs for constant strings (and avoid needing to count + * chars). See STR_WITH_LEN in handy.h - because these are macros we cant use + * STR_WITH_LEN to do the work, we have to unroll it. */ +#define hv_existss(hv, key) \ + hv_exists((hv), ("" key ""), (sizeof(key)-1)) + +#define hv_fetchs(hv, key, lval) \ + hv_fetch((hv), ("" key ""), (sizeof(key)-1), (lval)) + +#define hv_deletes(hv, key, flags) \ + hv_delete((hv), ("" key ""), (sizeof(key)-1), (flags)) + +#define hv_name_sets(hv, name, flags) \ + hv_name_set((hv),("" name ""),(sizeof(name)-1), flags) + +#define hv_stores(hv, key, val) \ + hv_store((hv), ("" key ""), (sizeof(key)-1), (val), 0) + +#ifdef PERL_CORE +# define hv_storehek(hv, hek, val) \ + hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ + HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, (val), HEK_HASH(hek)) +# define hv_fetchhek(hv, hek, lval) \ + ((SV **) \ + hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ + (lval) \ + ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) \ + : HV_FETCH_JUST_SV, \ + NULL, HEK_HASH(hek))) +# define hv_deletehek(hv, hek, flags) \ + hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ + (flags)|HV_DELETE, NULL, HEK_HASH(hek)) +#endif + /* This refcounted he structure is used for storing the hints used for lexical pragmas. Without threads, it's basically struct he + refcount. With threads, life gets more complex as the structure needs to be shared @@ -971,10 +575,10 @@ struct refcounted_he { }; /* -=for apidoc m|SV *|refcounted_he_fetch_pvs|const struct refcounted_he *chain|const char *key|U32 flags +=for apidoc m|SV *|refcounted_he_fetch_pvs|const struct refcounted_he *chain|"key"|U32 flags -Like L, but takes a literal string instead of -a string/length pair, and no precomputed hash. +Like L, but takes a literal string +instead of a string/length pair, and no precomputed hash. =cut */ @@ -983,10 +587,10 @@ a string/length pair, and no precomputed hash. Perl_refcounted_he_fetch_pvn(aTHX_ chain, STR_WITH_LEN(key), 0, flags) /* -=for apidoc m|struct refcounted_he *|refcounted_he_new_pvs|struct refcounted_he *parent|const char *key|SV *value|U32 flags +=for apidoc m|struct refcounted_he *|refcounted_he_new_pvs|struct refcounted_he *parent|"key"|SV *value|U32 flags -Like L, but takes a literal string instead of -a string/length pair, and no precomputed hash. +Like L, but takes a literal string +instead of a string/length pair, and no precomputed hash. =cut */ @@ -1056,12 +660,8 @@ Creates a new HV. The reference count is set to 1. #define newHV() MUTABLE_HV(newSV_type(SVt_PVHV)) +#include "hv_func.h" + /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */