X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b5a2311a24e9ebd29ae29518bfa5f0a488608b3a..247a7f40cd18f049b85ee008433addbd3069717a:/hv.h?ds=sidebyside diff --git a/hv.h b/hv.h index b89377b..3ee2399 100644 --- a/hv.h +++ b/hv.h @@ -129,6 +129,7 @@ struct xpvhv { #define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED) #define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0]) #define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1]) +#define PERL_HASH_SEED_U16_x(idx) (((U16*)PERL_HASH_SEED)[idx]) /* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS @@ -142,13 +143,77 @@ struct xpvhv { #define PERL_HASH_FUNC_MURMUR3 #define PERL_HASH_FUNC_SIPHASH #define PERL_HASH_FUNC_ONE_AT_A_TIME +#define PERL_HASH_FUNC_ONE_AT_A_TIME_OLD +#define PERL_HASH_FUNC_BUZZHASH16 */ -#if !(defined(PERL_HASH_FUNC_SDBM) || defined(PERL_HASH_FUNC_DJB2) || defined(PERL_HASH_FUNC_SUPERFAST) || defined(PERL_HASH_FUNC_MURMUR3) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME)) -#define PERL_HASH_FUNC_MURMUR3 +#if !( 0 \ + || defined(PERL_HASH_FUNC_SDBM) \ + || defined(PERL_HASH_FUNC_DJB2) \ + || defined(PERL_HASH_FUNC_SUPERFAST) \ + || defined(PERL_HASH_FUNC_MURMUR3) \ + || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \ + || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \ + || defined(PERL_HASH_FUNC_BUZZHASH16) \ + ) +#ifdef U64 +#define PERL_HASH_FUNC_SIPHASH +#else +#define PERL_HASH_FUNC_ONE_AT_A_TIME +#endif +#endif + +#if defined(PERL_HASH_FUNC_BUZZHASH16) +/* "BUZZHASH16" + * + * I whacked this together while just playing around. + * + * The idea is that instead of hashing the actual string input we use the + * bytes of the string as an index into a table of randomly generated + * 16 bit values. + * + * A left rotate is used to "mix" in previous bits as we go, and I borrowed + * the avalanche function from one-at-a-time for the final step. A lookup + * into the table based on the lower 8 bits of the length combined with + * the length itself is used as an itializer. + * + * The resulting hash value has no actual bits fed in from the string so + * I would guess it is pretty secure, although I am not a cryptographer + * and have no idea for sure. Nor has it been rigorously tested. On the + * other hand it is reasonably fast, and seems to produce reasonable + * distributions. + * + * Yves Orton + */ + + +#define PERL_HASH_FUNC "BUZZHASH16" +#define PERL_HASH_SEED_BYTES 512 /* 2 bytes per octet value, 2 * 256 */ +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include /* Microsoft put _rotl declaration in here */ + #define BUZZHASH_ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define BUZZHASH_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) #endif -#if defined(PERL_HASH_FUNC_SIPHASH) +#define PERL_HASH(hash,str,len) \ + STMT_START { \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh + len; \ + U32 hash_PeRlHaSh = (PERL_HASH_SEED_U16_x(len & 0xff) << 16) + len; \ + while (s_PeRlHaSh < end_PeRlHaSh) { \ + hash_PeRlHaSh ^= PERL_HASH_SEED_U16_x((U8)*s_PeRlHaSh++); \ + hash_PeRlHaSh += BUZZHASH_ROTL32(hash_PeRlHaSh,11); \ + } \ + hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ + hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ + (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ + } STMT_END + +#elif defined(PERL_HASH_FUNC_SIPHASH) #define PERL_HASH_FUNC "SIPHASH" #define PERL_HASH_SEED_BYTES 16 @@ -261,6 +326,7 @@ struct xpvhv { #elif defined(PERL_HASH_FUNC_SUPERFAST) #define PERL_HASH_FUNC "SUPERFAST" +#define PERL_HASH_SEED_BYTES 4 /* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in * (http://burtleburtle.net/bob/hash/doobs.html) * It is by Paul Hsieh (c) 2004 and is analysed here @@ -280,12 +346,12 @@ struct xpvhv { #endif #define PERL_HASH(hash,str,len) \ STMT_START { \ - register const char * const strtmp_PeRlHaSh = (str); \ - register const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ - register U32 len_PeRlHaSh = (len); \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - register U32 tmp_PeRlHaSh; \ - register int rem_PeRlHaSh= len_PeRlHaSh & 3; \ + const char * const strtmp_PeRlHaSh = (str); \ + const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \ + U32 len_PeRlHaSh = (len); \ + U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + U32 tmp_PeRlHaSh; \ + int rem_PeRlHaSh= len_PeRlHaSh & 3; \ len_PeRlHaSh >>= 2; \ \ for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \ @@ -459,9 +525,9 @@ struct xpvhv { #if defined(UNALIGNED_SAFE) #define PERL_HASH(hash,str,len) STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 PeRlHaSh_len = len; \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ + I32 PeRlHaSh_len = len; \ \ U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ U32 PeRlHaSh_k1; \ @@ -484,9 +550,9 @@ struct xpvhv { } STMT_END #else #define PERL_HASH(hash,str,len) STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 PeRlHaSh_len = len; \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \ + I32 PeRlHaSh_len = len; \ \ U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \ U32 PeRlHaSh_k1; \ @@ -548,10 +614,10 @@ struct xpvhv { #define PERL_HASH_SEED_BYTES 4 #define PERL_HASH(hash,str,len) \ STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + I32 i_PeRlHaSh = len; \ + U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ while (i_PeRlHaSh--) { \ hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \ } \ @@ -563,31 +629,40 @@ struct xpvhv { #define PERL_HASH_SEED_BYTES 4 #define PERL_HASH(hash,str,len) \ STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + I32 i_PeRlHaSh = len; \ + U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ while (i_PeRlHaSh--) { \ hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \ } \ (hash) = hash_PeRlHaSh;\ } STMT_END -#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) -/* DEFAULT/HISTORIC HASH FUNCTION */ -#define PERL_HASH_FUNC "ONE_AT_A_TIME" +#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) + #define PERL_HASH_SEED_BYTES 4 +#ifdef PERL_HASH_FUNC_ONE_AT_A_TIME +/* new version, add the length to the seed so that adding characters changes the "seed" being used. */ +#define PERL_HASH_FUNC "ONE_AT_A_TIME" +#define MIX_SEED_AND_LEN(seed,len) (seed + len) +#else +/* old version, just use the seed. - not recommended */ +#define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD" +#define MIX_SEED_AND_LEN(seed,len) (seed) +#endif + /* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins * from requirements by Colin Plumb. * (http://burtleburtle.net/bob/hash/doobs.html) */ #define PERL_HASH(hash,str,len) \ STMT_START { \ - register const char * const s_PeRlHaSh_tmp = (str); \ - register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ - register I32 i_PeRlHaSh = len; \ - register U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \ - while (i_PeRlHaSh--) { \ + const char * const s_PeRlHaSh_tmp = (str); \ + const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp + (len); \ + U32 hash_PeRlHaSh = MIX_SEED_AND_LEN(PERL_HASH_SEED_U32, len); \ + while (s_PeRlHaSh < end_PeRlHaSh) { \ hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \ hash_PeRlHaSh += (hash_PeRlHaSh << 10); \ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \