2 *--------------------------------------------------------------------------------------
3 * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
4 * to avoid "algorithmic complexity attacks".
6 * If USE_HASH_SEED is defined, hash randomisation is done by default
7 * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
8 * only if the environment variable PERL_HASH_SEED is set.
9 * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
12 #ifndef PERL_SEEN_HV_FUNC_H /* compile once */
13 #define PERL_SEEN_HV_FUNC_H
16 || defined(PERL_HASH_FUNC_SIPHASH) \
17 || defined(PERL_HASH_FUNC_SDBM) \
18 || defined(PERL_HASH_FUNC_DJB2) \
19 || defined(PERL_HASH_FUNC_SUPERFAST) \
20 || defined(PERL_HASH_FUNC_MURMUR3) \
21 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \
22 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) \
23 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \
24 || defined(PERL_HASH_FUNC_MURMUR_HASH_64A) \
25 || defined(PERL_HASH_FUNC_MURMUR_HASH_64B) \
27 #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD
30 #if defined(PERL_HASH_FUNC_SIPHASH)
31 # define PERL_HASH_FUNC "SIPHASH_2_4"
32 # define PERL_HASH_SEED_BYTES 16
33 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len))
34 #elif defined(PERL_HASH_FUNC_SUPERFAST)
35 # define PERL_HASH_FUNC "SUPERFAST"
36 # define PERL_HASH_SEED_BYTES 4
37 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_superfast((seed),(U8*)(str),(len))
38 #elif defined(PERL_HASH_FUNC_MURMUR3)
39 # define PERL_HASH_FUNC "MURMUR3"
40 # define PERL_HASH_SEED_BYTES 4
41 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur3((seed),(U8*)(str),(len))
42 #elif defined(PERL_HASH_FUNC_DJB2)
43 # define PERL_HASH_FUNC "DJB2"
44 # define PERL_HASH_SEED_BYTES 4
45 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_djb2((seed),(U8*)(str),(len))
46 #elif defined(PERL_HASH_FUNC_SDBM)
47 # define PERL_HASH_FUNC "SDBM"
48 # define PERL_HASH_SEED_BYTES 4
49 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_sdbm((seed),(U8*)(str),(len))
50 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD)
51 # define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD"
52 # define PERL_HASH_SEED_BYTES 8
53 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len))
54 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME)
55 # define PERL_HASH_FUNC "ONE_AT_A_TIME"
56 # define PERL_HASH_SEED_BYTES 4
57 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time((seed),(U8*)(str),(len))
58 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD)
59 # define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD"
60 # define PERL_HASH_SEED_BYTES 4
61 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_old_one_at_a_time((seed),(U8*)(str),(len))
62 #elif defined(PERL_HASH_FUNC_MURMUR_HASH_64A)
63 # define PERL_HASH_FUNC "MURMUR_HASH_64A"
64 # define PERL_HASH_SEED_BYTES 8
65 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64a((seed),(U8*)(str),(len))
66 #elif defined(PERL_HASH_FUNC_MURMUR_HASH_64B)
67 # define PERL_HASH_FUNC "MURMUR_HASH_64B"
68 # define PERL_HASH_SEED_BYTES 8
69 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64b((seed),(U8*)(str),(len))
72 #ifndef PERL_HASH_WITH_SEED
73 #error "No hash function defined!"
75 #ifndef PERL_HASH_SEED_BYTES
76 #error "PERL_HASH_SEED_BYTES not defined"
78 #ifndef PERL_HASH_FUNC
79 #error "PERL_HASH_FUNC not defined"
82 #ifndef PERL_HASH_SEED
83 # if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
84 # define PERL_HASH_SEED PL_hash_seed
85 # elif PERL_HASH_SEED_BYTES == 4
86 # define PERL_HASH_SEED ((const U8 *)"PeRl")
87 # elif PERL_HASH_SEED_BYTES == 8
88 # define PERL_HASH_SEED ((const U8 *)"PeRlHaSh")
89 # elif PERL_HASH_SEED_BYTES == 16
90 # define PERL_HASH_SEED ((const U8 *)"PeRlHaShhAcKpErl")
92 # error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC
96 #define PERL_HASH(hash,str,len) PERL_HASH_WITH_SEED(PERL_HASH_SEED,hash,str,len)
98 /*-----------------------------------------------------------------------------
99 * Endianess, misalignment capabilities and util macros
101 * The following 3 macros are defined in this section. The other macros defined
102 * are only needed to help derive these 3.
104 * U8TO32_LE(x) Read a little endian unsigned 32-bit int
105 * UNALIGNED_SAFE Defined if unaligned access is safe
106 * ROTL32(x,r) Rotate x left by r bits
109 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
110 || defined(_MSC_VER) || defined (__TURBOC__)
111 #define U8TO16_LE(d) (*((const U16 *) (d)))
114 #if !defined (U8TO16_LE)
115 #define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\
116 +((const U8 *)(d))[0])
119 #if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4
120 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
121 #define U8TO32_LE(ptr) (*((U32*)(ptr)))
122 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
123 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
124 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
125 #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr))))
127 /* Without a known fast bswap32 we're just as well off doing this */
128 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
129 #define UNALIGNED_SAFE
132 /* Unknown endianess so last resort is to read individual bytes */
133 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
134 /* Since we're not doing word-reads we can skip the messing about with realignment */
135 #define UNALIGNED_SAFE
140 /* This probably isn't going to work, but failing with a compiler error due to
141 lack of uint64_t is no worse than failing right now with an #error. */
146 /* Find best way to ROTL32/ROTL64 */
147 #if defined(_MSC_VER)
148 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
149 #define ROTL32(x,r) _rotl(x,r)
151 #define ROTL64(x,r) _rotl64(x,r)
154 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
155 #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
157 #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r)))
163 #define ROTL_UV(x,r) ROTL64(x,r)
165 #define ROTL_UV(x,r) ROTL32(x,r)
168 /* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
169 * The authors claim it is relatively secure compared to the alternatives
170 * and that performance wise it is a suitable hash for languages like Perl.
173 * https://www.131002.net/siphash/
175 * This implementation seems to perform slightly slower than one-at-a-time for
176 * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
177 * regardless of keys size.
184 #define U8TO64_LE(p) \
185 (((U64)((p)[0]) ) | \
186 ((U64)((p)[1]) << 8) | \
187 ((U64)((p)[2]) << 16) | \
188 ((U64)((p)[3]) << 24) | \
189 ((U64)((p)[4]) << 32) | \
190 ((U64)((p)[5]) << 40) | \
191 ((U64)((p)[6]) << 48) | \
192 ((U64)((p)[7]) << 56))
196 v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
197 v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
198 v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
199 v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
204 PERL_STATIC_INLINE U32
205 S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) {
206 /* "somepseudorandomlygeneratedbytes" */
207 U64 v0 = UINT64_C(0x736f6d6570736575);
208 U64 v1 = UINT64_C(0x646f72616e646f6d);
209 U64 v2 = UINT64_C(0x6c7967656e657261);
210 U64 v3 = UINT64_C(0x7465646279746573);
213 U64 k0 = ((U64*)seed)[0];
214 U64 k1 = ((U64*)seed)[1];
216 const int left = inlen & 7;
217 const U8 *end = in + inlen - left;
219 b = ( ( U64 )(inlen) ) << 56;
225 for ( ; in != end; in += 8 )
236 case 7: b |= ( ( U64 )in[ 6] ) << 48;
237 case 6: b |= ( ( U64 )in[ 5] ) << 40;
238 case 5: b |= ( ( U64 )in[ 4] ) << 32;
239 case 4: b |= ( ( U64 )in[ 3] ) << 24;
240 case 3: b |= ( ( U64 )in[ 2] ) << 16;
241 case 2: b |= ( ( U64 )in[ 1] ) << 8;
242 case 1: b |= ( ( U64 )in[ 0] ); break;
256 b = v0 ^ v1 ^ v2 ^ v3;
257 return (U32)(b & U32_MAX);
259 #endif /* defined(HAS_QUAD) */
261 /* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
262 * (http://burtleburtle.net/bob/hash/doobs.html)
263 * It is by Paul Hsieh (c) 2004 and is analysed here
264 * http://www.azillionmonkeys.com/qed/hash.html
265 * license terms are here:
266 * http://www.azillionmonkeys.com/qed/weblicense.html
270 PERL_STATIC_INLINE U32
271 S_perl_hash_superfast(const unsigned char * const seed, const unsigned char *str, STRLEN len) {
272 U32 hash = *((U32*)seed) + (U32)len;
277 for (;len > 0; len--) {
278 hash += U8TO16_LE (str);
279 tmp = (U8TO16_LE (str+2) << 11) ^ hash;
280 hash = (hash << 16) ^ tmp;
281 str += 2 * sizeof (U16);
285 /* Handle end cases */
287 case 3: hash += U8TO16_LE (str);
289 hash ^= str[sizeof (U16)] << 18;
292 case 2: hash += U8TO16_LE (str);
296 case 1: hash += *str;
300 /* Force "avalanching" of final 127 bits */
306 return (hash + (hash >> 6));
310 /*-----------------------------------------------------------------------------
311 * MurmurHash3 was written by Austin Appleby, and is placed in the public
314 * This implementation was originally written by Shane Day, and is also public domain,
315 * and was modified to function as a macro similar to other perl hash functions by
318 * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
319 * with support for progressive processing.
321 * If you want to understand the MurmurHash algorithm you would be much better
322 * off reading the original source. Just point your browser at:
323 * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
327 * We can only process entire 32 bit chunks of input, except for the very end
328 * that may be shorter.
330 * To handle endianess I simply use a macro that reads a U32 and define
331 * that macro to be a direct read on little endian machines, a read and swap
332 * on big endian machines, or a byte-by-byte read if the endianess is unknown.
336 /*-----------------------------------------------------------------------------
337 * Core murmurhash algorithm macros */
339 #define MURMUR_C1 (0xcc9e2d51)
340 #define MURMUR_C2 (0x1b873593)
341 #define MURMUR_C3 (0xe6546b64)
342 #define MURMUR_C4 (0x85ebca6b)
343 #define MURMUR_C5 (0xc2b2ae35)
345 /* This is the main processing body of the algorithm. It operates
346 * on each full 32-bits of input. */
347 #define MURMUR_DOBLOCK(h1, k1) STMT_START { \
349 k1 = ROTL32(k1,15); \
353 h1 = ROTL32(h1,13); \
354 h1 = h1 * 5 + MURMUR_C3; \
358 /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
359 /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
360 #define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
361 int MURMUR_DOBYTES_i = cnt; \
362 while(MURMUR_DOBYTES_i--) { \
363 c = c>>8 | *ptr++<<24; \
366 MURMUR_DOBLOCK(h1, c); \
373 /* now we create the hash function */
374 PERL_STATIC_INLINE U32
375 S_perl_hash_murmur3(const unsigned char * const seed, const unsigned char *ptr, STRLEN len) {
376 U32 h1 = *((U32*)seed);
380 const unsigned char *end;
381 int bytes_in_carry = 0; /* bytes in carry */
382 I32 total_length= (I32)len;
384 #if defined(UNALIGNED_SAFE)
385 /* Handle carry: commented out as its only used in incremental mode - it never fires for us
388 MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len);
392 /* This CPU handles unaligned word access */
393 /* Process 32-bit chunks */
395 for( ; ptr < end ; ptr+=4) {
397 MURMUR_DOBLOCK(h1, k1);
400 /* This CPU does not handle unaligned word access */
402 /* Consume enough so that the next data byte is word aligned */
403 STRLEN i = -PTR2IV(ptr) & 3;
405 MURMUR_DOBYTES((int)i, h1, carry, bytes_in_carry, ptr, len);
408 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
410 switch(bytes_in_carry) { /* how many bytes in carry */
411 case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
412 for( ; ptr < end ; ptr+=4) {
414 MURMUR_DOBLOCK(h1, k1);
417 case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
418 for( ; ptr < end ; ptr+=4) {
420 carry = U8TO32_LE(ptr);
422 MURMUR_DOBLOCK(h1, k1);
425 case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
426 for( ; ptr < end ; ptr+=4) {
428 carry = U8TO32_LE(ptr);
430 MURMUR_DOBLOCK(h1, k1);
433 case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
434 for( ; ptr < end ; ptr+=4) {
436 carry = U8TO32_LE(ptr);
438 MURMUR_DOBLOCK(h1, k1);
442 /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
445 /* Append any remaining bytes into carry */
446 MURMUR_DOBYTES((int)len, h1, carry, bytes_in_carry, ptr, len);
448 if (bytes_in_carry) {
449 k1 = carry >> ( 4 - bytes_in_carry ) * 8;
467 PERL_STATIC_INLINE U32
468 S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
469 const unsigned char * const end = (const unsigned char *)str + len;
470 U32 hash = *((U32*)seed) + (U32)len;
472 hash = ((hash << 5) + hash) + *str++;
477 PERL_STATIC_INLINE U32
478 S_perl_hash_sdbm(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
479 const unsigned char * const end = (const unsigned char *)str + len;
480 U32 hash = *((U32*)seed) + (U32)len;
482 hash = (hash << 6) + (hash << 16) - hash + *str++;
487 /* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME algorithm
488 * - ONE_AT_A_TIME_OLD is the unmodified 5.16 and older algorithm
489 * - ONE_AT_A_TIME is a 5.17+ tweak of ONE_AT_A_TIME_OLD to
490 * prevent strings of only \0 but different lengths from colliding
492 * Security-wise, from best to worst,
493 * ONE_AT_A_TIME_HARD > ONE_AT_A_TIME > ONE_AT_A_TIME_OLD
494 * There is a big drop-off in security between ONE_AT_A_TIME_HARD and
498 /* This is the "One-at-a-Time" algorithm by Bob Jenkins
499 * from requirements by Colin Plumb.
500 * (http://burtleburtle.net/bob/hash/doobs.html)
501 * With seed/len tweak.
503 PERL_STATIC_INLINE U32
504 S_perl_hash_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
505 const unsigned char * const end = (const unsigned char *)str + len;
506 U32 hash = *((U32*)seed) + (U32)len;
509 hash += (hash << 10);
513 hash ^= (hash >> 11);
514 return (hash + (hash << 15));
517 /* Derived from "One-at-a-Time" algorithm by Bob Jenkins */
518 PERL_STATIC_INLINE U32
519 S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
520 const unsigned char * const end = (const unsigned char *)str + len;
521 U32 hash = *((U32*)seed) + (U32)len;
524 hash += (hash << 10);
529 hash += (hash << 10);
533 hash += (hash << 10);
537 hash += (hash << 10);
541 hash += (hash << 10);
545 hash += (hash << 10);
549 hash ^= (hash >> 11);
550 return (hash + (hash << 15));
553 PERL_STATIC_INLINE U32
554 S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
555 const unsigned char * const end = (const unsigned char *)str + len;
556 U32 hash = *((U32*)seed);
559 hash += (hash << 10);
563 hash ^= (hash >> 11);
564 return (hash + (hash << 15));
567 #ifdef PERL_HASH_FUNC_MURMUR_HASH_64A
568 /* This code is from Austin Appleby and is in the public domain.
569 Altered by Yves Orton to match Perl's hash interface, and to
570 return a 32 bit hash.
572 Note uses unaligned 64 bit loads - will NOT work on machines with
573 strict alignment requirements.
575 Also this code may not be suitable for big-endian machines.
578 /* a 64 bit hash where we only use the low 32 bits */
579 PERL_STATIC_INLINE U32
580 S_perl_hash_murmur_hash_64a (const unsigned char * const seed, const unsigned char *str, const STRLEN len)
582 const U64 m = UINT64_C(0xc6a4a7935bd1e995);
584 U64 h = *((U64*)seed) ^ len;
585 const U64 * data = (const U64 *)str;
586 const U64 * end = data + (len/8);
587 const unsigned char * data2;
601 data2 = (const unsigned char *)data;
605 case 7: h ^= (U64)(data2[6]) << 48; /* fallthrough */
606 case 6: h ^= (U64)(data2[5]) << 40; /* fallthrough */
607 case 5: h ^= (U64)(data2[4]) << 32; /* fallthrough */
608 case 4: h ^= (U64)(data2[3]) << 24; /* fallthrough */
609 case 3: h ^= (U64)(data2[2]) << 16; /* fallthrough */
610 case 2: h ^= (U64)(data2[1]) << 8; /* fallthrough */
611 case 1: h ^= (U64)(data2[0]); /* fallthrough */
620 return h & 0xFFFFFFFF;
625 #ifdef PERL_HASH_FUNC_MURMUR_HASH_64B
626 /* This code is from Austin Appleby and is in the public domain.
627 Altered by Yves Orton to match Perl's hash interface and return
630 Note uses unaligned 32 bit loads - will NOT work on machines with
631 strict alignment requirements.
633 Also this code may not be suitable for big-endian machines.
636 /* a 64-bit hash for 32-bit platforms where we only use the low 32 bits */
637 PERL_STATIC_INLINE U32
638 S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned char *str, STRLEN len)
640 const U32 m = 0x5bd1e995;
643 U32 h1 = ((U32 *)seed)[0] ^ len;
644 U32 h2 = ((U32 *)seed)[1];
646 const U32 * data = (const U32 *)str;
652 k1 *= m; k1 ^= k1 >> r; k1 *= m;
657 k2 *= m; k2 ^= k2 >> r; k2 *= m;
665 k1 *= m; k1 ^= k1 >> r; k1 *= m;
672 case 3: h2 ^= ((unsigned char*)data)[2] << 16; /* fallthrough */
673 case 2: h2 ^= ((unsigned char*)data)[1] << 8; /* fallthrough */
674 case 1: h2 ^= ((unsigned char*)data)[0]; /* fallthrough */
678 h1 ^= h2 >> 18; h1 *= m;
679 h2 ^= h1 >> 22; h2 *= m;
681 The following code has been removed as it is unused
682 when only the low 32 bits are used. -- Yves
684 h1 ^= h2 >> 17; h1 *= m;
695 /* legacy - only mod_perl should be doing this. */
696 #ifdef PERL_HASH_INTERNAL_ACCESS
697 #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
700 #endif /*compile once*/
703 * ex: set ts=8 sts=4 sw=4 et: