2 *--------------------------------------------------------------------------------------
3 * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
4 * to avoid "algorithmic complexity attacks".
6 * If USE_HASH_SEED is defined, hash randomisation is done by default
7 * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
8 * only if the environment variable PERL_HASH_SEED is set.
9 * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
12 #ifndef PERL_SEEN_HV_FUNC_H /* compile once */
13 #define PERL_SEEN_HV_FUNC_H
16 || defined(PERL_HASH_FUNC_SIPHASH) \
17 || defined(PERL_HASH_FUNC_SDBM) \
18 || defined(PERL_HASH_FUNC_DJB2) \
19 || defined(PERL_HASH_FUNC_SUPERFAST) \
20 || defined(PERL_HASH_FUNC_MURMUR3) \
21 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \
22 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) \
23 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \
24 || defined(PERL_HASH_FUNC_MURMUR_HASH_64A) \
25 || defined(PERL_HASH_FUNC_MURMUR_HASH_64B) \
27 #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD
30 #if defined(PERL_HASH_FUNC_SIPHASH)
31 # define PERL_HASH_FUNC "SIPHASH_2_4"
32 # define PERL_HASH_SEED_BYTES 16
33 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len))
34 #elif defined(PERL_HASH_FUNC_SUPERFAST)
35 # define PERL_HASH_FUNC "SUPERFAST"
36 # define PERL_HASH_SEED_BYTES 4
37 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_superfast((seed),(U8*)(str),(len))
38 #elif defined(PERL_HASH_FUNC_MURMUR3)
39 # define PERL_HASH_FUNC "MURMUR3"
40 # define PERL_HASH_SEED_BYTES 4
41 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur3((seed),(U8*)(str),(len))
42 #elif defined(PERL_HASH_FUNC_DJB2)
43 # define PERL_HASH_FUNC "DJB2"
44 # define PERL_HASH_SEED_BYTES 4
45 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_djb2((seed),(U8*)(str),(len))
46 #elif defined(PERL_HASH_FUNC_SDBM)
47 # define PERL_HASH_FUNC "SDBM"
48 # define PERL_HASH_SEED_BYTES 4
49 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_sdbm((seed),(U8*)(str),(len))
50 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD)
51 # define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD"
52 # define PERL_HASH_SEED_BYTES 8
53 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len))
54 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME)
55 # define PERL_HASH_FUNC "ONE_AT_A_TIME"
56 # define PERL_HASH_SEED_BYTES 4
57 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time((seed),(U8*)(str),(len))
58 #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD)
59 # define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD"
60 # define PERL_HASH_SEED_BYTES 4
61 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_old_one_at_a_time((seed),(U8*)(str),(len))
62 #elif defined(PERL_HASH_FUNC_MURMUR_HASH_64A)
63 # define PERL_HASH_FUNC "MURMUR_HASH_64A"
64 # define PERL_HASH_SEED_BYTES 8
65 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64a((seed),(U8*)(str),(len))
66 #elif defined(PERL_HASH_FUNC_MURMUR_HASH_64B)
67 # define PERL_HASH_FUNC "MURMUR_HASH_64B"
68 # define PERL_HASH_SEED_BYTES 8
69 # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64b((seed),(U8*)(str),(len))
72 #ifndef PERL_HASH_WITH_SEED
73 #error "No hash function defined!"
75 #ifndef PERL_HASH_SEED_BYTES
76 #error "PERL_HASH_SEED_BYTES not defined"
78 #ifndef PERL_HASH_FUNC
79 #error "PERL_HASH_FUNC not defined"
82 #ifndef PERL_HASH_SEED
83 # if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
84 # define PERL_HASH_SEED PL_hash_seed
85 # elif PERL_HASH_SEED_BYTES == 4
86 # define PERL_HASH_SEED "PeRl"
87 # elif PERL_HASH_SEED_BYTES == 16
88 # define PERL_HASH_SEED "PeRlHaShhAcKpErl"
90 # error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC
94 #define PERL_HASH(hash,str,len) PERL_HASH_WITH_SEED(PERL_HASH_SEED,hash,str,len)
96 /*-----------------------------------------------------------------------------
97 * Endianess, misalignment capabilities and util macros
99 * The following 3 macros are defined in this section. The other macros defined
100 * are only needed to help derive these 3.
102 * U8TO32_LE(x) Read a little endian unsigned 32-bit int
103 * UNALIGNED_SAFE Defined if unaligned access is safe
104 * ROTL32(x,r) Rotate x left by r bits
107 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
108 || defined(_MSC_VER) || defined (__TURBOC__)
109 #define U8TO16_LE(d) (*((const U16 *) (d)))
112 #if !defined (U8TO16_LE)
113 #define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\
114 +((const U8 *)(d))[0])
117 #if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4
118 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
119 #define U8TO32_LE(ptr) (*((U32*)(ptr)))
120 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
121 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
122 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
123 #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr))))
125 /* Without a known fast bswap32 we're just as well off doing this */
126 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
127 #define UNALIGNED_SAFE
130 /* Unknown endianess so last resort is to read individual bytes */
131 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
132 /* Since we're not doing word-reads we can skip the messing about with realignment */
133 #define UNALIGNED_SAFE
138 /* This probably isn't going to work, but failing with a compiler error due to
139 lack of uint64_t is no worse than failing right now with an #error. */
144 /* Find best way to ROTL32/ROTL64 */
145 #if defined(_MSC_VER)
146 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
147 #define ROTL32(x,r) _rotl(x,r)
149 #define ROTL64(x,r) _rotl64(x,r)
152 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
153 #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
155 #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r)))
161 #define ROTL_UV(x,r) ROTL64(x,r)
163 #define ROTL_UV(x,r) ROTL32(x,r)
166 /* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
167 * The authors claim it is relatively secure compared to the alternatives
168 * and that performance wise it is a suitable hash for languages like Perl.
171 * https://www.131002.net/siphash/
173 * This implementation seems to perform slightly slower than one-at-a-time for
174 * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
175 * regardless of keys size.
182 #define U8TO64_LE(p) \
183 (((U64)((p)[0]) ) | \
184 ((U64)((p)[1]) << 8) | \
185 ((U64)((p)[2]) << 16) | \
186 ((U64)((p)[3]) << 24) | \
187 ((U64)((p)[4]) << 32) | \
188 ((U64)((p)[5]) << 40) | \
189 ((U64)((p)[6]) << 48) | \
190 ((U64)((p)[7]) << 56))
194 v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
195 v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
196 v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
197 v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
202 PERL_STATIC_INLINE U32
203 S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) {
204 /* "somepseudorandomlygeneratedbytes" */
205 U64 v0 = UINT64_C(0x736f6d6570736575);
206 U64 v1 = UINT64_C(0x646f72616e646f6d);
207 U64 v2 = UINT64_C(0x6c7967656e657261);
208 U64 v3 = UINT64_C(0x7465646279746573);
211 U64 k0 = ((U64*)seed)[0];
212 U64 k1 = ((U64*)seed)[1];
214 const int left = inlen & 7;
215 const U8 *end = in + inlen - left;
217 b = ( ( U64 )(inlen) ) << 56;
223 for ( ; in != end; in += 8 )
234 case 7: b |= ( ( U64 )in[ 6] ) << 48;
235 case 6: b |= ( ( U64 )in[ 5] ) << 40;
236 case 5: b |= ( ( U64 )in[ 4] ) << 32;
237 case 4: b |= ( ( U64 )in[ 3] ) << 24;
238 case 3: b |= ( ( U64 )in[ 2] ) << 16;
239 case 2: b |= ( ( U64 )in[ 1] ) << 8;
240 case 1: b |= ( ( U64 )in[ 0] ); break;
254 b = v0 ^ v1 ^ v2 ^ v3;
255 return (U32)(b & U32_MAX);
257 #endif /* defined(HAS_QUAD) */
259 /* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
260 * (http://burtleburtle.net/bob/hash/doobs.html)
261 * It is by Paul Hsieh (c) 2004 and is analysed here
262 * http://www.azillionmonkeys.com/qed/hash.html
263 * license terms are here:
264 * http://www.azillionmonkeys.com/qed/weblicense.html
268 PERL_STATIC_INLINE U32
269 S_perl_hash_superfast(const unsigned char * const seed, const unsigned char *str, STRLEN len) {
270 U32 hash = *((U32*)seed) + (U32)len;
275 for (;len > 0; len--) {
276 hash += U8TO16_LE (str);
277 tmp = (U8TO16_LE (str+2) << 11) ^ hash;
278 hash = (hash << 16) ^ tmp;
279 str += 2 * sizeof (U16);
283 /* Handle end cases */
285 case 3: hash += U8TO16_LE (str);
287 hash ^= str[sizeof (U16)] << 18;
290 case 2: hash += U8TO16_LE (str);
294 case 1: hash += *str;
298 /* Force "avalanching" of final 127 bits */
304 return (hash + (hash >> 6));
308 /*-----------------------------------------------------------------------------
309 * MurmurHash3 was written by Austin Appleby, and is placed in the public
312 * This implementation was originally written by Shane Day, and is also public domain,
313 * and was modified to function as a macro similar to other perl hash functions by
316 * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
317 * with support for progressive processing.
319 * If you want to understand the MurmurHash algorithm you would be much better
320 * off reading the original source. Just point your browser at:
321 * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
325 * We can only process entire 32 bit chunks of input, except for the very end
326 * that may be shorter.
328 * To handle endianess I simply use a macro that reads a U32 and define
329 * that macro to be a direct read on little endian machines, a read and swap
330 * on big endian machines, or a byte-by-byte read if the endianess is unknown.
334 /*-----------------------------------------------------------------------------
335 * Core murmurhash algorithm macros */
337 #define MURMUR_C1 (0xcc9e2d51)
338 #define MURMUR_C2 (0x1b873593)
339 #define MURMUR_C3 (0xe6546b64)
340 #define MURMUR_C4 (0x85ebca6b)
341 #define MURMUR_C5 (0xc2b2ae35)
343 /* This is the main processing body of the algorithm. It operates
344 * on each full 32-bits of input. */
345 #define MURMUR_DOBLOCK(h1, k1) STMT_START { \
347 k1 = ROTL32(k1,15); \
351 h1 = ROTL32(h1,13); \
352 h1 = h1 * 5 + MURMUR_C3; \
356 /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
357 /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
358 #define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
359 int MURMUR_DOBYTES_i = cnt; \
360 while(MURMUR_DOBYTES_i--) { \
361 c = c>>8 | *ptr++<<24; \
364 MURMUR_DOBLOCK(h1, c); \
371 /* now we create the hash function */
372 PERL_STATIC_INLINE U32
373 S_perl_hash_murmur3(const unsigned char * const seed, const unsigned char *ptr, STRLEN len) {
374 U32 h1 = *((U32*)seed);
378 const unsigned char *end;
379 int bytes_in_carry = 0; /* bytes in carry */
380 I32 total_length= (I32)len;
382 #if defined(UNALIGNED_SAFE)
383 /* Handle carry: commented out as its only used in incremental mode - it never fires for us
386 MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len);
390 /* This CPU handles unaligned word access */
391 /* Process 32-bit chunks */
393 for( ; ptr < end ; ptr+=4) {
395 MURMUR_DOBLOCK(h1, k1);
398 /* This CPU does not handle unaligned word access */
400 /* Consume enough so that the next data byte is word aligned */
401 STRLEN i = -PTR2IV(ptr) & 3;
403 MURMUR_DOBYTES((int)i, h1, carry, bytes_in_carry, ptr, len);
406 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
408 switch(bytes_in_carry) { /* how many bytes in carry */
409 case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
410 for( ; ptr < end ; ptr+=4) {
412 MURMUR_DOBLOCK(h1, k1);
415 case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
416 for( ; ptr < end ; ptr+=4) {
418 carry = U8TO32_LE(ptr);
420 MURMUR_DOBLOCK(h1, k1);
423 case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
424 for( ; ptr < end ; ptr+=4) {
426 carry = U8TO32_LE(ptr);
428 MURMUR_DOBLOCK(h1, k1);
431 case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
432 for( ; ptr < end ; ptr+=4) {
434 carry = U8TO32_LE(ptr);
436 MURMUR_DOBLOCK(h1, k1);
440 /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
443 /* Append any remaining bytes into carry */
444 MURMUR_DOBYTES((int)len, h1, carry, bytes_in_carry, ptr, len);
446 if (bytes_in_carry) {
447 k1 = carry >> ( 4 - bytes_in_carry ) * 8;
465 PERL_STATIC_INLINE U32
466 S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
467 const unsigned char * const end = (const unsigned char *)str + len;
468 U32 hash = *((U32*)seed) + (U32)len;
470 hash = ((hash << 5) + hash) + *str++;
475 PERL_STATIC_INLINE U32
476 S_perl_hash_sdbm(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
477 const unsigned char * const end = (const unsigned char *)str + len;
478 U32 hash = *((U32*)seed) + (U32)len;
480 hash = (hash << 6) + (hash << 16) - hash + *str++;
485 /* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME algorithm
486 * - ONE_AT_A_TIME_OLD is the unmodified 5.16 and older algorithm
487 * - ONE_AT_A_TIME is a 5.17+ tweak of ONE_AT_A_TIME_OLD to
488 * prevent strings of only \0 but different lengths from colliding
490 * Security-wise, from best to worst,
491 * ONE_AT_A_TIME_HARD > ONE_AT_A_TIME > ONE_AT_A_TIME_OLD
492 * There is a big drop-off in security between ONE_AT_A_TIME_HARD and
496 /* This is the "One-at-a-Time" algorithm by Bob Jenkins
497 * from requirements by Colin Plumb.
498 * (http://burtleburtle.net/bob/hash/doobs.html)
499 * With seed/len tweak.
501 PERL_STATIC_INLINE U32
502 S_perl_hash_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
503 const unsigned char * const end = (const unsigned char *)str + len;
504 U32 hash = *((U32*)seed) + (U32)len;
507 hash += (hash << 10);
511 hash ^= (hash >> 11);
512 return (hash + (hash << 15));
515 /* Derived from "One-at-a-Time" algorithm by Bob Jenkins */
516 PERL_STATIC_INLINE U32
517 S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
518 const unsigned char * const end = (const unsigned char *)str + len;
519 U32 hash = *((U32*)seed) + (U32)len;
522 hash += (hash << 10);
527 hash += (hash << 10);
531 hash += (hash << 10);
535 hash += (hash << 10);
539 hash += (hash << 10);
543 hash += (hash << 10);
547 hash ^= (hash >> 11);
548 return (hash + (hash << 15));
551 PERL_STATIC_INLINE U32
552 S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
553 const unsigned char * const end = (const unsigned char *)str + len;
554 U32 hash = *((U32*)seed);
557 hash += (hash << 10);
561 hash ^= (hash >> 11);
562 return (hash + (hash << 15));
565 #ifdef PERL_HASH_FUNC_MURMUR_HASH_64A
566 /* This code is from Austin Appleby and is in the public domain.
567 Altered by Yves Orton to match Perl's hash interface, and to
568 return a 32 bit hash.
570 Note uses unaligned 64 bit loads - will NOT work on machines with
571 strict alignment requirements.
573 Also this code may not be suitable for big-endian machines.
576 /* a 64 bit hash where we only use the low 32 bits */
577 PERL_STATIC_INLINE U32
578 S_perl_hash_murmur_hash_64a (const unsigned char * const seed, const unsigned char *str, const STRLEN len)
580 const U64 m = UINT64_C(0xc6a4a7935bd1e995);
582 U64 h = *((U64*)seed) ^ len;
583 const U64 * data = (const U64 *)str;
584 const U64 * end = data + (len/8);
585 const unsigned char * data2;
599 data2 = (const unsigned char *)data;
603 case 7: h ^= (U64)(data2[6]) << 48; /* fallthrough */
604 case 6: h ^= (U64)(data2[5]) << 40; /* fallthrough */
605 case 5: h ^= (U64)(data2[4]) << 32; /* fallthrough */
606 case 4: h ^= (U64)(data2[3]) << 24; /* fallthrough */
607 case 3: h ^= (U64)(data2[2]) << 16; /* fallthrough */
608 case 2: h ^= (U64)(data2[1]) << 8; /* fallthrough */
609 case 1: h ^= (U64)(data2[0]); /* fallthrough */
618 return h & 0xFFFFFFFF;
623 #ifdef PERL_HASH_FUNC_MURMUR_HASH_64B
624 /* This code is from Austin Appleby and is in the public domain.
625 Altered by Yves Orton to match Perl's hash interface and return
628 Note uses unaligned 32 bit loads - will NOT work on machines with
629 strict alignment requirements.
631 Also this code may not be suitable for big-endian machines.
634 /* a 64-bit hash for 32-bit platforms where we only use the low 32 bits */
635 PERL_STATIC_INLINE U32
636 S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned char *str, STRLEN len)
638 const U32 m = 0x5bd1e995;
641 U32 h1 = ((U32 *)seed)[0] ^ len;
642 U32 h2 = ((U32 *)seed)[1];
644 const U32 * data = (const U32 *)str;
650 k1 *= m; k1 ^= k1 >> r; k1 *= m;
655 k2 *= m; k2 ^= k2 >> r; k2 *= m;
663 k1 *= m; k1 ^= k1 >> r; k1 *= m;
670 case 3: h2 ^= ((unsigned char*)data)[2] << 16; /* fallthrough */
671 case 2: h2 ^= ((unsigned char*)data)[1] << 8; /* fallthrough */
672 case 1: h2 ^= ((unsigned char*)data)[0]; /* fallthrough */
676 h1 ^= h2 >> 18; h1 *= m;
677 h2 ^= h1 >> 22; h2 *= m;
679 The following code has been removed as it is unused
680 when only the low 32 bits are used. -- Yves
682 h1 ^= h2 >> 17; h1 *= m;
693 /* legacy - only mod_perl should be doing this. */
694 #ifdef PERL_HASH_INTERNAL_ACCESS
695 #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
698 #endif /*compile once*/
701 * ex: set ts=8 sts=4 sw=4 et: