This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Split out hash functions into new file and turn into inline static functions
[perl5.git] / hv_func.h
CommitLineData
4d3a042d
YO
1/* hash a key
2 *--------------------------------------------------------------------------------------
3 * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
4 * to avoid "algorithmic complexity attacks".
5 *
6 * If USE_HASH_SEED is defined, hash randomisation is done by default
7 * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
8 * only if the environment variable PERL_HASH_SEED is set.
9 * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
10 */
11
12#ifndef PERL_SEEN_HV_FUNC_H /* compile once */
13#define PERL_SEEN_HV_FUNC_H
14
15#if !( 0 \
16 || defined(PERL_HASH_FUNC_SDBM) \
17 || defined(PERL_HASH_FUNC_DJB2) \
18 || defined(PERL_HASH_FUNC_SUPERFAST) \
19 || defined(PERL_HASH_FUNC_MURMUR3) \
20 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \
21 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \
22 )
23#ifdef HAS_QUAD
24#define PERL_HASH_FUNC_SIPHASH
25#else
26#define PERL_HASH_FUNC_ONE_AT_A_TIME
27#endif
28#endif
29
30#if defined(PERL_HASH_FUNC_SIPHASH)
31# define PERL_HASH_FUNC "SIPHASH_2_4"
32# define PERL_HASH_SEED_BYTES 16
33# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_siphash_2_4(PERL_HASH_SEED,(U8*)(str),(len))
34#elif defined(PERL_HASH_FUNC_SUPERFAST)
35# define PERL_HASH_FUNC "SUPERFAST"
36# define PERL_HASH_SEED_BYTES 4
37# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_superfast(PERL_HASH_SEED,(U8*)(str),(len))
38#elif defined(PERL_HASH_FUNC_MURMUR3)
39# define PERL_HASH_FUNC "MURMUR3"
40# define PERL_HASH_SEED_BYTES 4
41# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_murmur3(PERL_HASH_SEED,(U8*)(str),(len))
42#elif defined(PERL_HASH_FUNC_DJB2)
43# define PERL_HASH_FUNC "DJB2"
44# define PERL_HASH_SEED_BYTES 4
45# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_djb2(PERL_HASH_SEED,(U8*)(str),(len))
46#elif defined(PERL_HASH_FUNC_SDBM)
47# define PERL_HASH_FUNC "SDBM"
48# define PERL_HASH_SEED_BYTES 4
49# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_sdbm(PERL_HASH_SEED,(U8*)(str),(len))
50#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME)
51# define PERL_HASH_FUNC "ONE_AT_A_TIME"
52# define PERL_HASH_SEED_BYTES 4
53# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_one_at_a_time(PERL_HASH_SEED,(U8*)(str),(len))
54#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD)
55# define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD"
56# define PERL_HASH_SEED_BYTES 4
57# define PERL_HASH(hash,str,len) (hash)= S_perl_hash_old_one_at_a_time(PERL_HASH_SEED,(U8*)(str),(len))
58#endif
59
60#ifndef PERL_HASH
61#error "No hash function defined!"
62#endif
63#ifndef PERL_HASH_SEED_BYTES
64#error "PERL_HASH_SEED_BYTES not defined"
65#endif
66#ifndef PERL_HASH_FUNC
67#error "PERL_HASH_FUNC not defined"
68#endif
69
70#ifndef PERL_HASH_SEED
71# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
72# define PERL_HASH_SEED PL_hash_seed
73# elif PERL_HASH_SEED_BYTES == 4
74# define PERL_HASH_SEED "PeRl"
75# elif PERL_HASH_SEED_BYTES == 16
76# define PERL_HASH_SEED "PeRlHaShhAcKpErl"
77# else
78# error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC
79# endif
80#endif
81
82/*-----------------------------------------------------------------------------
83 * Endianess, misalignment capabilities and util macros
84 *
85 * The following 3 macros are defined in this section. The other macros defined
86 * are only needed to help derive these 3.
87 *
88 * U8TO32_LE(x) Read a little endian unsigned 32-bit int
89 * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
90 * ROTL32(x,r) Rotate x left by r bits
91 */
92
93#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
94 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
95#define U8TO16_LE(d) (*((const U16 *) (d)))
96#endif
97
98#if !defined (U8TO16_LE)
99#define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\
100 +((const U8 *)(d))[0])
101#endif
102
103
104/* Now find best way we can to READ_UINT32 */
105#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4
106 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
107 #define U8TO32_LE(ptr) (*((U32*)(ptr)))
108#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
109 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
110 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
111 #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr))))
112 #else
113 /* Without a known fast bswap32 we're just as well off doing this */
114 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
115 #define UNALIGNED_SAFE
116 #endif
117#else
118 /* Unknown endianess so last resort is to read individual bytes */
119 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
120 /* Since we're not doing word-reads we can skip the messing about with realignment */
121 #define UNALIGNED_SAFE
122#endif
123
124/* Find best way to ROTL32 */
125#if defined(_MSC_VER)
126 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
127 #define ROTL32(x,r) _rotl(x,r)
128#else
129 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
130 #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
131#endif
132
133
134/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
135 * The authors claim it is relatively secure compared to the alternatives
136 * and that performance wise it is a suitable hash for languages like Perl.
137 * See:
138 *
139 * https://www.131002.net/siphash/
140 *
141 * This implementation seems to perform slightly slower than one-at-a-time for
142 * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
143 * regardless of keys size.
144 *
145 * It is 64 bit only.
146 */
147
148#ifdef HAS_QUAD
149
150#ifndef U64TYPE
151/* This probably isn't going to work, but failing with a compiler error due to
152 lack of uint64_t is no worse than failing right now with an #error. */
153#define U64TYPE uint64_t
154#endif
155
156
157#define ROTL64(x,b) (U64TYPE)( ((x) << (b)) | ( (x) >> (64 - (b))) )
158
159#define U8TO64_LE(p) \
160 (((U64TYPE)((p)[0]) ) | \
161 ((U64TYPE)((p)[1]) << 8) | \
162 ((U64TYPE)((p)[2]) << 16) | \
163 ((U64TYPE)((p)[3]) << 24) | \
164 ((U64TYPE)((p)[4]) << 32) | \
165 ((U64TYPE)((p)[5]) << 40) | \
166 ((U64TYPE)((p)[6]) << 48) | \
167 ((U64TYPE)((p)[7]) << 56))
168
169#define SIPROUND \
170 do { \
171 v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
172 v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
173 v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
174 v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
175 } while(0)
176
177/* SipHash-2-4 */
178
179PERL_STATIC_INLINE U32
180S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) {
181 /* "somepseudorandomlygeneratedbytes" */
182 U64TYPE v0 = 0x736f6d6570736575ULL;
183 U64TYPE v1 = 0x646f72616e646f6dULL;
184 U64TYPE v2 = 0x6c7967656e657261ULL;
185 U64TYPE v3 = 0x7465646279746573ULL;
186
187 U64TYPE b;
188 U64TYPE k0 = ((U64TYPE*)seed)[0];
189 U64TYPE k1 = ((U64TYPE*)seed)[1];
190 U64TYPE m;
191 const int left = inlen & 7;
192 const U8 *end = in + inlen - left;
193
194 b = ( ( U64TYPE )(inlen) ) << 56;
195 v3 ^= k1;
196 v2 ^= k0;
197 v1 ^= k1;
198 v0 ^= k0;
199
200 for ( ; in != end; in += 8 )
201 {
202 m = U8TO64_LE( in );
203 v3 ^= m;
204 SIPROUND;
205 SIPROUND;
206 v0 ^= m;
207 }
208
209 switch( left )
210 {
211 case 7: b |= ( ( U64TYPE )in[ 6] ) << 48;
212 case 6: b |= ( ( U64TYPE )in[ 5] ) << 40;
213 case 5: b |= ( ( U64TYPE )in[ 4] ) << 32;
214 case 4: b |= ( ( U64TYPE )in[ 3] ) << 24;
215 case 3: b |= ( ( U64TYPE )in[ 2] ) << 16;
216 case 2: b |= ( ( U64TYPE )in[ 1] ) << 8;
217 case 1: b |= ( ( U64TYPE )in[ 0] ); break;
218 case 0: break;
219 }
220
221 v3 ^= b;
222 SIPROUND;
223 SIPROUND;
224 v0 ^= b;
225
226 v2 ^= 0xff;
227 SIPROUND;
228 SIPROUND;
229 SIPROUND;
230 SIPROUND;
231 b = v0 ^ v1 ^ v2 ^ v3;
232 return (U32)(b & U32_MAX);
233}
234#endif /* defined(HAS_QUAD) */
235
236/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
237 * (http://burtleburtle.net/bob/hash/doobs.html)
238 * It is by Paul Hsieh (c) 2004 and is analysed here
239 * http://www.azillionmonkeys.com/qed/hash.html
240 * license terms are here:
241 * http://www.azillionmonkeys.com/qed/weblicense.html
242 */
243
244
245PERL_STATIC_INLINE U32
246S_perl_hash_superfast(const unsigned char * const seed, const unsigned char *str, STRLEN len) {
247 U32 hash = *((U32*)seed) + len;
248 U32 tmp;
249 int rem= len & 3;
250 len >>= 2;
251
252 for (;len > 0; len--) {
253 hash += U8TO16_LE (str);
254 tmp = (U8TO16_LE (str+2) << 11) ^ hash;
255 hash = (hash << 16) ^ tmp;
256 str += 2 * sizeof (U16);
257 hash += hash >> 11;
258 }
259
260 /* Handle end cases */
261 switch (rem) { \
262 case 3: hash += U8TO16_LE (str);
263 hash ^= hash << 16;
264 hash ^= str[sizeof (U16)] << 18;
265 hash += hash >> 11;
266 break;
267 case 2: hash += U8TO16_LE (str);
268 hash ^= hash << 11;
269 hash += hash >> 17;
270 break;
271 case 1: hash += *str;
272 hash ^= hash << 10;
273 hash += hash >> 1;
274 }
275 /* Force "avalanching" of final 127 bits */
276 hash ^= hash << 3;
277 hash += hash >> 5;
278 hash ^= hash << 4;
279 hash += hash >> 17;
280 hash ^= hash << 25;
281 return (hash + (hash >> 6));
282}
283
284
285/*-----------------------------------------------------------------------------
286 * MurmurHash3 was written by Austin Appleby, and is placed in the public
287 * domain.
288 *
289 * This implementation was originally written by Shane Day, and is also public domain,
290 * and was modified to function as a macro similar to other perl hash functions by
291 * Yves Orton.
292 *
293 * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
294 * with support for progressive processing.
295 *
296 * If you want to understand the MurmurHash algorithm you would be much better
297 * off reading the original source. Just point your browser at:
298 * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
299 *
300 * How does it work?
301 *
302 * We can only process entire 32 bit chunks of input, except for the very end
303 * that may be shorter.
304 *
305 * To handle endianess I simply use a macro that reads a U32 and define
306 * that macro to be a direct read on little endian machines, a read and swap
307 * on big endian machines, or a byte-by-byte read if the endianess is unknown.
308 */
309
310
311/*-----------------------------------------------------------------------------
312 * Core murmurhash algorithm macros */
313
314#define MURMUR_C1 (0xcc9e2d51)
315#define MURMUR_C2 (0x1b873593)
316#define MURMUR_C3 (0xe6546b64)
317#define MURMUR_C4 (0x85ebca6b)
318#define MURMUR_C5 (0xc2b2ae35)
319
320/* This is the main processing body of the algorithm. It operates
321 * on each full 32-bits of input. */
322#define MURMUR_DOBLOCK(h1, k1) STMT_START { \
323 k1 *= MURMUR_C1; \
324 k1 = ROTL32(k1,15); \
325 k1 *= MURMUR_C2; \
326 \
327 h1 ^= k1; \
328 h1 = ROTL32(h1,13); \
329 h1 = h1 * 5 + MURMUR_C3; \
330} STMT_END
331
332
333/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
334/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
335#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
336 int MURMUR_DOBYTES_i = cnt; \
337 while(MURMUR_DOBYTES_i--) { \
338 c = c>>8 | *ptr++<<24; \
339 n++; len--; \
340 if(n==4) { \
341 MURMUR_DOBLOCK(h1, c); \
342 n = 0; \
343 } \
344 } \
345} STMT_END
346
347
348/* now we create the hash function */
349PERL_STATIC_INLINE U32
350S_perl_hash_murmur3(const unsigned char * const seed, const unsigned char *ptr, STRLEN len) {
351 U32 h1 = *((U32*)seed);
352 U32 k1;
353 U32 carry = 0;
354
355 const unsigned char *end;
356 int bytes_in_carry = 0; /* bytes in carry */
357 I32 total_length= len;
358
359#if defined(UNALIGNED_SAFE)
360 /* Handle carry: commented out as its only used in incremental mode - it never fires for us
361 int i = (4-n) & 3;
362 if(i && i <= len) {
363 MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len);
364 }
365 */
366
367 /* This CPU handles unaligned word access */
368 /* Process 32-bit chunks */
369 end = ptr + len/4*4;
370 for( ; ptr < end ; ptr+=4) {
371 k1 = U8TO32_LE(ptr);
372 MURMUR_DOBLOCK(h1, k1);
373 }
374#else
375 /* This CPU does not handle unaligned word access */
376
377 /* Consume enough so that the next data byte is word aligned */
378 int i = -(long)ptr & 3;
379 if(i && i <= len) {
380 MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len);
381 }
382
383 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
384 end = ptr + len/4*4;
385 switch(bytes_in_carry) { /* how many bytes in carry */
386 case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
387 for( ; ptr < end ; ptr+=4) {
388 k1 = U8TO32_LE(ptr);
389 MURMUR_DOBLOCK(h1, k1);
390 }
391 break;
392 case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
393 for( ; ptr < end ; ptr+=4) {
394 k1 = carry>>24;
395 carry = U8TO32_LE(ptr);
396 k1 |= carry<<8;
397 MURMUR_DOBLOCK(h1, k1);
398 }
399 break;
400 case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
401 for( ; ptr < end ; ptr+=4) {
402 k1 = carry>>16;
403 carry = U8TO32_LE(ptr);
404 k1 |= carry<<16;
405 MURMUR_DOBLOCK(h1, k1);
406 }
407 break;
408 case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
409 for( ; ptr < end ; ptr+=4) {
410 k1 = carry>>8;
411 carry = U8TO32_LE(ptr);
412 k1 |= carry<<24;
413 MURMUR_DOBLOCK(h1, k1);
414 }
415 }
416#endif
417 /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
418 len -= len/4*4;
419
420 /* Append any remaining bytes into carry */
421 MURMUR_DOBYTES(len, h1, carry, bytes_in_carry, ptr, len);
422
423 if (bytes_in_carry) {
424 k1 = carry >> ( 4 - bytes_in_carry ) * 8;
425 k1 *= MURMUR_C1;
426 k1 = ROTL32(k1,15);
427 k1 *= MURMUR_C2;
428 h1 ^= k1;
429 }
430 h1 ^= total_length;
431
432 /* fmix */
433 h1 ^= h1 >> 16;
434 h1 *= MURMUR_C4;
435 h1 ^= h1 >> 13;
436 h1 *= MURMUR_C5;
437 h1 ^= h1 >> 16;
438 return h1;
439}
440
441
442PERL_STATIC_INLINE U32
443S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
444 const unsigned char * const end = (const unsigned char *)str + len;
445 U32 hash = *((U32*)seed + len);
446 while (str < end) {
447 hash = ((hash << 5) + hash) + *str++;
448 }
449 return hash;
450}
451
452PERL_STATIC_INLINE U32
453S_perl_hash_sdbm(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
454 const unsigned char * const end = (const unsigned char *)str + len;
455 U32 hash = *((U32*)seed + len);
456 while (str < end) {
457 hash = (hash << 6) + (hash << 16) - hash + *str++;
458 }
459 return hash;
460}
461
462
463/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
464 * from requirements by Colin Plumb.
465 * (http://burtleburtle.net/bob/hash/doobs.html) */
466PERL_STATIC_INLINE U32
467S_perl_hash_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
468 const unsigned char * const end = (const unsigned char *)str + len;
469 U32 hash = *((U32*)seed) + len;
470 while (str < end) {
471 hash += *str++;
472 hash += (hash << 10);
473 hash ^= (hash >> 6);
474 }
475 hash += (hash << 3);
476 hash ^= (hash >> 11);
477 return (hash + (hash << 15));
478}
479
480PERL_STATIC_INLINE U32
481S_perl_hash_old_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
482 const unsigned char * const end = (const unsigned char *)str + len;
483 U32 hash = *((U32*)seed);
484 while (str < end) {
485 hash += *str++;
486 hash += (hash << 10);
487 hash ^= (hash >> 6);
488 }
489 hash += (hash << 3);
490 hash ^= (hash >> 11);
491 return (hash + (hash << 15));
492}
493
494/* legacy - only mod_perl should be doing this. */
495#ifdef PERL_HASH_INTERNAL_ACCESS
496#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
497#endif
498
499#endif /*compile once*/
500
501/*
502 * Local variables:
503 * c-indentation-style: bsd
504 * c-basic-offset: 4
505 * indent-tabs-mode: nil
506 * End:
507 *
508 * ex: set ts=8 sts=4 sw=4 et:
509 */