This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta for bf4a926a2937
[perl5.git] / hv_func.h
CommitLineData
4d3a042d
YO
1/* hash a key
2 *--------------------------------------------------------------------------------------
3 * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
4 * to avoid "algorithmic complexity attacks".
5 *
6 * If USE_HASH_SEED is defined, hash randomisation is done by default
7 * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
8 * only if the environment variable PERL_HASH_SEED is set.
9 * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
10 */
11
12#ifndef PERL_SEEN_HV_FUNC_H /* compile once */
13#define PERL_SEEN_HV_FUNC_H
14
15#if !( 0 \
e6b54db6 16 || defined(PERL_HASH_FUNC_SIPHASH) \
6b026047
YO
17 || defined(PERL_HASH_FUNC_SIPHASH13) \
18 || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \
b1300a73 19 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) \
4d3a042d 20 )
c9b6887c 21#if IVSIZE == 8
6b026047
YO
22#define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13
23#else
b1300a73 24#define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD
4d3a042d 25#endif
6b026047 26#endif
4d3a042d
YO
27
28#if defined(PERL_HASH_FUNC_SIPHASH)
29# define PERL_HASH_FUNC "SIPHASH_2_4"
30# define PERL_HASH_SEED_BYTES 16
3d53a8ea 31# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len))
6b026047
YO
32#elif defined(PERL_HASH_FUNC_SIPHASH13)
33# define PERL_HASH_FUNC "SIPHASH_1_3"
34# define PERL_HASH_SEED_BYTES 16
35# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len))
36#elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13)
37# define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3"
38# define PERL_HASH_SEED_BYTES 24
39# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len))
b1300a73
YO
40#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD)
41# define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD"
42# define PERL_HASH_SEED_BYTES 8
3d53a8ea 43# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len))
4d3a042d
YO
44#endif
45
3d53a8ea 46#ifndef PERL_HASH_WITH_SEED
4d3a042d
YO
47#error "No hash function defined!"
48#endif
49#ifndef PERL_HASH_SEED_BYTES
50#error "PERL_HASH_SEED_BYTES not defined"
51#endif
52#ifndef PERL_HASH_FUNC
53#error "PERL_HASH_FUNC not defined"
54#endif
55
56#ifndef PERL_HASH_SEED
57# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
58# define PERL_HASH_SEED PL_hash_seed
59# elif PERL_HASH_SEED_BYTES == 4
25c1b134 60# define PERL_HASH_SEED ((const U8 *)"PeRl")
c2d4ebc6 61# elif PERL_HASH_SEED_BYTES == 8
25c1b134 62# define PERL_HASH_SEED ((const U8 *)"PeRlHaSh")
4d3a042d 63# elif PERL_HASH_SEED_BYTES == 16
25c1b134 64# define PERL_HASH_SEED ((const U8 *)"PeRlHaShhAcKpErl")
4d3a042d
YO
65# else
66# error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC
67# endif
68#endif
69
3d53a8ea
YO
70#define PERL_HASH(hash,str,len) PERL_HASH_WITH_SEED(PERL_HASH_SEED,hash,str,len)
71
236a7029
YO
72/* legacy - only mod_perl should be doing this. */
73#ifdef PERL_HASH_INTERNAL_ACCESS
74#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
75#endif
76
4d3a042d
YO
77/*-----------------------------------------------------------------------------
78 * Endianess, misalignment capabilities and util macros
79 *
80 * The following 3 macros are defined in this section. The other macros defined
81 * are only needed to help derive these 3.
82 *
83 * U8TO32_LE(x) Read a little endian unsigned 32-bit int
8757ed9b 84 * UNALIGNED_SAFE Defined if unaligned access is safe
4d3a042d
YO
85 * ROTL32(x,r) Rotate x left by r bits
86 */
87
88#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
0a35fd8a 89 || defined(_MSC_VER) || defined (__TURBOC__)
4d3a042d
YO
90#define U8TO16_LE(d) (*((const U16 *) (d)))
91#endif
92
93#if !defined (U8TO16_LE)
94#define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\
95 +((const U8 *)(d))[0])
96#endif
97
4d3a042d
YO
98#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4
99 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
463ddf34 100 #define U8TO32_LE(ptr) (*((const U32*)(ptr)))
4d3a042d
YO
101#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
102 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
103 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
104 #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr))))
105 #else
106 /* Without a known fast bswap32 we're just as well off doing this */
107 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
108 #define UNALIGNED_SAFE
109 #endif
110#else
111 /* Unknown endianess so last resort is to read individual bytes */
112 #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
113 /* Since we're not doing word-reads we can skip the messing about with realignment */
114 #define UNALIGNED_SAFE
115#endif
116
b716320d
YO
117#ifdef HAS_QUAD
118#ifndef U64TYPE
119/* This probably isn't going to work, but failing with a compiler error due to
120 lack of uint64_t is no worse than failing right now with an #error. */
702c92eb 121#define U64 uint64_t
b716320d
YO
122#endif
123#endif
124
125/* Find best way to ROTL32/ROTL64 */
4d3a042d
YO
126#if defined(_MSC_VER)
127 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
128 #define ROTL32(x,r) _rotl(x,r)
b716320d
YO
129 #ifdef HAS_QUAD
130 #define ROTL64(x,r) _rotl64(x,r)
131 #endif
4d3a042d
YO
132#else
133 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
134 #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
b716320d 135 #ifdef HAS_QUAD
702c92eb 136 #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r)))
b716320d 137 #endif
4d3a042d
YO
138#endif
139
140
b716320d
YO
141#ifdef UV_IS_QUAD
142#define ROTL_UV(x,r) ROTL64(x,r)
143#else
144#define ROTL_UV(x,r) ROTL32(x,r)
145#endif
146
4d3a042d
YO
147/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
148 * The authors claim it is relatively secure compared to the alternatives
149 * and that performance wise it is a suitable hash for languages like Perl.
150 * See:
151 *
152 * https://www.131002.net/siphash/
153 *
154 * This implementation seems to perform slightly slower than one-at-a-time for
155 * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
156 * regardless of keys size.
157 *
158 * It is 64 bit only.
159 */
160
161#ifdef HAS_QUAD
162
4d3a042d 163#define U8TO64_LE(p) \
702c92eb
JH
164 (((U64)((p)[0]) ) | \
165 ((U64)((p)[1]) << 8) | \
166 ((U64)((p)[2]) << 16) | \
167 ((U64)((p)[3]) << 24) | \
168 ((U64)((p)[4]) << 32) | \
169 ((U64)((p)[5]) << 40) | \
170 ((U64)((p)[6]) << 48) | \
171 ((U64)((p)[7]) << 56))
4d3a042d
YO
172
173#define SIPROUND \
6b026047 174 STMT_START { \
4d3a042d
YO
175 v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
176 v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
177 v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
178 v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
6b026047 179 } STMT_END
4d3a042d
YO
180
181/* SipHash-2-4 */
182
6b026047
YO
183
184#define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \
185PERL_STATIC_INLINE U32 \
186FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \
187 /* "somepseudorandomlygeneratedbytes" */ \
188 U64 v0 = UINT64_C(0x736f6d6570736575); \
189 U64 v1 = UINT64_C(0x646f72616e646f6d); \
190 U64 v2 = UINT64_C(0x6c7967656e657261); \
191 U64 v3 = UINT64_C(0x7465646279746573); \
192 \
193 U64 b; \
194 U64 k0 = ((const U64*)seed)[0]; \
195 U64 k1 = ((const U64*)seed)[1]; \
196 U64 m; \
197 const int left = inlen & 7; \
198 const U8 *end = in + inlen - left; \
199 \
200 b = ( ( U64 )(inlen) ) << 56; \
201 v3 ^= k1; \
202 v2 ^= k0; \
203 v1 ^= k1; \
204 v0 ^= k0; \
205 \
206 for ( ; in != end; in += 8 ) \
207 { \
208 m = U8TO64_LE( in ); \
209 v3 ^= m; \
210 \
211 SIP_ROUNDS; \
212 \
213 v0 ^= m; \
214 } \
215 \
216 switch( left ) \
217 { \
218 case 7: b |= ( ( U64 )in[ 6] ) << 48; \
219 case 6: b |= ( ( U64 )in[ 5] ) << 40; \
220 case 5: b |= ( ( U64 )in[ 4] ) << 32; \
221 case 4: b |= ( ( U64 )in[ 3] ) << 24; \
222 case 3: b |= ( ( U64 )in[ 2] ) << 16; \
223 case 2: b |= ( ( U64 )in[ 1] ) << 8; \
224 case 1: b |= ( ( U64 )in[ 0] ); break; \
225 case 0: break; \
226 } \
227 \
228 v3 ^= b; \
229 \
230 SIP_ROUNDS; \
231 \
232 v0 ^= b; \
233 \
234 v2 ^= 0xff; \
235 \
236 SIP_FINAL_ROUNDS \
237 \
238 b = v0 ^ v1 ^ v2 ^ v3; \
239 return (U32)(b & U32_MAX); \
4d3a042d 240}
6b026047
YO
241
242PERL_SIPHASH_FNC(
243 S_perl_hash_siphash_1_3
244 ,SIPROUND;
245 ,SIPROUND;SIPROUND;SIPROUND;
246)
247
248PERL_SIPHASH_FNC(
249 S_perl_hash_siphash_2_4
250 ,SIPROUND;SIPROUND;
251 ,SIPROUND;SIPROUND;SIPROUND;SIPROUND;
252)
253
4d3a042d
YO
254#endif /* defined(HAS_QUAD) */
255
236a7029 256/* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME variant */
6b026047 257
236a7029 258/* This is derived from the "One-at-a-Time" algorithm by Bob Jenkins
4d3a042d 259 * from requirements by Colin Plumb.
b1300a73 260 * (http://burtleburtle.net/bob/hash/doobs.html)
236a7029
YO
261 * Modified by Yves Orton to increase security for Perl 5.17 and later.
262 */
b1300a73
YO
263PERL_STATIC_INLINE U32
264S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
265 const unsigned char * const end = (const unsigned char *)str + len;
463ddf34 266 U32 hash = *((const U32*)seed) + (U32)len;
b1300a73
YO
267
268 while (str < end) {
269 hash += (hash << 10);
270 hash ^= (hash >> 6);
271 hash += *str++;
272 }
273
274 hash += (hash << 10);
275 hash ^= (hash >> 6);
276 hash += seed[4];
277
278 hash += (hash << 10);
279 hash ^= (hash >> 6);
280 hash += seed[5];
281
282 hash += (hash << 10);
283 hash ^= (hash >> 6);
284 hash += seed[6];
285
286 hash += (hash << 10);
287 hash ^= (hash >> 6);
288 hash += seed[7];
289
290 hash += (hash << 10);
291 hash ^= (hash >> 6);
292
293 hash += (hash << 3);
294 hash ^= (hash >> 11);
295 return (hash + (hash << 15));
296}
297
6b026047 298#ifdef HAS_QUAD
236a7029
YO
299
300/* Hybrid hash function
301 *
302 * For short strings, 16 bytes or shorter, we use an optimised variant
6b026047 303 * of One At A Time Hard, and for longer strings, we use siphash_1_3.
236a7029
YO
304 *
305 * The optimisation of One At A Time Hard means we read the key in
306 * reverse from normal, but by doing so we avoid the loop overhead.
6b026047
YO
307 */
308PERL_STATIC_INLINE U32
309S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) {
310 U32 hash = *((const U32*)seed) + (U32)len;
311 switch (len) {
312 case 16:
313 hash += (hash << 10);
314 hash ^= (hash >> 6);
315 hash += str[15];
316 case 15:
317 hash += (hash << 10);
318 hash ^= (hash >> 6);
319 hash += str[14];
320 case 14:
321 hash += (hash << 10);
322 hash ^= (hash >> 6);
323 hash += str[13];
324 case 13:
325 hash += (hash << 10);
326 hash ^= (hash >> 6);
327 hash += str[12];
328 case 12:
329 hash += (hash << 10);
330 hash ^= (hash >> 6);
331 hash += str[11];
332 case 11:
333 hash += (hash << 10);
334 hash ^= (hash >> 6);
335 hash += str[10];
336 case 10:
337 hash += (hash << 10);
338 hash ^= (hash >> 6);
339 hash += str[9];
340 case 9:
341 hash += (hash << 10);
342 hash ^= (hash >> 6);
343 hash += str[8];
344 case 8:
345 hash += (hash << 10);
346 hash ^= (hash >> 6);
347 hash += str[7];
348 case 7:
349 hash += (hash << 10);
350 hash ^= (hash >> 6);
351 hash += str[6];
352 case 6:
353 hash += (hash << 10);
354 hash ^= (hash >> 6);
355 hash += str[5];
356 case 5:
357 hash += (hash << 10);
358 hash ^= (hash >> 6);
359 hash += str[4];
360 case 4:
361 hash += (hash << 10);
362 hash ^= (hash >> 6);
363 hash += str[3];
364 case 3:
365 hash += (hash << 10);
366 hash ^= (hash >> 6);
367 hash += str[2];
368 case 2:
369 hash += (hash << 10);
370 hash ^= (hash >> 6);
371 hash += str[1];
372 case 1:
373 hash += (hash << 10);
374 hash ^= (hash >> 6);
375 hash += str[0];
376 case 0:
377 hash += (hash << 10);
378 hash ^= (hash >> 6);
379 hash += seed[4];
380 hash += (hash << 10);
381 hash ^= (hash >> 6);
382 hash += seed[5];
383 hash += (hash << 10);
384 hash ^= (hash >> 6);
385 hash += seed[6];
386 hash += (hash << 10);
387 hash ^= (hash >> 6);
388 hash += seed[7];
389 hash += (hash << 10);
390 hash ^= (hash >> 6);
391
392 hash += (hash << 3);
393 hash ^= (hash >> 11);
394 return (hash + (hash << 15));
395 }
396 return S_perl_hash_siphash_1_3(seed+8, str, len);
397}
398#endif /* defined(HAS_QUAD) */
399
4d3a042d
YO
400
401#endif /*compile once*/
402
403/*
4d3a042d
YO
404 * ex: set ts=8 sts=4 sw=4 et:
405 */