Commit | Line | Data |
---|---|---|
4d3a042d YO |
1 | /* hash a key |
2 | *-------------------------------------------------------------------------------------- | |
3 | * The "hash seed" feature was added in Perl 5.8.1 to perturb the results | |
4 | * to avoid "algorithmic complexity attacks". | |
5 | * | |
6 | * If USE_HASH_SEED is defined, hash randomisation is done by default | |
7 | * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done | |
8 | * only if the environment variable PERL_HASH_SEED is set. | |
9 | * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) | |
10 | */ | |
11 | ||
12 | #ifndef PERL_SEEN_HV_FUNC_H /* compile once */ | |
13 | #define PERL_SEEN_HV_FUNC_H | |
14 | ||
15 | #if !( 0 \ | |
e6b54db6 | 16 | || defined(PERL_HASH_FUNC_SIPHASH) \ |
6b026047 YO |
17 | || defined(PERL_HASH_FUNC_SIPHASH13) \ |
18 | || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \ | |
b1300a73 | 19 | || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) \ |
4d3a042d | 20 | ) |
c9b6887c | 21 | #if IVSIZE == 8 |
6b026047 YO |
22 | #define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13 |
23 | #else | |
b1300a73 | 24 | #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD |
4d3a042d | 25 | #endif |
6b026047 | 26 | #endif |
4d3a042d YO |
27 | |
28 | #if defined(PERL_HASH_FUNC_SIPHASH) | |
29 | # define PERL_HASH_FUNC "SIPHASH_2_4" | |
30 | # define PERL_HASH_SEED_BYTES 16 | |
3d53a8ea | 31 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len)) |
6b026047 YO |
32 | #elif defined(PERL_HASH_FUNC_SIPHASH13) |
33 | # define PERL_HASH_FUNC "SIPHASH_1_3" | |
34 | # define PERL_HASH_SEED_BYTES 16 | |
35 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len)) | |
36 | #elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) | |
37 | # define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3" | |
38 | # define PERL_HASH_SEED_BYTES 24 | |
39 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len)) | |
b1300a73 YO |
40 | #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) |
41 | # define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD" | |
42 | # define PERL_HASH_SEED_BYTES 8 | |
3d53a8ea | 43 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len)) |
4d3a042d YO |
44 | #endif |
45 | ||
3d53a8ea | 46 | #ifndef PERL_HASH_WITH_SEED |
4d3a042d YO |
47 | #error "No hash function defined!" |
48 | #endif | |
49 | #ifndef PERL_HASH_SEED_BYTES | |
50 | #error "PERL_HASH_SEED_BYTES not defined" | |
51 | #endif | |
52 | #ifndef PERL_HASH_FUNC | |
53 | #error "PERL_HASH_FUNC not defined" | |
54 | #endif | |
55 | ||
56 | #ifndef PERL_HASH_SEED | |
57 | # if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) | |
58 | # define PERL_HASH_SEED PL_hash_seed | |
59 | # elif PERL_HASH_SEED_BYTES == 4 | |
25c1b134 | 60 | # define PERL_HASH_SEED ((const U8 *)"PeRl") |
c2d4ebc6 | 61 | # elif PERL_HASH_SEED_BYTES == 8 |
25c1b134 | 62 | # define PERL_HASH_SEED ((const U8 *)"PeRlHaSh") |
4d3a042d | 63 | # elif PERL_HASH_SEED_BYTES == 16 |
25c1b134 | 64 | # define PERL_HASH_SEED ((const U8 *)"PeRlHaShhAcKpErl") |
4d3a042d YO |
65 | # else |
66 | # error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC | |
67 | # endif | |
68 | #endif | |
69 | ||
3d53a8ea YO |
70 | #define PERL_HASH(hash,str,len) PERL_HASH_WITH_SEED(PERL_HASH_SEED,hash,str,len) |
71 | ||
236a7029 YO |
72 | /* legacy - only mod_perl should be doing this. */ |
73 | #ifdef PERL_HASH_INTERNAL_ACCESS | |
74 | #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) | |
75 | #endif | |
76 | ||
4d3a042d YO |
77 | /*----------------------------------------------------------------------------- |
78 | * Endianess, misalignment capabilities and util macros | |
79 | * | |
80 | * The following 3 macros are defined in this section. The other macros defined | |
81 | * are only needed to help derive these 3. | |
82 | * | |
83 | * U8TO32_LE(x) Read a little endian unsigned 32-bit int | |
8757ed9b | 84 | * UNALIGNED_SAFE Defined if unaligned access is safe |
4d3a042d YO |
85 | * ROTL32(x,r) Rotate x left by r bits |
86 | */ | |
87 | ||
88 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ | |
0a35fd8a | 89 | || defined(_MSC_VER) || defined (__TURBOC__) |
4d3a042d YO |
90 | #define U8TO16_LE(d) (*((const U16 *) (d))) |
91 | #endif | |
92 | ||
93 | #if !defined (U8TO16_LE) | |
94 | #define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\ | |
95 | +((const U8 *)(d))[0]) | |
96 | #endif | |
97 | ||
4d3a042d YO |
98 | #if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 |
99 | /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ | |
463ddf34 | 100 | #define U8TO32_LE(ptr) (*((const U32*)(ptr))) |
4d3a042d YO |
101 | #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 |
102 | /* TODO: Add additional cases below where a compiler provided bswap32 is available */ | |
103 | #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) | |
104 | #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr)))) | |
105 | #else | |
106 | /* Without a known fast bswap32 we're just as well off doing this */ | |
107 | #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) | |
108 | #define UNALIGNED_SAFE | |
109 | #endif | |
110 | #else | |
111 | /* Unknown endianess so last resort is to read individual bytes */ | |
112 | #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) | |
113 | /* Since we're not doing word-reads we can skip the messing about with realignment */ | |
114 | #define UNALIGNED_SAFE | |
115 | #endif | |
116 | ||
b716320d YO |
117 | #ifdef HAS_QUAD |
118 | #ifndef U64TYPE | |
119 | /* This probably isn't going to work, but failing with a compiler error due to | |
120 | lack of uint64_t is no worse than failing right now with an #error. */ | |
702c92eb | 121 | #define U64 uint64_t |
b716320d YO |
122 | #endif |
123 | #endif | |
124 | ||
125 | /* Find best way to ROTL32/ROTL64 */ | |
4d3a042d YO |
126 | #if defined(_MSC_VER) |
127 | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ | |
128 | #define ROTL32(x,r) _rotl(x,r) | |
b716320d YO |
129 | #ifdef HAS_QUAD |
130 | #define ROTL64(x,r) _rotl64(x,r) | |
131 | #endif | |
4d3a042d YO |
132 | #else |
133 | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ | |
134 | #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) | |
b716320d | 135 | #ifdef HAS_QUAD |
702c92eb | 136 | #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r))) |
b716320d | 137 | #endif |
4d3a042d YO |
138 | #endif |
139 | ||
140 | ||
b716320d YO |
141 | #ifdef UV_IS_QUAD |
142 | #define ROTL_UV(x,r) ROTL64(x,r) | |
143 | #else | |
144 | #define ROTL_UV(x,r) ROTL32(x,r) | |
145 | #endif | |
146 | ||
4d3a042d YO |
147 | /* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. |
148 | * The authors claim it is relatively secure compared to the alternatives | |
149 | * and that performance wise it is a suitable hash for languages like Perl. | |
150 | * See: | |
151 | * | |
152 | * https://www.131002.net/siphash/ | |
153 | * | |
154 | * This implementation seems to perform slightly slower than one-at-a-time for | |
155 | * short keys, but degrades slower for longer keys. Murmur Hash outperforms it | |
156 | * regardless of keys size. | |
157 | * | |
158 | * It is 64 bit only. | |
159 | */ | |
160 | ||
161 | #ifdef HAS_QUAD | |
162 | ||
4d3a042d | 163 | #define U8TO64_LE(p) \ |
702c92eb JH |
164 | (((U64)((p)[0]) ) | \ |
165 | ((U64)((p)[1]) << 8) | \ | |
166 | ((U64)((p)[2]) << 16) | \ | |
167 | ((U64)((p)[3]) << 24) | \ | |
168 | ((U64)((p)[4]) << 32) | \ | |
169 | ((U64)((p)[5]) << 40) | \ | |
170 | ((U64)((p)[6]) << 48) | \ | |
171 | ((U64)((p)[7]) << 56)) | |
4d3a042d YO |
172 | |
173 | #define SIPROUND \ | |
6b026047 | 174 | STMT_START { \ |
4d3a042d YO |
175 | v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ |
176 | v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ | |
177 | v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ | |
178 | v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ | |
6b026047 | 179 | } STMT_END |
4d3a042d YO |
180 | |
181 | /* SipHash-2-4 */ | |
182 | ||
6b026047 YO |
183 | |
184 | #define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \ | |
185 | PERL_STATIC_INLINE U32 \ | |
186 | FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \ | |
187 | /* "somepseudorandomlygeneratedbytes" */ \ | |
188 | U64 v0 = UINT64_C(0x736f6d6570736575); \ | |
189 | U64 v1 = UINT64_C(0x646f72616e646f6d); \ | |
190 | U64 v2 = UINT64_C(0x6c7967656e657261); \ | |
191 | U64 v3 = UINT64_C(0x7465646279746573); \ | |
192 | \ | |
193 | U64 b; \ | |
194 | U64 k0 = ((const U64*)seed)[0]; \ | |
195 | U64 k1 = ((const U64*)seed)[1]; \ | |
196 | U64 m; \ | |
197 | const int left = inlen & 7; \ | |
198 | const U8 *end = in + inlen - left; \ | |
199 | \ | |
200 | b = ( ( U64 )(inlen) ) << 56; \ | |
201 | v3 ^= k1; \ | |
202 | v2 ^= k0; \ | |
203 | v1 ^= k1; \ | |
204 | v0 ^= k0; \ | |
205 | \ | |
206 | for ( ; in != end; in += 8 ) \ | |
207 | { \ | |
208 | m = U8TO64_LE( in ); \ | |
209 | v3 ^= m; \ | |
210 | \ | |
211 | SIP_ROUNDS; \ | |
212 | \ | |
213 | v0 ^= m; \ | |
214 | } \ | |
215 | \ | |
216 | switch( left ) \ | |
217 | { \ | |
218 | case 7: b |= ( ( U64 )in[ 6] ) << 48; \ | |
219 | case 6: b |= ( ( U64 )in[ 5] ) << 40; \ | |
220 | case 5: b |= ( ( U64 )in[ 4] ) << 32; \ | |
221 | case 4: b |= ( ( U64 )in[ 3] ) << 24; \ | |
222 | case 3: b |= ( ( U64 )in[ 2] ) << 16; \ | |
223 | case 2: b |= ( ( U64 )in[ 1] ) << 8; \ | |
224 | case 1: b |= ( ( U64 )in[ 0] ); break; \ | |
225 | case 0: break; \ | |
226 | } \ | |
227 | \ | |
228 | v3 ^= b; \ | |
229 | \ | |
230 | SIP_ROUNDS; \ | |
231 | \ | |
232 | v0 ^= b; \ | |
233 | \ | |
234 | v2 ^= 0xff; \ | |
235 | \ | |
236 | SIP_FINAL_ROUNDS \ | |
237 | \ | |
238 | b = v0 ^ v1 ^ v2 ^ v3; \ | |
239 | return (U32)(b & U32_MAX); \ | |
4d3a042d | 240 | } |
6b026047 YO |
241 | |
242 | PERL_SIPHASH_FNC( | |
243 | S_perl_hash_siphash_1_3 | |
244 | ,SIPROUND; | |
245 | ,SIPROUND;SIPROUND;SIPROUND; | |
246 | ) | |
247 | ||
248 | PERL_SIPHASH_FNC( | |
249 | S_perl_hash_siphash_2_4 | |
250 | ,SIPROUND;SIPROUND; | |
251 | ,SIPROUND;SIPROUND;SIPROUND;SIPROUND; | |
252 | ) | |
253 | ||
4d3a042d YO |
254 | #endif /* defined(HAS_QUAD) */ |
255 | ||
236a7029 | 256 | /* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME variant */ |
6b026047 | 257 | |
236a7029 | 258 | /* This is derived from the "One-at-a-Time" algorithm by Bob Jenkins |
4d3a042d | 259 | * from requirements by Colin Plumb. |
b1300a73 | 260 | * (http://burtleburtle.net/bob/hash/doobs.html) |
236a7029 YO |
261 | * Modified by Yves Orton to increase security for Perl 5.17 and later. |
262 | */ | |
b1300a73 YO |
263 | PERL_STATIC_INLINE U32 |
264 | S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { | |
265 | const unsigned char * const end = (const unsigned char *)str + len; | |
463ddf34 | 266 | U32 hash = *((const U32*)seed) + (U32)len; |
b1300a73 YO |
267 | |
268 | while (str < end) { | |
269 | hash += (hash << 10); | |
270 | hash ^= (hash >> 6); | |
271 | hash += *str++; | |
272 | } | |
273 | ||
274 | hash += (hash << 10); | |
275 | hash ^= (hash >> 6); | |
276 | hash += seed[4]; | |
277 | ||
278 | hash += (hash << 10); | |
279 | hash ^= (hash >> 6); | |
280 | hash += seed[5]; | |
281 | ||
282 | hash += (hash << 10); | |
283 | hash ^= (hash >> 6); | |
284 | hash += seed[6]; | |
285 | ||
286 | hash += (hash << 10); | |
287 | hash ^= (hash >> 6); | |
288 | hash += seed[7]; | |
289 | ||
290 | hash += (hash << 10); | |
291 | hash ^= (hash >> 6); | |
292 | ||
293 | hash += (hash << 3); | |
294 | hash ^= (hash >> 11); | |
295 | return (hash + (hash << 15)); | |
296 | } | |
297 | ||
6b026047 | 298 | #ifdef HAS_QUAD |
236a7029 YO |
299 | |
300 | /* Hybrid hash function | |
301 | * | |
302 | * For short strings, 16 bytes or shorter, we use an optimised variant | |
6b026047 | 303 | * of One At A Time Hard, and for longer strings, we use siphash_1_3. |
236a7029 YO |
304 | * |
305 | * The optimisation of One At A Time Hard means we read the key in | |
306 | * reverse from normal, but by doing so we avoid the loop overhead. | |
6b026047 YO |
307 | */ |
308 | PERL_STATIC_INLINE U32 | |
309 | S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { | |
310 | U32 hash = *((const U32*)seed) + (U32)len; | |
311 | switch (len) { | |
312 | case 16: | |
313 | hash += (hash << 10); | |
314 | hash ^= (hash >> 6); | |
315 | hash += str[15]; | |
316 | case 15: | |
317 | hash += (hash << 10); | |
318 | hash ^= (hash >> 6); | |
319 | hash += str[14]; | |
320 | case 14: | |
321 | hash += (hash << 10); | |
322 | hash ^= (hash >> 6); | |
323 | hash += str[13]; | |
324 | case 13: | |
325 | hash += (hash << 10); | |
326 | hash ^= (hash >> 6); | |
327 | hash += str[12]; | |
328 | case 12: | |
329 | hash += (hash << 10); | |
330 | hash ^= (hash >> 6); | |
331 | hash += str[11]; | |
332 | case 11: | |
333 | hash += (hash << 10); | |
334 | hash ^= (hash >> 6); | |
335 | hash += str[10]; | |
336 | case 10: | |
337 | hash += (hash << 10); | |
338 | hash ^= (hash >> 6); | |
339 | hash += str[9]; | |
340 | case 9: | |
341 | hash += (hash << 10); | |
342 | hash ^= (hash >> 6); | |
343 | hash += str[8]; | |
344 | case 8: | |
345 | hash += (hash << 10); | |
346 | hash ^= (hash >> 6); | |
347 | hash += str[7]; | |
348 | case 7: | |
349 | hash += (hash << 10); | |
350 | hash ^= (hash >> 6); | |
351 | hash += str[6]; | |
352 | case 6: | |
353 | hash += (hash << 10); | |
354 | hash ^= (hash >> 6); | |
355 | hash += str[5]; | |
356 | case 5: | |
357 | hash += (hash << 10); | |
358 | hash ^= (hash >> 6); | |
359 | hash += str[4]; | |
360 | case 4: | |
361 | hash += (hash << 10); | |
362 | hash ^= (hash >> 6); | |
363 | hash += str[3]; | |
364 | case 3: | |
365 | hash += (hash << 10); | |
366 | hash ^= (hash >> 6); | |
367 | hash += str[2]; | |
368 | case 2: | |
369 | hash += (hash << 10); | |
370 | hash ^= (hash >> 6); | |
371 | hash += str[1]; | |
372 | case 1: | |
373 | hash += (hash << 10); | |
374 | hash ^= (hash >> 6); | |
375 | hash += str[0]; | |
376 | case 0: | |
377 | hash += (hash << 10); | |
378 | hash ^= (hash >> 6); | |
379 | hash += seed[4]; | |
380 | hash += (hash << 10); | |
381 | hash ^= (hash >> 6); | |
382 | hash += seed[5]; | |
383 | hash += (hash << 10); | |
384 | hash ^= (hash >> 6); | |
385 | hash += seed[6]; | |
386 | hash += (hash << 10); | |
387 | hash ^= (hash >> 6); | |
388 | hash += seed[7]; | |
389 | hash += (hash << 10); | |
390 | hash ^= (hash >> 6); | |
391 | ||
392 | hash += (hash << 3); | |
393 | hash ^= (hash >> 11); | |
394 | return (hash + (hash << 15)); | |
395 | } | |
396 | return S_perl_hash_siphash_1_3(seed+8, str, len); | |
397 | } | |
398 | #endif /* defined(HAS_QUAD) */ | |
399 | ||
4d3a042d YO |
400 | |
401 | #endif /*compile once*/ | |
402 | ||
403 | /* | |
4d3a042d YO |
404 | * ex: set ts=8 sts=4 sw=4 et: |
405 | */ |