| 1 | /* hash a key |
| 2 | *-------------------------------------------------------------------------------------- |
| 3 | * The "hash seed" feature was added in Perl 5.8.1 to perturb the results |
| 4 | * to avoid "algorithmic complexity attacks". |
| 5 | * |
| 6 | * If USE_HASH_SEED is defined, hash randomisation is done by default |
| 7 | * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done |
| 8 | * only if the environment variable PERL_HASH_SEED is set. |
| 9 | * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed()) |
| 10 | */ |
| 11 | |
| 12 | #ifndef PERL_SEEN_HV_FUNC_H /* compile once */ |
| 13 | #define PERL_SEEN_HV_FUNC_H |
| 14 | |
| 15 | #if !( 0 \ |
| 16 | || defined(PERL_HASH_FUNC_SIPHASH) \ |
| 17 | || defined(PERL_HASH_FUNC_SIPHASH13) \ |
| 18 | || defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) \ |
| 19 | || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) \ |
| 20 | ) |
| 21 | #if IVSIZE == 8 |
| 22 | #define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13 |
| 23 | #else |
| 24 | #define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD |
| 25 | #endif |
| 26 | #endif |
| 27 | |
| 28 | #if defined(PERL_HASH_FUNC_SIPHASH) |
| 29 | # define PERL_HASH_FUNC "SIPHASH_2_4" |
| 30 | # define PERL_HASH_SEED_BYTES 16 |
| 31 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len)) |
| 32 | #elif defined(PERL_HASH_FUNC_SIPHASH13) |
| 33 | # define PERL_HASH_FUNC "SIPHASH_1_3" |
| 34 | # define PERL_HASH_SEED_BYTES 16 |
| 35 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_1_3((seed),(U8*)(str),(len)) |
| 36 | #elif defined(PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13) |
| 37 | # define PERL_HASH_FUNC "HYBRID_OAATHU_SIPHASH_1_3" |
| 38 | # define PERL_HASH_SEED_BYTES 24 |
| 39 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_oaathu_siphash_1_3((seed),(U8*)(str),(len)) |
| 40 | #elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) |
| 41 | # define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD" |
| 42 | # define PERL_HASH_SEED_BYTES 8 |
| 43 | # define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len)) |
| 44 | #endif |
| 45 | |
| 46 | #ifndef PERL_HASH_WITH_SEED |
| 47 | #error "No hash function defined!" |
| 48 | #endif |
| 49 | #ifndef PERL_HASH_SEED_BYTES |
| 50 | #error "PERL_HASH_SEED_BYTES not defined" |
| 51 | #endif |
| 52 | #ifndef PERL_HASH_FUNC |
| 53 | #error "PERL_HASH_FUNC not defined" |
| 54 | #endif |
| 55 | |
| 56 | #ifndef PERL_HASH_SEED |
| 57 | # if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) |
| 58 | # define PERL_HASH_SEED PL_hash_seed |
| 59 | # elif PERL_HASH_SEED_BYTES == 4 |
| 60 | # define PERL_HASH_SEED ((const U8 *)"PeRl") |
| 61 | # elif PERL_HASH_SEED_BYTES == 8 |
| 62 | # define PERL_HASH_SEED ((const U8 *)"PeRlHaSh") |
| 63 | # elif PERL_HASH_SEED_BYTES == 16 |
| 64 | # define PERL_HASH_SEED ((const U8 *)"PeRlHaShhAcKpErl") |
| 65 | # else |
| 66 | # error "No PERL_HASH_SEED definition for " PERL_HASH_FUNC |
| 67 | # endif |
| 68 | #endif |
| 69 | |
| 70 | #define PERL_HASH(hash,str,len) PERL_HASH_WITH_SEED(PERL_HASH_SEED,hash,str,len) |
| 71 | |
| 72 | /* legacy - only mod_perl should be doing this. */ |
| 73 | #ifdef PERL_HASH_INTERNAL_ACCESS |
| 74 | #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len) |
| 75 | #endif |
| 76 | |
| 77 | /*----------------------------------------------------------------------------- |
| 78 | * Endianess, misalignment capabilities and util macros |
| 79 | * |
| 80 | * The following 3 macros are defined in this section. The other macros defined |
| 81 | * are only needed to help derive these 3. |
| 82 | * |
| 83 | * U8TO32_LE(x) Read a little endian unsigned 32-bit int |
| 84 | * UNALIGNED_SAFE Defined if unaligned access is safe |
| 85 | * ROTL32(x,r) Rotate x left by r bits |
| 86 | */ |
| 87 | |
| 88 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ |
| 89 | || defined(_MSC_VER) || defined (__TURBOC__) |
| 90 | #define U8TO16_LE(d) (*((const U16 *) (d))) |
| 91 | #endif |
| 92 | |
| 93 | #if !defined (U8TO16_LE) |
| 94 | #define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\ |
| 95 | +((const U8 *)(d))[0]) |
| 96 | #endif |
| 97 | |
| 98 | #if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 |
| 99 | /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ |
| 100 | #define U8TO32_LE(ptr) (*((const U32*)(ptr))) |
| 101 | #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 |
| 102 | /* TODO: Add additional cases below where a compiler provided bswap32 is available */ |
| 103 | #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) |
| 104 | #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr)))) |
| 105 | #else |
| 106 | /* Without a known fast bswap32 we're just as well off doing this */ |
| 107 | #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 108 | #define UNALIGNED_SAFE |
| 109 | #endif |
| 110 | #else |
| 111 | /* Unknown endianess so last resort is to read individual bytes */ |
| 112 | #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) |
| 113 | /* Since we're not doing word-reads we can skip the messing about with realignment */ |
| 114 | #define UNALIGNED_SAFE |
| 115 | #endif |
| 116 | |
| 117 | #ifdef HAS_QUAD |
| 118 | #ifndef U64TYPE |
| 119 | /* This probably isn't going to work, but failing with a compiler error due to |
| 120 | lack of uint64_t is no worse than failing right now with an #error. */ |
| 121 | #define U64 uint64_t |
| 122 | #endif |
| 123 | #endif |
| 124 | |
| 125 | /* Find best way to ROTL32/ROTL64 */ |
| 126 | #if defined(_MSC_VER) |
| 127 | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
| 128 | #define ROTL32(x,r) _rotl(x,r) |
| 129 | #ifdef HAS_QUAD |
| 130 | #define ROTL64(x,r) _rotl64(x,r) |
| 131 | #endif |
| 132 | #else |
| 133 | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
| 134 | #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) |
| 135 | #ifdef HAS_QUAD |
| 136 | #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r))) |
| 137 | #endif |
| 138 | #endif |
| 139 | |
| 140 | |
| 141 | #ifdef UV_IS_QUAD |
| 142 | #define ROTL_UV(x,r) ROTL64(x,r) |
| 143 | #else |
| 144 | #define ROTL_UV(x,r) ROTL32(x,r) |
| 145 | #endif |
| 146 | |
| 147 | /* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. |
| 148 | * The authors claim it is relatively secure compared to the alternatives |
| 149 | * and that performance wise it is a suitable hash for languages like Perl. |
| 150 | * See: |
| 151 | * |
| 152 | * https://www.131002.net/siphash/ |
| 153 | * |
| 154 | * This implementation seems to perform slightly slower than one-at-a-time for |
| 155 | * short keys, but degrades slower for longer keys. Murmur Hash outperforms it |
| 156 | * regardless of keys size. |
| 157 | * |
| 158 | * It is 64 bit only. |
| 159 | */ |
| 160 | |
| 161 | #ifdef HAS_QUAD |
| 162 | |
| 163 | #define U8TO64_LE(p) \ |
| 164 | (((U64)((p)[0]) ) | \ |
| 165 | ((U64)((p)[1]) << 8) | \ |
| 166 | ((U64)((p)[2]) << 16) | \ |
| 167 | ((U64)((p)[3]) << 24) | \ |
| 168 | ((U64)((p)[4]) << 32) | \ |
| 169 | ((U64)((p)[5]) << 40) | \ |
| 170 | ((U64)((p)[6]) << 48) | \ |
| 171 | ((U64)((p)[7]) << 56)) |
| 172 | |
| 173 | #define SIPROUND \ |
| 174 | STMT_START { \ |
| 175 | v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ |
| 176 | v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ |
| 177 | v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ |
| 178 | v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ |
| 179 | } STMT_END |
| 180 | |
| 181 | /* SipHash-2-4 */ |
| 182 | |
| 183 | |
| 184 | #define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \ |
| 185 | PERL_STATIC_INLINE U32 \ |
| 186 | FNC(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { \ |
| 187 | /* "somepseudorandomlygeneratedbytes" */ \ |
| 188 | U64 v0 = UINT64_C(0x736f6d6570736575); \ |
| 189 | U64 v1 = UINT64_C(0x646f72616e646f6d); \ |
| 190 | U64 v2 = UINT64_C(0x6c7967656e657261); \ |
| 191 | U64 v3 = UINT64_C(0x7465646279746573); \ |
| 192 | \ |
| 193 | U64 b; \ |
| 194 | U64 k0 = ((const U64*)seed)[0]; \ |
| 195 | U64 k1 = ((const U64*)seed)[1]; \ |
| 196 | U64 m; \ |
| 197 | const int left = inlen & 7; \ |
| 198 | const U8 *end = in + inlen - left; \ |
| 199 | \ |
| 200 | b = ( ( U64 )(inlen) ) << 56; \ |
| 201 | v3 ^= k1; \ |
| 202 | v2 ^= k0; \ |
| 203 | v1 ^= k1; \ |
| 204 | v0 ^= k0; \ |
| 205 | \ |
| 206 | for ( ; in != end; in += 8 ) \ |
| 207 | { \ |
| 208 | m = U8TO64_LE( in ); \ |
| 209 | v3 ^= m; \ |
| 210 | \ |
| 211 | SIP_ROUNDS; \ |
| 212 | \ |
| 213 | v0 ^= m; \ |
| 214 | } \ |
| 215 | \ |
| 216 | switch( left ) \ |
| 217 | { \ |
| 218 | case 7: b |= ( ( U64 )in[ 6] ) << 48; \ |
| 219 | case 6: b |= ( ( U64 )in[ 5] ) << 40; \ |
| 220 | case 5: b |= ( ( U64 )in[ 4] ) << 32; \ |
| 221 | case 4: b |= ( ( U64 )in[ 3] ) << 24; \ |
| 222 | case 3: b |= ( ( U64 )in[ 2] ) << 16; \ |
| 223 | case 2: b |= ( ( U64 )in[ 1] ) << 8; \ |
| 224 | case 1: b |= ( ( U64 )in[ 0] ); break; \ |
| 225 | case 0: break; \ |
| 226 | } \ |
| 227 | \ |
| 228 | v3 ^= b; \ |
| 229 | \ |
| 230 | SIP_ROUNDS; \ |
| 231 | \ |
| 232 | v0 ^= b; \ |
| 233 | \ |
| 234 | v2 ^= 0xff; \ |
| 235 | \ |
| 236 | SIP_FINAL_ROUNDS \ |
| 237 | \ |
| 238 | b = v0 ^ v1 ^ v2 ^ v3; \ |
| 239 | return (U32)(b & U32_MAX); \ |
| 240 | } |
| 241 | |
| 242 | PERL_SIPHASH_FNC( |
| 243 | S_perl_hash_siphash_1_3 |
| 244 | ,SIPROUND; |
| 245 | ,SIPROUND;SIPROUND;SIPROUND; |
| 246 | ) |
| 247 | |
| 248 | PERL_SIPHASH_FNC( |
| 249 | S_perl_hash_siphash_2_4 |
| 250 | ,SIPROUND;SIPROUND; |
| 251 | ,SIPROUND;SIPROUND;SIPROUND;SIPROUND; |
| 252 | ) |
| 253 | |
| 254 | #endif /* defined(HAS_QUAD) */ |
| 255 | |
| 256 | /* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME variant */ |
| 257 | |
| 258 | /* This is derived from the "One-at-a-Time" algorithm by Bob Jenkins |
| 259 | * from requirements by Colin Plumb. |
| 260 | * (http://burtleburtle.net/bob/hash/doobs.html) |
| 261 | * Modified by Yves Orton to increase security for Perl 5.17 and later. |
| 262 | */ |
| 263 | PERL_STATIC_INLINE U32 |
| 264 | S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { |
| 265 | const unsigned char * const end = (const unsigned char *)str + len; |
| 266 | U32 hash = *((const U32*)seed) + (U32)len; |
| 267 | |
| 268 | while (str < end) { |
| 269 | hash += (hash << 10); |
| 270 | hash ^= (hash >> 6); |
| 271 | hash += *str++; |
| 272 | } |
| 273 | |
| 274 | hash += (hash << 10); |
| 275 | hash ^= (hash >> 6); |
| 276 | hash += seed[4]; |
| 277 | |
| 278 | hash += (hash << 10); |
| 279 | hash ^= (hash >> 6); |
| 280 | hash += seed[5]; |
| 281 | |
| 282 | hash += (hash << 10); |
| 283 | hash ^= (hash >> 6); |
| 284 | hash += seed[6]; |
| 285 | |
| 286 | hash += (hash << 10); |
| 287 | hash ^= (hash >> 6); |
| 288 | hash += seed[7]; |
| 289 | |
| 290 | hash += (hash << 10); |
| 291 | hash ^= (hash >> 6); |
| 292 | |
| 293 | hash += (hash << 3); |
| 294 | hash ^= (hash >> 11); |
| 295 | return (hash + (hash << 15)); |
| 296 | } |
| 297 | |
| 298 | #ifdef HAS_QUAD |
| 299 | |
| 300 | /* Hybrid hash function |
| 301 | * |
| 302 | * For short strings, 16 bytes or shorter, we use an optimised variant |
| 303 | * of One At A Time Hard, and for longer strings, we use siphash_1_3. |
| 304 | * |
| 305 | * The optimisation of One At A Time Hard means we read the key in |
| 306 | * reverse from normal, but by doing so we avoid the loop overhead. |
| 307 | */ |
| 308 | PERL_STATIC_INLINE U32 |
| 309 | S_perl_hash_oaathu_siphash_1_3(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { |
| 310 | U32 hash = *((const U32*)seed) + (U32)len; |
| 311 | switch (len) { |
| 312 | case 16: |
| 313 | hash += (hash << 10); |
| 314 | hash ^= (hash >> 6); |
| 315 | hash += str[15]; |
| 316 | case 15: |
| 317 | hash += (hash << 10); |
| 318 | hash ^= (hash >> 6); |
| 319 | hash += str[14]; |
| 320 | case 14: |
| 321 | hash += (hash << 10); |
| 322 | hash ^= (hash >> 6); |
| 323 | hash += str[13]; |
| 324 | case 13: |
| 325 | hash += (hash << 10); |
| 326 | hash ^= (hash >> 6); |
| 327 | hash += str[12]; |
| 328 | case 12: |
| 329 | hash += (hash << 10); |
| 330 | hash ^= (hash >> 6); |
| 331 | hash += str[11]; |
| 332 | case 11: |
| 333 | hash += (hash << 10); |
| 334 | hash ^= (hash >> 6); |
| 335 | hash += str[10]; |
| 336 | case 10: |
| 337 | hash += (hash << 10); |
| 338 | hash ^= (hash >> 6); |
| 339 | hash += str[9]; |
| 340 | case 9: |
| 341 | hash += (hash << 10); |
| 342 | hash ^= (hash >> 6); |
| 343 | hash += str[8]; |
| 344 | case 8: |
| 345 | hash += (hash << 10); |
| 346 | hash ^= (hash >> 6); |
| 347 | hash += str[7]; |
| 348 | case 7: |
| 349 | hash += (hash << 10); |
| 350 | hash ^= (hash >> 6); |
| 351 | hash += str[6]; |
| 352 | case 6: |
| 353 | hash += (hash << 10); |
| 354 | hash ^= (hash >> 6); |
| 355 | hash += str[5]; |
| 356 | case 5: |
| 357 | hash += (hash << 10); |
| 358 | hash ^= (hash >> 6); |
| 359 | hash += str[4]; |
| 360 | case 4: |
| 361 | hash += (hash << 10); |
| 362 | hash ^= (hash >> 6); |
| 363 | hash += str[3]; |
| 364 | case 3: |
| 365 | hash += (hash << 10); |
| 366 | hash ^= (hash >> 6); |
| 367 | hash += str[2]; |
| 368 | case 2: |
| 369 | hash += (hash << 10); |
| 370 | hash ^= (hash >> 6); |
| 371 | hash += str[1]; |
| 372 | case 1: |
| 373 | hash += (hash << 10); |
| 374 | hash ^= (hash >> 6); |
| 375 | hash += str[0]; |
| 376 | case 0: |
| 377 | hash += (hash << 10); |
| 378 | hash ^= (hash >> 6); |
| 379 | hash += seed[4]; |
| 380 | hash += (hash << 10); |
| 381 | hash ^= (hash >> 6); |
| 382 | hash += seed[5]; |
| 383 | hash += (hash << 10); |
| 384 | hash ^= (hash >> 6); |
| 385 | hash += seed[6]; |
| 386 | hash += (hash << 10); |
| 387 | hash ^= (hash >> 6); |
| 388 | hash += seed[7]; |
| 389 | hash += (hash << 10); |
| 390 | hash ^= (hash >> 6); |
| 391 | |
| 392 | hash += (hash << 3); |
| 393 | hash ^= (hash >> 11); |
| 394 | return (hash + (hash << 15)); |
| 395 | } |
| 396 | return S_perl_hash_siphash_1_3(seed+8, str, len); |
| 397 | } |
| 398 | #endif /* defined(HAS_QUAD) */ |
| 399 | |
| 400 | |
| 401 | #endif /*compile once*/ |
| 402 | |
| 403 | /* |
| 404 | * ex: set ts=8 sts=4 sw=4 et: |
| 405 | */ |