| 1 | #ifndef STADTX_HASH_H |
| 2 | #define STADTX_HASH_H |
| 3 | |
| 4 | #ifndef DEBUG_STADTX_HASH |
| 5 | #define DEBUG_STADTX_HASH 0 |
| 6 | #endif |
| 7 | |
| 8 | #ifndef PERL_SEEN_HV_FUNC_H |
| 9 | |
| 10 | #if !defined(U64) |
| 11 | #include <stdint.h> |
| 12 | #define U64 uint64_t |
| 13 | #endif |
| 14 | |
| 15 | #if !defined(U32) |
| 16 | #define U32 uint32_t |
| 17 | #endif |
| 18 | |
| 19 | #if !defined(U8) |
| 20 | #define U8 unsigned char |
| 21 | #endif |
| 22 | |
| 23 | #if !defined(U16) |
| 24 | #define U16 uint16_t |
| 25 | #endif |
| 26 | |
| 27 | #ifndef STRLEN |
| 28 | #define STRLEN int |
| 29 | #endif |
| 30 | |
| 31 | #endif |
| 32 | |
| 33 | #ifndef STADTX_STATIC_INLINE |
| 34 | #ifdef PERL_STATIC_INLINE |
| 35 | #define STADTX_STATIC_INLINE PERL_STATIC_INLINE |
| 36 | #else |
| 37 | #define STADTX_STATIC_INLINE static inline |
| 38 | #endif |
| 39 | #endif |
| 40 | |
| 41 | #ifndef STMT_START |
| 42 | #define STMT_START do |
| 43 | #define STMT_END while(0) |
| 44 | #endif |
| 45 | |
| 46 | /* Find best way to ROTL32/ROTL64 */ |
| 47 | #if defined(_MSC_VER) |
| 48 | #include <stdlib.h> /* Microsoft put _rotl declaration in here */ |
| 49 | #define ROTL32(x,r) _rotl(x,r) |
| 50 | #define ROTR32(x,r) _rotr(x,r) |
| 51 | #define ROTL64(x,r) _rotl64(x,r) |
| 52 | #define ROTR64(x,r) _rotr64(x,r) |
| 53 | #else |
| 54 | /* gcc recognises this code and generates a rotate instruction for CPUs with one */ |
| 55 | #define ROTL32(x,r) (((U32)(x) << (r)) | ((U32)(x) >> (32 - (r)))) |
| 56 | #define ROTR32(x,r) (((U32)(x) << (32 - (r))) | ((U32)(x) >> (r))) |
| 57 | #define ROTL64(x,r) ( ( (U64)(x) << (r) ) | ( (U64)(x) >> ( 64 - (r) ) ) ) |
| 58 | #define ROTR64(x,r) ( ( (U64)(x) << ( 64 - (r) ) ) | ( (U64)(x) >> (r) ) ) |
| 59 | #endif |
| 60 | |
| 61 | |
| 62 | /* do a marsaglia xor-shift permutation followed by a |
| 63 | * multiply by a prime (presumably large) and another |
| 64 | * marsaglia xor-shift permutation. |
| 65 | * One of these thoroughly changes the bits of the input. |
| 66 | * Two of these with different primes passes the Strict Avalanche Criteria |
| 67 | * in all the tests I did. |
| 68 | * |
| 69 | * Note that v cannot end up zero after a scramble64 unless it |
| 70 | * was zero in the first place. |
| 71 | */ |
| 72 | #define STADTX_SCRAMBLE64(v,prime) STMT_START { \ |
| 73 | v ^= (v >> 13); \ |
| 74 | v ^= (v << 35); \ |
| 75 | v ^= (v >> 30); \ |
| 76 | v *= prime; \ |
| 77 | v ^= (v >> 19); \ |
| 78 | v ^= (v << 15); \ |
| 79 | v ^= (v >> 46); \ |
| 80 | } STMT_END |
| 81 | |
| 82 | |
| 83 | STADTX_STATIC_INLINE void stadtx_seed_state ( |
| 84 | const U8 *seed_ch, |
| 85 | U8 *state_ch |
| 86 | ) { |
| 87 | const U64 *seed= (const U64 *)seed_ch; |
| 88 | U64 *state= (U64 *)state_ch; |
| 89 | /* first we apply two masks to each word of the seed, this means that |
| 90 | * a) at least one of state[0] and state[2] is nonzero, |
| 91 | * b) at least one of state[1] and state[3] is nonzero |
| 92 | * c) that state[0] and state[2] are different |
| 93 | * d) that state[1] and state[3] are different |
| 94 | * e) that the replacement value for any zero's is a totally different from the seed value. |
| 95 | * (iow, if seed[0] is 0x43f6a8885a308d31UL then state[0] becomes 0, which is the replaced |
| 96 | * with 1, which is totally different.). */ |
| 97 | /* hex expansion of pi, skipping first two digits. pi= 3.2[43f6...]*/ |
| 98 | /* pi value in hex from here: |
| 99 | * http://turner.faculty.swau.edu/mathematics/materialslibrary/pi/pibases.html*/ |
| 100 | state[0]= seed[0] ^ UINT64_C(0x43f6a8885a308d31); |
| 101 | state[1]= seed[1] ^ UINT64_C(0x3198a2e03707344a); |
| 102 | state[2]= seed[0] ^ UINT64_C(0x4093822299f31d00); |
| 103 | state[3]= seed[1] ^ UINT64_C(0x82efa98ec4e6c894); |
| 104 | if (!state[0]) state[0]=1; |
| 105 | if (!state[1]) state[1]=2; |
| 106 | if (!state[2]) state[2]=4; |
| 107 | if (!state[3]) state[3]=8; |
| 108 | /* and now for good measure we double scramble all four - |
| 109 | * a double scramble guarantees a complete avalanche of all the |
| 110 | * bits in the seed - IOW, by the time we are hashing the |
| 111 | * four state vectors should be completely different and utterly |
| 112 | * uncognizable from the input seed bits */ |
| 113 | STADTX_SCRAMBLE64(state[0],UINT64_C(0x801178846e899d17)); |
| 114 | STADTX_SCRAMBLE64(state[0],UINT64_C(0xdd51e5d1c9a5a151)); |
| 115 | STADTX_SCRAMBLE64(state[1],UINT64_C(0x93a7d6c8c62e4835)); |
| 116 | STADTX_SCRAMBLE64(state[1],UINT64_C(0x803340f36895c2b5)); |
| 117 | STADTX_SCRAMBLE64(state[2],UINT64_C(0xbea9344eb7565eeb)); |
| 118 | STADTX_SCRAMBLE64(state[2],UINT64_C(0xcd95d1e509b995cd)); |
| 119 | STADTX_SCRAMBLE64(state[3],UINT64_C(0x9999791977e30c13)); |
| 120 | STADTX_SCRAMBLE64(state[3],UINT64_C(0xaab8b6b05abfc6cd)); |
| 121 | } |
| 122 | |
| 123 | #define STADTX_K0_U64 UINT64_C(0xb89b0f8e1655514f) |
| 124 | #define STADTX_K1_U64 UINT64_C(0x8c6f736011bd5127) |
| 125 | #define STADTX_K2_U64 UINT64_C(0x8f29bd94edce7b39) |
| 126 | #define STADTX_K3_U64 UINT64_C(0x9c1b8e1e9628323f) |
| 127 | |
| 128 | #define STADTX_K2_U32 0x802910e3 |
| 129 | #define STADTX_K3_U32 0x819b13af |
| 130 | #define STADTX_K4_U32 0x91cb27e5 |
| 131 | #define STADTX_K5_U32 0xc1a269c1 |
| 132 | |
| 133 | STADTX_STATIC_INLINE U64 stadtx_hash_with_state( |
| 134 | const U8 *state_ch, |
| 135 | const U8 *key, |
| 136 | const STRLEN key_len |
| 137 | ) { |
| 138 | U64 *state= (U64 *)state_ch; |
| 139 | STRLEN len = key_len; |
| 140 | U64 v0= state[0] ^ ((key_len+1) * STADTX_K0_U64); |
| 141 | U64 v1= state[1] ^ ((key_len+2) * STADTX_K1_U64); |
| 142 | if (len < 32) { |
| 143 | switch(len >> 3) { |
| 144 | case 3: |
| 145 | v0 += U8TO64_LE(key) * STADTX_K3_U64; |
| 146 | v0= ROTR64(v0, 17) ^ v1; |
| 147 | v1= ROTR64(v1, 53) + v0; |
| 148 | key += 8; |
| 149 | /* FALLTHROUGH */ |
| 150 | case 2: |
| 151 | v0 += U8TO64_LE(key) * STADTX_K3_U64; |
| 152 | v0= ROTR64(v0, 17) ^ v1; |
| 153 | v1= ROTR64(v1, 53) + v0; |
| 154 | key += 8; |
| 155 | /* FALLTHROUGH */ |
| 156 | case 1: |
| 157 | v0 += U8TO64_LE(key) * STADTX_K3_U64; |
| 158 | v0= ROTR64(v0, 17) ^ v1; |
| 159 | v1= ROTR64(v1, 53) + v0; |
| 160 | key += 8; |
| 161 | /* FALLTHROUGH */ |
| 162 | case 0: |
| 163 | default: break; |
| 164 | } |
| 165 | switch ( len & 0x7 ) { |
| 166 | case 7: v0 += (U64)key[6] << 32; |
| 167 | /* FALLTHROUGH */ |
| 168 | case 6: v1 += (U64)key[5] << 48; |
| 169 | /* FALLTHROUGH */ |
| 170 | case 5: v0 += (U64)key[4] << 16; |
| 171 | /* FALLTHROUGH */ |
| 172 | case 4: v1 += (U64)U8TO32_LE(key); |
| 173 | break; |
| 174 | case 3: v0 += (U64)key[2] << 48; |
| 175 | /* FALLTHROUGH */ |
| 176 | case 2: v1 += (U64)U8TO16_LE(key); |
| 177 | break; |
| 178 | case 1: v0 += (U64)key[0]; |
| 179 | /* FALLTHROUGH */ |
| 180 | case 0: v1 = ROTL64(v1, 32) ^ 0xFF; |
| 181 | break; |
| 182 | } |
| 183 | v1 ^= v0; |
| 184 | v0 = ROTR64(v0,33) + v1; |
| 185 | v1 = ROTL64(v1,17) ^ v0; |
| 186 | v0 = ROTL64(v0,43) + v1; |
| 187 | v1 = ROTL64(v1,31) - v0; |
| 188 | v0 = ROTL64(v0,13) ^ v1; |
| 189 | v1 -= v0; |
| 190 | v0 = ROTL64(v0,41) + v1; |
| 191 | v1 = ROTL64(v1,37) ^ v0; |
| 192 | v0 = ROTR64(v0,39) + v1; |
| 193 | v1 = ROTR64(v1,15) + v0; |
| 194 | v0 = ROTL64(v0,15) ^ v1; |
| 195 | v1 = ROTR64(v1, 5); |
| 196 | return v0 ^ v1; |
| 197 | } else { |
| 198 | U64 v2= state[2] ^ ((key_len+3) * STADTX_K2_U64); |
| 199 | U64 v3= state[3] ^ ((key_len+4) * STADTX_K3_U64); |
| 200 | |
| 201 | do { |
| 202 | v0 += (U64)U8TO64_LE(key+ 0) * STADTX_K2_U32; v0= ROTL64(v0,57) ^ v3; |
| 203 | v1 += (U64)U8TO64_LE(key+ 8) * STADTX_K3_U32; v1= ROTL64(v1,63) ^ v2; |
| 204 | v2 += (U64)U8TO64_LE(key+16) * STADTX_K4_U32; v2= ROTR64(v2,47) + v0; |
| 205 | v3 += (U64)U8TO64_LE(key+24) * STADTX_K5_U32; v3= ROTR64(v3,11) - v1; |
| 206 | key += 32; |
| 207 | len -= 32; |
| 208 | } while ( len >= 32 ); |
| 209 | |
| 210 | switch ( len >> 3 ) { |
| 211 | case 3: v0 += ((U64)U8TO64_LE(key) * STADTX_K2_U32); key += 8; v0= ROTL64(v0,57) ^ v3; |
| 212 | /* FALLTHROUGH */ |
| 213 | case 2: v1 += ((U64)U8TO64_LE(key) * STADTX_K3_U32); key += 8; v1= ROTL64(v1,63) ^ v2; |
| 214 | /* FALLTHROUGH */ |
| 215 | case 1: v2 += ((U64)U8TO64_LE(key) * STADTX_K4_U32); key += 8; v2= ROTR64(v2,47) + v0; |
| 216 | /* FALLTHROUGH */ |
| 217 | case 0: v3 = ROTR64(v3,11) - v1; |
| 218 | /* FALLTHROUGH */ |
| 219 | } |
| 220 | v0 ^= (len+1) * STADTX_K3_U64; |
| 221 | switch ( len & 0x7 ) { |
| 222 | case 7: v1 += (U64)key[6]; |
| 223 | /* FALLTHROUGH */ |
| 224 | case 6: v2 += (U64)U8TO16_LE(key+4); |
| 225 | v3 += (U64)U8TO32_LE(key); |
| 226 | break; |
| 227 | case 5: v1 += (U64)key[4]; |
| 228 | /* FALLTHROUGH */ |
| 229 | case 4: v2 += (U64)U8TO32_LE(key); |
| 230 | break; |
| 231 | case 3: v3 += (U64)key[2]; |
| 232 | /* FALLTHROUGH */ |
| 233 | case 2: v1 += (U64)U8TO16_LE(key); |
| 234 | break; |
| 235 | case 1: v2 += (U64)key[0]; |
| 236 | /* FALLTHROUGH */ |
| 237 | case 0: v3 = ROTL64(v3, 32) ^ 0xFF; |
| 238 | break; |
| 239 | } |
| 240 | |
| 241 | v1 -= v2; |
| 242 | v0 = ROTR64(v0,19); |
| 243 | v1 -= v0; |
| 244 | v1 = ROTR64(v1,53); |
| 245 | v3 ^= v1; |
| 246 | v0 -= v3; |
| 247 | v3 = ROTL64(v3,43); |
| 248 | v0 += v3; |
| 249 | v0 = ROTR64(v0, 3); |
| 250 | v3 -= v0; |
| 251 | v2 = ROTR64(v2,43) - v3; |
| 252 | v2 = ROTL64(v2,55) ^ v0; |
| 253 | v1 -= v2; |
| 254 | v3 = ROTR64(v3, 7) - v2; |
| 255 | v2 = ROTR64(v2,31); |
| 256 | v3 += v2; |
| 257 | v2 -= v1; |
| 258 | v3 = ROTR64(v3,39); |
| 259 | v2 ^= v3; |
| 260 | v3 = ROTR64(v3,17) ^ v2; |
| 261 | v1 += v3; |
| 262 | v1 = ROTR64(v1, 9); |
| 263 | v2 ^= v1; |
| 264 | v2 = ROTL64(v2,24); |
| 265 | v3 ^= v2; |
| 266 | v3 = ROTR64(v3,59); |
| 267 | v0 = ROTR64(v0, 1) - v1; |
| 268 | |
| 269 | return v0 ^ v1 ^ v2 ^ v3; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | STADTX_STATIC_INLINE U64 stadtx_hash( |
| 274 | const U8 *seed_ch, |
| 275 | const U8 *key, |
| 276 | const STRLEN key_len |
| 277 | ) { |
| 278 | U64 state[4]; |
| 279 | stadtx_seed_state(seed_ch,(U8*)state); |
| 280 | return stadtx_hash_with_state((U8*)state,key,key_len); |
| 281 | } |
| 282 | |
| 283 | #endif |