This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
hopefully better macros for building on x86
[perl5.git] / stadtx_hash.h
CommitLineData
9d5e3f1a
YO
1#ifndef STADTX_HASH_H
2#define STADTX_HASH_H
3
4#ifndef DEBUG_STADTX_HASH
5#define DEBUG_STADTX_HASH 0
6#endif
7
9d5e3f1a
YO
8#ifndef PERL_SEEN_HV_FUNC_H
9
10#if !defined(U64)
11 #include <stdint.h>
12 #define U64 uint64_t
13#endif
14
15#if !defined(U32)
16 #define U32 uint32_t
17#endif
18
19#if !defined(U8)
20 #define U8 unsigned char
21#endif
22
23#if !defined(U16)
24 #define U16 uint16_t
25#endif
26
27#ifndef STRLEN
28#define STRLEN int
29#endif
d939098c 30
9d5e3f1a
YO
31#endif
32
33#ifndef STADTX_STATIC_INLINE
34#ifdef PERL_STATIC_INLINE
35#define STADTX_STATIC_INLINE PERL_STATIC_INLINE
36#else
37#define STADTX_STATIC_INLINE static inline
38#endif
39#endif
40
41#ifndef STMT_START
42#define STMT_START do
43#define STMT_END while(0)
44#endif
45
9995b99e 46#ifndef STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN
d939098c
YO
47/* STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN only matters if nothing has defined U8TO64_LE etc,
48 * and when built with Perl these should be defined before this file is loaded.
49 */
50#ifdef U32_ALIGNMENT_REQUIRED
51#define STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN 0
52#else
9995b99e 53#define STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN 1
9d5e3f1a 54#endif
d939098c
YO
55#endif
56
57#ifndef U8TO64_LE
58#if STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN
59#define U8TO64_LE(ptr) (*((const U64 *)(ptr)))
60#else
61#define U8TO64_LE(ptr) (\
62 (U64)(ptr)[7] << 56 | \
63 (U64)(ptr)[6] << 48 | \
64 (U64)(ptr)[5] << 40 | \
65 (U64)(ptr)[4] << 32 | \
66 (U64)(ptr)[3] << 24 | \
67 (U64)(ptr)[2] << 16 | \
68 (U64)(ptr)[1] << 8 | \
69 (U64)(ptr)[0] \
70)
71#endif
72#endif
9d5e3f1a 73
d939098c 74#ifndef U8TO32_LE
9d5e3f1a 75#if STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN
d939098c 76#define U8TO32_LE(ptr) (*((const U32 *)(ptr)))
9d5e3f1a 77#else
d939098c
YO
78#define U8TO32_LE(ptr) (\
79 (U32)(ptr)[3] << 24 | \
80 (U32)(ptr)[2] << 16 | \
81 (U32)(ptr)[1] << 8 | \
82 (U32)(ptr)[0] \
83)
9d5e3f1a 84#endif
d939098c
YO
85#endif
86
87#ifndef U8TO16_LE
88#if STADTX_ALLOW_UNALIGNED_AND_LITTLE_ENDIAN
89#define U8TO16_LE(ptr) (*((const U16 *)(ptr)))
90#else
91#define U8TO16_LE(ptr) (\
92 (U16)(ptr)[1] << 8 | \
93 (U16)(ptr)[0] \
94)
95#endif
96#endif
97
98/* Find best way to ROTL32/ROTL64 */
99#if defined(_MSC_VER)
100 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
101 #define ROTL32(x,r) _rotl(x,r)
102 #define ROTR32(x,r) _rotr(x,r)
103 #define ROTL64(x,r) _rotl64(x,r)
104 #define ROTR64(x,r) _rotr64(x,r)
105#else
106 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
107 #define ROTL32(x,r) (((U32)(x) << (r)) | ((U32)(x) >> (32 - (r))))
108 #define ROTR32(x,r) (((U32)(x) << (32 - (r))) | ((U32)(x) >> (r)))
109 #define ROTL64(x,r) ( ( (U64)(x) << (r) ) | ( (U64)(x) >> ( 64 - (r) ) ) )
110 #define ROTR64(x,r) ( ( (U64)(x) << ( 64 - (r) ) ) | ( (U64)(x) >> (r) ) )
111#endif
112
9d5e3f1a
YO
113
114/* do a marsaglia xor-shift permutation followed by a
115 * multiply by a prime (presumably large) and another
116 * marsaglia xor-shift permutation.
117 * One of these thoroughly changes the bits of the input.
118 * Two of these with different primes passes the Strict Avalanche Criteria
119 * in all the tests I did.
120 *
121 * Note that v cannot end up zero after a scramble64 unless it
122 * was zero in the first place.
123 */
124#define STADTX_SCRAMBLE64(v,prime) STMT_START { \
125 v ^= (v >> 13); \
126 v ^= (v << 35); \
127 v ^= (v >> 30); \
128 v *= prime; \
129 v ^= (v >> 19); \
130 v ^= (v << 15); \
131 v ^= (v >> 46); \
132} STMT_END
133
134
135STADTX_STATIC_INLINE void stadtx_seed_state (
136 const U8 *seed_ch,
137 U8 *state_ch
138) {
20e4c2ed 139 const U64 *seed= (const U64 *)seed_ch;
9d5e3f1a
YO
140 U64 *state= (U64 *)state_ch;
141 /* first we apply two masks to each word of the seed, this means that
142 * a) at least one of state[0] and state[2] is nonzero,
143 * b) at least one of state[1] and state[3] is nonzero
144 * c) that state[0] and state[2] are different
145 * d) that state[1] and state[3] are different
146 * e) that the replacement value for any zero's is a totally different from the seed value.
147 * (iow, if seed[0] is 0x43f6a8885a308d31UL then state[0] becomes 0, which is the replaced
148 * with 1, which is totally different.). */
149 /* hex expansion of pi, skipping first two digits. pi= 3.2[43f6...]*/
150 /* pi value in hex from here:
151 * http://turner.faculty.swau.edu/mathematics/materialslibrary/pi/pibases.html*/
152 state[0]= seed[0] ^ 0x43f6a8885a308d31UL;
153 state[1]= seed[1] ^ 0x3198a2e03707344aUL;
154 state[2]= seed[0] ^ 0x4093822299f31d00UL;
155 state[3]= seed[1] ^ 0x82efa98ec4e6c894UL;
156 if (!state[0]) state[0]=1;
157 if (!state[1]) state[1]=2;
158 if (!state[2]) state[2]=4;
159 if (!state[3]) state[3]=8;
160 /* and now for good measure we double scramble all four -
161 * a double scramble guarantees a complete avalanche of all the
162 * bits in the seed - IOW, by the time we are hashing the
163 * four state vectors should be completely different and utterly
164 * uncognizable from the input seed bits */
165 STADTX_SCRAMBLE64(state[0],0x801178846e899d17UL);
166 STADTX_SCRAMBLE64(state[0],0xdd51e5d1c9a5a151UL);
167 STADTX_SCRAMBLE64(state[1],0x93a7d6c8c62e4835UL);
168 STADTX_SCRAMBLE64(state[1],0x803340f36895c2b5UL);
169 STADTX_SCRAMBLE64(state[2],0xbea9344eb7565eebUL);
170 STADTX_SCRAMBLE64(state[2],0xcd95d1e509b995cdUL);
171 STADTX_SCRAMBLE64(state[3],0x9999791977e30c13UL);
172 STADTX_SCRAMBLE64(state[3],0xaab8b6b05abfc6cdUL);
173}
174
175#define STADTX_K0_U64 0xb89b0f8e1655514fUL
176#define STADTX_K1_U64 0x8c6f736011bd5127UL
177#define STADTX_K2_U64 0x8f29bd94edce7b39UL
178#define STADTX_K3_U64 0x9c1b8e1e9628323fUL
179
180#define STADTX_K2_U32 0x802910e3
181#define STADTX_K3_U32 0x819b13af
182#define STADTX_K4_U32 0x91cb27e5
183#define STADTX_K5_U32 0xc1a269c1
184
185STADTX_STATIC_INLINE U64 stadtx_hash_with_state(
186 const U8 *state_ch,
187 const U8 *key,
188 const STRLEN key_len
189) {
190 U64 *state= (U64 *)state_ch;
9995b99e 191 STRLEN len = key_len;
9d5e3f1a
YO
192 U64 v0= state[0] ^ ((key_len+1) * STADTX_K0_U64);
193 U64 v1= state[1] ^ ((key_len+2) * STADTX_K1_U64);
194 if (len < 32) {
195 switch(len >> 3) {
196 case 3:
197 v0 += U8TO64_LE(key) * STADTX_K3_U64;
198 v0= ROTR64(v0, 17) ^ v1;
199 v1= ROTR64(v1, 53) + v0;
200 key += 8;
201 case 2:
202 v0 += U8TO64_LE(key) * STADTX_K3_U64;
203 v0= ROTR64(v0, 17) ^ v1;
204 v1= ROTR64(v1, 53) + v0;
205 key += 8;
206 case 1:
207 v0 += U8TO64_LE(key) * STADTX_K3_U64;
208 v0= ROTR64(v0, 17) ^ v1;
209 v1= ROTR64(v1, 53) + v0;
210 key += 8;
211 case 0:
212 default: break;
213 }
214 switch ( len & 0x7 ) {
215 case 7: v0 += (U64)key[6] << 32;
216 case 6: v1 += (U64)key[5] << 48;
217 case 5: v0 += (U64)key[4] << 16;
218 case 4: v1 += (U64)U8TO32_LE(key);
219 break;
220 case 3: v0 += (U64)key[2] << 48;
221 case 2: v1 += (U64)U8TO16_LE(key);
222 break;
223 case 1: v0 += (U64)key[0];
224 case 0: v1 = ROTL64(v1, 32) ^ 0xFF;
225 break;
226 }
227 v1 ^= v0;
228 v0 = ROTR64(v0,33) + v1;
229 v1 = ROTL64(v1,17) ^ v0;
230 v0 = ROTL64(v0,43) + v1;
231 v1 = ROTL64(v1,31) - v0;
232 v0 = ROTL64(v0,13) ^ v1;
233 v1 -= v0;
234 v0 = ROTL64(v0,41) + v1;
235 v1 = ROTL64(v1,37) ^ v0;
236 v0 = ROTR64(v0,39) + v1;
237 v1 = ROTR64(v1,15) + v0;
238 v0 = ROTL64(v0,15) ^ v1;
239 v1 = ROTR64(v1, 5);
240 return v0 ^ v1;
241 } else {
242 U64 v2= state[2] ^ ((key_len+3) * STADTX_K2_U64);
243 U64 v3= state[3] ^ ((key_len+4) * STADTX_K3_U64);
244
245 do {
246 v0 += (U64)U8TO64_LE(key+ 0) * STADTX_K2_U32; v0= ROTL64(v0,57) ^ v3;
247 v1 += (U64)U8TO64_LE(key+ 8) * STADTX_K3_U32; v1= ROTL64(v1,63) ^ v2;
248 v2 += (U64)U8TO64_LE(key+16) * STADTX_K4_U32; v2= ROTR64(v2,47) + v0;
249 v3 += (U64)U8TO64_LE(key+24) * STADTX_K5_U32; v3= ROTR64(v3,11) - v1;
250 key += 32;
251 len -= 32;
252 } while ( len >= 32 );
253
254 switch ( len >> 3 ) {
255 case 3: v0 += ((U64)U8TO64_LE(key) * STADTX_K2_U32); key += 8; v0= ROTL64(v0,57) ^ v3;
256 case 2: v1 += ((U64)U8TO64_LE(key) * STADTX_K3_U32); key += 8; v1= ROTL64(v1,63) ^ v2;
257 case 1: v2 += ((U64)U8TO64_LE(key) * STADTX_K4_U32); key += 8; v2= ROTR64(v2,47) + v0;
258 case 0: v3 = ROTR64(v3,11) - v1;
259 }
260 v0 ^= (len+1) * STADTX_K3_U64;
261 switch ( len & 0x7 ) {
262 case 7: v1 += (U64)key[6];
263 case 6: v2 += (U64)U8TO16_LE(key+4);
264 v3 += (U64)U8TO32_LE(key);
265 break;
266 case 5: v1 += (U64)key[4];
267 case 4: v2 += (U64)U8TO32_LE(key);
268 break;
269 case 3: v3 += (U64)key[2];
270 case 2: v1 += (U64)U8TO16_LE(key);
271 break;
272 case 1: v2 += (U64)key[0];
273 case 0: v3 = ROTL64(v3, 32) ^ 0xFF;
274 break;
275 }
276
277 v1 -= v2;
278 v0 = ROTR64(v0,19);
279 v1 -= v0;
280 v1 = ROTR64(v1,53);
281 v3 ^= v1;
282 v0 -= v3;
283 v3 = ROTL64(v3,43);
284 v0 += v3;
285 v0 = ROTR64(v0, 3);
286 v3 -= v0;
287 v2 = ROTR64(v2,43) - v3;
288 v2 = ROTL64(v2,55) ^ v0;
289 v1 -= v2;
290 v3 = ROTR64(v3, 7) - v2;
291 v2 = ROTR64(v2,31);
292 v3 += v2;
293 v2 -= v1;
294 v3 = ROTR64(v3,39);
295 v2 ^= v3;
296 v3 = ROTR64(v3,17) ^ v2;
297 v1 += v3;
298 v1 = ROTR64(v1, 9);
299 v2 ^= v1;
300 v2 = ROTL64(v2,24);
301 v3 ^= v2;
302 v3 = ROTR64(v3,59);
303 v0 = ROTR64(v0, 1) - v1;
304
305 return v0 ^ v1 ^ v2 ^ v3;
306 }
307}
308
309STADTX_STATIC_INLINE U64 stadtx_hash(
310 const U8 *seed_ch,
311 const U8 *key,
312 const STRLEN key_len
313) {
314 U64 state[4];
315 stadtx_seed_state(seed_ch,(U8*)state);
316 return stadtx_hash_with_state((U8*)state,key,key_len);
317}
318
319#endif