This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Replace a loop with memchr
[perl5.git] / hv_func.h
1 /* hash a key
2  *--------------------------------------------------------------------------------------
3  * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
4  * to avoid "algorithmic complexity attacks".
5  *
6  * If USE_HASH_SEED is defined, hash randomisation is done by default
7  * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
8  */
9 #ifndef PERL_SEEN_HV_FUNC_H /* compile once */
10 #define PERL_SEEN_HV_FUNC_H
11 #include "hv_macro.h"
12
13 #if !( 0 \
14         || defined(PERL_HASH_FUNC_SIPHASH) \
15         || defined(PERL_HASH_FUNC_SIPHASH13) \
16         || defined(PERL_HASH_FUNC_STADTX) \
17         || defined(PERL_HASH_FUNC_ZAPHOD32) \
18     )
19 #   ifdef CAN64BITHASH
20 #       define PERL_HASH_FUNC_STADTX
21 #   else
22 #       define PERL_HASH_FUNC_ZAPHOD32
23 #   endif
24 #endif
25
26 #ifndef PERL_HASH_USE_SBOX32_ALSO
27 #define PERL_HASH_USE_SBOX32_ALSO 1
28 #endif
29
30 #ifndef SBOX32_MAX_LEN
31 #define SBOX32_MAX_LEN 24
32 #endif
33
34 /* this must be after the SBOX32_MAX_LEN define */
35 #include "sbox32_hash.h"
36
37 #if defined(PERL_HASH_FUNC_SIPHASH)
38 # define __PERL_HASH_FUNC "SIPHASH_2_4"
39 # define __PERL_HASH_SEED_BYTES 16
40 # define __PERL_HASH_STATE_BYTES 32
41 # define __PERL_HASH_SEED_STATE(seed,state) S_perl_siphash_seed_state(seed,state)
42 # define __PERL_HASH_WITH_STATE(state,str,len) S_perl_hash_siphash_2_4_with_state((state),(U8*)(str),(len))
43 #elif defined(PERL_HASH_FUNC_SIPHASH13)
44 # define __PERL_HASH_FUNC "SIPHASH_1_3"
45 # define __PERL_HASH_SEED_BYTES 16
46 # define __PERL_HASH_STATE_BYTES 32
47 # define __PERL_HASH_SEED_STATE(seed,state) S_perl_siphash_seed_state(seed,state)
48 # define __PERL_HASH_WITH_STATE(state,str,len) S_perl_hash_siphash_1_3_with_state((state),(U8*)(str),(len))
49 #elif defined(PERL_HASH_FUNC_STADTX)
50 # define __PERL_HASH_FUNC "STADTX"
51 # define __PERL_HASH_SEED_BYTES 16
52 # define __PERL_HASH_STATE_BYTES 32
53 # define __PERL_HASH_SEED_STATE(seed,state) stadtx_seed_state(seed,state)
54 # define __PERL_HASH_WITH_STATE(state,str,len) (U32)stadtx_hash_with_state((state),(U8*)(str),(len))
55 # include "stadtx_hash.h"
56 #elif defined(PERL_HASH_FUNC_ZAPHOD32)
57 # define __PERL_HASH_FUNC "ZAPHOD32"
58 # define __PERL_HASH_SEED_BYTES 12
59 # define __PERL_HASH_STATE_BYTES 12
60 # define __PERL_HASH_SEED_STATE(seed,state) zaphod32_seed_state(seed,state)
61 # define __PERL_HASH_WITH_STATE(state,str,len) (U32)zaphod32_hash_with_state((state),(U8*)(str),(len))
62 # include "zaphod32_hash.h"
63 #endif
64
65 #ifndef __PERL_HASH_WITH_STATE
66 #error "No hash function defined!"
67 #endif
68 #ifndef __PERL_HASH_SEED_BYTES
69 #error "__PERL_HASH_SEED_BYTES not defined"
70 #endif
71 #ifndef __PERL_HASH_FUNC
72 #error "__PERL_HASH_FUNC not defined"
73 #endif
74
75
76 #if PERL_HASH_USE_SBOX32_ALSO != 1
77 # define _PERL_HASH_FUNC                        __PERL_HASH_FUNC
78 # define _PERL_HASH_SEED_BYTES                  __PERL_HASH_SEED_BYTES
79 # define _PERL_HASH_STATE_BYTES                 __PERL_HASH_STATE_BYTES
80 # define _PERL_HASH_SEED_STATE(seed,state)      __PERL_HASH_SEED_STATE(seed,state)
81 # define _PERL_HASH_WITH_STATE(state,str,len)   __PERL_HASH_WITH_STATE(state,str,len)
82 #else
83
84 #define _PERL_HASH_FUNC         "SBOX32_WITH_" __PERL_HASH_FUNC
85
86 #define _PERL_HASH_SEED_BYTES   ( __PERL_HASH_SEED_BYTES + (int)( 3 * sizeof(U32) ) )
87
88 #define _PERL_HASH_STATE_BYTES  \
89     ( __PERL_HASH_STATE_BYTES + ( ( 1 + ( 256 * SBOX32_MAX_LEN ) ) * sizeof(U32) ) )
90
91 #define _PERL_HASH_SEED_STATE(seed,state) STMT_START {                                      \
92     __PERL_HASH_SEED_STATE(seed,state);                                                     \
93     sbox32_seed_state96(seed + __PERL_HASH_SEED_BYTES, state + __PERL_HASH_STATE_BYTES);    \
94 } STMT_END
95
96 #define _PERL_HASH_WITH_STATE(state,str,len)                                            \
97     (LIKELY(len <= SBOX32_MAX_LEN)                                                      \
98         ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len))    \
99         : __PERL_HASH_WITH_STATE((state),(str),(len)))
100
101 #endif
102
103 PERL_STATIC_INLINE
104 U32 S_perl_hash_with_seed(const U8 * const seed, const U8 * const str, const STRLEN len)
105 {
106     U8 state[_PERL_HASH_STATE_BYTES];
107     _PERL_HASH_SEED_STATE(seed,state);
108     return _PERL_HASH_WITH_STATE(state,str,len);
109 }
110
111 #define PERL_HASH_WITH_SEED(seed,hash,str,len) \
112     (hash) = S_perl_hash_with_seed((const U8 *) seed, (const U8 *) str,len)
113 #define PERL_HASH_WITH_STATE(state,hash,str,len) \
114     (hash) = _PERL_HASH_WITH_STATE((state),(U8*)(str),(len))
115 #define PERL_HASH_SEED_STATE(seed,state) _PERL_HASH_SEED_STATE(seed,state)
116 #define PERL_HASH_SEED_BYTES _PERL_HASH_SEED_BYTES
117 #define PERL_HASH_STATE_BYTES _PERL_HASH_STATE_BYTES
118 #define PERL_HASH_FUNC        _PERL_HASH_FUNC
119
120 #ifdef PERL_USE_SINGLE_CHAR_HASH_CACHE
121 #define PERL_HASH(state,str,len) \
122     (hash) = ((len) < 2 ? ( (len) == 0 ? PL_hash_chars[256] : PL_hash_chars[(U8)(str)[0]] ) \
123                        : _PERL_HASH_WITH_STATE(PL_hash_state,(U8*)(str),(len)))
124 #else
125 #define PERL_HASH(hash,str,len) \
126     PERL_HASH_WITH_STATE(PL_hash_state,hash,(U8*)(str),(len))
127 #endif
128
129 /* Setup the hash seed, either we do things dynamically at start up,
130  * including reading from the environment, or we randomly setup the
131  * seed. The seed will be passed into the PERL_HASH_SEED_STATE() function
132  * defined for the configuration defined for this perl, which will then
133  * initialze whatever state it might need later in hashing. */
134
135 #ifndef PERL_HASH_SEED
136 #   if defined(USE_HASH_SEED)
137 #       define PERL_HASH_SEED PL_hash_seed
138 #   else
139        /* this is a 512 bit seed, which should be more than enough for the
140         * configuration of any of our hash functions (with or without sbox).
141         * If you actually use a hard coded seed, you are strongly encouraged
142         * to replace this with something else of the correct length
143         * for the hash function you are using (24-32 bytes depending on build
144         * options). Repeat, you are *STRONGLY* encouraged not to use the value
145         * provided here.
146         */
147 #       define PERL_HASH_SEED \
148            ((const U8 *)"A long string of pseudorandomly "  \
149                         "chosen bytes for hashing in Perl")
150 #   endif
151 #endif
152
153 /* legacy - only mod_perl should be doing this.  */
154 #ifdef PERL_HASH_INTERNAL_ACCESS
155 #define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
156 #endif
157
158 /* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
159  * The authors claim it is relatively secure compared to the alternatives
160  * and that performance wise it is a suitable hash for languages like Perl.
161  * See:
162  *
163  * https://www.131002.net/siphash/
164  *
165  * This implementation seems to perform slightly slower than one-at-a-time for
166  * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
167  * regardless of keys size.
168  *
169  * It is 64 bit only.
170  */
171
172 #ifdef CAN64BITHASH
173
174 #define SIPROUND            \
175   STMT_START {              \
176     v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
177     v2 += v3; v3=ROTL64(v3,16); v3 ^= v2;     \
178     v0 += v3; v3=ROTL64(v3,21); v3 ^= v0;     \
179     v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
180   } STMT_END
181
182 #define SIPHASH_SEED_STATE(key,v0,v1,v2,v3) \
183 do {                                    \
184     v0 = v2 = U8TO64_LE(key + 0);       \
185     v1 = v3 = U8TO64_LE(key + 8);       \
186   /* "somepseudorandomlygeneratedbytes" */  \
187     v0 ^= UINT64_C(0x736f6d6570736575);  \
188     v1 ^= UINT64_C(0x646f72616e646f6d);      \
189     v2 ^= UINT64_C(0x6c7967656e657261);      \
190     v3 ^= UINT64_C(0x7465646279746573);      \
191 } while (0)
192
193 PERL_STATIC_INLINE
194 void S_perl_siphash_seed_state(const unsigned char * const seed_buf, unsigned char * state_buf) {
195     U64 *v= (U64*) state_buf;
196     SIPHASH_SEED_STATE(seed_buf, v[0],v[1],v[2],v[3]);
197 }
198
199 #define PERL_SIPHASH_FNC(FNC,SIP_ROUNDS,SIP_FINAL_ROUNDS) \
200 PERL_STATIC_INLINE U32 \
201 FNC ## _with_state \
202   (const unsigned char * const state, const unsigned char *in, const STRLEN inlen) \
203 {                                           \
204   const int left = inlen & 7;               \
205   const U8 *end = in + inlen - left;        \
206                                             \
207   U64 b = ( ( U64 )(inlen) ) << 56;         \
208   U64 m;                                    \
209   U64 v0 = U8TO64_LE(state);                \
210   U64 v1 = U8TO64_LE(state+8);              \
211   U64 v2 = U8TO64_LE(state+16);             \
212   U64 v3 = U8TO64_LE(state+24);             \
213                                             \
214   for ( ; in != end; in += 8 )              \
215   {                                         \
216     m = U8TO64_LE( in );                    \
217     v3 ^= m;                                \
218                                             \
219     SIP_ROUNDS;                             \
220                                             \
221     v0 ^= m;                                \
222   }                                         \
223                                             \
224   switch( left )                            \
225   {                                         \
226   case 7: b |= ( ( U64 )in[ 6] )  << 48;    \
227   case 6: b |= ( ( U64 )in[ 5] )  << 40;    \
228   case 5: b |= ( ( U64 )in[ 4] )  << 32;    \
229   case 4: b |= ( ( U64 )in[ 3] )  << 24;    \
230   case 3: b |= ( ( U64 )in[ 2] )  << 16;    \
231   case 2: b |= ( ( U64 )in[ 1] )  <<  8;    \
232   case 1: b |= ( ( U64 )in[ 0] ); break;    \
233   case 0: break;                            \
234   }                                         \
235                                             \
236   v3 ^= b;                                  \
237                                             \
238   SIP_ROUNDS;                               \
239                                             \
240   v0 ^= b;                                  \
241                                             \
242   v2 ^= 0xff;                               \
243                                             \
244   SIP_FINAL_ROUNDS                          \
245                                             \
246   b = v0 ^ v1 ^ v2  ^ v3;                   \
247   return (U32)(b & U32_MAX);                \
248 }                                           \
249                                             \
250 PERL_STATIC_INLINE U32                      \
251 FNC (const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) \
252 {                                                                   \
253     U64 state[4];                                                   \
254     SIPHASH_SEED_STATE(seed,state[0],state[1],state[2],state[3]);   \
255     return FNC ## _with_state((U8*)state,in,inlen);                 \
256 }
257
258
259 PERL_SIPHASH_FNC(
260     S_perl_hash_siphash_1_3
261     ,SIPROUND;
262     ,SIPROUND;SIPROUND;SIPROUND;
263 )
264
265 PERL_SIPHASH_FNC(
266     S_perl_hash_siphash_2_4
267     ,SIPROUND;SIPROUND;
268     ,SIPROUND;SIPROUND;SIPROUND;SIPROUND;
269 )
270 #endif /* defined(CAN64BITHASH) */
271
272
273 #endif /*compile once*/
274
275 /*
276  * ex: set ts=8 sts=4 sw=4 et:
277  */