This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Standardize removal of escapes in unixify.
[perl5.git] / hv.h
CommitLineData
a0d0e21e 1/* hv.h
79072805 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999,
62a1a1ef 4 * 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, by Larry Wall and others
79072805
LW
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
79072805
LW
9 */
10
5cbe4eec 11/* entry in hash value chain */
79072805 12struct he {
6f51351d
NC
13 /* Keep hent_next first in this structure, because sv_free_arenas take
14 advantage of this to share code between the he arenas and the SV
15 body arenas */
5cbe4eec
MLF
16 HE *hent_next; /* next entry in chain */
17 HEK *hent_hek; /* hash key */
de616631
NC
18 union {
19 SV *hent_val; /* scalar value that was hashed */
20 Size_t hent_refcount; /* references for this shared hash key */
21 } he_valu;
bbce6d69 22};
23
5cbe4eec 24/* hash key -- defined separately for use as shared pointer */
ff68c719 25struct hek {
5cbe4eec
MLF
26 U32 hek_hash; /* hash of key */
27 I32 hek_len; /* length of hash key */
28 char hek_key[1]; /* variable-length hash key */
e05949c7 29 /* the hash-key is \0-terminated */
1e54db1a
JH
30 /* after the \0 there is a byte for flags, such as whether the key
31 is UTF-8 */
79072805
LW
32};
33
cbae3960
NC
34struct shared_he {
35 struct he shared_he_he;
36 struct hek shared_he_hek;
37};
bfcb3514
NC
38
39/* Subject to change.
40 Don't access this directly.
e1a479c5 41 Use the funcs in mro.c
bfcb3514 42*/
e1a479c5 43
b2685f0c
NC
44struct mro_alg {
45 AV *(*resolve)(pTHX_ HV* stash, U32 level);
46 const char *name;
47 U16 length;
48 U16 kflags; /* For the hash API - set HVhek_UTF8 if name is UTF-8 */
49 U32 hash; /* or 0 */
50};
e1a479c5
BB
51
52struct mro_meta {
9953ff72
NC
53 /* a hash holding the different MROs private data. */
54 HV *mro_linear_all;
3a6fa573
NC
55 /* a pointer directly to the current MROs private data. If mro_linear_all
56 is NULL, this owns the SV reference, else it is just a pointer to a
57 value stored in and owned by mro_linear_all. */
58 SV *mro_linear_current;
dd69841b
BB
59 HV *mro_nextmethod; /* next::method caching */
60 U32 cache_gen; /* Bumping this invalidates our method cache */
70cd14a1 61 U32 pkg_gen; /* Bumps when local methods/@ISA change */
3d76853f 62 const struct mro_alg *mro_which; /* which mro alg is in use? */
a49ba3fc 63 HV *isa; /* Everything this class @ISA */
e1a479c5
BB
64};
65
123892d9
NC
66#define MRO_GET_PRIVATE_DATA(smeta, which) \
67 (((smeta)->mro_which && (which) == (smeta)->mro_which) \
3a6fa573 68 ? (smeta)->mro_linear_current \
123892d9
NC
69 : Perl_mro_get_private_data(aTHX_ (smeta), (which)))
70
e1a479c5
BB
71/* Subject to change.
72 Don't access this directly.
73*/
74
15d9236d
NC
75union _xhvnameu {
76 HEK *xhvnameu_name; /* When xhv_name_count is 0 */
77 HEK **xhvnameu_names; /* When xhv_name_count is non-0 */
78};
79
bfcb3514 80struct xpvhv_aux {
15d9236d 81 union _xhvnameu xhv_name_u; /* name, if a symbol table */
86f55936 82 AV *xhv_backreferences; /* back references for weak references */
bfcb3514
NC
83 HE *xhv_eiter; /* current entry of iterator */
84 I32 xhv_riter; /* current root of iterator */
7dc86639 85
15d9236d 86/* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer
78b79c77
FC
87 * to an array of HEK pointers, this being the length. The first element is
88 * the name of the stash, which may be NULL. If xhv_name_count is positive,
89 * then *xhv_name is one of the effective names. If xhv_name_count is nega-
15d9236d 90 * tive, then xhv_name_u.xhvnameu_names[1] is the first effective name.
78b79c77
FC
91 */
92 I32 xhv_name_count;
bc85b3a1 93 struct mro_meta *xhv_mro_meta;
aae43805 94 HV * xhv_super; /* SUPER method cache */
78b79c77 95};
bfcb3514 96
5cbe4eec 97/* hash structure: */
6ee623d5 98/* This structure must match the beginning of struct xpvmg in sv.h. */
79072805 99struct xpvhv {
6e128786
NC
100 HV* xmg_stash; /* class package */
101 union _xmgu xmg_u;
359164a0 102 STRLEN xhv_keys; /* total keys, including placeholders */
c8e503bf 103 STRLEN xhv_max; /* subscript of last element of xhv_array */
79072805
LW
104};
105
5cbe4eec 106/* hash a key */
5afd6d42
JH
107/* The use of a temporary pointer and the casting games
108 * is needed to serve the dual purposes of
109 * (a) the hashed data being interpreted as "unsigned char" (new since 5.8,
df805c3e 110 * a "char" can be either signed or unsigned, depending on the compiler)
5afd6d42 111 * (b) catering for old code that uses a "char"
830b38bd 112 *
504f80c1
JH
113 * The "hash seed" feature was added in Perl 5.8.1 to perturb the results
114 * to avoid "algorithmic complexity attacks".
830b38bd
JH
115 *
116 * If USE_HASH_SEED is defined, hash randomisation is done by default
117 * If USE_HASH_SEED_EXPLICIT is defined, hash randomisation is done
118 * only if the environment variable PERL_HASH_SEED is set.
7dc86639 119 * (see also perl.c:perl_parse() and S_init_tls_and_interp() and util.c:get_hash_seed())
5afd6d42 120 */
830b38bd
JH
121#ifndef PERL_HASH_SEED
122# if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT)
7dc86639 123# define PERL_HASH_SEED PL_hash_seed
830b38bd 124# else
7dc86639 125# define PERL_HASH_SEED "PeRlHaShhAcKpErl"
830b38bd 126# endif
504f80c1 127#endif
f8d50d94 128
7dc86639
YO
129#define PERL_HASH_SEED_U32 *((U32*)PERL_HASH_SEED)
130#define PERL_HASH_SEED_U64_1 (((U64*)PERL_HASH_SEED)[0])
131#define PERL_HASH_SEED_U64_2 (((U64*)PERL_HASH_SEED)[1])
4886dc4f 132#define PERL_HASH_SEED_U16_x(idx) (((U16*)PERL_HASH_SEED)[idx])
bf6bd887 133
7dc86639 134/* legacy - only mod_perl should be doing this. */
3d78eb94 135#ifdef PERL_HASH_INTERNAL_ACCESS
7dc86639
YO
136#define PERL_HASH_INTERNAL(hash,str,len) PERL_HASH(hash,str,len)
137#endif
138
139/* Uncomment one of the following lines to use an alternative hash algorithm.
140#define PERL_HASH_FUNC_SDBM
141#define PERL_HASH_FUNC_DJB2
142#define PERL_HASH_FUNC_SUPERFAST
143#define PERL_HASH_FUNC_MURMUR3
144#define PERL_HASH_FUNC_SIPHASH
145#define PERL_HASH_FUNC_ONE_AT_A_TIME
8c9e24c7 146#define PERL_HASH_FUNC_ONE_AT_A_TIME_OLD
4886dc4f 147#define PERL_HASH_FUNC_BUZZHASH16
7dc86639
YO
148*/
149
8c9e24c7
YO
150#if !( 0 \
151 || defined(PERL_HASH_FUNC_SDBM) \
152 || defined(PERL_HASH_FUNC_DJB2) \
153 || defined(PERL_HASH_FUNC_SUPERFAST) \
154 || defined(PERL_HASH_FUNC_MURMUR3) \
155 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME) \
156 || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD) \
157 || defined(PERL_HASH_FUNC_BUZZHASH16) \
158 )
3db6cbfc
YO
159#ifdef U64
160#define PERL_HASH_FUNC_SIPHASH
161#else
162#define PERL_HASH_FUNC_ONE_AT_A_TIME
163#endif
7dc86639
YO
164#endif
165
4886dc4f
YO
166#if defined(PERL_HASH_FUNC_BUZZHASH16)
167/* "BUZZHASH16"
168 *
169 * I whacked this together while just playing around.
170 *
171 * The idea is that instead of hashing the actual string input we use the
172 * bytes of the string as an index into a table of randomly generated
173 * 16 bit values.
174 *
175 * A left rotate is used to "mix" in previous bits as we go, and I borrowed
176 * the avalanche function from one-at-a-time for the final step. A lookup
177 * into the table based on the lower 8 bits of the length combined with
178 * the length itself is used as an itializer.
179 *
180 * The resulting hash value has no actual bits fed in from the string so
181 * I would guess it is pretty secure, although I am not a cryptographer
182 * and have no idea for sure. Nor has it been rigorously tested. On the
183 * other hand it is reasonably fast, and seems to produce reasonable
184 * distributions.
185 *
186 * Yves Orton
187 */
188
189
190#define PERL_HASH_FUNC "BUZZHASH16"
191#define PERL_HASH_SEED_BYTES 512 /* 2 bytes per octet value, 2 * 256 */
192/* Find best way to ROTL32 */
193#if defined(_MSC_VER)
194 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
195 #define BUZZHASH_ROTL32(x,r) _rotl(x,r)
196#else
197 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
198 #define BUZZHASH_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
199#endif
200
201#define PERL_HASH(hash,str,len) \
202 STMT_START { \
5aaab254
KW
203 const char * const s_PeRlHaSh_tmp = (str); \
204 const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
205 const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh + len; \
206 U32 hash_PeRlHaSh = (PERL_HASH_SEED_U16_x(len & 0xff) << 16) + len; \
4886dc4f
YO
207 while (s_PeRlHaSh < end_PeRlHaSh) { \
208 hash_PeRlHaSh ^= PERL_HASH_SEED_U16_x((U8)*s_PeRlHaSh++); \
209 hash_PeRlHaSh += BUZZHASH_ROTL32(hash_PeRlHaSh,11); \
210 } \
211 hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
212 hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
213 (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
214 } STMT_END
215
216#elif defined(PERL_HASH_FUNC_SIPHASH)
7dc86639
YO
217#define PERL_HASH_FUNC "SIPHASH"
218#define PERL_HASH_SEED_BYTES 16
219
220/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
221 * The authors claim it is relatively secure compared to the alternatives
222 * and that performance wise it is a suitable hash for languages like Perl.
223 * See:
224 *
225 * https://www.131002.net/siphash/
226 *
227 * This implementation seems to perform slightly slower than one-at-a-time for
228 * short keys, but degrades slower for longer keys. Murmur Hash outperforms it
229 * regardless of keys size.
230 *
231 * It is 64 bit only.
232 */
233
234#define PERL_HASH_NEEDS_TWO_SEEDS
235
236#ifndef U64
237#define U64 uint64_t
238#endif
239
240#define ROTL(x,b) (U64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
241
242#define U32TO8_LE(p, v) \
243 (p)[0] = (U8)((v) ); (p)[1] = (U8)((v) >> 8); \
244 (p)[2] = (U8)((v) >> 16); (p)[3] = (U8)((v) >> 24);
245
246#define U64TO8_LE(p, v) \
247 U32TO8_LE((p), (U32)((v) )); \
248 U32TO8_LE((p) + 4, (U32)((v) >> 32));
249
250#define U8TO64_LE(p) \
251 (((U64)((p)[0]) ) | \
252 ((U64)((p)[1]) << 8) | \
253 ((U64)((p)[2]) << 16) | \
254 ((U64)((p)[3]) << 24) | \
255 ((U64)((p)[4]) << 32) | \
256 ((U64)((p)[5]) << 40) | \
257 ((U64)((p)[6]) << 48) | \
258 ((U64)((p)[7]) << 56))
259
260#define SIPROUND \
261 do { \
262 v0_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,13); v1_PeRlHaSh ^= v0_PeRlHaSh; v0_PeRlHaSh=ROTL(v0_PeRlHaSh,32); \
263 v2_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,16); v3_PeRlHaSh ^= v2_PeRlHaSh; \
264 v0_PeRlHaSh += v3_PeRlHaSh; v3_PeRlHaSh=ROTL(v3_PeRlHaSh,21); v3_PeRlHaSh ^= v0_PeRlHaSh; \
265 v2_PeRlHaSh += v1_PeRlHaSh; v1_PeRlHaSh=ROTL(v1_PeRlHaSh,17); v1_PeRlHaSh ^= v2_PeRlHaSh; v2_PeRlHaSh=ROTL(v2_PeRlHaSh,32); \
266 } while(0)
267
268/* SipHash-2-4 */
269#define PERL_HASH(hash,str,len) STMT_START { \
270 const char * const strtmp_PeRlHaSh = (str); \
271 const unsigned char *in_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
272 const U32 inlen_PeRlHaSh = (len); \
273 /* "somepseudorandomlygeneratedbytes" */ \
274 U64 v0_PeRlHaSh = 0x736f6d6570736575ULL; \
275 U64 v1_PeRlHaSh = 0x646f72616e646f6dULL; \
276 U64 v2_PeRlHaSh = 0x6c7967656e657261ULL; \
277 U64 v3_PeRlHaSh = 0x7465646279746573ULL; \
278\
279 U64 b_PeRlHaSh; \
280 U64 k0_PeRlHaSh = PERL_HASH_SEED_U64_1; \
281 U64 k1_PeRlHaSh = PERL_HASH_SEED_U64_2; \
282 U64 m_PeRlHaSh; \
283 const int left_PeRlHaSh = inlen_PeRlHaSh & 7; \
284 const U8 *end_PeRlHaSh = in_PeRlHaSh + inlen_PeRlHaSh - left_PeRlHaSh; \
285\
286 b_PeRlHaSh = ( ( U64 )(len) ) << 56; \
287 v3_PeRlHaSh ^= k1_PeRlHaSh; \
288 v2_PeRlHaSh ^= k0_PeRlHaSh; \
289 v1_PeRlHaSh ^= k1_PeRlHaSh; \
290 v0_PeRlHaSh ^= k0_PeRlHaSh; \
291\
292 for ( ; in_PeRlHaSh != end_PeRlHaSh; in_PeRlHaSh += 8 ) \
293 { \
294 m_PeRlHaSh = U8TO64_LE( in_PeRlHaSh ); \
295 v3_PeRlHaSh ^= m_PeRlHaSh; \
296 SIPROUND; \
297 SIPROUND; \
298 v0_PeRlHaSh ^= m_PeRlHaSh; \
299 } \
300\
301 switch( left_PeRlHaSh ) \
302 { \
303 case 7: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 6] ) << 48; \
304 case 6: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 5] ) << 40; \
305 case 5: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 4] ) << 32; \
306 case 4: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 3] ) << 24; \
307 case 3: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 2] ) << 16; \
308 case 2: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 1] ) << 8; \
309 case 1: b_PeRlHaSh |= ( ( U64 )in_PeRlHaSh[ 0] ); break; \
310 case 0: break; \
311 } \
312\
313 v3_PeRlHaSh ^= b_PeRlHaSh; \
314 SIPROUND; \
315 SIPROUND; \
316 v0_PeRlHaSh ^= b_PeRlHaSh; \
317\
318 v2_PeRlHaSh ^= 0xff; \
319 SIPROUND; \
320 SIPROUND; \
321 SIPROUND; \
322 SIPROUND; \
323 b_PeRlHaSh = v0_PeRlHaSh ^ v1_PeRlHaSh ^ v2_PeRlHaSh ^ v3_PeRlHaSh; \
324 (hash)= (U32)(b_PeRlHaSh & U32_MAX); \
325} STMT_END
326
327#elif defined(PERL_HASH_FUNC_SUPERFAST)
328#define PERL_HASH_FUNC "SUPERFAST"
c3a88658 329#define PERL_HASH_SEED_BYTES 4
7dc86639
YO
330/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in
331 * (http://burtleburtle.net/bob/hash/doobs.html)
332 * It is by Paul Hsieh (c) 2004 and is analysed here
333 * http://www.azillionmonkeys.com/qed/hash.html
334 * license terms are here:
335 * http://www.azillionmonkeys.com/qed/weblicense.html
336 */
337#undef get16bits
338#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
339 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
340#define get16bits(d) (*((const U16 *) (d)))
f8d50d94
DM
341#endif
342
7dc86639
YO
343#if !defined (get16bits)
344#define get16bits(d) ((((const U8 *)(d))[1] << UINT32_C(8))\
345 +((const U8 *)(d))[0])
346#endif
347#define PERL_HASH(hash,str,len) \
348 STMT_START { \
5aaab254
KW
349 const char * const strtmp_PeRlHaSh = (str); \
350 const unsigned char *str_PeRlHaSh = (const unsigned char *)strtmp_PeRlHaSh; \
351 U32 len_PeRlHaSh = (len); \
352 U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
353 U32 tmp_PeRlHaSh; \
354 int rem_PeRlHaSh= len_PeRlHaSh & 3; \
7dc86639
YO
355 len_PeRlHaSh >>= 2; \
356 \
357 for (;len_PeRlHaSh > 0; len_PeRlHaSh--) { \
358 hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
359 tmp_PeRlHaSh = (get16bits (str_PeRlHaSh+2) << 11) ^ hash_PeRlHaSh; \
360 hash_PeRlHaSh = (hash_PeRlHaSh << 16) ^ tmp_PeRlHaSh; \
361 str_PeRlHaSh += 2 * sizeof (U16); \
362 hash_PeRlHaSh += hash_PeRlHaSh >> 11; \
363 } \
364 \
365 /* Handle end cases */ \
366 switch (rem_PeRlHaSh) { \
367 case 3: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
368 hash_PeRlHaSh ^= hash_PeRlHaSh << 16; \
369 hash_PeRlHaSh ^= str_PeRlHaSh[sizeof (U16)] << 18; \
370 hash_PeRlHaSh += hash_PeRlHaSh >> 11; \
371 break; \
372 case 2: hash_PeRlHaSh += get16bits (str_PeRlHaSh); \
373 hash_PeRlHaSh ^= hash_PeRlHaSh << 11; \
374 hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
375 break; \
376 case 1: hash_PeRlHaSh += *str_PeRlHaSh; \
377 hash_PeRlHaSh ^= hash_PeRlHaSh << 10; \
378 hash_PeRlHaSh += hash_PeRlHaSh >> 1; \
379 } \
380 \
381 /* Force "avalanching" of final 127 bits */ \
382 hash_PeRlHaSh ^= hash_PeRlHaSh << 3; \
383 hash_PeRlHaSh += hash_PeRlHaSh >> 5; \
384 hash_PeRlHaSh ^= hash_PeRlHaSh << 4; \
385 hash_PeRlHaSh += hash_PeRlHaSh >> 17; \
386 hash_PeRlHaSh ^= hash_PeRlHaSh << 25; \
387 (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh >> 6)); \
388 } STMT_END
389
390#elif defined(PERL_HASH_FUNC_MURMUR3)
391#define PERL_HASH_FUNC "MURMUR3"
392#define PERL_HASH_SEED_BYTES 4
393
394/*-----------------------------------------------------------------------------
395 * MurmurHash3 was written by Austin Appleby, and is placed in the public
396 * domain.
397 *
398 * This implementation was originally written by Shane Day, and is also public domain,
399 * and was modified to function as a macro similar to other perl hash functions by
400 * Yves Orton.
401 *
402 * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
403 * with support for progressive processing.
404 *
405 * If you want to understand the MurmurHash algorithm you would be much better
406 * off reading the original source. Just point your browser at:
407 * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
408 *
409 * How does it work?
410 *
411 * We can only process entire 32 bit chunks of input, except for the very end
412 * that may be shorter.
413 *
414 * To handle endianess I simply use a macro that reads a U32 and define
415 * that macro to be a direct read on little endian machines, a read and swap
416 * on big endian machines, or a byte-by-byte read if the endianess is unknown.
417 */
418
419
420/*-----------------------------------------------------------------------------
421 * Endianess, misalignment capabilities and util macros
422 *
423 * The following 3 macros are defined in this section. The other macros defined
424 * are only needed to help derive these 3.
425 *
426 * MURMUR_READ_UINT32(x) Read a little endian unsigned 32-bit int
427 * MURMUR_UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
428 * MURMUR_ROTL32(x,r) Rotate x left by r bits
429 */
f8d50d94 430
7dc86639 431/* Now find best way we can to READ_UINT32 */
b5a2311a 432#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4
7dc86639
YO
433 /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
434 #define MURMUR_READ_UINT32(ptr) (*((U32*)(ptr)))
b5a2311a 435#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
7dc86639
YO
436 /* TODO: Add additional cases below where a compiler provided bswap32 is available */
437 #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
438 #define MURMUR_READ_UINT32(ptr) (__builtin_bswap32(*((U32*)(ptr))))
439 #else
440 /* Without a known fast bswap32 we're just as well off doing this */
441 #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
442 #define MURMUR_UNALIGNED_SAFE
443 #endif
444#else
445 /* Unknown endianess so last resort is to read individual bytes */
446 #define MURMUR_READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
447
448 /* Since we're not doing word-reads we can skip the messing about with realignment */
449 #define MURMUR_UNALIGNED_SAFE
450#endif
451
452/* Find best way to ROTL32 */
453#if defined(_MSC_VER)
454 #include <stdlib.h> /* Microsoft put _rotl declaration in here */
455 #define MURMUR_ROTL32(x,r) _rotl(x,r)
456#else
457 /* gcc recognises this code and generates a rotate instruction for CPUs with one */
458 #define MURMUR_ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r)))
459#endif
460
461
462/*-----------------------------------------------------------------------------
463 * Core murmurhash algorithm macros */
464
465#define MURMUR_C1 (0xcc9e2d51)
466#define MURMUR_C2 (0x1b873593)
467#define MURMUR_C3 (0xe6546b64)
468#define MURMUR_C4 (0x85ebca6b)
469#define MURMUR_C5 (0xc2b2ae35)
470
471/* This is the main processing body of the algorithm. It operates
472 * on each full 32-bits of input. */
473#define MURMUR_DOBLOCK(h1, k1) STMT_START { \
474 k1 *= MURMUR_C1; \
475 k1 = MURMUR_ROTL32(k1,15); \
476 k1 *= MURMUR_C2; \
477 \
478 h1 ^= k1; \
479 h1 = MURMUR_ROTL32(h1,13); \
480 h1 = h1 * 5 + MURMUR_C3; \
481} STMT_END
482
483
484/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
485/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
486#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \
487 int MURMUR_DOBYTES_i = cnt; \
488 while(MURMUR_DOBYTES_i--) { \
489 c = c>>8 | *ptr++<<24; \
490 n++; len--; \
491 if(n==4) { \
492 MURMUR_DOBLOCK(h1, c); \
493 n = 0; \
494 } \
495 } \
496} STMT_END
497
498/* process the last 1..3 bytes and finalize */
499#define MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length) STMT_START { \
500 /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */\
501 PeRlHaSh_len -= PeRlHaSh_len/4*4; \
502 \
503 /* Append any remaining bytes into carry */ \
504 MURMUR_DOBYTES(PeRlHaSh_len, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len); \
505 \
506 if (PeRlHaSh_bytes_in_carry) { \
507 PeRlHaSh_k1 = PeRlHaSh_carry >> ( 4 - PeRlHaSh_bytes_in_carry ) * 8; \
508 PeRlHaSh_k1 *= MURMUR_C1; \
509 PeRlHaSh_k1 = MURMUR_ROTL32(PeRlHaSh_k1,15); \
510 PeRlHaSh_k1 *= MURMUR_C2; \
511 PeRlHaSh_h1 ^= PeRlHaSh_k1; \
512 } \
513 PeRlHaSh_h1 ^= PeRlHaSh_total_length; \
514 \
515 /* fmix */ \
516 PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \
517 PeRlHaSh_h1 *= MURMUR_C4; \
518 PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 13; \
519 PeRlHaSh_h1 *= MURMUR_C5; \
520 PeRlHaSh_h1 ^= PeRlHaSh_h1 >> 16; \
521 (hash)= PeRlHaSh_h1; \
522} STMT_END
523
524/* now we create the hash function */
525
526#if defined(UNALIGNED_SAFE)
527#define PERL_HASH(hash,str,len) STMT_START { \
5aaab254
KW
528 const char * const s_PeRlHaSh_tmp = (str); \
529 const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
530 I32 PeRlHaSh_len = len; \
7dc86639
YO
531 \
532 U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \
533 U32 PeRlHaSh_k1; \
534 U32 PeRlHaSh_carry = 0; \
535 \
536 const unsigned char *PeRlHaSh_end; \
537 \
538 int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
539 I32 PeRlHaSh_total_length= PeRlHaSh_len; \
540 \
541 /* This CPU handles unaligned word access */ \
542 /* Process 32-bit chunks */ \
543 PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \
544 for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
545 PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
546 MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
547 } \
548 \
549 MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
550 } STMT_END
551#else
552#define PERL_HASH(hash,str,len) STMT_START { \
5aaab254
KW
553 const char * const s_PeRlHaSh_tmp = (str); \
554 const unsigned char *PeRlHaSh_ptr = (const unsigned char *)s_PeRlHaSh_tmp; \
555 I32 PeRlHaSh_len = len; \
7dc86639
YO
556 \
557 U32 PeRlHaSh_h1 = PERL_HASH_SEED_U32; \
558 U32 PeRlHaSh_k1; \
559 U32 PeRlHaSh_carry = 0; \
560 \
561 const unsigned char *PeRlHaSh_end; \
562 \
563 int PeRlHaSh_bytes_in_carry = 0; /* bytes in carry */ \
564 I32 PeRlHaSh_total_length= PeRlHaSh_len; \
565 \
566 /* This CPU does not handle unaligned word access */ \
567 \
568 /* Consume enough so that the next data byte is word aligned */ \
569 int PeRlHaSh_i = -(long)PeRlHaSh_ptr & 3; \
570 if(PeRlHaSh_i && PeRlHaSh_i <= PeRlHaSh_len) { \
571 MURMUR_DOBYTES(PeRlHaSh_i, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_len);\
572 } \
573 \
574 /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ \
575 PeRlHaSh_end = PeRlHaSh_ptr + PeRlHaSh_len/4*4; \
576 switch(PeRlHaSh_bytes_in_carry) { /* how many bytes in carry */ \
577 case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ \
578 for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
579 PeRlHaSh_k1 = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
580 MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
581 } \
582 break; \
583 case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ \
584 for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
585 PeRlHaSh_k1 = PeRlHaSh_carry>>24; \
586 PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
587 PeRlHaSh_k1 |= PeRlHaSh_carry<<8; \
588 MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
589 } \
590 break; \
591 case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ \
592 for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
593 PeRlHaSh_k1 = PeRlHaSh_carry>>16; \
594 PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
595 PeRlHaSh_k1 |= PeRlHaSh_carry<<16; \
596 MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
597 } \
598 break; \
599 case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ \
600 for( ; PeRlHaSh_ptr < PeRlHaSh_end ; PeRlHaSh_ptr+=4) { \
601 PeRlHaSh_k1 = PeRlHaSh_carry>>8; \
602 PeRlHaSh_carry = MURMUR_READ_UINT32(PeRlHaSh_ptr); \
603 PeRlHaSh_k1 |= PeRlHaSh_carry<<24; \
604 MURMUR_DOBLOCK(PeRlHaSh_h1, PeRlHaSh_k1); \
605 } \
606 } \
607 \
608 MURMUR_FINALIZE(hash, PeRlHaSh_len, PeRlHaSh_k1, PeRlHaSh_h1, PeRlHaSh_carry, PeRlHaSh_bytes_in_carry, PeRlHaSh_ptr, PeRlHaSh_total_length);\
609 } STMT_END
610#endif
611
612#elif defined(PERL_HASH_FUNC_DJB2)
613#define PERL_HASH_FUNC "DJB2"
614#define PERL_HASH_SEED_BYTES 4
615#define PERL_HASH(hash,str,len) \
616 STMT_START { \
5aaab254
KW
617 const char * const s_PeRlHaSh_tmp = (str); \
618 const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
619 I32 i_PeRlHaSh = len; \
620 U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
7dc86639
YO
621 while (i_PeRlHaSh--) { \
622 hash_PeRlHaSh = ((hash_PeRlHaSh << 5) + hash_PeRlHaSh) + *s_PeRlHaSh++; \
623 } \
624 (hash) = hash_PeRlHaSh;\
625 } STMT_END
626
627#elif defined(PERL_HASH_FUNC_SDBM)
628#define PERL_HASH_FUNC "SDBM"
629#define PERL_HASH_SEED_BYTES 4
630#define PERL_HASH(hash,str,len) \
631 STMT_START { \
5aaab254
KW
632 const char * const s_PeRlHaSh_tmp = (str); \
633 const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
634 I32 i_PeRlHaSh = len; \
635 U32 hash_PeRlHaSh = PERL_HASH_SEED_U32 ^ len; \
7dc86639
YO
636 while (i_PeRlHaSh--) { \
637 hash_PeRlHaSh = (hash_PeRlHaSh << 6) + (hash_PeRlHaSh << 16) - hash_PeRlHaSh + *s_PeRlHaSh++; \
638 } \
639 (hash) = hash_PeRlHaSh;\
640 } STMT_END
641
8c9e24c7
YO
642#elif defined(PERL_HASH_FUNC_ONE_AT_A_TIME) || defined(PERL_HASH_FUNC_ONE_AT_A_TIME_OLD)
643
7dc86639
YO
644#define PERL_HASH_SEED_BYTES 4
645
8c9e24c7
YO
646#ifdef PERL_HASH_FUNC_ONE_AT_A_TIME
647/* new version, add the length to the seed so that adding characters changes the "seed" being used. */
648#define PERL_HASH_FUNC "ONE_AT_A_TIME"
649#define MIX_SEED_AND_LEN(seed,len) (seed + len)
650#else
651/* old version, just use the seed. - not recommended */
652#define PERL_HASH_FUNC "ONE_AT_A_TIME_OLD"
653#define MIX_SEED_AND_LEN(seed,len) (seed)
654#endif
655
7dc86639
YO
656/* FYI: This is the "One-at-a-Time" algorithm by Bob Jenkins
657 * from requirements by Colin Plumb.
658 * (http://burtleburtle.net/bob/hash/doobs.html) */
659#define PERL_HASH(hash,str,len) \
4b5190b5 660 STMT_START { \
5aaab254
KW
661 const char * const s_PeRlHaSh_tmp = (str); \
662 const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
8c9e24c7
YO
663 const unsigned char *end_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp + (len); \
664 U32 hash_PeRlHaSh = MIX_SEED_AND_LEN(PERL_HASH_SEED_U32, len); \
665 while (s_PeRlHaSh < end_PeRlHaSh) { \
7dc86639 666 hash_PeRlHaSh += (U8)*s_PeRlHaSh++; \
4b5190b5
NC
667 hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
668 hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
669 } \
670 hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
671 hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
672 (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
673 } STMT_END
7dc86639
YO
674#endif
675#ifndef PERL_HASH
676#error "No hash function defined!"
677#endif
954c1994 678/*
ccfc67b7
JH
679=head1 Hash Manipulation Functions
680
954c1994
GS
681=for apidoc AmU||HEf_SVKEY
682This flag, used in the length slot of hash entries and magic structures,
d1be9408 683specifies the structure contains an C<SV*> pointer where a C<char*> pointer
954c1994
GS
684is to be expected. (For information only--not to be used).
685
ccfc67b7
JH
686=head1 Handy Values
687
954c1994
GS
688=for apidoc AmU||Nullhv
689Null HV pointer.
690
3ae1b226
NC
691(deprecated - use C<(HV *)NULL> instead)
692
ccfc67b7
JH
693=head1 Hash Manipulation Functions
694
954c1994 695=for apidoc Am|char*|HvNAME|HV* stash
9282b5fd
SH
696Returns the package name of a stash, or NULL if C<stash> isn't a stash.
697See C<SvSTASH>, C<CvSTASH>.
954c1994 698
d9021235
FC
699=for apidoc Am|STRLEN|HvNAMELEN|HV *stash
700Returns the length of the stash's name.
701
702=for apidoc Am|unsigned char|HvNAMEUTF8|HV *stash
703Returns true if the name is in UTF8 encoding.
704
bc56db2a
FC
705=for apidoc Am|char*|HvENAME|HV* stash
706Returns the effective name of a stash, or NULL if there is none. The
707effective name represents a location in the symbol table where this stash
708resides. It is updated automatically when packages are aliased or deleted.
709A stash that is no longer in the symbol table has no effective name. This
710name is preferable to C<HvNAME> for use in MRO linearisations and isa
711caches.
712
d9021235
FC
713=for apidoc Am|STRLEN|HvENAMELEN|HV *stash
714Returns the length of the stash's effective name.
715
716=for apidoc Am|unsigned char|HvENAMEUTF8|HV *stash
717Returns true if the effective name is in UTF8 encoding.
718
954c1994
GS
719=for apidoc Am|void*|HeKEY|HE* he
720Returns the actual pointer stored in the key slot of the hash entry. The
721pointer may be either C<char*> or C<SV*>, depending on the value of
722C<HeKLEN()>. Can be assigned to. The C<HePV()> or C<HeSVKEY()> macros are
723usually preferable for finding the value of a key.
724
725=for apidoc Am|STRLEN|HeKLEN|HE* he
726If this is negative, and amounts to C<HEf_SVKEY>, it indicates the entry
727holds an C<SV*> key. Otherwise, holds the actual length of the key. Can
728be assigned to. The C<HePV()> macro is usually preferable for finding key
729lengths.
730
731=for apidoc Am|SV*|HeVAL|HE* he
566a4718
YO
732Returns the value slot (type C<SV*>) stored in the hash entry. Can be assigned
733to.
734
735 SV *foo= HeVAL(hv);
736 HeVAL(hv)= sv;
737
954c1994
GS
738
739=for apidoc Am|U32|HeHASH|HE* he
740Returns the computed hash stored in the hash entry.
741
742=for apidoc Am|char*|HePV|HE* he|STRLEN len
743Returns the key slot of the hash entry as a C<char*> value, doing any
744necessary dereferencing of possibly C<SV*> keys. The length of the string
745is placed in C<len> (this is a macro, so do I<not> use C<&len>). If you do
746not care about what the length of the key is, you may use the global
747variable C<PL_na>, though this is rather less efficient than using a local
748variable. Remember though, that hash keys in perl are free to contain
749embedded nulls, so using C<strlen()> or similar is not a good way to find
750the length of hash keys. This is very similar to the C<SvPV()> macro
289d3c6a
NC
751described elsewhere in this document. See also C<HeUTF8>.
752
753If you are using C<HePV> to get values to pass to C<newSVpvn()> to create a
754new SV, you should consider using C<newSVhek(HeKEY_hek(he))> as it is more
755efficient.
756
cca4e9fa 757=for apidoc Am|char*|HeUTF8|HE* he
289d3c6a
NC
758Returns whether the C<char *> value returned by C<HePV> is encoded in UTF-8,
759doing any necessary dereferencing of possibly C<SV*> keys. The value returned
62a1a1ef 760will be 0 or non-0, not necessarily 1 (or even a value with any low bits set),
289d3c6a
NC
761so B<do not> blindly assign this to a C<bool> variable, as C<bool> may be a
762typedef for C<char>.
954c1994
GS
763
764=for apidoc Am|SV*|HeSVKEY|HE* he
a0714e2c 765Returns the key as an C<SV*>, or C<NULL> if the hash entry does not
954c1994
GS
766contain an C<SV*> key.
767
768=for apidoc Am|SV*|HeSVKEY_force|HE* he
769Returns the key as an C<SV*>. Will create and return a temporary mortal
770C<SV*> if the hash entry contains only a C<char*> key.
771
772=for apidoc Am|SV*|HeSVKEY_set|HE* he|SV* sv
773Sets the key to a given C<SV*>, taking care to set the appropriate flags to
774indicate the presence of an C<SV*> key, and returns the same
775C<SV*>.
776
777=cut
778*/
bf6bd887 779
bf5b86ae 780/* these hash entry flags ride on hent_klen (for use only in magic/tied HVs) */
d1be9408 781#define HEf_SVKEY -2 /* hent_key is an SV* */
bf6bd887 782
3ae1b226
NC
783#ifndef PERL_CORE
784# define Nullhv Null(HV*)
785#endif
43e6e717 786#define HvARRAY(hv) ((hv)->sv_u.svu_hash)
4d0fbddd 787#define HvFILL(hv) Perl_hv_fill(aTHX_ (const HV *)(hv))
463ee0b2 788#define HvMAX(hv) ((XPVHV*) SvANY(hv))->xhv_max
b79f7545
NC
789/* This quite intentionally does no flag checking first. That's your
790 responsibility. */
791#define HvAUX(hv) ((struct xpvhv_aux*)&(HvARRAY(hv)[HvMAX(hv)+1]))
dbebbdb4
NC
792#define HvRITER(hv) (*Perl_hv_riter_p(aTHX_ MUTABLE_HV(hv)))
793#define HvEITER(hv) (*Perl_hv_eiter_p(aTHX_ MUTABLE_HV(hv)))
794#define HvRITER_set(hv,r) Perl_hv_riter_set(aTHX_ MUTABLE_HV(hv), r)
795#define HvEITER_set(hv,e) Perl_hv_eiter_set(aTHX_ MUTABLE_HV(hv), e)
b79f7545 796#define HvRITER_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_riter : -1)
cef6ea9d 797#define HvEITER_get(hv) (SvOOK(hv) ? HvAUX(hv)->xhv_eiter : NULL)
7423f6db 798#define HvNAME(hv) HvNAME_get(hv)
f2462604 799#define HvNAMELEN(hv) HvNAMELEN_get(hv)
bc56db2a 800#define HvENAME(hv) HvENAME_get(hv)
f2462604 801#define HvENAMELEN(hv) HvENAMELEN_get(hv)
2c7f4b87
BB
802
803/* Checking that hv is a valid package stash is the
804 caller's responsibility */
805#define HvMROMETA(hv) (HvAUX(hv)->xhv_mro_meta \
806 ? HvAUX(hv)->xhv_mro_meta \
c60bad7b 807 : Perl_mro_meta_init(aTHX_ hv))
2c7f4b87 808
15d9236d
NC
809#define HvNAME_HEK_NN(hv) \
810 ( \
811 HvAUX(hv)->xhv_name_count \
812 ? *HvAUX(hv)->xhv_name_u.xhvnameu_names \
813 : HvAUX(hv)->xhv_name_u.xhvnameu_name \
b7247a80 814 )
9b9d0b15 815/* This macro may go away without notice. */
b7247a80 816#define HvNAME_HEK(hv) \
15d9236d 817 (SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name ? HvNAME_HEK_NN(hv) : NULL)
78b79c77 818#define HvNAME_get(hv) \
15d9236d 819 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvNAME_HEK_NN(hv)) \
b7247a80 820 ? HEK_KEY(HvNAME_HEK_NN(hv)) : NULL)
78b79c77 821#define HvNAMELEN_get(hv) \
15d9236d 822 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvNAME_HEK_NN(hv)) \
b7247a80 823 ? HEK_LEN(HvNAME_HEK_NN(hv)) : 0)
b2c03ebd 824#define HvNAMEUTF8(hv) \
f2462604
BF
825 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvNAME_HEK_NN(hv)) \
826 ? HEK_UTF8(HvNAME_HEK_NN(hv)) : 0)
c4842d1d 827#define HvENAME_HEK_NN(hv) \
78b79c77 828 ( \
15d9236d
NC
829 HvAUX(hv)->xhv_name_count > 0 ? HvAUX(hv)->xhv_name_u.xhvnameu_names[0] : \
830 HvAUX(hv)->xhv_name_count < -1 ? HvAUX(hv)->xhv_name_u.xhvnameu_names[1] : \
78b79c77 831 HvAUX(hv)->xhv_name_count == -1 ? NULL : \
15d9236d 832 HvAUX(hv)->xhv_name_u.xhvnameu_name \
78b79c77 833 )
c4842d1d 834#define HvENAME_HEK(hv) \
15d9236d 835 (SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name ? HvENAME_HEK_NN(hv) : NULL)
c4842d1d 836#define HvENAME_get(hv) \
c49a809b 837 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvAUX(hv)->xhv_name_count != -1) \
78b79c77 838 ? HEK_KEY(HvENAME_HEK_NN(hv)) : NULL)
c4842d1d 839#define HvENAMELEN_get(hv) \
c49a809b 840 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvAUX(hv)->xhv_name_count != -1) \
78b79c77 841 ? HEK_LEN(HvENAME_HEK_NN(hv)) : 0)
b2c03ebd 842#define HvENAMEUTF8(hv) \
c49a809b 843 ((SvOOK(hv) && HvAUX(hv)->xhv_name_u.xhvnameu_name && HvAUX(hv)->xhv_name_count != -1) \
f2462604 844 ? HEK_UTF8(HvENAME_HEK_NN(hv)) : 0)
a0d0e21e 845
486ec47a 846/* the number of keys (including any placeholders) */
8aacddc1
NIS
847#define XHvTOTALKEYS(xhv) ((xhv)->xhv_keys)
848
8aacddc1
NIS
849/*
850 * HvKEYS gets the number of keys that actually exist(), and is provided
851 * for backwards compatibility with old XS code. The core uses HvUSEDKEYS
486ec47a 852 * (keys, excluding placeholders) and HvTOTALKEYS (including placeholders)
8aacddc1 853 */
ca732855
NC
854#define HvKEYS(hv) HvUSEDKEYS(hv)
855#define HvUSEDKEYS(hv) (HvTOTALKEYS(hv) - HvPLACEHOLDERS_get(hv))
8aacddc1 856#define HvTOTALKEYS(hv) XHvTOTALKEYS((XPVHV*) SvANY(hv))
dbebbdb4
NC
857#define HvPLACEHOLDERS(hv) (*Perl_hv_placeholders_p(aTHX_ MUTABLE_HV(hv)))
858#define HvPLACEHOLDERS_get(hv) (SvMAGIC(hv) ? Perl_hv_placeholders_get(aTHX_ (const HV *)hv) : 0)
859#define HvPLACEHOLDERS_set(hv,p) Perl_hv_placeholders_set(aTHX_ MUTABLE_HV(hv), p)
8aacddc1 860
bf6bd887 861#define HvSHAREKEYS(hv) (SvFLAGS(hv) & SVphv_SHAREKEYS)
862#define HvSHAREKEYS_on(hv) (SvFLAGS(hv) |= SVphv_SHAREKEYS)
863#define HvSHAREKEYS_off(hv) (SvFLAGS(hv) &= ~SVphv_SHAREKEYS)
864
19692e8d
NC
865/* This is an optimisation flag. It won't be set if all hash keys have a 0
866 * flag. Currently the only flags relate to utf8.
867 * Hence it won't be set if all keys are 8 bit only. It will be set if any key
868 * is utf8 (including 8 bit keys that were entered as utf8, and need upgrading
869 * when retrieved during iteration. It may still be set when there are no longer
870 * any utf8 keys.
fdcd69b6 871 * See HVhek_ENABLEHVKFLAGS for the trigger.
19692e8d
NC
872 */
873#define HvHASKFLAGS(hv) (SvFLAGS(hv) & SVphv_HASKFLAGS)
874#define HvHASKFLAGS_on(hv) (SvFLAGS(hv) |= SVphv_HASKFLAGS)
875#define HvHASKFLAGS_off(hv) (SvFLAGS(hv) &= ~SVphv_HASKFLAGS)
574c8022 876
bf5b86ae
GS
877#define HvLAZYDEL(hv) (SvFLAGS(hv) & SVphv_LAZYDEL)
878#define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL)
879#define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL)
880
3ae1b226
NC
881#ifndef PERL_CORE
882# define Nullhe Null(HE*)
883#endif
bf6bd887 884#define HeNEXT(he) (he)->hent_next
ff68c719 885#define HeKEY_hek(he) (he)->hent_hek
886#define HeKEY(he) HEK_KEY(HeKEY_hek(he))
bbce6d69 887#define HeKEY_sv(he) (*(SV**)HeKEY(he))
ff68c719 888#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
da58a35d 889#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
19692e8d 890#define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he))
da58a35d 891#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
19692e8d 892#define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he))
de616631 893#define HeVAL(he) (he)->he_valu.hent_val
ff68c719 894#define HeHASH(he) HEK_HASH(HeKEY_hek(he))
1e422769 895#define HePV(he,lp) ((HeKLEN(he) == HEf_SVKEY) ? \
896 SvPV(HeKEY_sv(he),lp) : \
402f0539 897 ((lp = HeKLEN(he)), HeKEY(he)))
289d3c6a
NC
898#define HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \
899 SvUTF8(HeKEY_sv(he)) : \
900 (U32)HeKUTF8(he))
1e422769 901
bbce6d69 902#define HeSVKEY(he) ((HeKEY(he) && \
903 HeKLEN(he) == HEf_SVKEY) ? \
a0714e2c 904 HeKEY_sv(he) : NULL)
bbce6d69 905
906#define HeSVKEY_force(he) (HeKEY(he) ? \
907 ((HeKLEN(he) == HEf_SVKEY) ? \
908 HeKEY_sv(he) : \
ad25789c
NC
909 newSVpvn_flags(HeKEY(he), \
910 HeKLEN(he), SVs_TEMP)) : \
3280af22 911 &PL_sv_undef)
1e422769 912#define HeSVKEY_set(he,sv) ((HeKLEN(he) = HEf_SVKEY), (HeKEY_sv(he) = sv))
bbce6d69 913
3ae1b226
NC
914#ifndef PERL_CORE
915# define Nullhek Null(HEK*)
916#endif
71be2cbc 917#define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0])
ff68c719 918#define HEK_HASH(hek) (hek)->hek_hash
919#define HEK_LEN(hek) (hek)->hek_len
920#define HEK_KEY(hek) (hek)->hek_key
19692e8d
NC
921#define HEK_FLAGS(hek) (*((unsigned char *)(HEK_KEY(hek))+HEK_LEN(hek)+1))
922
923#define HVhek_UTF8 0x01 /* Key is utf8 encoded. */
924#define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */
45e34800 925#define HVhek_UNSHARED 0x08 /* This key isn't a shared hash key. */
19692e8d 926#define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */
e16e2ff8
NC
927#define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder.
928 * (may change, but Storable is a core module) */
44b87b50
NC
929#define HVhek_KEYCANONICAL 0x400 /* Internal flag - key is in canonical form.
930 If the string is UTF-8, it cannot be
931 converted to bytes. */
19692e8d
NC
932#define HVhek_MASK 0xFF
933
7dc86639 934#define HVhek_ENABLEHVKFLAGS (HVhek_MASK & ~(HVhek_UNSHARED))
fdcd69b6 935
19692e8d
NC
936#define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8)
937#define HEK_UTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_UTF8)
938#define HEK_UTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_UTF8)
939#define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8)
940#define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8)
941#define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
d18c6117 942
5cbe4eec 943/* calculate HV array allocation */
36768cf4
NC
944#ifndef PERL_USE_LARGE_HV_ALLOC
945/* Default to allocating the correct size - default to assuming that malloc()
946 is not broken and is efficient at allocating blocks sized at powers-of-two.
947*/
d18c6117
GS
948# define PERL_HV_ARRAY_ALLOC_BYTES(size) ((size) * sizeof(HE*))
949#else
950# define MALLOC_OVERHEAD 16
951# define PERL_HV_ARRAY_ALLOC_BYTES(size) \
952 (((size) < 64) \
953 ? (size) * sizeof(HE*) \
954 : (size) * sizeof(HE*) * 2 - MALLOC_OVERHEAD)
955#endif
37d85e3a 956
e16e2ff8
NC
957/* Flags for hv_iternext_flags. */
958#define HV_ITERNEXT_WANTPLACEHOLDERS 0x01 /* Don't skip placeholders. */
959
7a7b9979 960#define hv_iternext(hv) hv_iternext_flags(hv, 0)
b1bc3f34 961#define hv_magic(hv, gv, how) sv_magic(MUTABLE_SV(hv), MUTABLE_SV(gv), how, NULL, 0)
8581adba 962#define hv_undef(hv) Perl_hv_undef_flags(aTHX_ hv, 0)
7a7b9979 963
bbd42945
FC
964#define Perl_sharepvn(pv, len, hash) HEK_KEY(share_hek(pv, len, hash))
965#define sharepvn(pv, len, hash) Perl_sharepvn(pv, len, hash)
bfcb3514 966
d1db91c6
NC
967#define share_hek_hek(hek) \
968 (++(((struct shared_he *)(((char *)hek) \
969 - STRUCT_OFFSET(struct shared_he, \
970 shared_he_hek))) \
de616631 971 ->shared_he_he.he_valu.hent_refcount), \
d1db91c6
NC
972 hek)
973
99978bb4
NC
974#define hv_store_ent(hv, keysv, val, hash) \
975 ((HE *) hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISSTORE, \
976 (val), (hash)))
4c2df08c 977
99978bb4
NC
978#define hv_exists_ent(hv, keysv, hash) \
979 (hv_common((hv), (keysv), NULL, 0, 0, HV_FETCH_ISEXISTS, 0, (hash)) \
4c2df08c 980 ? TRUE : FALSE)
99978bb4
NC
981#define hv_fetch_ent(hv, keysv, lval, hash) \
982 ((HE *) hv_common((hv), (keysv), NULL, 0, 0, \
983 ((lval) ? HV_FETCH_LVALUE : 0), NULL, (hash)))
984#define hv_delete_ent(hv, key, flags, hash) \
b1bc3f34
NC
985 (MUTABLE_SV(hv_common((hv), (key), NULL, 0, 0, (flags) | HV_DELETE, \
986 NULL, (hash))))
99978bb4
NC
987
988#define hv_store_flags(hv, key, klen, val, hash, flags) \
989 ((SV**) hv_common((hv), NULL, (key), (klen), (flags), \
990 (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), (val), \
991 (hash)))
992
993#define hv_store(hv, key, klen, val, hash) \
994 ((SV**) hv_common_key_len((hv), (key), (klen), \
a038e571 995 (HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), \
99978bb4 996 (val), (hash)))
a038e571 997
99978bb4
NC
998#define hv_exists(hv, key, klen) \
999 (hv_common_key_len((hv), (key), (klen), HV_FETCH_ISEXISTS, NULL, 0) \
a038e571
NC
1000 ? TRUE : FALSE)
1001
99978bb4
NC
1002#define hv_fetch(hv, key, klen, lval) \
1003 ((SV**) hv_common_key_len((hv), (key), (klen), (lval) \
a038e571
NC
1004 ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) \
1005 : HV_FETCH_JUST_SV, NULL, 0))
1006
99978bb4 1007#define hv_delete(hv, key, klen, flags) \
b1bc3f34
NC
1008 (MUTABLE_SV(hv_common_key_len((hv), (key), (klen), \
1009 (flags) | HV_DELETE, NULL, 0)))
a038e571 1010
71ad1b0c
NC
1011/* This refcounted he structure is used for storing the hints used for lexical
1012 pragmas. Without threads, it's basically struct he + refcount.
1013 With threads, life gets more complex as the structure needs to be shared
1014 between threads (because it hangs from OPs, which are shared), hence the
1015 alternate definition and mutex. */
1016
44ebaf21
NC
1017struct refcounted_he;
1018
20439bc7
Z
1019/* flags for the refcounted_he API */
1020#define REFCOUNTED_HE_KEY_UTF8 0x00000001
94250aee
FC
1021#ifdef PERL_CORE
1022# define REFCOUNTED_HE_EXISTS 0x00000002
1023#endif
20439bc7 1024
71ad1b0c
NC
1025#ifdef PERL_CORE
1026
b6bbf3fa 1027/* Gosh. This really isn't a good name any longer. */
71ad1b0c
NC
1028struct refcounted_he {
1029 struct refcounted_he *refcounted_he_next; /* next entry in chain */
cbb1fbea 1030#ifdef USE_ITHREADS
b6bbf3fa
NC
1031 U32 refcounted_he_hash;
1032 U32 refcounted_he_keylen;
cbb1fbea 1033#else
71ad1b0c 1034 HEK *refcounted_he_hek; /* hint key */
cbb1fbea 1035#endif
b6bbf3fa
NC
1036 union {
1037 IV refcounted_he_u_iv;
1038 UV refcounted_he_u_uv;
1039 STRLEN refcounted_he_u_len;
44ebaf21 1040 void *refcounted_he_u_ptr; /* Might be useful in future */
b6bbf3fa 1041 } refcounted_he_val;
9bd87817 1042 U32 refcounted_he_refcnt; /* reference count */
b6bbf3fa
NC
1043 /* First byte is flags. Then NUL-terminated value. Then for ithreads,
1044 non-NUL terminated key. */
1045 char refcounted_he_data[1];
71ad1b0c
NC
1046};
1047
20439bc7
Z
1048/*
1049=for apidoc m|SV *|refcounted_he_fetch_pvs|const struct refcounted_he *chain|const char *key|U32 flags
1050
1051Like L</refcounted_he_fetch_pvn>, but takes a literal string instead of
1052a string/length pair, and no precomputed hash.
1053
1054=cut
1055*/
1056
1057#define refcounted_he_fetch_pvs(chain, key, flags) \
1058 Perl_refcounted_he_fetch_pvn(aTHX_ chain, STR_WITH_LEN(key), 0, flags)
1059
1060/*
1061=for apidoc m|struct refcounted_he *|refcounted_he_new_pvs|struct refcounted_he *parent|const char *key|SV *value|U32 flags
1062
1063Like L</refcounted_he_new_pvn>, but takes a literal string instead of
1064a string/length pair, and no precomputed hash.
1065
1066=cut
1067*/
1068
1069#define refcounted_he_new_pvs(parent, key, value, flags) \
1070 Perl_refcounted_he_new_pvn(aTHX_ parent, STR_WITH_LEN(key), 0, value, flags)
1071
b6bbf3fa
NC
1072/* Flag bits are HVhek_UTF8, HVhek_WASUTF8, then */
1073#define HVrhek_undef 0x00 /* Value is undef. */
44ebaf21
NC
1074#define HVrhek_delete 0x10 /* Value is placeholder - signifies delete. */
1075#define HVrhek_IV 0x20 /* Value is IV. */
1076#define HVrhek_UV 0x30 /* Value is UV. */
1077#define HVrhek_PV 0x40 /* Value is a (byte) string. */
1078#define HVrhek_PV_UTF8 0x50 /* Value is a (utf8) string. */
1079/* Two spare. As these have to live in the optree, you can't store anything
1080 interpreter specific, such as SVs. :-( */
1081#define HVrhek_typemask 0x70
1082
1083#ifdef USE_ITHREADS
1084/* A big expression to find the key offset */
1085#define REF_HE_KEY(chain) \
1086 ((((chain->refcounted_he_data[0] & 0x60) == 0x40) \
1087 ? chain->refcounted_he_val.refcounted_he_u_len + 1 : 0) \
1088 + 1 + chain->refcounted_he_data)
1089#endif
b6bbf3fa 1090
71ad1b0c
NC
1091# ifdef USE_ITHREADS
1092# define HINTS_REFCNT_LOCK MUTEX_LOCK(&PL_hints_mutex)
1093# define HINTS_REFCNT_UNLOCK MUTEX_UNLOCK(&PL_hints_mutex)
1094# else
1095# define HINTS_REFCNT_LOCK NOOP
1096# define HINTS_REFCNT_UNLOCK NOOP
1097# endif
1098#endif
1099
1100#ifdef USE_ITHREADS
1101# define HINTS_REFCNT_INIT MUTEX_INIT(&PL_hints_mutex)
1102# define HINTS_REFCNT_TERM MUTEX_DESTROY(&PL_hints_mutex)
1103#else
1104# define HINTS_REFCNT_INIT NOOP
1105# define HINTS_REFCNT_TERM NOOP
1106#endif
1107
324a0d18
JH
1108/* Hash actions
1109 * Passed in PERL_MAGIC_uvar calls
1110 */
b54b4831
NC
1111#define HV_DISABLE_UVAR_XKEY 0x01
1112/* We need to ensure that these don't clash with G_DISCARD, which is 2, as it
1113 is documented as being passed to hv_delete(). */
1114#define HV_FETCH_ISSTORE 0x04
1115#define HV_FETCH_ISEXISTS 0x08
1116#define HV_FETCH_LVALUE 0x10
1117#define HV_FETCH_JUST_SV 0x20
9dbc5603 1118#define HV_DELETE 0x40
df5f182b 1119#define HV_FETCH_EMPTY_HE 0x80 /* Leave HeVAL null. */
324a0d18 1120
745edda6
FC
1121/* Must not conflict with HVhek_UTF8 */
1122#define HV_NAME_SETALL 0x02
1123
bfcb3514 1124/*
78ac7dd9
NC
1125=for apidoc newHV
1126
1127Creates a new HV. The reference count is set to 1.
1128
1129=cut
1130*/
1131
dbebbdb4 1132#define newHV() MUTABLE_HV(newSV_type(SVt_PVHV))
78ac7dd9
NC
1133
1134/*
bfcb3514
NC
1135 * Local variables:
1136 * c-indentation-style: bsd
1137 * c-basic-offset: 4
14d04a33 1138 * indent-tabs-mode: nil
bfcb3514
NC
1139 * End:
1140 *
14d04a33 1141 * ex: set ts=8 sts=4 sw=4 et:
bfcb3514 1142 */