This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
document Hash::Util::hash_traversal_mask()
[perl5.git] / ext / Hash-Util / lib / Hash / Util.pm
CommitLineData
96c33d98
YO
1package Hash::Util;
2
3require 5.007003;
4use strict;
5use Carp;
6use warnings;
1e6ffe56 7no warnings 'uninitialized';
96c33d98
YO
8use warnings::register;
9use Scalar::Util qw(reftype);
10
11require Exporter;
12our @ISA = qw(Exporter);
13our @EXPORT_OK = qw(
1e73acc8
AS
14 fieldhash fieldhashes
15
96c33d98
YO
16 all_keys
17 lock_keys unlock_keys
18 lock_value unlock_value
19 lock_hash unlock_hash
08579537
JK
20 lock_keys_plus
21 hash_locked hash_unlocked
22 hashref_locked hashref_unlocked
96c33d98
YO
23 hidden_keys legal_keys
24
25 lock_ref_keys unlock_ref_keys
26 lock_ref_value unlock_ref_value
27 lock_hashref unlock_hashref
08579537 28 lock_ref_keys_plus
96c33d98
YO
29 hidden_ref_keys legal_ref_keys
30
77a92142 31 hash_seed hash_value hv_store
32 bucket_stats bucket_info bucket_array
5114d263 33 lock_hash_recurse unlock_hash_recurse
4950784d
YO
34
35 hash_traversal_mask
96c33d98 36 );
6a5b4183 37our $VERSION = '0.16';
b58f046e
NC
38require XSLoader;
39XSLoader::load();
96c33d98 40
1e73acc8
AS
41sub import {
42 my $class = shift;
43 if ( grep /fieldhash/, @_ ) {
44 require Hash::Util::FieldHash;
45 Hash::Util::FieldHash->import(':all'); # for re-export
46 }
47 unshift @_, $class;
48 goto &Exporter::import;
49}
50
96c33d98
YO
51
52=head1 NAME
53
54Hash::Util - A selection of general-utility hash subroutines
55
56=head1 SYNOPSIS
57
1e73acc8
AS
58 # Restricted hashes
59
96c33d98 60 use Hash::Util qw(
08579537
JK
61 fieldhash fieldhashes
62
63 all_keys
96c33d98
YO
64 lock_keys unlock_keys
65 lock_value unlock_value
66 lock_hash unlock_hash
08579537
JK
67 lock_keys_plus
68 hash_locked hash_unlocked
69 hashref_locked hashref_unlocked
96c33d98 70 hidden_keys legal_keys
08579537
JK
71
72 lock_ref_keys unlock_ref_keys
73 lock_ref_value unlock_ref_value
74 lock_hashref unlock_hashref
75 lock_ref_keys_plus
76 hidden_ref_keys legal_ref_keys
77
77a92142 78 hash_seed hash_value hv_store
79 bucket_stats bucket_info bucket_array
5114d263 80 lock_hash_recurse unlock_hash_recurse
4950784d
YO
81
82 hash_traversal_mask
96c33d98
YO
83 );
84
85 %hash = (foo => 42, bar => 23);
86 # Ways to restrict a hash
87 lock_keys(%hash);
88 lock_keys(%hash, @keyset);
89 lock_keys_plus(%hash, @additional_keys);
90
72712bfd
RGS
91 # Ways to inspect the properties of a restricted hash
92 my @legal = legal_keys(%hash);
93 my @hidden = hidden_keys(%hash);
94 my $ref = all_keys(%hash,@keys,@hidden);
95 my $is_locked = hash_locked(%hash);
96c33d98 96
72712bfd 97 # Remove restrictions on the hash
96c33d98
YO
98 unlock_keys(%hash);
99
72712bfd 100 # Lock individual values in a hash
96c33d98
YO
101 lock_value (%hash, 'foo');
102 unlock_value(%hash, 'foo');
103
72712bfd 104 # Ways to change the restrictions on both keys and values
96c33d98
YO
105 lock_hash (%hash);
106 unlock_hash(%hash);
107
108 my $hashes_are_randomised = hash_seed() != 0;
109
77a92142 110 my $int_hash_value = hash_value( 'string' );
111
4950784d
YO
112 my $mask= hash_traversal_mask(%hash);
113
114 hash_traversal_mask(%hash,1234);
115
96c33d98
YO
116=head1 DESCRIPTION
117
d74d639b
AS
118C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
119for manipulating hashes that don't really warrant a keyword.
1e73acc8 120
b299c47c
RGS
121C<Hash::Util> contains a set of functions that support
122L<restricted hashes|/"Restricted hashes">. These are described in
d74d639b
AS
123this document. C<Hash::Util::FieldHash> contains an (unrelated)
124set of functions that support the use of hashes in
125I<inside-out classes>, described in L<Hash::Util::FieldHash>.
1e73acc8 126
d74d639b 127By default C<Hash::Util> does not export anything.
1e73acc8 128
96c33d98
YO
129=head2 Restricted hashes
130
1315.8.0 introduces the ability to restrict a hash to a certain set of
132keys. No keys outside of this set can be added. It also introduces
133the ability to lock an individual key so it cannot be deleted and the
134ability to ensure that an individual value cannot be changed.
135
136This is intended to largely replace the deprecated pseudo-hashes.
137
138=over 4
139
140=item B<lock_keys>
141
142=item B<unlock_keys>
143
144 lock_keys(%hash);
145 lock_keys(%hash, @keys);
146
147Restricts the given %hash's set of keys to @keys. If @keys is not
148given it restricts it to its current keyset. No more keys can be
149added. delete() and exists() will still work, but will not alter
150the set of allowed keys. B<Note>: the current implementation prevents
151the hash from being bless()ed while it is in a locked state. Any attempt
152to do so will raise an exception. Of course you can still bless()
153the hash before you call lock_keys() so this shouldn't be a problem.
154
155 unlock_keys(%hash);
156
157Removes the restriction on the %hash's keyset.
158
5114d263
JK
159B<Note> that if any of the values of the hash have been locked they will not
160be unlocked after this sub executes.
96c33d98
YO
161
162Both routines return a reference to the hash operated on.
163
164=cut
165
166sub lock_ref_keys {
167 my($hash, @keys) = @_;
168
169 Internals::hv_clear_placeholders %$hash;
170 if( @keys ) {
171 my %keys = map { ($_ => 1) } @keys;
172 my %original_keys = map { ($_ => 1) } keys %$hash;
173 foreach my $k (keys %original_keys) {
174 croak "Hash has key '$k' which is not in the new key set"
175 unless $keys{$k};
176 }
177
178 foreach my $k (@keys) {
179 $hash->{$k} = undef unless exists $hash->{$k};
180 }
181 Internals::SvREADONLY %$hash, 1;
182
183 foreach my $k (@keys) {
184 delete $hash->{$k} unless $original_keys{$k};
185 }
186 }
187 else {
188 Internals::SvREADONLY %$hash, 1;
189 }
190
191 return $hash;
192}
193
194sub unlock_ref_keys {
195 my $hash = shift;
196
197 Internals::SvREADONLY %$hash, 0;
198 return $hash;
199}
200
201sub lock_keys (\%;@) { lock_ref_keys(@_) }
202sub unlock_keys (\%) { unlock_ref_keys(@_) }
203
204=item B<lock_keys_plus>
205
206 lock_keys_plus(%hash,@additional_keys)
207
208Similar to C<lock_keys()>, with the difference being that the optional key list
209specifies keys that may or may not be already in the hash. Essentially this is
210an easier way to say
211
212 lock_keys(%hash,@additional_keys,keys %hash);
213
214Returns a reference to %hash
215
216=cut
217
218
219sub lock_ref_keys_plus {
77a92142 220 my ($hash,@keys) = @_;
96c33d98
YO
221 my @delete;
222 Internals::hv_clear_placeholders(%$hash);
223 foreach my $key (@keys) {
224 unless (exists($hash->{$key})) {
225 $hash->{$key}=undef;
226 push @delete,$key;
227 }
228 }
229 Internals::SvREADONLY(%$hash,1);
230 delete @{$hash}{@delete};
231 return $hash
232}
233
234sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
235
236
237=item B<lock_value>
238
239=item B<unlock_value>
240
241 lock_value (%hash, $key);
242 unlock_value(%hash, $key);
243
244Locks and unlocks the value for an individual key of a hash. The value of a
245locked key cannot be changed.
246
247Unless %hash has already been locked the key/value could be deleted
248regardless of this setting.
249
250Returns a reference to the %hash.
251
252=cut
253
254sub lock_ref_value {
255 my($hash, $key) = @_;
256 # I'm doubtful about this warning, as it seems not to be true.
257 # Marking a value in the hash as RO is useful, regardless
258 # of the status of the hash itself.
259 carp "Cannot usefully lock values in an unlocked hash"
260 if !Internals::SvREADONLY(%$hash) && warnings::enabled;
261 Internals::SvREADONLY $hash->{$key}, 1;
262 return $hash
263}
264
265sub unlock_ref_value {
266 my($hash, $key) = @_;
267 Internals::SvREADONLY $hash->{$key}, 0;
268 return $hash
269}
270
271sub lock_value (\%$) { lock_ref_value(@_) }
272sub unlock_value (\%$) { unlock_ref_value(@_) }
273
274
275=item B<lock_hash>
276
277=item B<unlock_hash>
278
279 lock_hash(%hash);
280
72712bfd 281lock_hash() locks an entire hash, making all keys and values read-only.
96c33d98
YO
282No value can be changed, no keys can be added or deleted.
283
284 unlock_hash(%hash);
285
286unlock_hash() does the opposite of lock_hash(). All keys and values
287are made writable. All values can be changed and keys can be added
288and deleted.
289
290Returns a reference to the %hash.
291
292=cut
293
294sub lock_hashref {
295 my $hash = shift;
296
297 lock_ref_keys($hash);
298
299 foreach my $value (values %$hash) {
300 Internals::SvREADONLY($value,1);
301 }
302
303 return $hash;
304}
305
306sub unlock_hashref {
307 my $hash = shift;
308
309 foreach my $value (values %$hash) {
310 Internals::SvREADONLY($value, 0);
311 }
312
313 unlock_ref_keys($hash);
314
315 return $hash;
316}
317
318sub lock_hash (\%) { lock_hashref(@_) }
319sub unlock_hash (\%) { unlock_hashref(@_) }
320
321=item B<lock_hash_recurse>
322
323=item B<unlock_hash_recurse>
324
325 lock_hash_recurse(%hash);
326
327lock_hash() locks an entire hash and any hashes it references recursively,
72712bfd 328making all keys and values read-only. No value can be changed, no keys can
96c33d98
YO
329be added or deleted.
330
5114d263
JK
331This method B<only> recurses into hashes that are referenced by another hash.
332Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
333Hashes (HoAoH) will only have the top hash restricted.
96c33d98
YO
334
335 unlock_hash_recurse(%hash);
336
337unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and
338values are made writable. All values can be changed and keys can be added
339and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
340
341Returns a reference to the %hash.
342
343=cut
344
345sub lock_hashref_recurse {
346 my $hash = shift;
347
348 lock_ref_keys($hash);
349 foreach my $value (values %$hash) {
2793cb42
CO
350 my $type = reftype($value);
351 if (defined($type) and $type eq 'HASH') {
96c33d98
YO
352 lock_hashref_recurse($value);
353 }
354 Internals::SvREADONLY($value,1);
355 }
356 return $hash
357}
358
359sub unlock_hashref_recurse {
360 my $hash = shift;
361
362 foreach my $value (values %$hash) {
2793cb42
CO
363 my $type = reftype($value);
364 if (defined($type) and $type eq 'HASH') {
96c33d98
YO
365 unlock_hashref_recurse($value);
366 }
367 Internals::SvREADONLY($value,1);
368 }
369 unlock_ref_keys($hash);
370 return $hash;
371}
372
373sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) }
374sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
375
5114d263
JK
376=item B<hashref_locked>
377
08579537
JK
378=item B<hash_locked>
379
5114d263 380 hashref_locked(\%hash) and print "Hash is locked!\n";
08579537
JK
381 hash_locked(%hash) and print "Hash is locked!\n";
382
383Returns true if the hash and its keys are locked.
384
385=cut
386
387sub hashref_locked {
388 my $hash=shift;
5114d263 389 Internals::SvREADONLY(%$hash);
08579537
JK
390}
391
392sub hash_locked(\%) { hashref_locked(@_) }
96c33d98 393
5114d263
JK
394=item B<hashref_unlocked>
395
96c33d98
YO
396=item B<hash_unlocked>
397
5114d263 398 hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
96c33d98
YO
399 hash_unlocked(%hash) and print "Hash is unlocked!\n";
400
401Returns true if the hash and its keys are unlocked.
402
403=cut
404
405sub hashref_unlocked {
406 my $hash=shift;
5114d263 407 !Internals::SvREADONLY(%$hash);
96c33d98
YO
408}
409
410sub hash_unlocked(\%) { hashref_unlocked(@_) }
411
412=for demerphqs_editor
413sub legal_ref_keys{}
414sub hidden_ref_keys{}
415sub all_keys{}
416
417=cut
418
419sub legal_keys(\%) { legal_ref_keys(@_) }
420sub hidden_keys(\%){ hidden_ref_keys(@_) }
421
3a6a06e3 422=item B<legal_keys>
96c33d98 423
72712bfd 424 my @keys = legal_keys(%hash);
96c33d98 425
72712bfd 426Returns the list of the keys that are legal in a restricted hash.
96c33d98
YO
427In the case of an unrestricted hash this is identical to calling
428keys(%hash).
429
430=item B<hidden_keys>
431
72712bfd 432 my @keys = hidden_keys(%hash);
96c33d98 433
72712bfd 434Returns the list of the keys that are legal in a restricted hash but
96c33d98
YO
435do not have a value associated to them. Thus if 'foo' is a
436"hidden" key of the %hash it will return false for both C<defined>
437and C<exists> tests.
438
439In the case of an unrestricted hash this will return an empty list.
440
441B<NOTE> this is an experimental feature that is heavily dependent
442on the current implementation of restricted hashes. Should the
72712bfd 443implementation change, this routine may become meaningless, in which
96c33d98
YO
444case it will return an empty list.
445
446=item B<all_keys>
447
448 all_keys(%hash,@keys,@hidden);
449
450Populates the arrays @keys with the all the keys that would pass
451an C<exists> tests, and populates @hidden with the remaining legal
452keys that have not been utilized.
453
454Returns a reference to the hash.
455
72712bfd 456In the case of an unrestricted hash this will be equivalent to
96c33d98 457
72712bfd
RGS
458 $ref = do {
459 @keys = keys %hash;
460 @hidden = ();
461 \%hash
462 };
96c33d98
YO
463
464B<NOTE> this is an experimental feature that is heavily dependent
465on the current implementation of restricted hashes. Should the
466implementation change this routine may become meaningless in which
467case it will behave identically to how it would behave on an
72712bfd 468unrestricted hash.
96c33d98
YO
469
470=item B<hash_seed>
471
472 my $hash_seed = hash_seed();
473
7dc86639 474hash_seed() returns the seed bytes used to randomise hash ordering.
96c33d98
YO
475
476B<Note that the hash seed is sensitive information>: by knowing it one
477can craft a denial-of-service attack against Perl code, even remotely,
478see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
479B<Do not disclose the hash seed> to people who don't need to know it.
480See also L<perlrun/PERL_HASH_SEED_DEBUG>.
481
7dc86639
YO
482Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
483which may be of nearly any size as determined by the hash function your
484Perl has been built with. Possible sizes may be but are not limited to
4854 bytes (for most hash algorithms) and 16 bytes (for siphash).
486
487=item B<hash_value>
488
489 my $hash_value = hash_value($string);
490
77a92142 491hash_value() returns the current perl's internal hash value for a given
7dc86639
YO
492string.
493
494Returns a 32 bit integer representing the hash value of the string passed
495in. This value is only reliable for the lifetime of the process. It may
496be different depending on invocation, environment variables, perl version,
497architectures, and build options.
498
499B<Note that the hash value of a given string is sensitive information>:
500by knowing it one can deduce the hash seed which in turn can allow one to
501craft a denial-of-service attack against Perl code, even remotely,
502see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
503B<Do not disclose the hash value of a string> to people who don't need to
504know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
505
506=item B<bucket_info>
507
508Return a set of basic information about a hash.
509
9fc1b446 510 my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
7dc86639
YO
511
512Fields are as follows:
513
514 0: Number of keys in the hash
515 1: Number of buckets in the hash
516 2: Number of used buckets in the hash
517 rest : list of counts, Kth element is the number of buckets
518 with K keys in it.
519
520See also bucket_stats() and bucket_array().
521
522=item B<bucket_stats>
523
524Returns a list of statistics about a hash.
525
526 my ($keys, buckets, $used, $utilization_ratio, $collision_pct,
77a92142 527 $mean, $stddev, @length_counts) = bucket_info($hashref);
7dc86639
YO
528
529
530Fields are as follows:
531
532
533 0: Number of keys in the hash
534 1: Number of buckets in the hash
535 2: Number of used buckets in the hash
a740dcb9
YO
536 3: Hash Quality Score
537 4: Percent of buckets used
538 5: Percent of keys which are in collision
539 6: Average bucket length
540 7: Standard Deviation of bucket lengths.
7dc86639
YO
541 rest : list of counts, Kth element is the number of buckets
542 with K keys in it.
543
544See also bucket_info() and bucket_array().
545
a740dcb9
YO
546Note that Hash Quality Score would be 1 for an ideal hash, numbers
547close to and below 1 indicate good hashing, and number significantly
548above indicate a poor score. In practice it should be around 0.95 to 1.05.
549It is defined as:
550
551 $score= sum( $count[$length] * ($length * ($length + 1) / 2) )
552 /
553 ( ( $keys / 2 * $buckets ) *
554 ( $keys + ( 2 * $buckets ) - 1 ) )
555
556The formula is from the Red Dragon book (reformulated to use the data available)
557and is documented at L<http://www.strchr.com/hash_functions>
558
7dc86639
YO
559=item B<bucket_array>
560
561 my $array= bucket_array(\%hash);
562
563Returns a packed representation of the bucket array associated with a hash. Each element
564of the array is either an integer K, in which case it represents K empty buckets, or
565a reference to another array which contains the keys that are in that bucket.
566
567B<Note that the information returned by bucket_array is sensitive information>:
77a92142 568by knowing it one can directly attack perl's hash function which in turn may allow
7dc86639
YO
569one to craft a denial-of-service attack against Perl code, even remotely,
570see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
77a92142 571B<Do not disclose the output of this function> to people who don't need to
7dc86639
YO
572know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
573for debugging and diagnostics purposes only, it is hard to imagine a reason why it
574would be used in production code.
575
96c33d98
YO
576=cut
577
7dc86639
YO
578
579sub bucket_stats {
77a92142 580 my ($hash) = @_;
581 my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
7dc86639 582 my $sum;
a740dcb9
YO
583 my $score;
584 for (0 .. $#length_counts) {
585 $sum += ($length_counts[$_] * $_);
586 $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
587 }
588 $score = $score /
589 (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
590 if $keys;
7dc86639
YO
591 my $mean= $sum/$buckets;
592 $sum= 0;
593 $sum += ($length_counts[$_] * (($_-$mean)**2)) for 0 .. $#length_counts;
594
595 my $stddev= sqrt($sum/$buckets);
a740dcb9 596 return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
96c33d98
YO
597}
598
599=item B<hv_store>
600
72712bfd 601 my $sv = 0;
96c33d98 602 hv_store(%hash,$key,$sv) or die "Failed to alias!";
72712bfd 603 $hash{$key} = 1;
96c33d98
YO
604 print $sv; # prints 1
605
606Stores an alias to a variable in a hash instead of copying the value.
607
4950784d
YO
608=item B<hash_traversal_mask>
609
610As of Perl 5.18 every hash its own hash traversal order, and this order
611changes every time a new element is inserted in the hash. This functionality
612is provided by maintaining an unsigned integer mask (U32) which is xor'ed
613with the actual bucket id during a traverse of the hash buckets using keys(),
614values() or each().
615
616You can use this subroutine to get and set the traversal mask for a specific
617hash. Setting the mask ensures that a given hash will produce the same key
618order. B<Note> that this does B<not> guarantee that *two* hashes will produce
619the same key order for the same hash seed and traversal mask, items that
620collide into one bucket may have different orders regardless of this setting.
621
96c33d98
YO
622=back
623
624=head2 Operating on references to hashes.
625
72712bfd 626Most subroutines documented in this module have equivalent versions
96c33d98
YO
627that operate on references to hashes instead of native hashes.
628The following is a list of these subs. They are identical except
629in name and in that instead of taking a %hash they take a $hashref,
630and additionally are not prototyped.
631
632=over 4
633
634=item lock_ref_keys
635
636=item unlock_ref_keys
637
638=item lock_ref_keys_plus
639
640=item lock_ref_value
641
642=item unlock_ref_value
643
644=item lock_hashref
645
646=item unlock_hashref
647
648=item lock_hashref_recurse
649
650=item unlock_hashref_recurse
651
652=item hash_ref_unlocked
653
654=item legal_ref_keys
655
656=item hidden_ref_keys
657
658=back
659
660=head1 CAVEATS
661
662Note that the trapping of the restricted operations is not atomic:
663for example
664
665 eval { %hash = (illegal_key => 1) }
666
667leaves the C<%hash> empty rather than with its original contents.
668
669=head1 BUGS
670
671The interface exposed by this module is very close to the current
72712bfd 672implementation of restricted hashes. Over time it is expected that
96c33d98
YO
673this behavior will be extended and the interface abstracted further.
674
675=head1 AUTHOR
676
677Michael G Schwern <schwern@pobox.com> on top of code by Nick
678Ing-Simmons and Jeffrey Friedl.
679
680hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
681
682Additional code by Yves Orton.
683
684=head1 SEE ALSO
685
d74d639b
AS
686L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
687
688L<Hash::Util::FieldHash>.
96c33d98
YO
689
690=cut
691
6921;