7 no warnings 'uninitialized';
8 use warnings::register;
9 use Scalar::Util qw(reftype);
12 our @ISA = qw(Exporter);
18 lock_value unlock_value
21 hash_locked hash_unlocked
22 hashref_locked hashref_unlocked
23 hidden_keys legal_keys
25 lock_ref_keys unlock_ref_keys
26 lock_ref_value unlock_ref_value
27 lock_hashref unlock_hashref
29 hidden_ref_keys legal_ref_keys
31 hash_seed hash_value hv_store
32 bucket_stats bucket_stats_formatted bucket_info bucket_array
33 lock_hash_recurse unlock_hash_recurse
37 our $VERSION = '0.17';
43 if ( grep /fieldhash/, @_ ) {
44 require Hash::Util::FieldHash;
45 Hash::Util::FieldHash->import(':all'); # for re-export
48 goto &Exporter::import;
54 Hash::Util - A selection of general-utility hash subroutines
65 lock_value unlock_value
68 hash_locked hash_unlocked
69 hashref_locked hashref_unlocked
70 hidden_keys legal_keys
72 lock_ref_keys unlock_ref_keys
73 lock_ref_value unlock_ref_value
74 lock_hashref unlock_hashref
76 hidden_ref_keys legal_ref_keys
78 hash_seed hash_value hv_store
79 bucket_stats bucket_info bucket_array
80 lock_hash_recurse unlock_hash_recurse
85 %hash = (foo => 42, bar => 23);
86 # Ways to restrict a hash
88 lock_keys(%hash, @keyset);
89 lock_keys_plus(%hash, @additional_keys);
91 # Ways to inspect the properties of a restricted hash
92 my @legal = legal_keys(%hash);
93 my @hidden = hidden_keys(%hash);
94 my $ref = all_keys(%hash,@keys,@hidden);
95 my $is_locked = hash_locked(%hash);
97 # Remove restrictions on the hash
100 # Lock individual values in a hash
101 lock_value (%hash, 'foo');
102 unlock_value(%hash, 'foo');
104 # Ways to change the restrictions on both keys and values
108 my $hashes_are_randomised = hash_seed() != 0;
110 my $int_hash_value = hash_value( 'string' );
112 my $mask= hash_traversal_mask(%hash);
114 hash_traversal_mask(%hash,1234);
118 C<Hash::Util> and C<Hash::Util::FieldHash> contain special functions
119 for manipulating hashes that don't really warrant a keyword.
121 C<Hash::Util> contains a set of functions that support
122 L<restricted hashes|/"Restricted hashes">. These are described in
123 this document. C<Hash::Util::FieldHash> contains an (unrelated)
124 set of functions that support the use of hashes in
125 I<inside-out classes>, described in L<Hash::Util::FieldHash>.
127 By default C<Hash::Util> does not export anything.
129 =head2 Restricted hashes
131 5.8.0 introduces the ability to restrict a hash to a certain set of
132 keys. No keys outside of this set can be added. It also introduces
133 the ability to lock an individual key so it cannot be deleted and the
134 ability to ensure that an individual value cannot be changed.
136 This is intended to largely replace the deprecated pseudo-hashes.
145 lock_keys(%hash, @keys);
147 Restricts the given %hash's set of keys to @keys. If @keys is not
148 given it restricts it to its current keyset. No more keys can be
149 added. delete() and exists() will still work, but will not alter
150 the set of allowed keys. B<Note>: the current implementation prevents
151 the hash from being bless()ed while it is in a locked state. Any attempt
152 to do so will raise an exception. Of course you can still bless()
153 the hash before you call lock_keys() so this shouldn't be a problem.
157 Removes the restriction on the %hash's keyset.
159 B<Note> that if any of the values of the hash have been locked they will not
160 be unlocked after this sub executes.
162 Both routines return a reference to the hash operated on.
167 my($hash, @keys) = @_;
169 Internals::hv_clear_placeholders %$hash;
171 my %keys = map { ($_ => 1) } @keys;
172 my %original_keys = map { ($_ => 1) } keys %$hash;
173 foreach my $k (keys %original_keys) {
174 croak "Hash has key '$k' which is not in the new key set"
178 foreach my $k (@keys) {
179 $hash->{$k} = undef unless exists $hash->{$k};
181 Internals::SvREADONLY %$hash, 1;
183 foreach my $k (@keys) {
184 delete $hash->{$k} unless $original_keys{$k};
188 Internals::SvREADONLY %$hash, 1;
194 sub unlock_ref_keys {
197 Internals::SvREADONLY %$hash, 0;
201 sub lock_keys (\%;@) { lock_ref_keys(@_) }
202 sub unlock_keys (\%) { unlock_ref_keys(@_) }
204 =item B<lock_keys_plus>
206 lock_keys_plus(%hash,@additional_keys)
208 Similar to C<lock_keys()>, with the difference being that the optional key list
209 specifies keys that may or may not be already in the hash. Essentially this is
212 lock_keys(%hash,@additional_keys,keys %hash);
214 Returns a reference to %hash
219 sub lock_ref_keys_plus {
220 my ($hash,@keys) = @_;
222 Internals::hv_clear_placeholders(%$hash);
223 foreach my $key (@keys) {
224 unless (exists($hash->{$key})) {
229 Internals::SvREADONLY(%$hash,1);
230 delete @{$hash}{@delete};
234 sub lock_keys_plus(\%;@) { lock_ref_keys_plus(@_) }
239 =item B<unlock_value>
241 lock_value (%hash, $key);
242 unlock_value(%hash, $key);
244 Locks and unlocks the value for an individual key of a hash. The value of a
245 locked key cannot be changed.
247 Unless %hash has already been locked the key/value could be deleted
248 regardless of this setting.
250 Returns a reference to the %hash.
255 my($hash, $key) = @_;
256 # I'm doubtful about this warning, as it seems not to be true.
257 # Marking a value in the hash as RO is useful, regardless
258 # of the status of the hash itself.
259 carp "Cannot usefully lock values in an unlocked hash"
260 if !Internals::SvREADONLY(%$hash) && warnings::enabled;
261 Internals::SvREADONLY $hash->{$key}, 1;
265 sub unlock_ref_value {
266 my($hash, $key) = @_;
267 Internals::SvREADONLY $hash->{$key}, 0;
271 sub lock_value (\%$) { lock_ref_value(@_) }
272 sub unlock_value (\%$) { unlock_ref_value(@_) }
281 lock_hash() locks an entire hash, making all keys and values read-only.
282 No value can be changed, no keys can be added or deleted.
286 unlock_hash() does the opposite of lock_hash(). All keys and values
287 are made writable. All values can be changed and keys can be added
290 Returns a reference to the %hash.
297 lock_ref_keys($hash);
299 foreach my $value (values %$hash) {
300 Internals::SvREADONLY($value,1);
309 foreach my $value (values %$hash) {
310 Internals::SvREADONLY($value, 0);
313 unlock_ref_keys($hash);
318 sub lock_hash (\%) { lock_hashref(@_) }
319 sub unlock_hash (\%) { unlock_hashref(@_) }
321 =item B<lock_hash_recurse>
323 =item B<unlock_hash_recurse>
325 lock_hash_recurse(%hash);
327 lock_hash() locks an entire hash and any hashes it references recursively,
328 making all keys and values read-only. No value can be changed, no keys can
331 This method B<only> recurses into hashes that are referenced by another hash.
332 Thus a Hash of Hashes (HoH) will all be restricted, but a Hash of Arrays of
333 Hashes (HoAoH) will only have the top hash restricted.
335 unlock_hash_recurse(%hash);
337 unlock_hash_recurse() does the opposite of lock_hash_recurse(). All keys and
338 values are made writable. All values can be changed and keys can be added
339 and deleted. Identical recursion restrictions apply as to lock_hash_recurse().
341 Returns a reference to the %hash.
345 sub lock_hashref_recurse {
348 lock_ref_keys($hash);
349 foreach my $value (values %$hash) {
350 my $type = reftype($value);
351 if (defined($type) and $type eq 'HASH') {
352 lock_hashref_recurse($value);
354 Internals::SvREADONLY($value,1);
359 sub unlock_hashref_recurse {
362 foreach my $value (values %$hash) {
363 my $type = reftype($value);
364 if (defined($type) and $type eq 'HASH') {
365 unlock_hashref_recurse($value);
367 Internals::SvREADONLY($value,1);
369 unlock_ref_keys($hash);
373 sub lock_hash_recurse (\%) { lock_hashref_recurse(@_) }
374 sub unlock_hash_recurse (\%) { unlock_hashref_recurse(@_) }
376 =item B<hashref_locked>
380 hashref_locked(\%hash) and print "Hash is locked!\n";
381 hash_locked(%hash) and print "Hash is locked!\n";
383 Returns true if the hash and its keys are locked.
389 Internals::SvREADONLY(%$hash);
392 sub hash_locked(\%) { hashref_locked(@_) }
394 =item B<hashref_unlocked>
396 =item B<hash_unlocked>
398 hashref_unlocked(\%hash) and print "Hash is unlocked!\n";
399 hash_unlocked(%hash) and print "Hash is unlocked!\n";
401 Returns true if the hash and its keys are unlocked.
405 sub hashref_unlocked {
407 !Internals::SvREADONLY(%$hash);
410 sub hash_unlocked(\%) { hashref_unlocked(@_) }
412 =for demerphqs_editor
414 sub hidden_ref_keys{}
419 sub legal_keys(\%) { legal_ref_keys(@_) }
420 sub hidden_keys(\%){ hidden_ref_keys(@_) }
424 my @keys = legal_keys(%hash);
426 Returns the list of the keys that are legal in a restricted hash.
427 In the case of an unrestricted hash this is identical to calling
432 my @keys = hidden_keys(%hash);
434 Returns the list of the keys that are legal in a restricted hash but
435 do not have a value associated to them. Thus if 'foo' is a
436 "hidden" key of the %hash it will return false for both C<defined>
439 In the case of an unrestricted hash this will return an empty list.
441 B<NOTE> this is an experimental feature that is heavily dependent
442 on the current implementation of restricted hashes. Should the
443 implementation change, this routine may become meaningless, in which
444 case it will return an empty list.
448 all_keys(%hash,@keys,@hidden);
450 Populates the arrays @keys with the all the keys that would pass
451 an C<exists> tests, and populates @hidden with the remaining legal
452 keys that have not been utilized.
454 Returns a reference to the hash.
456 In the case of an unrestricted hash this will be equivalent to
464 B<NOTE> this is an experimental feature that is heavily dependent
465 on the current implementation of restricted hashes. Should the
466 implementation change this routine may become meaningless in which
467 case it will behave identically to how it would behave on an
472 my $hash_seed = hash_seed();
474 hash_seed() returns the seed bytes used to randomise hash ordering.
476 B<Note that the hash seed is sensitive information>: by knowing it one
477 can craft a denial-of-service attack against Perl code, even remotely,
478 see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
479 B<Do not disclose the hash seed> to people who don't need to know it.
480 See also L<perlrun/PERL_HASH_SEED_DEBUG>.
482 Prior to Perl 5.17.6 this function returned a UV, it now returns a string,
483 which may be of nearly any size as determined by the hash function your
484 Perl has been built with. Possible sizes may be but are not limited to
485 4 bytes (for most hash algorithms) and 16 bytes (for siphash).
489 my $hash_value = hash_value($string);
491 hash_value() returns the current perl's internal hash value for a given
494 Returns a 32 bit integer representing the hash value of the string passed
495 in. This value is only reliable for the lifetime of the process. It may
496 be different depending on invocation, environment variables, perl version,
497 architectures, and build options.
499 B<Note that the hash value of a given string is sensitive information>:
500 by knowing it one can deduce the hash seed which in turn can allow one to
501 craft a denial-of-service attack against Perl code, even remotely,
502 see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
503 B<Do not disclose the hash value of a string> to people who don't need to
504 know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>.
508 Return a set of basic information about a hash.
510 my ($keys, $buckets, $used, @length_counts)= bucket_info($hash);
512 Fields are as follows:
514 0: Number of keys in the hash
515 1: Number of buckets in the hash
516 2: Number of used buckets in the hash
517 rest : list of counts, Kth element is the number of buckets
520 See also bucket_stats() and bucket_array().
522 =item B<bucket_stats>
524 Returns a list of statistics about a hash.
526 my ($keys, $buckets, $used, $quality, $utilization_ratio,
527 $collision_pct, $mean, $stddev, @length_counts)
528 = bucket_stats($hashref);
530 Fields are as follows:
532 0: Number of keys in the hash
533 1: Number of buckets in the hash
534 2: Number of used buckets in the hash
535 3: Hash Quality Score
536 4: Percent of buckets used
537 5: Percent of keys which are in collision
538 6: Mean bucket length of occupied buckets
539 7: Standard Deviation of bucket lengths of occupied buckets
540 rest : list of counts, Kth element is the number of buckets
543 See also bucket_info() and bucket_array().
545 Note that Hash Quality Score would be 1 for an ideal hash, numbers
546 close to and below 1 indicate good hashing, and number significantly
547 above indicate a poor score. In practice it should be around 0.95 to 1.05.
550 $score= sum( $count[$length] * ($length * ($length + 1) / 2) )
552 ( ( $keys / 2 * $buckets ) *
553 ( $keys + ( 2 * $buckets ) - 1 ) )
555 The formula is from the Red Dragon book (reformulated to use the data available)
556 and is documented at L<http://www.strchr.com/hash_functions>
558 =item B<bucket_array>
560 my $array= bucket_array(\%hash);
562 Returns a packed representation of the bucket array associated with a hash. Each element
563 of the array is either an integer K, in which case it represents K empty buckets, or
564 a reference to another array which contains the keys that are in that bucket.
566 B<Note that the information returned by bucket_array is sensitive information>:
567 by knowing it one can directly attack perl's hash function which in turn may allow
568 one to craft a denial-of-service attack against Perl code, even remotely,
569 see L<perlsec/"Algorithmic Complexity Attacks"> for more information.
570 B<Do not disclose the output of this function> to people who don't need to
571 know it. See also L<perlrun/PERL_HASH_SEED_DEBUG>. This function is provided strictly
572 for debugging and diagnostics purposes only, it is hard to imagine a reason why it
573 would be used in production code.
580 my ($keys, $buckets, $used, @length_counts) = bucket_info($hash);
583 for (1 .. $#length_counts) {
584 $sum += ($length_counts[$_] * $_);
585 $score += $length_counts[$_] * ( $_ * ($_ + 1 ) / 2 );
588 (( $keys / (2 * $buckets )) * ( $keys + ( 2 * $buckets ) - 1 ))
590 my ($mean, $stddev)= (0, 0);
594 $sum += ($length_counts[$_] * (($_-$mean)**2)) for 1 .. $#length_counts;
596 $stddev= sqrt($sum/$used);
598 return $keys, $buckets, $used, $keys ? ($score, $used/$buckets, ($keys-$used)/$keys, $mean, $stddev, @length_counts) : ();
601 =item B<bucket_stats_formatted>
603 print bucket_stats_formatted($hashref);
605 Return a formatted report of the information returned by bucket_stats().
606 An example report looks like this:
608 Keys: 50 Buckets: 33/64 Quality-Score: 1.01 (Good)
609 Utilized Buckets: 51.56% Optimal: 78.12% Keys In Collision: 34.00%
610 Chain Length - mean: 1.52 stddev: 0.66
611 Buckets 64 [0000000000000000000000000000000111111111111111111122222222222333]
612 Len 0 Pct: 48.44 [###############################]
613 Len 1 Pct: 29.69 [###################]
614 Len 2 Pct: 17.19 [###########]
615 Len 3 Pct: 4.69 [###]
616 Keys 50 [11111111111111111111111111111111122222222222222333]
617 Pos 1 Pct: 66.00 [#################################]
618 Pos 2 Pct: 28.00 [##############]
619 Pos 3 Pct: 6.00 [###]
621 The first set of stats gives some summary statistical information,
622 including the quality score translated into "Good", "Poor" and "Bad",
623 (score<=1.05, score<=1.2, score>1.2). See the documentation in
624 bucket_stats() for more details.
626 The two sets of barcharts give stats and a visual indication of performance
629 The first gives data on bucket chain lengths and provides insight on how
630 much work a fetch *miss* will take. In this case we have to inspect every item
631 in a bucket before we can be sure the item is not in the list. The performance
632 for an insert is equivalent to this case, as is a delete where the item
635 The second gives data on how many keys are at each depth in the chain, and
636 gives an idea of how much work a fetch *hit* will take. The performance for
637 an update or delete of an item in the hash is equivalent to this case.
639 Note that these statistics are summary only. Actual performance will depend
640 on real hit/miss ratios accessing the hash. If you are concerned by hit ratios
641 you are recommended to "oversize" your hash by using something like:
643 keys(%hash)= keys(%hash) << $k;
645 With $k chosen carefully, and likely to be a small number like 1 or 2. In
646 theory the larger the bucket array the less chance of collision.
651 sub _bucket_stats_formatted_bars {
652 my ($total, $ary, $start_idx, $title, $row_title)= @_;
655 my $max_width= $total > 64 ? 64 : $total;
656 my $bar_width= $max_width / $total;
660 for my $idx ($start_idx .. $#$ary) {
661 $str .= $idx x sprintf("%.0f", ($ary->[$idx] * $bar_width));
664 $str= "-" x $max_width;
666 $return .= sprintf "%-7s %6d [%s]\n",$title, $total, $str;
668 foreach my $idx ($start_idx .. $#$ary) {
669 $return .= sprintf "%-.3s %3d %6.2f%% %6d [%s]\n",
672 $ary->[$idx] / $total * 100,
674 "#" x sprintf("%.0f", ($ary->[$idx] * $bar_width)),
680 sub bucket_stats_formatted {
682 my ($keys, $buckets, $used, $score, $utilization_ratio, $collision_pct,
683 $mean, $stddev, @length_counts) = bucket_stats($hashref);
685 my $return= sprintf "Keys: %d Buckets: %d/%d Quality-Score: %.2f (%s)\n"
686 . "Utilized Buckets: %.2f%% Optimal: %.2f%% Keys In Collision: %.2f%%\n"
687 . "Chain Length - mean: %.2f stddev: %.2f\n",
688 $keys, $used, $buckets, $score, $score <= 1.05 ? "Good" : $score < 1.2 ? "Poor" : "Bad",
689 $utilization_ratio * 100,
690 $keys/$buckets * 100,
691 $collision_pct * 100,
695 $key_depth[$_]= $length_counts[$_] + ( $key_depth[$_+1] || 0 )
696 for reverse 1 .. $#length_counts;
699 $return .= _bucket_stats_formatted_bars($buckets, \@length_counts, 0, "Buckets", "Len");
700 $return .= _bucket_stats_formatted_bars($keys, \@key_depth, 1, "Keys", "Pos");
708 hv_store(%hash,$key,$sv) or die "Failed to alias!";
710 print $sv; # prints 1
712 Stores an alias to a variable in a hash instead of copying the value.
714 =item B<hash_traversal_mask>
716 As of Perl 5.18 every hash has its own hash traversal order, and this order
717 changes every time a new element is inserted into the hash. This functionality
718 is provided by maintaining an unsigned integer mask (U32) which is xor'ed
719 with the actual bucket id during a traversal of the hash buckets using keys(),
722 You can use this subroutine to get and set the traversal mask for a specific
723 hash. Setting the mask ensures that a given hash will produce the same key
724 order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
725 the same key order for the same hash seed and traversal mask, items that
726 collide into one bucket may have different orders regardless of this setting.
730 =head2 Operating on references to hashes.
732 Most subroutines documented in this module have equivalent versions
733 that operate on references to hashes instead of native hashes.
734 The following is a list of these subs. They are identical except
735 in name and in that instead of taking a %hash they take a $hashref,
736 and additionally are not prototyped.
742 =item unlock_ref_keys
744 =item lock_ref_keys_plus
748 =item unlock_ref_value
754 =item lock_hashref_recurse
756 =item unlock_hashref_recurse
758 =item hash_ref_unlocked
762 =item hidden_ref_keys
768 Note that the trapping of the restricted operations is not atomic:
771 eval { %hash = (illegal_key => 1) }
773 leaves the C<%hash> empty rather than with its original contents.
777 The interface exposed by this module is very close to the current
778 implementation of restricted hashes. Over time it is expected that
779 this behavior will be extended and the interface abstracted further.
783 Michael G Schwern <schwern@pobox.com> on top of code by Nick
784 Ing-Simmons and Jeffrey Friedl.
786 hv_store() is from Array::RefElem, Copyright 2000 Gisle Aas.
788 Additional code by Yves Orton.
792 L<Scalar::Util>, L<List::Util> and L<perlsec/"Algorithmic Complexity Attacks">.
794 L<Hash::Util::FieldHash>.