| 1 | #!./perl -w |
| 2 | |
| 3 | BEGIN { |
| 4 | if ($ENV{PERL_CORE}) { |
| 5 | chdir 't' if -d 't'; |
| 6 | @INC = '../lib'; |
| 7 | } |
| 8 | } |
| 9 | |
| 10 | use Test::More; |
| 11 | |
| 12 | use strict; |
| 13 | use Hash::Util::FieldHash qw( :all); |
| 14 | |
| 15 | no warnings 'misc'; |
| 16 | |
| 17 | plan tests => 5; |
| 18 | |
| 19 | fieldhash my %h; |
| 20 | |
| 21 | ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); |
| 22 | |
| 23 | foreach (1..10) { |
| 24 | $h{"\0"x$_}++; |
| 25 | } |
| 26 | |
| 27 | ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); |
| 28 | |
| 29 | foreach (11..20) { |
| 30 | $h{"\0"x$_}++; |
| 31 | } |
| 32 | |
| 33 | ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); |
| 34 | |
| 35 | |
| 36 | |
| 37 | |
| 38 | # second part using an emulation of the PERL_HASH in perl, mounting an |
| 39 | # attack on a prepopulated hash. This is also useful if you need normal |
| 40 | # keys which don't contain \0 -- suitable for stashes |
| 41 | |
| 42 | use constant MASK_U32 => 2**32; |
| 43 | use constant HASH_SEED => 0; |
| 44 | use constant THRESHOLD => 14; |
| 45 | use constant START => "a"; |
| 46 | |
| 47 | # some initial hash data |
| 48 | fieldhash my %h2; |
| 49 | %h2 = map {$_ => 1} 'a'..'cc'; |
| 50 | |
| 51 | ok (!Internals::HvREHASH(%h2), |
| 52 | "starting with pre-populated non-pathalogical hash (rehash flag if off)"); |
| 53 | |
| 54 | my @keys = get_keys(\%h2); |
| 55 | $h2{$_}++ for @keys; |
| 56 | ok (Internals::HvREHASH(%h2), |
| 57 | scalar(@keys) . " colliding into the same bucket keys are triggerring rehash"); |
| 58 | |
| 59 | sub get_keys { |
| 60 | my $hr = shift; |
| 61 | |
| 62 | # the minimum of bits required to mount the attack on a hash |
| 63 | my $min_bits = log(THRESHOLD)/log(2); |
| 64 | |
| 65 | # if the hash has already been populated with a significant amount |
| 66 | # of entries the number of mask bits can be higher |
| 67 | my $keys = scalar keys %$hr; |
| 68 | my $bits = $keys ? log($keys)/log(2) : 0; |
| 69 | $bits = $min_bits if $min_bits > $bits; |
| 70 | |
| 71 | $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); |
| 72 | # need to add 2 bits to cover the internal split cases |
| 73 | $bits += 2; |
| 74 | my $mask = 2**$bits-1; |
| 75 | print "# using mask: $mask ($bits)\n"; |
| 76 | |
| 77 | my @keys; |
| 78 | my $s = START; |
| 79 | my $c = 0; |
| 80 | # get 2 keys on top of the THRESHOLD |
| 81 | my $hash; |
| 82 | while (@keys < THRESHOLD+2) { |
| 83 | # next if exists $hash->{$s}; |
| 84 | $hash = hash($s); |
| 85 | next unless ($hash & $mask) == 0; |
| 86 | $c++; |
| 87 | printf "# %2d: %5s, %10s\n", $c, $s, $hash; |
| 88 | push @keys, $s; |
| 89 | } continue { |
| 90 | $s++; |
| 91 | } |
| 92 | |
| 93 | return @keys; |
| 94 | } |
| 95 | |
| 96 | |
| 97 | # trying to provide the fastest equivalent of C macro's PERL_HASH in |
| 98 | # Perl - the main complication is that it uses U32 integer, which we |
| 99 | # can't do it perl, without doing some tricks |
| 100 | sub hash { |
| 101 | my $s = shift; |
| 102 | my @c = split //, $s; |
| 103 | my $u = HASH_SEED; |
| 104 | for (@c) { |
| 105 | # (A % M) + (B % M) == (A + B) % M |
| 106 | # This works because '+' produces a NV, which is big enough to hold |
| 107 | # the intermidiate result. We only need the % before any "^" and "&" |
| 108 | # to get the result in the range for an I32. |
| 109 | # and << doesn't work on NV, so using 1 << 10 |
| 110 | $u += ord; |
| 111 | $u += $u * (1 << 10); $u %= MASK_U32; |
| 112 | $u ^= $u >> 6; |
| 113 | } |
| 114 | $u += $u << 3; $u %= MASK_U32; |
| 115 | $u ^= $u >> 11; $u %= MASK_U32; |
| 116 | $u += $u << 15; $u %= MASK_U32; |
| 117 | $u; |
| 118 | } |