perl5.git.perl.org Git - perl5.git/blame_incremental - ext/Hash/Util/FieldHash/t/10

... / ...

Commit	Line	Data
	1	#!./perl -w
	2
	3	BEGIN {
	4	if ($ENV{PERL_CORE}) {
	5	chdir 't' if -d 't';
	6	@INC = '../lib';
	7	}
	8	}
	9
	10	use Test::More;
	11
	12	use strict;
	13	use Hash::Util::FieldHash qw( :all);
	14
	15	no warnings 'misc';
	16
	17	plan tests => 5;
	18
	19	fieldhash my %h;
	20
	21	ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
	22
	23	foreach (1..10) {
	24	$h{"\0"x$_}++;
	25	}
	26
	27	ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
	28
	29	foreach (11..20) {
	30	$h{"\0"x$_}++;
	31	}
	32
	33	ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
	34
	35
	36
	37
	38	# second part using an emulation of the PERL_HASH in perl, mounting an
	39	# attack on a prepopulated hash. This is also useful if you need normal
	40	# keys which don't contain \0 -- suitable for stashes
	41
	42	use constant MASK_U32 => 2**32;
	43	use constant HASH_SEED => 0;
	44	use constant THRESHOLD => 14;
	45	use constant START => "a";
	46
	47	# some initial hash data
	48	fieldhash my %h2;
	49	%h2 = map {$_ => 1} 'a'..'cc';
	50
	51	ok (!Internals::HvREHASH(%h2),
	52	"starting with pre-populated non-pathalogical hash (rehash flag if off)");
	53
	54	my @keys = get_keys(\%h2);
	55	$h2{$_}++ for @keys;
	56	ok (Internals::HvREHASH(%h2),
	57	scalar(@keys) . " colliding into the same bucket keys are triggerring rehash");
	58
	59	sub get_keys {
	60	my $hr = shift;
	61
	62	# the minimum of bits required to mount the attack on a hash
	63	my $min_bits = log(THRESHOLD)/log(2);
	64
	65	# if the hash has already been populated with a significant amount
	66	# of entries the number of mask bits can be higher
	67	my $keys = scalar keys %$hr;
	68	my $bits = $keys ? log($keys)/log(2) : 0;
	69	$bits = $min_bits if $min_bits > $bits;
	70
	71	$bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
	72	# need to add 2 bits to cover the internal split cases
	73	$bits += 2;
	74	my $mask = 2**$bits-1;
	75	print "# using mask: $mask ($bits)\n";
	76
	77	my @keys;
	78	my $s = START;
	79	my $c = 0;
	80	# get 2 keys on top of the THRESHOLD
	81	my $hash;
	82	while (@keys < THRESHOLD+2) {
	83	# next if exists $hash->{$s};
	84	$hash = hash($s);
	85	next unless ($hash & $mask) == 0;
	86	$c++;
	87	printf "# %2d: %5s, %10s\n", $c, $s, $hash;
	88	push @keys, $s;
	89	} continue {
	90	$s++;
	91	}
	92
	93	return @keys;
	94	}
	95
	96
	97	# trying to provide the fastest equivalent of C macro's PERL_HASH in
	98	# Perl - the main complication is that it uses U32 integer, which we
	99	# can't do it perl, without doing some tricks
	100	sub hash {
	101	my $s = shift;
	102	my @c = split //, $s;
	103	my $u = HASH_SEED;
	104	for (@c) {
	105	# (A % M) + (B % M) == (A + B) % M
	106	# This works because '+' produces a NV, which is big enough to hold
	107	# the intermidiate result. We only need the % before any "^" and "&"
	108	# to get the result in the range for an I32.
	109	# and << doesn't work on NV, so using 1 << 10
	110	$u += ord;
	111	$u += $u * (1 << 10); $u %= MASK_U32;
	112	$u ^= $u >> 6;
	113	}
	114	$u += $u << 3; $u %= MASK_U32;
	115	$u ^= $u >> 11; $u %= MASK_U32;
	116	$u += $u << 15; $u %= MASK_U32;
	117	$u;
	118	}