=head1 INHERITANCE
Benchmark inherits from no other class, except of course
-for Exporter.
+from Exporter.
=head1 CAVEATS
clearcache clearallcache disablecache enablecache);
%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
-$VERSION = 1.17;
+$VERSION = 1.23;
# --- ':hireswallclock' special handling
sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
sub iters { $_[0]->[5] ; }
+# return the sum of various times: which ones depending on $style
+
+sub elapsed {
+ my ($self, $style) = @_;
+ $style = "" unless defined $style;
+
+ return $self->cpu_c if $style eq 'nop';
+ return $self->cpu_p if $style eq 'noc';
+ return $self->cpu_a;
+}
+
$_Usage{timediff} = <<'USAGE';
usage: $result_diff = timediff($result1, $result2);
$r,$pu,$ps,$pt) if $style eq 'noc';
$s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
$r,$cu,$cs,$ct) if $style eq 'nop';
- my $elapsed = do {
- if ($style eq 'nop') {$cu+$cs}
- elsif ($style eq 'noc') {$pu+$ps}
- else {$cu+$cs+$pu+$ps}
- };
+ my $elapsed = $tr->elapsed($style);
$s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
$s;
}
# getting a too low initial $n in the initial, 'find the minimum' loop
# in &countit. This, in turn, can reduce the number of calls to
# &runloop a lot, and thus reduce additive errors.
+ #
+ # Note that its possible for the act of reading the system clock to
+ # burn lots of system CPU while we burn very little user clock in the
+ # busy loop, which can cause the loop to run for a very long wall time.
+ # So gradually ramp up the duration of the loop. See RT #122003
+ #
my $tbase = Benchmark->new(0)->[1];
- while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
+ my $limit = 1;
+ while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {
+ for (my $i=0; $i < $limit; $i++) { my $x = $i / 1.5 } # burn user CPU
+ $limit *= 1.1;
+ }
$subref->();
$t1 = Benchmark->new($n);
$td = &timediff($t1, $t0);
# First find the minimum $n that gives a significant timing.
my $zeros=0;
for ($n = 1; ; $n *= 2 ) {
+ my $t0 = Benchmark->new(0);
my $td = timeit($n, $code);
+ my $t1 = Benchmark->new(0);
$tc = $td->[1] + $td->[2];
if ( $tc <= 0 and $n > 1024 ) {
- ++$zeros > 16
- and die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
+ my $d = timediff($t1, $t0);
+ # note that $d is the total CPU time taken to call timeit(),
+ # while $tc is the difference in CPU secs between the empty run
+ # and the code run. If the code is trivial, its possible
+ # for $d to get large while $tc is still zero (or slightly
+ # negative). Bail out once timeit() starts taking more than a
+ # few seconds without noticeable difference.
+ if ($d->[1] + $d->[2] > 8
+ || ++$zeros > 16)
+ {
+ die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
+ }
} else {
$zeros = 0;
}
$n = $forn if defined $forn;
+ if ($t->elapsed($style) < 0) {
+ # due to clock granularity and variable CPU speed and load,
+ # on quick code with a small number of loops, it's possible for
+ # the empty loop to appear to take longer than the real loop
+ # (e.g. 1 tick versus 0 ticks). This leads to a negative elapsed
+ # time. In this case, floor it at zero, to stop bizarre results.
+ print " (warning: too few iterations for a reliable count)\n";
+ $t->[$_] = 0 for 1..4;
+ }
+
# A conservative warning to spot very silly tests.
# Don't assume that your benchmark is ok simply because
# you don't get this warning!
my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
for (@vals) {
+ # recreate the pre-flattened Benchmark object
+ my $tmp_bm = bless [ @{$_}[1..$#$_] ];
+ my $elapsed = $tmp_bm->elapsed($style);
# The epsilon fudge here is to prevent div by 0. Since clock
# resolutions are much larger, it's below the noise floor.
- my $elapsed = do {
- if ($style eq 'nop') {$_->[4]+$_->[5]}
- elsif ($style eq 'noc') {$_->[2]+$_->[3]}
- else {$_->[2]+$_->[3]+$_->[4]+$_->[5]}
- };
my $rate = $_->[6]/(($elapsed)+0.000000000000001);
$_->[7] = $rate;
}