This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
2 more VMS test tweaks
[perl5.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e
LW
1package Benchmark;
2
f06db76b
AD
3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b
AD
7=head1 SYNOPSIS
8
9 timethis ($count, "code");
10
523cc92b 11 # Use Perl code in strings...
f06db76b
AD
12 timethese($count, {
13 'Name1' => '...code1...',
14 'Name2' => '...code2...',
15 });
16
523cc92b
CS
17 # ... or use subroutine references.
18 timethese($count, {
19 'Name1' => sub { ...code1... },
20 'Name2' => sub { ...code2... },
21 });
22
431d98c2
BS
23 # cmpthese can be used both ways as well
24 cmpthese($count, {
25 'Name1' => '...code1...',
26 'Name2' => '...code2...',
27 });
28
29 cmpthese($count, {
30 'Name1' => sub { ...code1... },
31 'Name2' => sub { ...code2... },
32 });
33
34 # ...or in two stages
35 $results = timethese($count,
36 {
37 'Name1' => sub { ...code1... },
38 'Name2' => sub { ...code2... },
39 },
40 'none'
41 );
42 cmpthese( $results ) ;
43
f06db76b
AD
44 $t = timeit($count, '...other code...')
45 print "$count loops of other code took:",timestr($t),"\n";
46
431d98c2
BS
47 $t = countit($time, '...other code...')
48 $count = $t->iters ;
49 print "$count loops of other code took:",timestr($t),"\n";
50
f06db76b
AD
51=head1 DESCRIPTION
52
53The Benchmark module encapsulates a number of routines to help you
54figure out how long it takes to execute some code.
55
8a4f6ac2
GS
56timethis - run a chunk of code several times
57
58timethese - run several chunks of code several times
59
60cmpthese - print results of timethese as a comparison chart
61
62timeit - run a chunk of code and see how long it goes
63
64countit - see how many times a chunk of code runs in a given time
65
66
f06db76b
AD
67=head2 Methods
68
69=over 10
70
71=item new
72
73Returns the current time. Example:
74
75 use Benchmark;
76 $t0 = new Benchmark;
77 # ... your code here ...
78 $t1 = new Benchmark;
79 $td = timediff($t1, $t0);
a24a9dfe 80 print "the code took:",timestr($td),"\n";
f06db76b
AD
81
82=item debug
83
84Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
85
523cc92b 86 debug Benchmark 1;
f06db76b 87 $t = timeit(10, ' 5 ** $Global ');
523cc92b 88 debug Benchmark 0;
f06db76b 89
431d98c2
BS
90=item iters
91
92Returns the number of iterations.
93
f06db76b
AD
94=back
95
96=head2 Standard Exports
97
523cc92b 98The following routines will be exported into your namespace
f06db76b
AD
99if you use the Benchmark module:
100
101=over 10
102
103=item timeit(COUNT, CODE)
104
523cc92b
CS
105Arguments: COUNT is the number of times to run the loop, and CODE is
106the code to run. CODE may be either a code reference or a string to
107be eval'd; either way it will be run in the caller's package.
108
109Returns: a Benchmark object.
110
111=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
112
113Time COUNT iterations of CODE. CODE may be a string to eval or a
114code reference; either way the CODE will run in the caller's package.
115Results will be printed to STDOUT as TITLE followed by the times.
116TITLE defaults to "timethis COUNT" if none is provided. STYLE
117determines the format of the output, as described for timestr() below.
118
6ee623d5
GS
119The COUNT can be zero or negative: this means the I<minimum number of
120CPU seconds> to run. A zero signifies the default of 3 seconds. For
121example to run at least for 10 seconds:
122
123 timethis(-10, $code)
124
125or to run two pieces of code tests for at least 3 seconds:
126
127 timethese(0, { test1 => '...', test2 => '...'})
128
129CPU seconds is, in UNIX terms, the user time plus the system time of
130the process itself, as opposed to the real (wallclock) time and the
131time spent by the child processes. Less than 0.1 seconds is not
132accepted (-0.01 as the count, for example, will cause a fatal runtime
133exception).
134
135Note that the CPU seconds is the B<minimum> time: CPU scheduling and
136other operating system factors may complicate the attempt so that a
137little bit more time is spent. The benchmark output will, however,
138also tell the number of C<$code> runs/second, which should be a more
139interesting number than the actually spent seconds.
140
141Returns a Benchmark object.
142
523cc92b 143=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 144
523cc92b
CS
145The CODEHASHREF is a reference to a hash containing names as keys
146and either a string to eval or a code reference for each value.
147For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
148call
f06db76b 149
523cc92b 150 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 151
1d2dff63
GS
152The routines are called in string comparison order of KEY.
153
154The COUNT can be zero or negative, see timethis().
6ee623d5 155
3c6312e9
BS
156Returns a hash of Benchmark objects, keyed by name.
157
523cc92b 158=item timediff ( T1, T2 )
f06db76b 159
523cc92b
CS
160Returns the difference between two Benchmark times as a Benchmark
161object suitable for passing to timestr().
f06db76b 162
6ee623d5 163=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 164
523cc92b
CS
165Returns a string that formats the times in the TIMEDIFF object in
166the requested STYLE. TIMEDIFF is expected to be a Benchmark object
167similar to that returned by timediff().
168
3c6312e9
BS
169STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
170each of the 5 times available ('wallclock' time, user time, system time,
523cc92b
CS
171user time of children, and system time of children). 'noc' shows all
172except the two children times. 'nop' shows only wallclock and the
173two children times. 'auto' (the default) will act as 'all' unless
174the children times are both zero, in which case it acts as 'noc'.
3c6312e9 175'none' prevents output.
523cc92b
CS
176
177FORMAT is the L<printf(3)>-style format specifier (without the
178leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b
AD
179
180=back
181
182=head2 Optional Exports
183
184The following routines will be exported into your namespace
185if you specifically ask that they be imported:
186
187=over 10
188
523cc92b
CS
189=item clearcache ( COUNT )
190
191Clear the cached time for COUNT rounds of the null loop.
192
193=item clearallcache ( )
f06db76b 194
523cc92b 195Clear all cached times.
f06db76b 196
ac8eabc1
JH
197=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
198
199=item cmpthese ( RESULTSHASHREF )
200
201Optionally calls timethese(), then outputs comparison chart. This
277427cf 202chart is sorted from slowest to fastest, and shows the percent
ac8eabc1
JH
203speed difference between each pair of tests. Can also be passed
204the data structure that timethese() returns:
205
206 $results = timethese( .... );
207 cmpthese( $results );
208
277427cf 209Returns the data structure returned by timethese() (or passed in).
ac8eabc1
JH
210
211=item countit(TIME, CODE)
212
213Arguments: TIME is the minimum length of time to run CODE for, and CODE is
214the code to run. CODE may be either a code reference or a string to
215be eval'd; either way it will be run in the caller's package.
216
217TIME is I<not> negative. countit() will run the loop many times to
218calculate the speed of CODE before running it for TIME. The actual
219time run for will usually be greater than TIME due to system clock
220resolution, so it's best to look at the number of iterations divided
221by the times that you are concerned with, not just the iterations.
222
223Returns: a Benchmark object.
224
523cc92b 225=item disablecache ( )
f06db76b 226
523cc92b
CS
227Disable caching of timings for the null loop. This will force Benchmark
228to recalculate these timings for each new piece of code timed.
229
230=item enablecache ( )
231
232Enable caching of timings for the null loop. The time taken for COUNT
233rounds of the null loop will be calculated only once for each
234different COUNT used.
f06db76b 235
ac8eabc1
JH
236=item timesum ( T1, T2 )
237
238Returns the sum of two Benchmark times as a Benchmark object suitable
239for passing to timestr().
240
f06db76b
AD
241=back
242
243=head1 NOTES
244
245The data is stored as a list of values from the time and times
523cc92b 246functions:
f06db76b 247
431d98c2 248 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b
AD
249
250in seconds for the whole loop (not divided by the number of rounds).
251
252The timing is done using time(3) and times(3).
253
254Code is executed in the caller's package.
255
f06db76b
AD
256The time of the null loop (a loop with the same
257number of rounds but empty loop body) is subtracted
258from the time of the real loop.
259
3c6312e9 260The null loop times can be cached, the key being the
f06db76b
AD
261number of rounds. The caching can be controlled using
262calls like these:
263
523cc92b 264 clearcache($key);
f06db76b
AD
265 clearallcache();
266
523cc92b 267 disablecache();
f06db76b
AD
268 enablecache();
269
3c6312e9
BS
270Caching is off by default, as it can (usually slightly) decrease
271accuracy and does not usually noticably affect runtimes.
272
54e82ce5
GS
273=head1 EXAMPLES
274
275For example,
276
277 use Benchmark;$x=3;cmpthese(-5,{a=>sub{$x*$x},b=>sub{$x**2}})
278
279outputs something like this:
280
281 Benchmark: running a, b, each for at least 5 CPU seconds...
282 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
283 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
284 Rate b a
285 b 1574945/s -- -59%
286 a 3835056/s 144% --
287
288while
289
290 use Benchmark;
291 $x=3;
292 $r=timethese(-5,{a=>sub{$x*$x},b=>sub{$x**2}},'none');
293 cmpthese($r);
294
295outputs something like this:
296
297 Rate b a
298 b 1559428/s -- -62%
299 a 4152037/s 166% --
300
301
f06db76b
AD
302=head1 INHERITANCE
303
304Benchmark inherits from no other class, except of course
305for Exporter.
306
307=head1 CAVEATS
308
80eab818 309Comparing eval'd strings with code references will give you
431d98c2 310inaccurate results: a code reference will show a slightly slower
80eab818
CS
311execution time than the equivalent eval'd string.
312
f06db76b
AD
313The real time timing is done using time(2) and
314the granularity is therefore only one second.
315
316Short tests may produce negative figures because perl
523cc92b
CS
317can appear to take longer to execute the empty loop
318than a short test; try:
f06db76b
AD
319
320 timethis(100,'1');
321
322The system time of the null loop might be slightly
323more than the system time of the loop with the actual
a24a9dfe 324code and therefore the difference might end up being E<lt> 0.
f06db76b 325
8a4f6ac2
GS
326=head1 SEE ALSO
327
328L<Devel::DProf> - a Perl code profiler
329
f06db76b
AD
330=head1 AUTHORS
331
5aabfad6 332Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b
AD
333
334=head1 MODIFICATION HISTORY
335
336September 8th, 1994; by Tim Bunce.
337
523cc92b
CS
338March 28th, 1997; by Hugo van der Sanden: added support for code
339references and the already documented 'debug' method; revamped
340documentation.
f06db76b 341
6ee623d5
GS
342April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
343functionality.
344
3c6312e9
BS
345September, 1999; by Barrie Slaymaker: math fixes and accuracy and
346efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 347timethese(). Exposed countit() (was runfor()).
3c6312e9 348
523cc92b 349=cut
a0d0e21e 350
3f943bd9
GS
351# evaluate something in a clean lexical environment
352sub _doeval { eval shift }
353
354#
355# put any lexicals at file scope AFTER here
356#
357
4aa0a1f7 358use Carp;
a0d0e21e
LW
359use Exporter;
360@ISA=(Exporter);
ac8eabc1
JH
361@EXPORT=qw(timeit timethis timethese timediff timestr);
362@EXPORT_OK=qw(timesum cmpthese countit
363 clearcache clearallcache disablecache enablecache);
a0d0e21e 364
d6a466d7 365$VERSION = 1.01;
8a4f6ac2 366
a0d0e21e
LW
367&init;
368
369sub init {
370 $debug = 0;
371 $min_count = 4;
372 $min_cpu = 0.4;
373 $defaultfmt = '5.2f';
374 $defaultstyle = 'auto';
375 # The cache can cause a slight loss of sys time accuracy. If a
376 # user does many tests (>10) with *very* large counts (>10000)
377 # or works on a very slow machine the cache may be useful.
378 &disablecache;
379 &clearallcache;
380}
381
523cc92b
CS
382sub debug { $debug = ($_[1] != 0); }
383
bba8fca5
BS
384# The cache needs two branches: 's' for strings and 'c' for code. The
385# emtpy loop is different in these two cases.
386sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e
LW
387sub clearallcache { %cache = (); }
388sub enablecache { $cache = 1; }
389sub disablecache { $cache = 0; }
390
a0d0e21e
LW
391# --- Functions to process the 'time' data type
392
6ee623d5
GS
393sub new { my @t = (time, times, @_ == 2 ? $_[1] : 0);
394 print "new=@t\n" if $debug;
395 bless \@t; }
a0d0e21e
LW
396
397sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
398sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
399sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
400sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 401sub iters { $_[0]->[5] ; }
a0d0e21e 402
523cc92b 403sub timediff {
a0d0e21e 404 my($a, $b) = @_;
523cc92b 405 my @r;
3f943bd9 406 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e
LW
407 push(@r, $a->[$i] - $b->[$i]);
408 }
409 bless \@r;
410}
411
705cc255
TB
412sub timesum {
413 my($a, $b) = @_;
414 my @r;
415 for (my $i=0; $i < @$a; ++$i) {
416 push(@r, $a->[$i] + $b->[$i]);
417 }
418 bless \@r;
419}
420
523cc92b 421sub timestr {
a0d0e21e 422 my($tr, $style, $f) = @_;
523cc92b 423 my @t = @$tr;
6ee623d5
GS
424 warn "bad time value (@t)" unless @t==6;
425 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 426 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 427 $f = $defaultfmt unless defined $f;
a0d0e21e 428 # format a time in the required style, other formats may be added here
80eab818 429 $style ||= $defaultstyle;
523cc92b
CS
430 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
431 my $s = "@t $style"; # default for unknown style
7be077a2 432 $s=sprintf("%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 433 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
7be077a2
GS
434 $s=sprintf("%2d wallclock secs (%$f usr + %$f sys = %$f CPU)",
435 $r,$pu,$ps,$pt) if $style eq 'noc';
436 $s=sprintf("%2d wallclock secs (%$f cusr + %$f csys = %$f CPU)",
437 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 438 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e
LW
439 $s;
440}
523cc92b
CS
441
442sub timedebug {
a0d0e21e 443 my($msg, $t) = @_;
523cc92b 444 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e
LW
445}
446
a0d0e21e
LW
447# --- Functions implementing low-level support for timing loops
448
449sub runloop {
450 my($n, $c) = @_;
4aa0a1f7
AD
451
452 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b
CS
453 croak "negative loopcount $n" if $n<0;
454 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e
LW
455 my($t0, $t1, $td); # before, after, difference
456
457 # find package of caller so we can execute code there
523cc92b
CS
458 my($curpack) = caller(0);
459 my($i, $pack)= 0;
a0d0e21e
LW
460 while (($pack) = caller(++$i)) {
461 last if $pack ne $curpack;
462 }
463
3f943bd9
GS
464 my ($subcode, $subref);
465 if (ref $c eq 'CODE') {
466 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
467 $subref = eval $subcode;
468 }
469 else {
470 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
471 $subref = _doeval($subcode);
472 }
4aa0a1f7 473 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 474 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 475
3c6312e9
BS
476 # Wait for the user timer to tick. This makes the error range more like
477 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
478 # may not seem important, but it significantly reduces the chances of
479 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 480 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5
BS
481 # &runloop a lot, and thus reduce additive errors.
482 my $tbase = Benchmark->new(0)->[1];
277427cf 483 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 484 &$subref;
6ee623d5 485 $t1 = Benchmark->new($n);
a0d0e21e 486 $td = &timediff($t1, $t0);
a0d0e21e
LW
487 timedebug("runloop:",$td);
488 $td;
489}
490
491
492sub timeit {
493 my($n, $code) = @_;
494 my($wn, $wc, $wd);
495
496 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 497 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5
BS
498 if ($cache && exists $cache{$cache_key} ) {
499 $wn = $cache{$cache_key};
523cc92b 500 } else {
bba8fca5 501 $wn = &runloop($n, ref( $code ) ? sub { undef } : '' );
3c6312e9
BS
502 # Can't let our baseline have any iterations, or they get subtracted
503 # out of the result.
504 $wn->[5] = 0;
bba8fca5 505 $cache{$cache_key} = $wn;
a0d0e21e
LW
506 }
507
508 $wc = &runloop($n, $code);
509
510 $wd = timediff($wc, $wn);
a0d0e21e
LW
511 timedebug("timeit: ",$wc);
512 timedebug(" - ",$wn);
513 timedebug(" = ",$wd);
514
515 $wd;
516}
517
6ee623d5
GS
518
519my $default_for = 3;
520my $min_for = 0.1;
521
3c6312e9 522
431d98c2
BS
523sub countit {
524 my ( $tmax, $code ) = @_;
6ee623d5
GS
525
526 if ( not defined $tmax or $tmax == 0 ) {
527 $tmax = $default_for;
528 } elsif ( $tmax < 0 ) {
529 $tmax = -$tmax;
530 }
531
431d98c2 532 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5
GS
533 if $tmax < $min_for;
534
3c6312e9 535 my ($n, $tc);
6ee623d5 536
bba8fca5 537 # First find the minimum $n that gives a significant timing.
3c6312e9
BS
538 for ($n = 1; ; $n *= 2 ) {
539 my $td = timeit($n, $code);
540 $tc = $td->[1] + $td->[2];
541 last if $tc > 0.1;
542 }
6ee623d5 543
3c6312e9
BS
544 my $nmin = $n;
545
546 # Get $n high enough that we can guess the final $n with some accuracy.
547 my $tpra = 0.1 * $tmax; # Target/time practice.
548 while ( $tc < $tpra ) {
549 # The 5% fudge is to keep us from iterating again all
550 # that often (this speeds overall responsiveness when $tmax is big
551 # and we guess a little low). This does not noticably affect
552 # accuracy since we're not couting these times.
553 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
554 my $td = timeit($n, $code);
c5d57293
A
555 my $new_tc = $td->[1] + $td->[2];
556 # Make sure we are making progress.
557 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5
GS
558 }
559
3c6312e9
BS
560 # Now, do the 'for real' timing(s), repeating until we exceed
561 # the max.
562 my $ntot = 0;
563 my $rtot = 0;
564 my $utot = 0.0;
565 my $stot = 0.0;
566 my $cutot = 0.0;
567 my $cstot = 0.0;
568 my $ttot = 0.0;
569
570 # The 5% fudge is because $n is often a few % low even for routines
571 # with stable times and avoiding extra timeit()s is nice for
572 # accuracy's sake.
573 $n = int( $n * ( 1.05 * $tmax / $tc ) );
574
575 while () {
576 my $td = timeit($n, $code);
577 $ntot += $n;
578 $rtot += $td->[0];
579 $utot += $td->[1];
580 $stot += $td->[2];
6ee623d5
GS
581 $cutot += $td->[3];
582 $cstot += $td->[4];
3c6312e9
BS
583 $ttot = $utot + $stot;
584 last if $ttot >= $tmax;
6ee623d5 585
c5d57293 586 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 587 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 588 $n = int( $r * $ntot );
6ee623d5 589 $n = $nmin if $n < $nmin;
6ee623d5
GS
590 }
591
592 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
593}
594
a0d0e21e
LW
595# --- Functions implementing high-level time-then-print utilities
596
6ee623d5
GS
597sub n_to_for {
598 my $n = shift;
599 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
600}
601
a0d0e21e
LW
602sub timethis{
603 my($n, $code, $title, $style) = @_;
6ee623d5
GS
604 my($t, $for, $forn);
605
606 if ( $n > 0 ) {
607 croak "non-integer loopcount $n, stopped" if int($n)<$n;
608 $t = timeit($n, $code);
609 $title = "timethis $n" unless defined $title;
610 } else {
611 $fort = n_to_for( $n );
431d98c2 612 $t = countit( $fort, $code );
6ee623d5
GS
613 $title = "timethis for $fort" unless defined $title;
614 $forn = $t->[-1];
615 }
523cc92b 616 local $| = 1;
523cc92b 617 $style = "" unless defined $style;
3c6312e9
BS
618 printf("%10s: ", $title) unless $style eq 'none';
619 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5
GS
620
621 $n = $forn if defined $forn;
523cc92b 622
a0d0e21e
LW
623 # A conservative warning to spot very silly tests.
624 # Don't assume that your benchmark is ok simply because
625 # you don't get this warning!
626 print " (warning: too few iterations for a reliable count)\n"
523cc92b 627 if $n < $min_count
a0d0e21e 628 || ($t->real < 1 && $n < 1000)
523cc92b 629 || $t->cpu_a < $min_cpu;
a0d0e21e
LW
630 $t;
631}
632
a0d0e21e
LW
633sub timethese{
634 my($n, $alt, $style) = @_;
635 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
636 unless ref $alt eq HASH;
523cc92b
CS
637 my @names = sort keys %$alt;
638 $style = "" unless defined $style;
3c6312e9 639 print "Benchmark: " unless $style eq 'none';
6ee623d5
GS
640 if ( $n > 0 ) {
641 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 642 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 643 } else {
3c6312e9 644 print "running" unless $style eq 'none';
6ee623d5 645 }
3c6312e9 646 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5
GS
647 unless ( $n > 0 ) {
648 my $for = n_to_for( $n );
3c6312e9 649 print ", each for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 650 }
3c6312e9 651 print "...\n" unless $style eq 'none';
523cc92b
CS
652
653 # we could save the results in an array and produce a summary here
a0d0e21e 654 # sum, min, max, avg etc etc
3c6312e9 655 my %results;
4dbb2df9 656 foreach my $name (@names) {
3c6312e9 657 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 658 }
3c6312e9
BS
659
660 return \%results;
a0d0e21e
LW
661}
662
3c6312e9
BS
663sub cmpthese{
664 my $results = ref $_[0] ? $_[0] : timethese( @_ );
665
666 return $results
667 if defined $_[2] && $_[2] eq 'none';
668
669 # Flatten in to an array of arrays with the name as the first field
670 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
671
672 for (@vals) {
673 # The epsilon fudge here is to prevent div by 0. Since clock
674 # resolutions are much larger, it's below the noise floor.
675 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
676 $_->[7] = $rate;
677 }
678
679 # Sort by rate
680 @vals = sort { $a->[7] <=> $b->[7] } @vals;
681
682 # If more than half of the rates are greater than one...
683 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
684
685 my @rows;
686 my @col_widths;
687
688 my @top_row = (
689 '',
690 $display_as_rate ? 'Rate' : 's/iter',
691 map { $_->[0] } @vals
692 );
693
694 push @rows, \@top_row;
695 @col_widths = map { length( $_ ) } @top_row;
696
697 # Build the data rows
698 # We leave the last column in even though it never has any data. Perhaps
699 # it should go away. Also, perhaps a style for a single column of
700 # percentages might be nice.
701 for my $row_val ( @vals ) {
702 my @row;
703
704 # Column 0 = test name
705 push @row, $row_val->[0];
706 $col_widths[0] = length( $row_val->[0] )
707 if length( $row_val->[0] ) > $col_widths[0];
708
709 # Column 1 = performance
710 my $row_rate = $row_val->[7];
711
712 # We assume that we'll never get a 0 rate.
713 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
714
715 # Only give a few decimal places before switching to sci. notation,
716 # since the results aren't usually that accurate anyway.
717 my $format =
718 $a >= 100 ?
719 "%0.0f" :
720 $a >= 10 ?
721 "%0.1f" :
722 $a >= 1 ?
723 "%0.2f" :
724 $a >= 0.1 ?
725 "%0.3f" :
726 "%0.2e";
727
728 $format .= "/s"
729 if $display_as_rate;
730 # Using $b here due to optimizing bug in _58 through _61
731 my $b = sprintf( $format, $a );
732 push @row, $b;
733 $col_widths[1] = length( $b )
734 if length( $b ) > $col_widths[1];
735
736 # Columns 2..N = performance ratios
737 my $skip_rest = 0;
738 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
739 my $col_val = $vals[$col_num];
740 my $out;
741 if ( $skip_rest ) {
742 $out = '';
743 }
744 elsif ( $col_val->[0] eq $row_val->[0] ) {
745 $out = "--";
746 # $skip_rest = 1;
747 }
748 else {
749 my $col_rate = $col_val->[7];
750 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
751 }
752 push @row, $out;
753 $col_widths[$col_num+2] = length( $out )
754 if length( $out ) > $col_widths[$col_num+2];
755
756 # A little wierdness to set the first column width properly
757 $col_widths[$col_num+2] = length( $col_val->[0] )
758 if length( $col_val->[0] ) > $col_widths[$col_num+2];
759 }
760 push @rows, \@row;
761 }
762
763 # Equalize column widths in the chart as much as possible without
764 # exceeding 80 characters. This does not use or affect cols 0 or 1.
765 my @sorted_width_refs =
766 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
767 my $max_width = ${$sorted_width_refs[-1]};
768
277427cf 769 my $total = @col_widths - 1 ;
3c6312e9
BS
770 for ( @col_widths ) { $total += $_ }
771
772 STRETCHER:
773 while ( $total < 80 ) {
774 my $min_width = ${$sorted_width_refs[0]};
775 last
776 if $min_width == $max_width;
777 for ( @sorted_width_refs ) {
778 last
779 if $$_ > $min_width;
780 ++$$_;
781 ++$total;
782 last STRETCHER
783 if $total >= 80;
784 }
785 }
786
787 # Dump the output
788 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
789 substr( $format, 1, 0 ) = '-';
790 for ( @rows ) {
791 printf $format, @$_;
792 }
793
794 return $results;
795}
796
797
a0d0e21e 7981;