This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[PATCH] Attack of the C loops, round 1
[perl5.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e
LW
1package Benchmark;
2
f06db76b
AD
3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b
AD
7=head1 SYNOPSIS
8
9 timethis ($count, "code");
10
523cc92b 11 # Use Perl code in strings...
f06db76b
AD
12 timethese($count, {
13 'Name1' => '...code1...',
14 'Name2' => '...code2...',
15 });
16
523cc92b
CS
17 # ... or use subroutine references.
18 timethese($count, {
19 'Name1' => sub { ...code1... },
20 'Name2' => sub { ...code2... },
21 });
22
431d98c2
BS
23 # cmpthese can be used both ways as well
24 cmpthese($count, {
25 'Name1' => '...code1...',
26 'Name2' => '...code2...',
27 });
28
29 cmpthese($count, {
30 'Name1' => sub { ...code1... },
31 'Name2' => sub { ...code2... },
32 });
33
34 # ...or in two stages
35 $results = timethese($count,
36 {
37 'Name1' => sub { ...code1... },
38 'Name2' => sub { ...code2... },
39 },
40 'none'
41 );
42 cmpthese( $results ) ;
43
f06db76b
AD
44 $t = timeit($count, '...other code...')
45 print "$count loops of other code took:",timestr($t),"\n";
46
431d98c2
BS
47 $t = countit($time, '...other code...')
48 $count = $t->iters ;
49 print "$count loops of other code took:",timestr($t),"\n";
50
f06db76b
AD
51=head1 DESCRIPTION
52
53The Benchmark module encapsulates a number of routines to help you
54figure out how long it takes to execute some code.
55
8a4f6ac2
GS
56timethis - run a chunk of code several times
57
58timethese - run several chunks of code several times
59
60cmpthese - print results of timethese as a comparison chart
61
62timeit - run a chunk of code and see how long it goes
63
64countit - see how many times a chunk of code runs in a given time
65
66
f06db76b
AD
67=head2 Methods
68
69=over 10
70
71=item new
72
73Returns the current time. Example:
74
75 use Benchmark;
76 $t0 = new Benchmark;
77 # ... your code here ...
78 $t1 = new Benchmark;
79 $td = timediff($t1, $t0);
a24a9dfe 80 print "the code took:",timestr($td),"\n";
f06db76b
AD
81
82=item debug
83
84Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
85
523cc92b 86 debug Benchmark 1;
f06db76b 87 $t = timeit(10, ' 5 ** $Global ');
523cc92b 88 debug Benchmark 0;
f06db76b 89
431d98c2
BS
90=item iters
91
92Returns the number of iterations.
93
f06db76b
AD
94=back
95
96=head2 Standard Exports
97
523cc92b 98The following routines will be exported into your namespace
f06db76b
AD
99if you use the Benchmark module:
100
101=over 10
102
103=item timeit(COUNT, CODE)
104
523cc92b
CS
105Arguments: COUNT is the number of times to run the loop, and CODE is
106the code to run. CODE may be either a code reference or a string to
107be eval'd; either way it will be run in the caller's package.
108
109Returns: a Benchmark object.
110
111=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
112
113Time COUNT iterations of CODE. CODE may be a string to eval or a
114code reference; either way the CODE will run in the caller's package.
115Results will be printed to STDOUT as TITLE followed by the times.
116TITLE defaults to "timethis COUNT" if none is provided. STYLE
117determines the format of the output, as described for timestr() below.
118
6ee623d5
GS
119The COUNT can be zero or negative: this means the I<minimum number of
120CPU seconds> to run. A zero signifies the default of 3 seconds. For
121example to run at least for 10 seconds:
122
123 timethis(-10, $code)
124
125or to run two pieces of code tests for at least 3 seconds:
126
127 timethese(0, { test1 => '...', test2 => '...'})
128
129CPU seconds is, in UNIX terms, the user time plus the system time of
130the process itself, as opposed to the real (wallclock) time and the
131time spent by the child processes. Less than 0.1 seconds is not
132accepted (-0.01 as the count, for example, will cause a fatal runtime
133exception).
134
135Note that the CPU seconds is the B<minimum> time: CPU scheduling and
136other operating system factors may complicate the attempt so that a
137little bit more time is spent. The benchmark output will, however,
138also tell the number of C<$code> runs/second, which should be a more
139interesting number than the actually spent seconds.
140
141Returns a Benchmark object.
142
523cc92b 143=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 144
523cc92b
CS
145The CODEHASHREF is a reference to a hash containing names as keys
146and either a string to eval or a code reference for each value.
147For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
148call
f06db76b 149
523cc92b 150 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 151
1d2dff63
GS
152The routines are called in string comparison order of KEY.
153
154The COUNT can be zero or negative, see timethis().
6ee623d5 155
3c6312e9
BS
156Returns a hash of Benchmark objects, keyed by name.
157
523cc92b 158=item timediff ( T1, T2 )
f06db76b 159
523cc92b
CS
160Returns the difference between two Benchmark times as a Benchmark
161object suitable for passing to timestr().
f06db76b 162
6ee623d5 163=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 164
523cc92b
CS
165Returns a string that formats the times in the TIMEDIFF object in
166the requested STYLE. TIMEDIFF is expected to be a Benchmark object
167similar to that returned by timediff().
168
3c6312e9
BS
169STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
170each of the 5 times available ('wallclock' time, user time, system time,
523cc92b
CS
171user time of children, and system time of children). 'noc' shows all
172except the two children times. 'nop' shows only wallclock and the
173two children times. 'auto' (the default) will act as 'all' unless
174the children times are both zero, in which case it acts as 'noc'.
3c6312e9 175'none' prevents output.
523cc92b
CS
176
177FORMAT is the L<printf(3)>-style format specifier (without the
178leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b
AD
179
180=back
181
182=head2 Optional Exports
183
184The following routines will be exported into your namespace
185if you specifically ask that they be imported:
186
187=over 10
188
523cc92b
CS
189=item clearcache ( COUNT )
190
191Clear the cached time for COUNT rounds of the null loop.
192
193=item clearallcache ( )
f06db76b 194
523cc92b 195Clear all cached times.
f06db76b 196
ac8eabc1
JH
197=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
198
d1083c7a 199=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
ac8eabc1 200
d1083c7a 201Optionally calls timethese(), then outputs comparison chart. This:
ac8eabc1 202
d1083c7a
BS
203 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
204
205outputs a chart like:
206
207 Rate b a
208 b 2831802/s -- -61%
209 a 7208959/s 155% --
210
211This chart is sorted from slowest to fastest, and shows the percent speed
212difference between each pair of tests.
213
214c<cmpthese> can also be passed the data structure that timethese() returns:
215
216 $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
ac8eabc1
JH
217 cmpthese( $results );
218
d1083c7a
BS
219in case you want to see both sets of results.
220
221Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
222above chart, including labels. This:
223
224 my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
225
226returns a data structure like:
227
228 [
229 [ '', 'Rate', 'b', 'a' ],
230 [ 'b', '2885232/s', '--', '-59%' ],
231 [ 'a', '7099126/s', '146%', '--' ],
232 ]
233
234B<NOTE>: This result value differs from previous versions, which returned
235the C<timethese()> result structure. If you want that, just use the two
236statement C<timethese>...C<cmpthese> idiom shown above.
237
238Incidently, note the variance in the result values between the two examples;
239this is typical of benchmarking. If this were a real benchmark, you would
240probably want to run a lot more iterations.
ac8eabc1
JH
241
242=item countit(TIME, CODE)
243
244Arguments: TIME is the minimum length of time to run CODE for, and CODE is
245the code to run. CODE may be either a code reference or a string to
246be eval'd; either way it will be run in the caller's package.
247
248TIME is I<not> negative. countit() will run the loop many times to
249calculate the speed of CODE before running it for TIME. The actual
250time run for will usually be greater than TIME due to system clock
251resolution, so it's best to look at the number of iterations divided
252by the times that you are concerned with, not just the iterations.
253
254Returns: a Benchmark object.
255
523cc92b 256=item disablecache ( )
f06db76b 257
523cc92b
CS
258Disable caching of timings for the null loop. This will force Benchmark
259to recalculate these timings for each new piece of code timed.
260
261=item enablecache ( )
262
263Enable caching of timings for the null loop. The time taken for COUNT
264rounds of the null loop will be calculated only once for each
265different COUNT used.
f06db76b 266
ac8eabc1
JH
267=item timesum ( T1, T2 )
268
269Returns the sum of two Benchmark times as a Benchmark object suitable
270for passing to timestr().
271
f06db76b
AD
272=back
273
274=head1 NOTES
275
276The data is stored as a list of values from the time and times
523cc92b 277functions:
f06db76b 278
431d98c2 279 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b
AD
280
281in seconds for the whole loop (not divided by the number of rounds).
282
283The timing is done using time(3) and times(3).
284
285Code is executed in the caller's package.
286
f06db76b
AD
287The time of the null loop (a loop with the same
288number of rounds but empty loop body) is subtracted
289from the time of the real loop.
290
3c6312e9 291The null loop times can be cached, the key being the
f06db76b
AD
292number of rounds. The caching can be controlled using
293calls like these:
294
523cc92b 295 clearcache($key);
f06db76b
AD
296 clearallcache();
297
523cc92b 298 disablecache();
f06db76b
AD
299 enablecache();
300
3c6312e9
BS
301Caching is off by default, as it can (usually slightly) decrease
302accuracy and does not usually noticably affect runtimes.
303
54e82ce5
GS
304=head1 EXAMPLES
305
306For example,
307
308 use Benchmark;$x=3;cmpthese(-5,{a=>sub{$x*$x},b=>sub{$x**2}})
309
310outputs something like this:
311
312 Benchmark: running a, b, each for at least 5 CPU seconds...
313 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
314 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
315 Rate b a
316 b 1574945/s -- -59%
317 a 3835056/s 144% --
318
319while
320
321 use Benchmark;
322 $x=3;
323 $r=timethese(-5,{a=>sub{$x*$x},b=>sub{$x**2}},'none');
324 cmpthese($r);
325
326outputs something like this:
327
328 Rate b a
329 b 1559428/s -- -62%
330 a 4152037/s 166% --
331
332
f06db76b
AD
333=head1 INHERITANCE
334
335Benchmark inherits from no other class, except of course
336for Exporter.
337
338=head1 CAVEATS
339
80eab818 340Comparing eval'd strings with code references will give you
431d98c2 341inaccurate results: a code reference will show a slightly slower
80eab818
CS
342execution time than the equivalent eval'd string.
343
f06db76b
AD
344The real time timing is done using time(2) and
345the granularity is therefore only one second.
346
347Short tests may produce negative figures because perl
523cc92b
CS
348can appear to take longer to execute the empty loop
349than a short test; try:
f06db76b
AD
350
351 timethis(100,'1');
352
353The system time of the null loop might be slightly
354more than the system time of the loop with the actual
a24a9dfe 355code and therefore the difference might end up being E<lt> 0.
f06db76b 356
8a4f6ac2
GS
357=head1 SEE ALSO
358
359L<Devel::DProf> - a Perl code profiler
360
f06db76b
AD
361=head1 AUTHORS
362
5aabfad6 363Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b
AD
364
365=head1 MODIFICATION HISTORY
366
367September 8th, 1994; by Tim Bunce.
368
523cc92b
CS
369March 28th, 1997; by Hugo van der Sanden: added support for code
370references and the already documented 'debug' method; revamped
371documentation.
f06db76b 372
6ee623d5
GS
373April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
374functionality.
375
3c6312e9
BS
376September, 1999; by Barrie Slaymaker: math fixes and accuracy and
377efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 378timethese(). Exposed countit() (was runfor()).
3c6312e9 379
523cc92b 380=cut
a0d0e21e 381
3f943bd9
GS
382# evaluate something in a clean lexical environment
383sub _doeval { eval shift }
384
385#
386# put any lexicals at file scope AFTER here
387#
388
4aa0a1f7 389use Carp;
a0d0e21e
LW
390use Exporter;
391@ISA=(Exporter);
ac8eabc1
JH
392@EXPORT=qw(timeit timethis timethese timediff timestr);
393@EXPORT_OK=qw(timesum cmpthese countit
394 clearcache clearallcache disablecache enablecache);
a0d0e21e 395
d6a466d7 396$VERSION = 1.01;
8a4f6ac2 397
a0d0e21e
LW
398&init;
399
400sub init {
401 $debug = 0;
402 $min_count = 4;
403 $min_cpu = 0.4;
404 $defaultfmt = '5.2f';
405 $defaultstyle = 'auto';
406 # The cache can cause a slight loss of sys time accuracy. If a
407 # user does many tests (>10) with *very* large counts (>10000)
408 # or works on a very slow machine the cache may be useful.
409 &disablecache;
410 &clearallcache;
411}
412
523cc92b
CS
413sub debug { $debug = ($_[1] != 0); }
414
bba8fca5
BS
415# The cache needs two branches: 's' for strings and 'c' for code. The
416# emtpy loop is different in these two cases.
417sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e
LW
418sub clearallcache { %cache = (); }
419sub enablecache { $cache = 1; }
420sub disablecache { $cache = 0; }
421
a0d0e21e
LW
422# --- Functions to process the 'time' data type
423
6ee623d5
GS
424sub new { my @t = (time, times, @_ == 2 ? $_[1] : 0);
425 print "new=@t\n" if $debug;
426 bless \@t; }
a0d0e21e
LW
427
428sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
429sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
430sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
431sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 432sub iters { $_[0]->[5] ; }
a0d0e21e 433
523cc92b 434sub timediff {
a0d0e21e 435 my($a, $b) = @_;
523cc92b 436 my @r;
3f943bd9 437 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e
LW
438 push(@r, $a->[$i] - $b->[$i]);
439 }
440 bless \@r;
441}
442
705cc255
TB
443sub timesum {
444 my($a, $b) = @_;
445 my @r;
446 for (my $i=0; $i < @$a; ++$i) {
447 push(@r, $a->[$i] + $b->[$i]);
448 }
449 bless \@r;
450}
451
523cc92b 452sub timestr {
a0d0e21e 453 my($tr, $style, $f) = @_;
523cc92b 454 my @t = @$tr;
6ee623d5
GS
455 warn "bad time value (@t)" unless @t==6;
456 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 457 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 458 $f = $defaultfmt unless defined $f;
a0d0e21e 459 # format a time in the required style, other formats may be added here
80eab818 460 $style ||= $defaultstyle;
523cc92b
CS
461 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
462 my $s = "@t $style"; # default for unknown style
7be077a2 463 $s=sprintf("%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 464 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
7be077a2
GS
465 $s=sprintf("%2d wallclock secs (%$f usr + %$f sys = %$f CPU)",
466 $r,$pu,$ps,$pt) if $style eq 'noc';
467 $s=sprintf("%2d wallclock secs (%$f cusr + %$f csys = %$f CPU)",
468 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 469 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e
LW
470 $s;
471}
523cc92b
CS
472
473sub timedebug {
a0d0e21e 474 my($msg, $t) = @_;
523cc92b 475 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e
LW
476}
477
a0d0e21e
LW
478# --- Functions implementing low-level support for timing loops
479
480sub runloop {
481 my($n, $c) = @_;
4aa0a1f7
AD
482
483 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b
CS
484 croak "negative loopcount $n" if $n<0;
485 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e
LW
486 my($t0, $t1, $td); # before, after, difference
487
488 # find package of caller so we can execute code there
523cc92b
CS
489 my($curpack) = caller(0);
490 my($i, $pack)= 0;
a0d0e21e
LW
491 while (($pack) = caller(++$i)) {
492 last if $pack ne $curpack;
493 }
494
3f943bd9
GS
495 my ($subcode, $subref);
496 if (ref $c eq 'CODE') {
497 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
498 $subref = eval $subcode;
499 }
500 else {
501 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
502 $subref = _doeval($subcode);
503 }
4aa0a1f7 504 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 505 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 506
3c6312e9
BS
507 # Wait for the user timer to tick. This makes the error range more like
508 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
509 # may not seem important, but it significantly reduces the chances of
510 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 511 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5
BS
512 # &runloop a lot, and thus reduce additive errors.
513 my $tbase = Benchmark->new(0)->[1];
277427cf 514 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 515 &$subref;
6ee623d5 516 $t1 = Benchmark->new($n);
a0d0e21e 517 $td = &timediff($t1, $t0);
a0d0e21e
LW
518 timedebug("runloop:",$td);
519 $td;
520}
521
522
523sub timeit {
524 my($n, $code) = @_;
525 my($wn, $wc, $wd);
526
527 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 528 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5
BS
529 if ($cache && exists $cache{$cache_key} ) {
530 $wn = $cache{$cache_key};
523cc92b 531 } else {
bba8fca5 532 $wn = &runloop($n, ref( $code ) ? sub { undef } : '' );
3c6312e9
BS
533 # Can't let our baseline have any iterations, or they get subtracted
534 # out of the result.
535 $wn->[5] = 0;
bba8fca5 536 $cache{$cache_key} = $wn;
a0d0e21e
LW
537 }
538
539 $wc = &runloop($n, $code);
540
541 $wd = timediff($wc, $wn);
a0d0e21e
LW
542 timedebug("timeit: ",$wc);
543 timedebug(" - ",$wn);
544 timedebug(" = ",$wd);
545
546 $wd;
547}
548
6ee623d5
GS
549
550my $default_for = 3;
551my $min_for = 0.1;
552
3c6312e9 553
431d98c2
BS
554sub countit {
555 my ( $tmax, $code ) = @_;
6ee623d5
GS
556
557 if ( not defined $tmax or $tmax == 0 ) {
558 $tmax = $default_for;
559 } elsif ( $tmax < 0 ) {
560 $tmax = -$tmax;
561 }
562
431d98c2 563 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5
GS
564 if $tmax < $min_for;
565
3c6312e9 566 my ($n, $tc);
6ee623d5 567
bba8fca5 568 # First find the minimum $n that gives a significant timing.
3c6312e9
BS
569 for ($n = 1; ; $n *= 2 ) {
570 my $td = timeit($n, $code);
571 $tc = $td->[1] + $td->[2];
572 last if $tc > 0.1;
573 }
6ee623d5 574
3c6312e9
BS
575 my $nmin = $n;
576
577 # Get $n high enough that we can guess the final $n with some accuracy.
578 my $tpra = 0.1 * $tmax; # Target/time practice.
579 while ( $tc < $tpra ) {
580 # The 5% fudge is to keep us from iterating again all
581 # that often (this speeds overall responsiveness when $tmax is big
582 # and we guess a little low). This does not noticably affect
583 # accuracy since we're not couting these times.
584 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
585 my $td = timeit($n, $code);
c5d57293
A
586 my $new_tc = $td->[1] + $td->[2];
587 # Make sure we are making progress.
588 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5
GS
589 }
590
3c6312e9
BS
591 # Now, do the 'for real' timing(s), repeating until we exceed
592 # the max.
593 my $ntot = 0;
594 my $rtot = 0;
595 my $utot = 0.0;
596 my $stot = 0.0;
597 my $cutot = 0.0;
598 my $cstot = 0.0;
599 my $ttot = 0.0;
600
601 # The 5% fudge is because $n is often a few % low even for routines
602 # with stable times and avoiding extra timeit()s is nice for
603 # accuracy's sake.
604 $n = int( $n * ( 1.05 * $tmax / $tc ) );
605
606 while () {
607 my $td = timeit($n, $code);
608 $ntot += $n;
609 $rtot += $td->[0];
610 $utot += $td->[1];
611 $stot += $td->[2];
6ee623d5
GS
612 $cutot += $td->[3];
613 $cstot += $td->[4];
3c6312e9
BS
614 $ttot = $utot + $stot;
615 last if $ttot >= $tmax;
6ee623d5 616
c5d57293 617 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 618 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 619 $n = int( $r * $ntot );
6ee623d5 620 $n = $nmin if $n < $nmin;
6ee623d5
GS
621 }
622
623 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
624}
625
a0d0e21e
LW
626# --- Functions implementing high-level time-then-print utilities
627
6ee623d5
GS
628sub n_to_for {
629 my $n = shift;
630 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
631}
632
a0d0e21e
LW
633sub timethis{
634 my($n, $code, $title, $style) = @_;
6ee623d5
GS
635 my($t, $for, $forn);
636
637 if ( $n > 0 ) {
638 croak "non-integer loopcount $n, stopped" if int($n)<$n;
639 $t = timeit($n, $code);
640 $title = "timethis $n" unless defined $title;
641 } else {
642 $fort = n_to_for( $n );
431d98c2 643 $t = countit( $fort, $code );
6ee623d5
GS
644 $title = "timethis for $fort" unless defined $title;
645 $forn = $t->[-1];
646 }
523cc92b 647 local $| = 1;
523cc92b 648 $style = "" unless defined $style;
3c6312e9
BS
649 printf("%10s: ", $title) unless $style eq 'none';
650 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5
GS
651
652 $n = $forn if defined $forn;
523cc92b 653
a0d0e21e
LW
654 # A conservative warning to spot very silly tests.
655 # Don't assume that your benchmark is ok simply because
656 # you don't get this warning!
657 print " (warning: too few iterations for a reliable count)\n"
523cc92b 658 if $n < $min_count
a0d0e21e 659 || ($t->real < 1 && $n < 1000)
523cc92b 660 || $t->cpu_a < $min_cpu;
a0d0e21e
LW
661 $t;
662}
663
a0d0e21e
LW
664sub timethese{
665 my($n, $alt, $style) = @_;
666 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
667 unless ref $alt eq HASH;
523cc92b
CS
668 my @names = sort keys %$alt;
669 $style = "" unless defined $style;
3c6312e9 670 print "Benchmark: " unless $style eq 'none';
6ee623d5
GS
671 if ( $n > 0 ) {
672 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 673 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 674 } else {
3c6312e9 675 print "running" unless $style eq 'none';
6ee623d5 676 }
3c6312e9 677 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5
GS
678 unless ( $n > 0 ) {
679 my $for = n_to_for( $n );
3c6312e9 680 print ", each for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 681 }
3c6312e9 682 print "...\n" unless $style eq 'none';
523cc92b
CS
683
684 # we could save the results in an array and produce a summary here
a0d0e21e 685 # sum, min, max, avg etc etc
3c6312e9 686 my %results;
4dbb2df9 687 foreach my $name (@names) {
3c6312e9 688 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 689 }
3c6312e9
BS
690
691 return \%results;
a0d0e21e
LW
692}
693
3c6312e9 694sub cmpthese{
d1083c7a 695 my ($results, $style) = ref $_[0] ? @_ : ( timethese( @_[0,1] ), $_[2] ) ;
3c6312e9 696
d1083c7a 697 $style = "" unless defined $style;
3c6312e9
BS
698
699 # Flatten in to an array of arrays with the name as the first field
700 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
701
702 for (@vals) {
703 # The epsilon fudge here is to prevent div by 0. Since clock
704 # resolutions are much larger, it's below the noise floor.
705 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
706 $_->[7] = $rate;
707 }
708
709 # Sort by rate
710 @vals = sort { $a->[7] <=> $b->[7] } @vals;
711
712 # If more than half of the rates are greater than one...
713 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
714
715 my @rows;
716 my @col_widths;
717
718 my @top_row = (
719 '',
720 $display_as_rate ? 'Rate' : 's/iter',
721 map { $_->[0] } @vals
722 );
723
724 push @rows, \@top_row;
725 @col_widths = map { length( $_ ) } @top_row;
726
727 # Build the data rows
728 # We leave the last column in even though it never has any data. Perhaps
729 # it should go away. Also, perhaps a style for a single column of
730 # percentages might be nice.
731 for my $row_val ( @vals ) {
732 my @row;
733
734 # Column 0 = test name
735 push @row, $row_val->[0];
736 $col_widths[0] = length( $row_val->[0] )
737 if length( $row_val->[0] ) > $col_widths[0];
738
739 # Column 1 = performance
740 my $row_rate = $row_val->[7];
741
742 # We assume that we'll never get a 0 rate.
743 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
744
745 # Only give a few decimal places before switching to sci. notation,
746 # since the results aren't usually that accurate anyway.
747 my $format =
748 $a >= 100 ?
749 "%0.0f" :
750 $a >= 10 ?
751 "%0.1f" :
752 $a >= 1 ?
753 "%0.2f" :
754 $a >= 0.1 ?
755 "%0.3f" :
756 "%0.2e";
757
758 $format .= "/s"
759 if $display_as_rate;
760 # Using $b here due to optimizing bug in _58 through _61
761 my $b = sprintf( $format, $a );
762 push @row, $b;
763 $col_widths[1] = length( $b )
764 if length( $b ) > $col_widths[1];
765
766 # Columns 2..N = performance ratios
767 my $skip_rest = 0;
768 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
769 my $col_val = $vals[$col_num];
770 my $out;
771 if ( $skip_rest ) {
772 $out = '';
773 }
774 elsif ( $col_val->[0] eq $row_val->[0] ) {
775 $out = "--";
776 # $skip_rest = 1;
777 }
778 else {
779 my $col_rate = $col_val->[7];
780 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
781 }
782 push @row, $out;
783 $col_widths[$col_num+2] = length( $out )
784 if length( $out ) > $col_widths[$col_num+2];
785
786 # A little wierdness to set the first column width properly
787 $col_widths[$col_num+2] = length( $col_val->[0] )
788 if length( $col_val->[0] ) > $col_widths[$col_num+2];
789 }
790 push @rows, \@row;
791 }
792
d1083c7a
BS
793 return \@rows if $style eq "none";
794
3c6312e9
BS
795 # Equalize column widths in the chart as much as possible without
796 # exceeding 80 characters. This does not use or affect cols 0 or 1.
797 my @sorted_width_refs =
798 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
799 my $max_width = ${$sorted_width_refs[-1]};
800
277427cf 801 my $total = @col_widths - 1 ;
3c6312e9
BS
802 for ( @col_widths ) { $total += $_ }
803
804 STRETCHER:
805 while ( $total < 80 ) {
806 my $min_width = ${$sorted_width_refs[0]};
807 last
808 if $min_width == $max_width;
809 for ( @sorted_width_refs ) {
810 last
811 if $$_ > $min_width;
812 ++$$_;
813 ++$total;
814 last STRETCHER
815 if $total >= 80;
816 }
817 }
818
819 # Dump the output
820 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
821 substr( $format, 1, 0 ) = '-';
822 for ( @rows ) {
823 printf $format, @$_;
824 }
825
d1083c7a 826 return \@rows ;
3c6312e9
BS
827}
828
829
a0d0e21e 8301;