This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Doc patches: assorted minor nits
[perl5.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e
LW
1package Benchmark;
2
f06db76b
AD
3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b
AD
7=head1 SYNOPSIS
8
f36484b0
BS
9 use Benchmark qw(:all) ;
10
f06db76b
AD
11 timethis ($count, "code");
12
523cc92b 13 # Use Perl code in strings...
f06db76b
AD
14 timethese($count, {
15 'Name1' => '...code1...',
16 'Name2' => '...code2...',
17 });
18
523cc92b
CS
19 # ... or use subroutine references.
20 timethese($count, {
21 'Name1' => sub { ...code1... },
22 'Name2' => sub { ...code2... },
23 });
24
431d98c2
BS
25 # cmpthese can be used both ways as well
26 cmpthese($count, {
27 'Name1' => '...code1...',
28 'Name2' => '...code2...',
29 });
30
31 cmpthese($count, {
32 'Name1' => sub { ...code1... },
33 'Name2' => sub { ...code2... },
34 });
35
36 # ...or in two stages
37 $results = timethese($count,
38 {
39 'Name1' => sub { ...code1... },
40 'Name2' => sub { ...code2... },
41 },
42 'none'
43 );
44 cmpthese( $results ) ;
45
f06db76b
AD
46 $t = timeit($count, '...other code...')
47 print "$count loops of other code took:",timestr($t),"\n";
48
431d98c2
BS
49 $t = countit($time, '...other code...')
50 $count = $t->iters ;
51 print "$count loops of other code took:",timestr($t),"\n";
52
f06db76b
AD
53=head1 DESCRIPTION
54
55The Benchmark module encapsulates a number of routines to help you
56figure out how long it takes to execute some code.
57
8a4f6ac2
GS
58timethis - run a chunk of code several times
59
60timethese - run several chunks of code several times
61
62cmpthese - print results of timethese as a comparison chart
63
64timeit - run a chunk of code and see how long it goes
65
66countit - see how many times a chunk of code runs in a given time
67
68
f06db76b
AD
69=head2 Methods
70
71=over 10
72
73=item new
74
75Returns the current time. Example:
76
77 use Benchmark;
78 $t0 = new Benchmark;
79 # ... your code here ...
80 $t1 = new Benchmark;
81 $td = timediff($t1, $t0);
a24a9dfe 82 print "the code took:",timestr($td),"\n";
f06db76b
AD
83
84=item debug
85
86Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
87
523cc92b 88 debug Benchmark 1;
f06db76b 89 $t = timeit(10, ' 5 ** $Global ');
523cc92b 90 debug Benchmark 0;
f06db76b 91
431d98c2
BS
92=item iters
93
94Returns the number of iterations.
95
f06db76b
AD
96=back
97
98=head2 Standard Exports
99
523cc92b 100The following routines will be exported into your namespace
f06db76b
AD
101if you use the Benchmark module:
102
103=over 10
104
105=item timeit(COUNT, CODE)
106
523cc92b
CS
107Arguments: COUNT is the number of times to run the loop, and CODE is
108the code to run. CODE may be either a code reference or a string to
109be eval'd; either way it will be run in the caller's package.
110
111Returns: a Benchmark object.
112
113=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
114
115Time COUNT iterations of CODE. CODE may be a string to eval or a
116code reference; either way the CODE will run in the caller's package.
117Results will be printed to STDOUT as TITLE followed by the times.
118TITLE defaults to "timethis COUNT" if none is provided. STYLE
119determines the format of the output, as described for timestr() below.
120
6ee623d5
GS
121The COUNT can be zero or negative: this means the I<minimum number of
122CPU seconds> to run. A zero signifies the default of 3 seconds. For
123example to run at least for 10 seconds:
124
125 timethis(-10, $code)
126
127or to run two pieces of code tests for at least 3 seconds:
128
129 timethese(0, { test1 => '...', test2 => '...'})
130
131CPU seconds is, in UNIX terms, the user time plus the system time of
132the process itself, as opposed to the real (wallclock) time and the
133time spent by the child processes. Less than 0.1 seconds is not
134accepted (-0.01 as the count, for example, will cause a fatal runtime
135exception).
136
137Note that the CPU seconds is the B<minimum> time: CPU scheduling and
138other operating system factors may complicate the attempt so that a
139little bit more time is spent. The benchmark output will, however,
140also tell the number of C<$code> runs/second, which should be a more
141interesting number than the actually spent seconds.
142
143Returns a Benchmark object.
144
523cc92b 145=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 146
523cc92b
CS
147The CODEHASHREF is a reference to a hash containing names as keys
148and either a string to eval or a code reference for each value.
149For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
150call
f06db76b 151
523cc92b 152 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 153
1d2dff63
GS
154The routines are called in string comparison order of KEY.
155
156The COUNT can be zero or negative, see timethis().
6ee623d5 157
3c6312e9
BS
158Returns a hash of Benchmark objects, keyed by name.
159
523cc92b 160=item timediff ( T1, T2 )
f06db76b 161
523cc92b
CS
162Returns the difference between two Benchmark times as a Benchmark
163object suitable for passing to timestr().
f06db76b 164
6ee623d5 165=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 166
523cc92b
CS
167Returns a string that formats the times in the TIMEDIFF object in
168the requested STYLE. TIMEDIFF is expected to be a Benchmark object
169similar to that returned by timediff().
170
3c6312e9
BS
171STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
172each of the 5 times available ('wallclock' time, user time, system time,
523cc92b
CS
173user time of children, and system time of children). 'noc' shows all
174except the two children times. 'nop' shows only wallclock and the
175two children times. 'auto' (the default) will act as 'all' unless
176the children times are both zero, in which case it acts as 'noc'.
3c6312e9 177'none' prevents output.
523cc92b
CS
178
179FORMAT is the L<printf(3)>-style format specifier (without the
180leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b
AD
181
182=back
183
184=head2 Optional Exports
185
186The following routines will be exported into your namespace
187if you specifically ask that they be imported:
188
189=over 10
190
523cc92b
CS
191=item clearcache ( COUNT )
192
193Clear the cached time for COUNT rounds of the null loop.
194
195=item clearallcache ( )
f06db76b 196
523cc92b 197Clear all cached times.
f06db76b 198
ac8eabc1
JH
199=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
200
d1083c7a 201=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
ac8eabc1 202
d1083c7a 203Optionally calls timethese(), then outputs comparison chart. This:
ac8eabc1 204
d1083c7a
BS
205 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
206
207outputs a chart like:
208
209 Rate b a
210 b 2831802/s -- -61%
211 a 7208959/s 155% --
212
213This chart is sorted from slowest to fastest, and shows the percent speed
214difference between each pair of tests.
215
216c<cmpthese> can also be passed the data structure that timethese() returns:
217
218 $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
ac8eabc1
JH
219 cmpthese( $results );
220
d1083c7a
BS
221in case you want to see both sets of results.
222
223Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
224above chart, including labels. This:
225
226 my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
227
228returns a data structure like:
229
230 [
231 [ '', 'Rate', 'b', 'a' ],
232 [ 'b', '2885232/s', '--', '-59%' ],
233 [ 'a', '7099126/s', '146%', '--' ],
234 ]
235
236B<NOTE>: This result value differs from previous versions, which returned
237the C<timethese()> result structure. If you want that, just use the two
238statement C<timethese>...C<cmpthese> idiom shown above.
239
240Incidently, note the variance in the result values between the two examples;
241this is typical of benchmarking. If this were a real benchmark, you would
242probably want to run a lot more iterations.
ac8eabc1
JH
243
244=item countit(TIME, CODE)
245
246Arguments: TIME is the minimum length of time to run CODE for, and CODE is
247the code to run. CODE may be either a code reference or a string to
248be eval'd; either way it will be run in the caller's package.
249
250TIME is I<not> negative. countit() will run the loop many times to
251calculate the speed of CODE before running it for TIME. The actual
252time run for will usually be greater than TIME due to system clock
253resolution, so it's best to look at the number of iterations divided
254by the times that you are concerned with, not just the iterations.
255
256Returns: a Benchmark object.
257
523cc92b 258=item disablecache ( )
f06db76b 259
523cc92b
CS
260Disable caching of timings for the null loop. This will force Benchmark
261to recalculate these timings for each new piece of code timed.
262
263=item enablecache ( )
264
265Enable caching of timings for the null loop. The time taken for COUNT
266rounds of the null loop will be calculated only once for each
267different COUNT used.
f06db76b 268
ac8eabc1
JH
269=item timesum ( T1, T2 )
270
271Returns the sum of two Benchmark times as a Benchmark object suitable
272for passing to timestr().
273
f06db76b
AD
274=back
275
276=head1 NOTES
277
278The data is stored as a list of values from the time and times
523cc92b 279functions:
f06db76b 280
431d98c2 281 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b
AD
282
283in seconds for the whole loop (not divided by the number of rounds).
284
285The timing is done using time(3) and times(3).
286
287Code is executed in the caller's package.
288
f06db76b
AD
289The time of the null loop (a loop with the same
290number of rounds but empty loop body) is subtracted
291from the time of the real loop.
292
3c6312e9 293The null loop times can be cached, the key being the
f06db76b
AD
294number of rounds. The caching can be controlled using
295calls like these:
296
523cc92b 297 clearcache($key);
f06db76b
AD
298 clearallcache();
299
523cc92b 300 disablecache();
f06db76b
AD
301 enablecache();
302
3c6312e9
BS
303Caching is off by default, as it can (usually slightly) decrease
304accuracy and does not usually noticably affect runtimes.
305
54e82ce5
GS
306=head1 EXAMPLES
307
308For example,
309
14393033
BS
310 use Benchmark qw( cmpthese ) ;
311 $x = 3;
312 cmpthese( -5, {
313 a => sub{$x*$x},
314 b => sub{$x**2},
315 } );
54e82ce5
GS
316
317outputs something like this:
318
319 Benchmark: running a, b, each for at least 5 CPU seconds...
14393033
BS
320 Rate b a
321 b 1559428/s -- -62%
322 a 4152037/s 166% --
323
54e82ce5
GS
324
325while
326
14393033
BS
327 use Benchmark qw( timethese cmpthese ) ;
328 $x = 3;
329 $r = timethese( -5, {
330 a => sub{$x*$x},
331 b => sub{$x**2},
332 } );
333 cmpthese $r;
54e82ce5
GS
334
335outputs something like this:
336
14393033
BS
337 Benchmark: running a, b, each for at least 5 CPU seconds...
338 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
339 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
340 Rate b a
341 b 1574945/s -- -59%
342 a 3835056/s 144% --
54e82ce5
GS
343
344
f06db76b
AD
345=head1 INHERITANCE
346
347Benchmark inherits from no other class, except of course
348for Exporter.
349
350=head1 CAVEATS
351
80eab818 352Comparing eval'd strings with code references will give you
431d98c2 353inaccurate results: a code reference will show a slightly slower
80eab818
CS
354execution time than the equivalent eval'd string.
355
f06db76b
AD
356The real time timing is done using time(2) and
357the granularity is therefore only one second.
358
359Short tests may produce negative figures because perl
523cc92b
CS
360can appear to take longer to execute the empty loop
361than a short test; try:
f06db76b
AD
362
363 timethis(100,'1');
364
365The system time of the null loop might be slightly
366more than the system time of the loop with the actual
a24a9dfe 367code and therefore the difference might end up being E<lt> 0.
f06db76b 368
8a4f6ac2
GS
369=head1 SEE ALSO
370
371L<Devel::DProf> - a Perl code profiler
372
f06db76b
AD
373=head1 AUTHORS
374
5aabfad6 375Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b
AD
376
377=head1 MODIFICATION HISTORY
378
379September 8th, 1994; by Tim Bunce.
380
523cc92b
CS
381March 28th, 1997; by Hugo van der Sanden: added support for code
382references and the already documented 'debug' method; revamped
383documentation.
f06db76b 384
6ee623d5
GS
385April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
386functionality.
387
3c6312e9
BS
388September, 1999; by Barrie Slaymaker: math fixes and accuracy and
389efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 390timethese(). Exposed countit() (was runfor()).
3c6312e9 391
0e74ff8e
JH
392December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
393and return an empty string. If cmpthese is calling timethese, make it pass the
394style in. (so that 'none' will suppress output). Make sub new dump its
395debugging output to STDERR, to be consistent with everything else.
396All bugs found while writing a regression test.
397
523cc92b 398=cut
a0d0e21e 399
3f943bd9
GS
400# evaluate something in a clean lexical environment
401sub _doeval { eval shift }
402
403#
404# put any lexicals at file scope AFTER here
405#
406
4aa0a1f7 407use Carp;
a0d0e21e
LW
408use Exporter;
409@ISA=(Exporter);
ac8eabc1
JH
410@EXPORT=qw(timeit timethis timethese timediff timestr);
411@EXPORT_OK=qw(timesum cmpthese countit
412 clearcache clearallcache disablecache enablecache);
f36484b0 413%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
a0d0e21e 414
0e74ff8e 415$VERSION = 1.04;
8a4f6ac2 416
a0d0e21e
LW
417&init;
418
419sub init {
420 $debug = 0;
421 $min_count = 4;
422 $min_cpu = 0.4;
423 $defaultfmt = '5.2f';
424 $defaultstyle = 'auto';
425 # The cache can cause a slight loss of sys time accuracy. If a
426 # user does many tests (>10) with *very* large counts (>10000)
427 # or works on a very slow machine the cache may be useful.
428 &disablecache;
429 &clearallcache;
430}
431
523cc92b
CS
432sub debug { $debug = ($_[1] != 0); }
433
bba8fca5
BS
434# The cache needs two branches: 's' for strings and 'c' for code. The
435# emtpy loop is different in these two cases.
436sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e
LW
437sub clearallcache { %cache = (); }
438sub enablecache { $cache = 1; }
439sub disablecache { $cache = 0; }
440
a0d0e21e
LW
441# --- Functions to process the 'time' data type
442
6ee623d5 443sub new { my @t = (time, times, @_ == 2 ? $_[1] : 0);
0e74ff8e 444 print STDERR "new=@t\n" if $debug;
6ee623d5 445 bless \@t; }
a0d0e21e
LW
446
447sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
448sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
449sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
450sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 451sub iters { $_[0]->[5] ; }
a0d0e21e 452
523cc92b 453sub timediff {
a0d0e21e 454 my($a, $b) = @_;
523cc92b 455 my @r;
3f943bd9 456 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e
LW
457 push(@r, $a->[$i] - $b->[$i]);
458 }
459 bless \@r;
460}
461
705cc255
TB
462sub timesum {
463 my($a, $b) = @_;
464 my @r;
465 for (my $i=0; $i < @$a; ++$i) {
466 push(@r, $a->[$i] + $b->[$i]);
467 }
468 bless \@r;
469}
470
523cc92b 471sub timestr {
a0d0e21e 472 my($tr, $style, $f) = @_;
523cc92b 473 my @t = @$tr;
6ee623d5
GS
474 warn "bad time value (@t)" unless @t==6;
475 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 476 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 477 $f = $defaultfmt unless defined $f;
a0d0e21e 478 # format a time in the required style, other formats may be added here
80eab818 479 $style ||= $defaultstyle;
0e74ff8e 480 return '' if $style eq 'none';
523cc92b
CS
481 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
482 my $s = "@t $style"; # default for unknown style
7be077a2 483 $s=sprintf("%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 484 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
7be077a2
GS
485 $s=sprintf("%2d wallclock secs (%$f usr + %$f sys = %$f CPU)",
486 $r,$pu,$ps,$pt) if $style eq 'noc';
487 $s=sprintf("%2d wallclock secs (%$f cusr + %$f csys = %$f CPU)",
488 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 489 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e
LW
490 $s;
491}
523cc92b
CS
492
493sub timedebug {
a0d0e21e 494 my($msg, $t) = @_;
523cc92b 495 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e
LW
496}
497
a0d0e21e
LW
498# --- Functions implementing low-level support for timing loops
499
500sub runloop {
501 my($n, $c) = @_;
4aa0a1f7
AD
502
503 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b
CS
504 croak "negative loopcount $n" if $n<0;
505 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e
LW
506 my($t0, $t1, $td); # before, after, difference
507
508 # find package of caller so we can execute code there
523cc92b
CS
509 my($curpack) = caller(0);
510 my($i, $pack)= 0;
a0d0e21e
LW
511 while (($pack) = caller(++$i)) {
512 last if $pack ne $curpack;
513 }
514
3f943bd9
GS
515 my ($subcode, $subref);
516 if (ref $c eq 'CODE') {
517 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
518 $subref = eval $subcode;
519 }
520 else {
521 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
522 $subref = _doeval($subcode);
523 }
4aa0a1f7 524 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 525 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 526
3c6312e9
BS
527 # Wait for the user timer to tick. This makes the error range more like
528 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
529 # may not seem important, but it significantly reduces the chances of
530 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 531 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5
BS
532 # &runloop a lot, and thus reduce additive errors.
533 my $tbase = Benchmark->new(0)->[1];
277427cf 534 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 535 &$subref;
6ee623d5 536 $t1 = Benchmark->new($n);
a0d0e21e 537 $td = &timediff($t1, $t0);
a0d0e21e
LW
538 timedebug("runloop:",$td);
539 $td;
540}
541
542
543sub timeit {
544 my($n, $code) = @_;
545 my($wn, $wc, $wd);
546
547 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 548 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5
BS
549 if ($cache && exists $cache{$cache_key} ) {
550 $wn = $cache{$cache_key};
523cc92b 551 } else {
6bf773bc 552 $wn = &runloop($n, ref( $code ) ? sub { } : '' );
3c6312e9
BS
553 # Can't let our baseline have any iterations, or they get subtracted
554 # out of the result.
555 $wn->[5] = 0;
bba8fca5 556 $cache{$cache_key} = $wn;
a0d0e21e
LW
557 }
558
559 $wc = &runloop($n, $code);
560
561 $wd = timediff($wc, $wn);
a0d0e21e
LW
562 timedebug("timeit: ",$wc);
563 timedebug(" - ",$wn);
564 timedebug(" = ",$wd);
565
566 $wd;
567}
568
6ee623d5
GS
569
570my $default_for = 3;
571my $min_for = 0.1;
572
3c6312e9 573
431d98c2
BS
574sub countit {
575 my ( $tmax, $code ) = @_;
6ee623d5
GS
576
577 if ( not defined $tmax or $tmax == 0 ) {
578 $tmax = $default_for;
579 } elsif ( $tmax < 0 ) {
580 $tmax = -$tmax;
581 }
582
431d98c2 583 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5
GS
584 if $tmax < $min_for;
585
3c6312e9 586 my ($n, $tc);
6ee623d5 587
bba8fca5 588 # First find the minimum $n that gives a significant timing.
3c6312e9
BS
589 for ($n = 1; ; $n *= 2 ) {
590 my $td = timeit($n, $code);
591 $tc = $td->[1] + $td->[2];
592 last if $tc > 0.1;
593 }
6ee623d5 594
3c6312e9
BS
595 my $nmin = $n;
596
597 # Get $n high enough that we can guess the final $n with some accuracy.
598 my $tpra = 0.1 * $tmax; # Target/time practice.
599 while ( $tc < $tpra ) {
600 # The 5% fudge is to keep us from iterating again all
601 # that often (this speeds overall responsiveness when $tmax is big
602 # and we guess a little low). This does not noticably affect
603 # accuracy since we're not couting these times.
604 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
605 my $td = timeit($n, $code);
c5d57293
A
606 my $new_tc = $td->[1] + $td->[2];
607 # Make sure we are making progress.
608 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5
GS
609 }
610
3c6312e9
BS
611 # Now, do the 'for real' timing(s), repeating until we exceed
612 # the max.
613 my $ntot = 0;
614 my $rtot = 0;
615 my $utot = 0.0;
616 my $stot = 0.0;
617 my $cutot = 0.0;
618 my $cstot = 0.0;
619 my $ttot = 0.0;
620
621 # The 5% fudge is because $n is often a few % low even for routines
622 # with stable times and avoiding extra timeit()s is nice for
623 # accuracy's sake.
624 $n = int( $n * ( 1.05 * $tmax / $tc ) );
625
626 while () {
627 my $td = timeit($n, $code);
628 $ntot += $n;
629 $rtot += $td->[0];
630 $utot += $td->[1];
631 $stot += $td->[2];
6ee623d5
GS
632 $cutot += $td->[3];
633 $cstot += $td->[4];
3c6312e9
BS
634 $ttot = $utot + $stot;
635 last if $ttot >= $tmax;
6ee623d5 636
c5d57293 637 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 638 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 639 $n = int( $r * $ntot );
6ee623d5 640 $n = $nmin if $n < $nmin;
6ee623d5
GS
641 }
642
643 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
644}
645
a0d0e21e
LW
646# --- Functions implementing high-level time-then-print utilities
647
6ee623d5
GS
648sub n_to_for {
649 my $n = shift;
650 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
651}
652
a0d0e21e
LW
653sub timethis{
654 my($n, $code, $title, $style) = @_;
6ee623d5
GS
655 my($t, $for, $forn);
656
657 if ( $n > 0 ) {
658 croak "non-integer loopcount $n, stopped" if int($n)<$n;
659 $t = timeit($n, $code);
660 $title = "timethis $n" unless defined $title;
661 } else {
662 $fort = n_to_for( $n );
431d98c2 663 $t = countit( $fort, $code );
6ee623d5
GS
664 $title = "timethis for $fort" unless defined $title;
665 $forn = $t->[-1];
666 }
523cc92b 667 local $| = 1;
523cc92b 668 $style = "" unless defined $style;
3c6312e9
BS
669 printf("%10s: ", $title) unless $style eq 'none';
670 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5
GS
671
672 $n = $forn if defined $forn;
523cc92b 673
a0d0e21e
LW
674 # A conservative warning to spot very silly tests.
675 # Don't assume that your benchmark is ok simply because
676 # you don't get this warning!
677 print " (warning: too few iterations for a reliable count)\n"
523cc92b 678 if $n < $min_count
a0d0e21e 679 || ($t->real < 1 && $n < 1000)
523cc92b 680 || $t->cpu_a < $min_cpu;
a0d0e21e
LW
681 $t;
682}
683
a0d0e21e
LW
684sub timethese{
685 my($n, $alt, $style) = @_;
686 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
687 unless ref $alt eq HASH;
523cc92b
CS
688 my @names = sort keys %$alt;
689 $style = "" unless defined $style;
3c6312e9 690 print "Benchmark: " unless $style eq 'none';
6ee623d5
GS
691 if ( $n > 0 ) {
692 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 693 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 694 } else {
3c6312e9 695 print "running" unless $style eq 'none';
6ee623d5 696 }
3c6312e9 697 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5
GS
698 unless ( $n > 0 ) {
699 my $for = n_to_for( $n );
df7779cf
T
700 print ", each" if $n > 1 && $style ne 'none';
701 print " for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 702 }
3c6312e9 703 print "...\n" unless $style eq 'none';
523cc92b
CS
704
705 # we could save the results in an array and produce a summary here
a0d0e21e 706 # sum, min, max, avg etc etc
3c6312e9 707 my %results;
4dbb2df9 708 foreach my $name (@names) {
3c6312e9 709 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 710 }
3c6312e9
BS
711
712 return \%results;
a0d0e21e
LW
713}
714
3c6312e9 715sub cmpthese{
0e74ff8e 716 my ($results, $style) = ref $_[0] ? @_ : ( timethese( @_[0,1,2] ), $_[2] ) ;
3c6312e9 717
d1083c7a 718 $style = "" unless defined $style;
3c6312e9
BS
719
720 # Flatten in to an array of arrays with the name as the first field
721 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
722
723 for (@vals) {
724 # The epsilon fudge here is to prevent div by 0. Since clock
725 # resolutions are much larger, it's below the noise floor.
726 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
727 $_->[7] = $rate;
728 }
729
730 # Sort by rate
731 @vals = sort { $a->[7] <=> $b->[7] } @vals;
732
733 # If more than half of the rates are greater than one...
734 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
735
736 my @rows;
737 my @col_widths;
738
739 my @top_row = (
740 '',
741 $display_as_rate ? 'Rate' : 's/iter',
742 map { $_->[0] } @vals
743 );
744
745 push @rows, \@top_row;
746 @col_widths = map { length( $_ ) } @top_row;
747
748 # Build the data rows
749 # We leave the last column in even though it never has any data. Perhaps
750 # it should go away. Also, perhaps a style for a single column of
751 # percentages might be nice.
752 for my $row_val ( @vals ) {
753 my @row;
754
755 # Column 0 = test name
756 push @row, $row_val->[0];
757 $col_widths[0] = length( $row_val->[0] )
758 if length( $row_val->[0] ) > $col_widths[0];
759
760 # Column 1 = performance
761 my $row_rate = $row_val->[7];
762
763 # We assume that we'll never get a 0 rate.
764 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
765
766 # Only give a few decimal places before switching to sci. notation,
767 # since the results aren't usually that accurate anyway.
768 my $format =
769 $a >= 100 ?
770 "%0.0f" :
771 $a >= 10 ?
772 "%0.1f" :
773 $a >= 1 ?
774 "%0.2f" :
775 $a >= 0.1 ?
776 "%0.3f" :
777 "%0.2e";
778
779 $format .= "/s"
780 if $display_as_rate;
781 # Using $b here due to optimizing bug in _58 through _61
782 my $b = sprintf( $format, $a );
783 push @row, $b;
784 $col_widths[1] = length( $b )
785 if length( $b ) > $col_widths[1];
786
787 # Columns 2..N = performance ratios
788 my $skip_rest = 0;
789 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
790 my $col_val = $vals[$col_num];
791 my $out;
792 if ( $skip_rest ) {
793 $out = '';
794 }
795 elsif ( $col_val->[0] eq $row_val->[0] ) {
796 $out = "--";
797 # $skip_rest = 1;
798 }
799 else {
800 my $col_rate = $col_val->[7];
801 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
802 }
803 push @row, $out;
804 $col_widths[$col_num+2] = length( $out )
805 if length( $out ) > $col_widths[$col_num+2];
806
807 # A little wierdness to set the first column width properly
808 $col_widths[$col_num+2] = length( $col_val->[0] )
809 if length( $col_val->[0] ) > $col_widths[$col_num+2];
810 }
811 push @rows, \@row;
812 }
813
d1083c7a
BS
814 return \@rows if $style eq "none";
815
3c6312e9
BS
816 # Equalize column widths in the chart as much as possible without
817 # exceeding 80 characters. This does not use or affect cols 0 or 1.
818 my @sorted_width_refs =
819 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
820 my $max_width = ${$sorted_width_refs[-1]};
821
277427cf 822 my $total = @col_widths - 1 ;
3c6312e9
BS
823 for ( @col_widths ) { $total += $_ }
824
825 STRETCHER:
826 while ( $total < 80 ) {
827 my $min_width = ${$sorted_width_refs[0]};
828 last
829 if $min_width == $max_width;
830 for ( @sorted_width_refs ) {
831 last
832 if $$_ > $min_width;
833 ++$$_;
834 ++$total;
835 last STRETCHER
836 if $total >= 80;
837 }
838 }
839
840 # Dump the output
841 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
842 substr( $format, 1, 0 ) = '-';
843 for ( @rows ) {
844 printf $format, @$_;
845 }
846
d1083c7a 847 return \@rows ;
3c6312e9
BS
848}
849
850
a0d0e21e 8511;