This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #23207] persistant sideffect involving bitwise xor and substr
[perl5.git] / lib / Benchmark.pm
CommitLineData
a0d0e21e
LW
1package Benchmark;
2
f06db76b
AD
3=head1 NAME
4
8a4f6ac2 5Benchmark - benchmark running times of Perl code
431d98c2 6
f06db76b
AD
7=head1 SYNOPSIS
8
f36484b0
BS
9 use Benchmark qw(:all) ;
10
f06db76b
AD
11 timethis ($count, "code");
12
523cc92b 13 # Use Perl code in strings...
f06db76b
AD
14 timethese($count, {
15 'Name1' => '...code1...',
16 'Name2' => '...code2...',
17 });
18
523cc92b
CS
19 # ... or use subroutine references.
20 timethese($count, {
21 'Name1' => sub { ...code1... },
22 'Name2' => sub { ...code2... },
23 });
24
431d98c2
BS
25 # cmpthese can be used both ways as well
26 cmpthese($count, {
27 'Name1' => '...code1...',
28 'Name2' => '...code2...',
29 });
30
31 cmpthese($count, {
32 'Name1' => sub { ...code1... },
33 'Name2' => sub { ...code2... },
34 });
35
36 # ...or in two stages
37 $results = timethese($count,
38 {
39 'Name1' => sub { ...code1... },
40 'Name2' => sub { ...code2... },
41 },
42 'none'
43 );
44 cmpthese( $results ) ;
45
f06db76b
AD
46 $t = timeit($count, '...other code...')
47 print "$count loops of other code took:",timestr($t),"\n";
48
431d98c2
BS
49 $t = countit($time, '...other code...')
50 $count = $t->iters ;
51 print "$count loops of other code took:",timestr($t),"\n";
52
e3d6de9a
JH
53 # enable hires wallclock timing if possible
54 use Benchmark ':hireswallclock';
55
f06db76b
AD
56=head1 DESCRIPTION
57
58The Benchmark module encapsulates a number of routines to help you
59figure out how long it takes to execute some code.
60
8a4f6ac2
GS
61timethis - run a chunk of code several times
62
63timethese - run several chunks of code several times
64
65cmpthese - print results of timethese as a comparison chart
66
67timeit - run a chunk of code and see how long it goes
68
69countit - see how many times a chunk of code runs in a given time
70
71
f06db76b
AD
72=head2 Methods
73
74=over 10
75
76=item new
77
78Returns the current time. Example:
79
80 use Benchmark;
81 $t0 = new Benchmark;
82 # ... your code here ...
83 $t1 = new Benchmark;
84 $td = timediff($t1, $t0);
a24a9dfe 85 print "the code took:",timestr($td),"\n";
f06db76b
AD
86
87=item debug
88
89Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
90
523cc92b 91 debug Benchmark 1;
f06db76b 92 $t = timeit(10, ' 5 ** $Global ');
523cc92b 93 debug Benchmark 0;
f06db76b 94
431d98c2
BS
95=item iters
96
97Returns the number of iterations.
98
f06db76b
AD
99=back
100
101=head2 Standard Exports
102
523cc92b 103The following routines will be exported into your namespace
f06db76b
AD
104if you use the Benchmark module:
105
106=over 10
107
108=item timeit(COUNT, CODE)
109
523cc92b
CS
110Arguments: COUNT is the number of times to run the loop, and CODE is
111the code to run. CODE may be either a code reference or a string to
112be eval'd; either way it will be run in the caller's package.
113
114Returns: a Benchmark object.
115
116=item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
117
118Time COUNT iterations of CODE. CODE may be a string to eval or a
119code reference; either way the CODE will run in the caller's package.
120Results will be printed to STDOUT as TITLE followed by the times.
121TITLE defaults to "timethis COUNT" if none is provided. STYLE
122determines the format of the output, as described for timestr() below.
123
6ee623d5
GS
124The COUNT can be zero or negative: this means the I<minimum number of
125CPU seconds> to run. A zero signifies the default of 3 seconds. For
126example to run at least for 10 seconds:
127
128 timethis(-10, $code)
129
130or to run two pieces of code tests for at least 3 seconds:
131
132 timethese(0, { test1 => '...', test2 => '...'})
133
134CPU seconds is, in UNIX terms, the user time plus the system time of
135the process itself, as opposed to the real (wallclock) time and the
136time spent by the child processes. Less than 0.1 seconds is not
137accepted (-0.01 as the count, for example, will cause a fatal runtime
138exception).
139
140Note that the CPU seconds is the B<minimum> time: CPU scheduling and
141other operating system factors may complicate the attempt so that a
142little bit more time is spent. The benchmark output will, however,
143also tell the number of C<$code> runs/second, which should be a more
144interesting number than the actually spent seconds.
145
146Returns a Benchmark object.
147
523cc92b 148=item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
f06db76b 149
523cc92b
CS
150The CODEHASHREF is a reference to a hash containing names as keys
151and either a string to eval or a code reference for each value.
152For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
153call
f06db76b 154
523cc92b 155 timethis(COUNT, VALUE, KEY, STYLE)
f06db76b 156
1d2dff63
GS
157The routines are called in string comparison order of KEY.
158
159The COUNT can be zero or negative, see timethis().
6ee623d5 160
3c6312e9
BS
161Returns a hash of Benchmark objects, keyed by name.
162
523cc92b 163=item timediff ( T1, T2 )
f06db76b 164
523cc92b
CS
165Returns the difference between two Benchmark times as a Benchmark
166object suitable for passing to timestr().
f06db76b 167
6ee623d5 168=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
f06db76b 169
523cc92b
CS
170Returns a string that formats the times in the TIMEDIFF object in
171the requested STYLE. TIMEDIFF is expected to be a Benchmark object
172similar to that returned by timediff().
173
3c6312e9
BS
174STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
175each of the 5 times available ('wallclock' time, user time, system time,
523cc92b
CS
176user time of children, and system time of children). 'noc' shows all
177except the two children times. 'nop' shows only wallclock and the
178two children times. 'auto' (the default) will act as 'all' unless
179the children times are both zero, in which case it acts as 'noc'.
3c6312e9 180'none' prevents output.
523cc92b
CS
181
182FORMAT is the L<printf(3)>-style format specifier (without the
183leading '%') to use to print the times. It defaults to '5.2f'.
f06db76b
AD
184
185=back
186
187=head2 Optional Exports
188
189The following routines will be exported into your namespace
190if you specifically ask that they be imported:
191
192=over 10
193
523cc92b
CS
194=item clearcache ( COUNT )
195
196Clear the cached time for COUNT rounds of the null loop.
197
198=item clearallcache ( )
f06db76b 199
523cc92b 200Clear all cached times.
f06db76b 201
8962dfd6 202=item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
ac8eabc1 203
d1083c7a 204=item cmpthese ( RESULTSHASHREF, [ STYLE ] )
ac8eabc1 205
d1083c7a 206Optionally calls timethese(), then outputs comparison chart. This:
ac8eabc1 207
d1083c7a
BS
208 cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
209
210outputs a chart like:
211
212 Rate b a
213 b 2831802/s -- -61%
214 a 7208959/s 155% --
215
216This chart is sorted from slowest to fastest, and shows the percent speed
217difference between each pair of tests.
218
219c<cmpthese> can also be passed the data structure that timethese() returns:
220
221 $results = timethese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
ac8eabc1
JH
222 cmpthese( $results );
223
d1083c7a
BS
224in case you want to see both sets of results.
225
226Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
227above chart, including labels. This:
228
229 my $rows = cmpthese( -1, { a => '++$i', b => '$i *= 2' }, "none" );
230
231returns a data structure like:
232
233 [
234 [ '', 'Rate', 'b', 'a' ],
235 [ 'b', '2885232/s', '--', '-59%' ],
236 [ 'a', '7099126/s', '146%', '--' ],
237 ]
238
239B<NOTE>: This result value differs from previous versions, which returned
240the C<timethese()> result structure. If you want that, just use the two
241statement C<timethese>...C<cmpthese> idiom shown above.
242
243Incidently, note the variance in the result values between the two examples;
244this is typical of benchmarking. If this were a real benchmark, you would
245probably want to run a lot more iterations.
ac8eabc1
JH
246
247=item countit(TIME, CODE)
248
249Arguments: TIME is the minimum length of time to run CODE for, and CODE is
250the code to run. CODE may be either a code reference or a string to
251be eval'd; either way it will be run in the caller's package.
252
253TIME is I<not> negative. countit() will run the loop many times to
254calculate the speed of CODE before running it for TIME. The actual
255time run for will usually be greater than TIME due to system clock
256resolution, so it's best to look at the number of iterations divided
257by the times that you are concerned with, not just the iterations.
258
259Returns: a Benchmark object.
260
523cc92b 261=item disablecache ( )
f06db76b 262
523cc92b
CS
263Disable caching of timings for the null loop. This will force Benchmark
264to recalculate these timings for each new piece of code timed.
265
266=item enablecache ( )
267
268Enable caching of timings for the null loop. The time taken for COUNT
269rounds of the null loop will be calculated only once for each
270different COUNT used.
f06db76b 271
ac8eabc1
JH
272=item timesum ( T1, T2 )
273
274Returns the sum of two Benchmark times as a Benchmark object suitable
275for passing to timestr().
276
f06db76b
AD
277=back
278
e3d6de9a
JH
279=head2 :hireswallclock
280
281If the Time::HiRes module has been installed, you can specify the
282special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
283available, the tag will be silently ignored). This tag will cause the
284wallclock time to be measured in microseconds, instead of integer
702fa71c
HS
285seconds. Note though that the speed computations are still conducted
286in CPU time, not wallclock time.
e3d6de9a 287
f06db76b
AD
288=head1 NOTES
289
290The data is stored as a list of values from the time and times
523cc92b 291functions:
f06db76b 292
431d98c2 293 ($real, $user, $system, $children_user, $children_system, $iters)
f06db76b
AD
294
295in seconds for the whole loop (not divided by the number of rounds).
296
297The timing is done using time(3) and times(3).
298
299Code is executed in the caller's package.
300
f06db76b
AD
301The time of the null loop (a loop with the same
302number of rounds but empty loop body) is subtracted
303from the time of the real loop.
304
3c6312e9 305The null loop times can be cached, the key being the
f06db76b
AD
306number of rounds. The caching can be controlled using
307calls like these:
308
523cc92b 309 clearcache($key);
f06db76b
AD
310 clearallcache();
311
523cc92b 312 disablecache();
f06db76b
AD
313 enablecache();
314
3c6312e9
BS
315Caching is off by default, as it can (usually slightly) decrease
316accuracy and does not usually noticably affect runtimes.
317
54e82ce5
GS
318=head1 EXAMPLES
319
320For example,
321
14393033
BS
322 use Benchmark qw( cmpthese ) ;
323 $x = 3;
324 cmpthese( -5, {
325 a => sub{$x*$x},
326 b => sub{$x**2},
327 } );
54e82ce5
GS
328
329outputs something like this:
330
331 Benchmark: running a, b, each for at least 5 CPU seconds...
14393033
BS
332 Rate b a
333 b 1559428/s -- -62%
334 a 4152037/s 166% --
335
54e82ce5
GS
336
337while
338
14393033
BS
339 use Benchmark qw( timethese cmpthese ) ;
340 $x = 3;
341 $r = timethese( -5, {
342 a => sub{$x*$x},
343 b => sub{$x**2},
344 } );
345 cmpthese $r;
54e82ce5
GS
346
347outputs something like this:
348
14393033
BS
349 Benchmark: running a, b, each for at least 5 CPU seconds...
350 a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
351 b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
352 Rate b a
353 b 1574945/s -- -59%
354 a 3835056/s 144% --
54e82ce5
GS
355
356
f06db76b
AD
357=head1 INHERITANCE
358
359Benchmark inherits from no other class, except of course
360for Exporter.
361
362=head1 CAVEATS
363
80eab818 364Comparing eval'd strings with code references will give you
431d98c2 365inaccurate results: a code reference will show a slightly slower
80eab818
CS
366execution time than the equivalent eval'd string.
367
f06db76b
AD
368The real time timing is done using time(2) and
369the granularity is therefore only one second.
370
371Short tests may produce negative figures because perl
523cc92b
CS
372can appear to take longer to execute the empty loop
373than a short test; try:
f06db76b
AD
374
375 timethis(100,'1');
376
377The system time of the null loop might be slightly
378more than the system time of the loop with the actual
a24a9dfe 379code and therefore the difference might end up being E<lt> 0.
f06db76b 380
8a4f6ac2
GS
381=head1 SEE ALSO
382
383L<Devel::DProf> - a Perl code profiler
384
f06db76b
AD
385=head1 AUTHORS
386
5aabfad6 387Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
f06db76b
AD
388
389=head1 MODIFICATION HISTORY
390
391September 8th, 1994; by Tim Bunce.
392
523cc92b
CS
393March 28th, 1997; by Hugo van der Sanden: added support for code
394references and the already documented 'debug' method; revamped
395documentation.
f06db76b 396
6ee623d5
GS
397April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
398functionality.
399
3c6312e9
BS
400September, 1999; by Barrie Slaymaker: math fixes and accuracy and
401efficiency tweaks. Added cmpthese(). A result is now returned from
431d98c2 402timethese(). Exposed countit() (was runfor()).
3c6312e9 403
0e74ff8e
JH
404December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
405and return an empty string. If cmpthese is calling timethese, make it pass the
406style in. (so that 'none' will suppress output). Make sub new dump its
407debugging output to STDERR, to be consistent with everything else.
408All bugs found while writing a regression test.
409
e3d6de9a
JH
410September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
411
523cc92b 412=cut
a0d0e21e 413
3f943bd9
GS
414# evaluate something in a clean lexical environment
415sub _doeval { eval shift }
416
417#
418# put any lexicals at file scope AFTER here
419#
420
4aa0a1f7 421use Carp;
a0d0e21e
LW
422use Exporter;
423@ISA=(Exporter);
ac8eabc1
JH
424@EXPORT=qw(timeit timethis timethese timediff timestr);
425@EXPORT_OK=qw(timesum cmpthese countit
426 clearcache clearallcache disablecache enablecache);
f36484b0 427%EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
a0d0e21e 428
e3d6de9a
JH
429$VERSION = 1.0501;
430
431# --- ':hireswallclock' special handling
432
433my $hirestime;
434
435sub mytime () { time }
8a4f6ac2 436
a0d0e21e
LW
437&init;
438
e3d6de9a
JH
439sub BEGIN {
440 if (eval 'require Time::HiRes') {
441 import Time::HiRes qw(time);
442 $hirestime = \&Time::HiRes::time;
443 }
444}
445
446sub import {
447 my $class = shift;
448 if (grep { $_ eq ":hireswallclock" } @_) {
449 @_ = grep { $_ ne ":hireswallclock" } @_;
450 *mytime = $hirestime if defined $hirestime;
451 }
452 Benchmark->export_to_level(1, $class, @_);
453}
454
a0d0e21e
LW
455sub init {
456 $debug = 0;
457 $min_count = 4;
458 $min_cpu = 0.4;
459 $defaultfmt = '5.2f';
460 $defaultstyle = 'auto';
461 # The cache can cause a slight loss of sys time accuracy. If a
462 # user does many tests (>10) with *very* large counts (>10000)
463 # or works on a very slow machine the cache may be useful.
464 &disablecache;
465 &clearallcache;
466}
467
523cc92b
CS
468sub debug { $debug = ($_[1] != 0); }
469
bba8fca5
BS
470# The cache needs two branches: 's' for strings and 'c' for code. The
471# emtpy loop is different in these two cases.
472sub clearcache { delete $cache{"$_[0]c"}; delete $cache{"$_[0]s"}; }
a0d0e21e
LW
473sub clearallcache { %cache = (); }
474sub enablecache { $cache = 1; }
475sub disablecache { $cache = 0; }
476
a0d0e21e
LW
477# --- Functions to process the 'time' data type
478
e3d6de9a 479sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
0e74ff8e 480 print STDERR "new=@t\n" if $debug;
6ee623d5 481 bless \@t; }
a0d0e21e
LW
482
483sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
484sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
485sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
486sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
431d98c2 487sub iters { $_[0]->[5] ; }
a0d0e21e 488
523cc92b 489sub timediff {
a0d0e21e 490 my($a, $b) = @_;
523cc92b 491 my @r;
3f943bd9 492 for (my $i=0; $i < @$a; ++$i) {
a0d0e21e
LW
493 push(@r, $a->[$i] - $b->[$i]);
494 }
495 bless \@r;
496}
497
705cc255
TB
498sub timesum {
499 my($a, $b) = @_;
500 my @r;
501 for (my $i=0; $i < @$a; ++$i) {
502 push(@r, $a->[$i] + $b->[$i]);
503 }
504 bless \@r;
505}
506
523cc92b 507sub timestr {
a0d0e21e 508 my($tr, $style, $f) = @_;
523cc92b 509 my @t = @$tr;
6ee623d5
GS
510 warn "bad time value (@t)" unless @t==6;
511 my($r, $pu, $ps, $cu, $cs, $n) = @t;
ce9550df 512 my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
523cc92b 513 $f = $defaultfmt unless defined $f;
a0d0e21e 514 # format a time in the required style, other formats may be added here
80eab818 515 $style ||= $defaultstyle;
0e74ff8e 516 return '' if $style eq 'none';
523cc92b
CS
517 $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
518 my $s = "@t $style"; # default for unknown style
e3d6de9a
JH
519 my $w = $hirestime ? "%2g" : "%2d";
520 $s=sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
ce9550df 521 $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
e3d6de9a 522 $s=sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
7be077a2 523 $r,$pu,$ps,$pt) if $style eq 'noc';
e3d6de9a 524 $s=sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
7be077a2 525 $r,$cu,$cs,$ct) if $style eq 'nop';
cc31225e 526 $s .= sprintf(" @ %$f/s (n=$n)", $n / ( $pu + $ps )) if $n && $pu+$ps;
a0d0e21e
LW
527 $s;
528}
523cc92b
CS
529
530sub timedebug {
a0d0e21e 531 my($msg, $t) = @_;
523cc92b 532 print STDERR "$msg",timestr($t),"\n" if $debug;
a0d0e21e
LW
533}
534
a0d0e21e
LW
535# --- Functions implementing low-level support for timing loops
536
537sub runloop {
538 my($n, $c) = @_;
4aa0a1f7
AD
539
540 $n+=0; # force numeric now, so garbage won't creep into the eval
523cc92b
CS
541 croak "negative loopcount $n" if $n<0;
542 confess "Usage: runloop(number, [string | coderef])" unless defined $c;
a0d0e21e
LW
543 my($t0, $t1, $td); # before, after, difference
544
545 # find package of caller so we can execute code there
523cc92b
CS
546 my($curpack) = caller(0);
547 my($i, $pack)= 0;
a0d0e21e
LW
548 while (($pack) = caller(++$i)) {
549 last if $pack ne $curpack;
550 }
551
3f943bd9
GS
552 my ($subcode, $subref);
553 if (ref $c eq 'CODE') {
554 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
555 $subref = eval $subcode;
556 }
557 else {
558 $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
559 $subref = _doeval($subcode);
560 }
4aa0a1f7 561 croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
523cc92b 562 print STDERR "runloop $n '$subcode'\n" if $debug;
a0d0e21e 563
3c6312e9
BS
564 # Wait for the user timer to tick. This makes the error range more like
565 # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
566 # may not seem important, but it significantly reduces the chances of
567 # getting a too low initial $n in the initial, 'find the minimum' loop
431d98c2 568 # in &countit. This, in turn, can reduce the number of calls to
bba8fca5
BS
569 # &runloop a lot, and thus reduce additive errors.
570 my $tbase = Benchmark->new(0)->[1];
277427cf 571 while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
a0d0e21e 572 &$subref;
6ee623d5 573 $t1 = Benchmark->new($n);
a0d0e21e 574 $td = &timediff($t1, $t0);
a0d0e21e
LW
575 timedebug("runloop:",$td);
576 $td;
577}
578
579
580sub timeit {
581 my($n, $code) = @_;
582 my($wn, $wc, $wd);
583
584 printf STDERR "timeit $n $code\n" if $debug;
3c6312e9 585 my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
bba8fca5
BS
586 if ($cache && exists $cache{$cache_key} ) {
587 $wn = $cache{$cache_key};
523cc92b 588 } else {
6bf773bc 589 $wn = &runloop($n, ref( $code ) ? sub { } : '' );
3c6312e9
BS
590 # Can't let our baseline have any iterations, or they get subtracted
591 # out of the result.
592 $wn->[5] = 0;
bba8fca5 593 $cache{$cache_key} = $wn;
a0d0e21e
LW
594 }
595
596 $wc = &runloop($n, $code);
597
598 $wd = timediff($wc, $wn);
a0d0e21e
LW
599 timedebug("timeit: ",$wc);
600 timedebug(" - ",$wn);
601 timedebug(" = ",$wd);
602
603 $wd;
604}
605
6ee623d5
GS
606
607my $default_for = 3;
608my $min_for = 0.1;
609
3c6312e9 610
431d98c2
BS
611sub countit {
612 my ( $tmax, $code ) = @_;
6ee623d5
GS
613
614 if ( not defined $tmax or $tmax == 0 ) {
615 $tmax = $default_for;
616 } elsif ( $tmax < 0 ) {
617 $tmax = -$tmax;
618 }
619
431d98c2 620 die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
6ee623d5
GS
621 if $tmax < $min_for;
622
3c6312e9 623 my ($n, $tc);
6ee623d5 624
bba8fca5 625 # First find the minimum $n that gives a significant timing.
3c6312e9
BS
626 for ($n = 1; ; $n *= 2 ) {
627 my $td = timeit($n, $code);
628 $tc = $td->[1] + $td->[2];
629 last if $tc > 0.1;
630 }
6ee623d5 631
3c6312e9
BS
632 my $nmin = $n;
633
634 # Get $n high enough that we can guess the final $n with some accuracy.
635 my $tpra = 0.1 * $tmax; # Target/time practice.
636 while ( $tc < $tpra ) {
637 # The 5% fudge is to keep us from iterating again all
638 # that often (this speeds overall responsiveness when $tmax is big
639 # and we guess a little low). This does not noticably affect
640 # accuracy since we're not couting these times.
641 $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
642 my $td = timeit($n, $code);
c5d57293
A
643 my $new_tc = $td->[1] + $td->[2];
644 # Make sure we are making progress.
645 $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
6ee623d5
GS
646 }
647
3c6312e9
BS
648 # Now, do the 'for real' timing(s), repeating until we exceed
649 # the max.
650 my $ntot = 0;
651 my $rtot = 0;
652 my $utot = 0.0;
653 my $stot = 0.0;
654 my $cutot = 0.0;
655 my $cstot = 0.0;
656 my $ttot = 0.0;
657
658 # The 5% fudge is because $n is often a few % low even for routines
659 # with stable times and avoiding extra timeit()s is nice for
660 # accuracy's sake.
661 $n = int( $n * ( 1.05 * $tmax / $tc ) );
662
663 while () {
664 my $td = timeit($n, $code);
665 $ntot += $n;
666 $rtot += $td->[0];
667 $utot += $td->[1];
668 $stot += $td->[2];
6ee623d5
GS
669 $cutot += $td->[3];
670 $cstot += $td->[4];
3c6312e9
BS
671 $ttot = $utot + $stot;
672 last if $ttot >= $tmax;
6ee623d5 673
c5d57293 674 $ttot = 0.01 if $ttot < 0.01;
3c6312e9 675 my $r = $tmax / $ttot - 1; # Linear approximation.
bba8fca5 676 $n = int( $r * $ntot );
6ee623d5 677 $n = $nmin if $n < $nmin;
6ee623d5
GS
678 }
679
680 return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
681}
682
a0d0e21e
LW
683# --- Functions implementing high-level time-then-print utilities
684
6ee623d5
GS
685sub n_to_for {
686 my $n = shift;
687 return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
688}
689
a0d0e21e
LW
690sub timethis{
691 my($n, $code, $title, $style) = @_;
6ee623d5
GS
692 my($t, $for, $forn);
693
694 if ( $n > 0 ) {
695 croak "non-integer loopcount $n, stopped" if int($n)<$n;
696 $t = timeit($n, $code);
697 $title = "timethis $n" unless defined $title;
698 } else {
699 $fort = n_to_for( $n );
431d98c2 700 $t = countit( $fort, $code );
6ee623d5
GS
701 $title = "timethis for $fort" unless defined $title;
702 $forn = $t->[-1];
703 }
523cc92b 704 local $| = 1;
523cc92b 705 $style = "" unless defined $style;
3c6312e9
BS
706 printf("%10s: ", $title) unless $style eq 'none';
707 print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
6ee623d5
GS
708
709 $n = $forn if defined $forn;
523cc92b 710
a0d0e21e
LW
711 # A conservative warning to spot very silly tests.
712 # Don't assume that your benchmark is ok simply because
713 # you don't get this warning!
714 print " (warning: too few iterations for a reliable count)\n"
523cc92b 715 if $n < $min_count
a0d0e21e 716 || ($t->real < 1 && $n < 1000)
523cc92b 717 || $t->cpu_a < $min_cpu;
a0d0e21e
LW
718 $t;
719}
720
a0d0e21e
LW
721sub timethese{
722 my($n, $alt, $style) = @_;
723 die "usage: timethese(count, { 'Name1'=>'code1', ... }\n"
724 unless ref $alt eq HASH;
523cc92b
CS
725 my @names = sort keys %$alt;
726 $style = "" unless defined $style;
3c6312e9 727 print "Benchmark: " unless $style eq 'none';
6ee623d5
GS
728 if ( $n > 0 ) {
729 croak "non-integer loopcount $n, stopped" if int($n)<$n;
3c6312e9 730 print "timing $n iterations of" unless $style eq 'none';
6ee623d5 731 } else {
3c6312e9 732 print "running" unless $style eq 'none';
6ee623d5 733 }
3c6312e9 734 print " ", join(', ',@names) unless $style eq 'none';
6ee623d5
GS
735 unless ( $n > 0 ) {
736 my $for = n_to_for( $n );
df7779cf
T
737 print ", each" if $n > 1 && $style ne 'none';
738 print " for at least $for CPU seconds" unless $style eq 'none';
6ee623d5 739 }
3c6312e9 740 print "...\n" unless $style eq 'none';
523cc92b
CS
741
742 # we could save the results in an array and produce a summary here
a0d0e21e 743 # sum, min, max, avg etc etc
3c6312e9 744 my %results;
4dbb2df9 745 foreach my $name (@names) {
3c6312e9 746 $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
4dbb2df9 747 }
3c6312e9
BS
748
749 return \%results;
a0d0e21e
LW
750}
751
3c6312e9 752sub cmpthese{
8962dfd6
A
753 my ($results, $style) =
754 ref $_ [0] ? @_
755 : (timethese (@_ [0, 1], @_ > 2 ? $_ [2] : "none"), $_ [2]);
3c6312e9 756
d1083c7a 757 $style = "" unless defined $style;
3c6312e9
BS
758
759 # Flatten in to an array of arrays with the name as the first field
760 my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
761
762 for (@vals) {
763 # The epsilon fudge here is to prevent div by 0. Since clock
764 # resolutions are much larger, it's below the noise floor.
765 my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
766 $_->[7] = $rate;
767 }
768
769 # Sort by rate
770 @vals = sort { $a->[7] <=> $b->[7] } @vals;
771
772 # If more than half of the rates are greater than one...
773 my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
774
775 my @rows;
776 my @col_widths;
777
778 my @top_row = (
779 '',
780 $display_as_rate ? 'Rate' : 's/iter',
781 map { $_->[0] } @vals
782 );
783
784 push @rows, \@top_row;
785 @col_widths = map { length( $_ ) } @top_row;
786
787 # Build the data rows
788 # We leave the last column in even though it never has any data. Perhaps
789 # it should go away. Also, perhaps a style for a single column of
790 # percentages might be nice.
791 for my $row_val ( @vals ) {
792 my @row;
793
794 # Column 0 = test name
795 push @row, $row_val->[0];
796 $col_widths[0] = length( $row_val->[0] )
797 if length( $row_val->[0] ) > $col_widths[0];
798
799 # Column 1 = performance
800 my $row_rate = $row_val->[7];
801
802 # We assume that we'll never get a 0 rate.
803 my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
804
805 # Only give a few decimal places before switching to sci. notation,
806 # since the results aren't usually that accurate anyway.
807 my $format =
808 $a >= 100 ?
809 "%0.0f" :
810 $a >= 10 ?
811 "%0.1f" :
812 $a >= 1 ?
813 "%0.2f" :
814 $a >= 0.1 ?
815 "%0.3f" :
816 "%0.2e";
817
818 $format .= "/s"
819 if $display_as_rate;
820 # Using $b here due to optimizing bug in _58 through _61
821 my $b = sprintf( $format, $a );
822 push @row, $b;
823 $col_widths[1] = length( $b )
824 if length( $b ) > $col_widths[1];
825
826 # Columns 2..N = performance ratios
827 my $skip_rest = 0;
828 for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
829 my $col_val = $vals[$col_num];
830 my $out;
831 if ( $skip_rest ) {
832 $out = '';
833 }
834 elsif ( $col_val->[0] eq $row_val->[0] ) {
835 $out = "--";
836 # $skip_rest = 1;
837 }
838 else {
839 my $col_rate = $col_val->[7];
840 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
841 }
842 push @row, $out;
843 $col_widths[$col_num+2] = length( $out )
844 if length( $out ) > $col_widths[$col_num+2];
845
846 # A little wierdness to set the first column width properly
847 $col_widths[$col_num+2] = length( $col_val->[0] )
848 if length( $col_val->[0] ) > $col_widths[$col_num+2];
849 }
850 push @rows, \@row;
851 }
852
d1083c7a
BS
853 return \@rows if $style eq "none";
854
3c6312e9
BS
855 # Equalize column widths in the chart as much as possible without
856 # exceeding 80 characters. This does not use or affect cols 0 or 1.
857 my @sorted_width_refs =
858 sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
859 my $max_width = ${$sorted_width_refs[-1]};
860
277427cf 861 my $total = @col_widths - 1 ;
3c6312e9
BS
862 for ( @col_widths ) { $total += $_ }
863
864 STRETCHER:
865 while ( $total < 80 ) {
866 my $min_width = ${$sorted_width_refs[0]};
867 last
868 if $min_width == $max_width;
869 for ( @sorted_width_refs ) {
870 last
871 if $$_ > $min_width;
872 ++$$_;
873 ++$total;
874 last STRETCHER
875 if $total >= 80;
876 }
877 }
878
879 # Dump the output
880 my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
881 substr( $format, 1, 0 ) = '-';
882 for ( @rows ) {
883 printf $format, @$_;
884 }
885
d1083c7a 886 return \@rows ;
3c6312e9
BS
887}
888
889
a0d0e21e 8901;