lib/Benchmark.pm

   1 package Benchmark;
   2
   3 use strict;
   4
   5
   6 =head1 NAME
   7
   8 Benchmark - benchmark running times of Perl code
   9
  10 =head1 SYNOPSIS
  11
  12     use Benchmark qw(:all) ;
  13
  14     timethis ($count, "code");
  15
  16     # Use Perl code in strings...
  17     timethese($count, {
  18         'Name1' => '...code1...',
  19         'Name2' => '...code2...',
  20     });
  21
  22     # ... or use subroutine references.
  23     timethese($count, {
  24         'Name1' => sub { ...code1... },
  25         'Name2' => sub { ...code2... },
  26     });
  27
  28     # cmpthese can be used both ways as well
  29     cmpthese($count, {
  30         'Name1' => '...code1...',
  31         'Name2' => '...code2...',
  32     });
  33
  34     cmpthese($count, {
  35         'Name1' => sub { ...code1... },
  36         'Name2' => sub { ...code2... },
  37     });
  38
  39     # ...or in two stages
  40     $results = timethese($count,
  41         {
  42             'Name1' => sub { ...code1... },
  43             'Name2' => sub { ...code2... },
  44         },
  45         'none'
  46     );
  47     cmpthese( $results ) ;
  48
  49     $t = timeit($count, '...other code...')
  50     print "$count loops of other code took:",timestr($t),"\n";
  51
  52     $t = countit($time, '...other code...')
  53     $count = $t->iters ;
  54     print "$count loops of other code took:",timestr($t),"\n";
  55
  56     # enable hires wallclock timing if possible
  57     use Benchmark ':hireswallclock';
  58
  59 =head1 DESCRIPTION
  60
  61 The Benchmark module encapsulates a number of routines to help you
  62 figure out how long it takes to execute some code.
  63
  64 timethis - run a chunk of code several times
  65
  66 timethese - run several chunks of code several times
  67
  68 cmpthese - print results of timethese as a comparison chart
  69
  70 timeit - run a chunk of code and see how long it goes
  71
  72 countit - see how many times a chunk of code runs in a given time
  73
  74
  75 =head2 Methods
  76
  77 =over 10
  78
  79 =item new
  80
  81 Returns the current time.   Example:
  82
  83     use Benchmark;
  84     $t0 = Benchmark->new;
  85     # ... your code here ...
  86     $t1 = Benchmark->new;
  87     $td = timediff($t1, $t0);
  88     print "the code took:",timestr($td),"\n";
  89
  90 =item debug
  91
  92 Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
  93
  94     Benchmark->debug(1);
  95     $t = timeit(10, ' 5 ** $Global ');
  96     Benchmark->debug(0);
  97
  98 =item iters
  99
 100 Returns the number of iterations.
 101
 102 =back
 103
 104 =head2 Standard Exports
 105
 106 The following routines will be exported into your namespace
 107 if you use the Benchmark module:
 108
 109 =over 10
 110
 111 =item timeit(COUNT, CODE)
 112
 113 Arguments: COUNT is the number of times to run the loop, and CODE is
 114 the code to run.  CODE may be either a code reference or a string to
 115 be eval'd; either way it will be run in the caller's package.
 116
 117 Returns: a Benchmark object.
 118
 119 =item timethis ( COUNT, CODE, [ TITLE, [ STYLE ]] )
 120
 121 Time COUNT iterations of CODE. CODE may be a string to eval or a
 122 code reference; either way the CODE will run in the caller's package.
 123 Results will be printed to STDOUT as TITLE followed by the times.
 124 TITLE defaults to "timethis COUNT" if none is provided. STYLE
 125 determines the format of the output, as described for timestr() below.
 126
 127 The COUNT can be zero or negative: this means the I<minimum number of
 128 CPU seconds> to run.  A zero signifies the default of 3 seconds.  For
 129 example to run at least for 10 seconds:
 130
 131         timethis(-10, $code)
 132
 133 or to run two pieces of code tests for at least 3 seconds:
 134
 135         timethese(0, { test1 => '...', test2 => '...'})
 136
 137 CPU seconds is, in UNIX terms, the user time plus the system time of
 138 the process itself, as opposed to the real (wallclock) time and the
 139 time spent by the child processes.  Less than 0.1 seconds is not
 140 accepted (-0.01 as the count, for example, will cause a fatal runtime
 141 exception).
 142
 143 Note that the CPU seconds is the B<minimum> time: CPU scheduling and
 144 other operating system factors may complicate the attempt so that a
 145 little bit more time is spent.  The benchmark output will, however,
 146 also tell the number of C<$code> runs/second, which should be a more
 147 interesting number than the actually spent seconds.
 148
 149 Returns a Benchmark object.
 150
 151 =item timethese ( COUNT, CODEHASHREF, [ STYLE ] )
 152
 153 The CODEHASHREF is a reference to a hash containing names as keys
 154 and either a string to eval or a code reference for each value.
 155 For each (KEY, VALUE) pair in the CODEHASHREF, this routine will
 156 call
 157
 158         timethis(COUNT, VALUE, KEY, STYLE)
 159
 160 The routines are called in string comparison order of KEY.
 161
 162 The COUNT can be zero or negative, see timethis().
 163
 164 Returns a hash reference of Benchmark objects, keyed by name.
 165
 166 =item timediff ( T1, T2 )
 167
 168 Returns the difference between two Benchmark times as a Benchmark
 169 object suitable for passing to timestr().
 170
 171 =item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
 172
 173 Returns a string that formats the times in the TIMEDIFF object in
 174 the requested STYLE. TIMEDIFF is expected to be a Benchmark object
 175 similar to that returned by timediff().
 176
 177 STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
 178 each of the 5 times available ('wallclock' time, user time, system time,
 179 user time of children, and system time of children). 'noc' shows all
 180 except the two children times. 'nop' shows only wallclock and the
 181 two children times. 'auto' (the default) will act as 'all' unless
 182 the children times are both zero, in which case it acts as 'noc'.
 183 'none' prevents output.
 184
 185 FORMAT is the L<printf(3)>-style format specifier (without the
 186 leading '%') to use to print the times. It defaults to '5.2f'.
 187
 188 =back
 189
 190 =head2 Optional Exports
 191
 192 The following routines will be exported into your namespace
 193 if you specifically ask that they be imported:
 194
 195 =over 10
 196
 197 =item clearcache ( COUNT )
 198
 199 Clear the cached time for COUNT rounds of the null loop.
 200
 201 =item clearallcache ( )
 202
 203 Clear all cached times.
 204
 205 =item cmpthese ( COUNT, CODEHASHREF, [ STYLE ] )
 206
 207 =item cmpthese ( RESULTSHASHREF, [ STYLE ] )
 208
 209 Optionally calls timethese(), then outputs comparison chart.  This:
 210
 211     cmpthese( -1, { a => "++\$i", b => "\$i *= 2" } ) ;
 212
 213 outputs a chart like:
 214
 215            Rate    b    a
 216     b 2831802/s   -- -61%
 217     a 7208959/s 155%   --
 218
 219 This chart is sorted from slowest to fastest, and shows the percent speed
 220 difference between each pair of tests.
 221
 222 C<cmpthese> can also be passed the data structure that timethese() returns:
 223
 224     $results = timethese( -1,
 225         { a => "++\$i", b => "\$i *= 2" } ) ;
 226     cmpthese( $results );
 227
 228 in case you want to see both sets of results.
 229 If the first argument is an unblessed hash reference,
 230 that is RESULTSHASHREF; otherwise that is COUNT.
 231
 232 Returns a reference to an ARRAY of rows, each row is an ARRAY of cells from the
 233 above chart, including labels. This:
 234
 235     my $rows = cmpthese( -1,
 236         { a => '++$i', b => '$i *= 2' }, "none" );
 237
 238 returns a data structure like:
 239
 240     [
 241         [ '',       'Rate',   'b',    'a' ],
 242         [ 'b', '2885232/s',  '--', '-59%' ],
 243         [ 'a', '7099126/s', '146%',  '--' ],
 244     ]
 245
 246 B<NOTE>: This result value differs from previous versions, which returned
 247 the C<timethese()> result structure.  If you want that, just use the two
 248 statement C<timethese>...C<cmpthese> idiom shown above.
 249
 250 Incidentally, note the variance in the result values between the two examples;
 251 this is typical of benchmarking.  If this were a real benchmark, you would
 252 probably want to run a lot more iterations.
 253
 254 =item countit(TIME, CODE)
 255
 256 Arguments: TIME is the minimum length of time to run CODE for, and CODE is
 257 the code to run.  CODE may be either a code reference or a string to
 258 be eval'd; either way it will be run in the caller's package.
 259
 260 TIME is I<not> negative.  countit() will run the loop many times to
 261 calculate the speed of CODE before running it for TIME.  The actual
 262 time run for will usually be greater than TIME due to system clock
 263 resolution, so it's best to look at the number of iterations divided
 264 by the times that you are concerned with, not just the iterations.
 265
 266 Returns: a Benchmark object.
 267
 268 =item disablecache ( )
 269
 270 Disable caching of timings for the null loop. This will force Benchmark
 271 to recalculate these timings for each new piece of code timed.
 272
 273 =item enablecache ( )
 274
 275 Enable caching of timings for the null loop. The time taken for COUNT
 276 rounds of the null loop will be calculated only once for each
 277 different COUNT used.
 278
 279 =item timesum ( T1, T2 )
 280
 281 Returns the sum of two Benchmark times as a Benchmark object suitable
 282 for passing to timestr().
 283
 284 =back
 285
 286 =head2 :hireswallclock
 287
 288 If the Time::HiRes module has been installed, you can specify the
 289 special tag C<:hireswallclock> for Benchmark (if Time::HiRes is not
 290 available, the tag will be silently ignored).  This tag will cause the
 291 wallclock time to be measured in microseconds, instead of integer
 292 seconds.  Note though that the speed computations are still conducted
 293 in CPU time, not wallclock time.
 294
 295 =head1 NOTES
 296
 297 The data is stored as a list of values from the time and times
 298 functions:
 299
 300       ($real, $user, $system, $children_user, $children_system, $iters)
 301
 302 in seconds for the whole loop (not divided by the number of rounds).
 303
 304 The timing is done using time(3) and times(3).
 305
 306 Code is executed in the caller's package.
 307
 308 The time of the null loop (a loop with the same
 309 number of rounds but empty loop body) is subtracted
 310 from the time of the real loop.
 311
 312 The null loop times can be cached, the key being the
 313 number of rounds. The caching can be controlled using
 314 calls like these:
 315
 316     clearcache($key);
 317     clearallcache();
 318
 319     disablecache();
 320     enablecache();
 321
 322 Caching is off by default, as it can (usually slightly) decrease
 323 accuracy and does not usually noticeably affect runtimes.
 324
 325 =head1 EXAMPLES
 326
 327 For example,
 328
 329     use Benchmark qw( cmpthese ) ;
 330     $x = 3;
 331     cmpthese( -5, {
 332         a => sub{$x*$x},
 333         b => sub{$x**2},
 334     } );
 335
 336 outputs something like this:
 337
 338    Benchmark: running a, b, each for at least 5 CPU seconds...
 339           Rate    b    a
 340    b 1559428/s   -- -62%
 341    a 4152037/s 166%   --
 342
 343
 344 while
 345
 346     use Benchmark qw( timethese cmpthese ) ;
 347     $x = 3;
 348     $r = timethese( -5, {
 349         a => sub{$x*$x},
 350         b => sub{$x**2},
 351     } );
 352     cmpthese $r;
 353
 354 outputs something like this:
 355
 356     Benchmark: running a, b, each for at least 5 CPU seconds...
 357              a: 10 wallclock secs ( 5.14 usr +  0.13 sys =  5.27 CPU) @ 3835055.60/s (n=20210743)
 358              b:  5 wallclock secs ( 5.41 usr +  0.00 sys =  5.41 CPU) @ 1574944.92/s (n=8520452)
 359            Rate    b    a
 360     b 1574945/s   -- -59%
 361     a 3835056/s 144%   --
 362
 363
 364 =head1 INHERITANCE
 365
 366 Benchmark inherits from no other class, except of course
 367 for Exporter.
 368
 369 =head1 CAVEATS
 370
 371 Comparing eval'd strings with code references will give you
 372 inaccurate results: a code reference will show a slightly slower
 373 execution time than the equivalent eval'd string.
 374
 375 The real time timing is done using time(2) and
 376 the granularity is therefore only one second.
 377
 378 Short tests may produce negative figures because perl
 379 can appear to take longer to execute the empty loop
 380 than a short test; try:
 381
 382     timethis(100,'1');
 383
 384 The system time of the null loop might be slightly
 385 more than the system time of the loop with the actual
 386 code and therefore the difference might end up being E<lt> 0.
 387
 388 =head1 SEE ALSO
 389
 390 L<Devel::NYTProf> - a Perl code profiler
 391
 392 =head1 AUTHORS
 393
 394 Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
 395
 396 =head1 MODIFICATION HISTORY
 397
 398 September 8th, 1994; by Tim Bunce.
 399
 400 March 28th, 1997; by Hugo van der Sanden: added support for code
 401 references and the already documented 'debug' method; revamped
 402 documentation.
 403
 404 April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
 405 functionality.
 406
 407 September, 1999; by Barrie Slaymaker: math fixes and accuracy and
 408 efficiency tweaks.  Added cmpthese().  A result is now returned from
 409 timethese().  Exposed countit() (was runfor()).
 410
 411 December, 2001; by Nicholas Clark: make timestr() recognise the style 'none'
 412 and return an empty string. If cmpthese is calling timethese, make it pass the
 413 style in. (so that 'none' will suppress output). Make sub new dump its
 414 debugging output to STDERR, to be consistent with everything else.
 415 All bugs found while writing a regression test.
 416
 417 September, 2002; by Jarkko Hietaniemi: add ':hireswallclock' special tag.
 418
 419 February, 2004; by Chia-liang Kao: make cmpthese and timestr use time
 420 statistics for children instead of parent when the style is 'nop'.
 421
 422 November, 2007; by Christophe Grosjean: make cmpthese and timestr compute
 423 time consistently with style argument, default is 'all' not 'noc' any more.
 424
 425 =cut
 426
 427 # evaluate something in a clean lexical environment
 428 sub _doeval { no strict;  eval shift }
 429
 430 #
 431 # put any lexicals at file scope AFTER here
 432 #
 433
 434 use Carp;
 435 use Exporter;
 436
 437 our(@ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS, $VERSION);
 438
 439 @ISA=qw(Exporter);
 440 @EXPORT=qw(timeit timethis timethese timediff timestr);
 441 @EXPORT_OK=qw(timesum cmpthese countit
 442               clearcache clearallcache disablecache enablecache);
 443 %EXPORT_TAGS=( all => [ @EXPORT, @EXPORT_OK ] ) ;
 444
 445 $VERSION = 1.17;
 446
 447 # --- ':hireswallclock' special handling
 448
 449 my $hirestime;
 450
 451 sub mytime () { time }
 452
 453 init();
 454
 455 sub BEGIN {
 456     if (eval 'require Time::HiRes') {
 457         import Time::HiRes qw(time);
 458         $hirestime = \&Time::HiRes::time;
 459     }
 460 }
 461
 462 sub import {
 463     my $class = shift;
 464     if (grep { $_ eq ":hireswallclock" } @_) {
 465         @_ = grep { $_ ne ":hireswallclock" } @_;
 466         local $^W=0;
 467         *mytime = $hirestime if defined $hirestime;
 468     }
 469     Benchmark->export_to_level(1, $class, @_);
 470 }
 471
 472 our($Debug, $Min_Count, $Min_CPU, $Default_Format, $Default_Style,
 473     %_Usage, %Cache, $Do_Cache);
 474
 475 sub init {
 476     $Debug = 0;
 477     $Min_Count = 4;
 478     $Min_CPU   = 0.4;
 479     $Default_Format = '5.2f';
 480     $Default_Style = 'auto';
 481     # The cache can cause a slight loss of sys time accuracy. If a
 482     # user does many tests (>10) with *very* large counts (>10000)
 483     # or works on a very slow machine the cache may be useful.
 484     disablecache();
 485     clearallcache();
 486 }
 487
 488 sub debug { $Debug = ($_[1] != 0); }
 489
 490 sub usage {
 491     my $calling_sub = (caller(1))[3];
 492     $calling_sub =~ s/^Benchmark:://;
 493     return $_Usage{$calling_sub} || '';
 494 }
 495
 496 # The cache needs two branches: 's' for strings and 'c' for code.  The
 497 # empty loop is different in these two cases.
 498
 499 $_Usage{clearcache} = <<'USAGE';
 500 usage: clearcache($count);
 501 USAGE
 502
 503 sub clearcache    {
 504     die usage unless @_ == 1;
 505     delete $Cache{"$_[0]c"}; delete $Cache{"$_[0]s"};
 506 }
 507
 508 $_Usage{clearallcache} = <<'USAGE';
 509 usage: clearallcache();
 510 USAGE
 511
 512 sub clearallcache {
 513     die usage if @_;
 514     %Cache = ();
 515 }
 516
 517 $_Usage{enablecache} = <<'USAGE';
 518 usage: enablecache();
 519 USAGE
 520
 521 sub enablecache   {
 522     die usage if @_;
 523     $Do_Cache = 1;
 524 }
 525
 526 $_Usage{disablecache} = <<'USAGE';
 527 usage: disablecache();
 528 USAGE
 529
 530 sub disablecache  {
 531     die usage if @_;
 532     $Do_Cache = 0;
 533 }
 534
 535
 536 # --- Functions to process the 'time' data type
 537
 538 sub new { my @t = (mytime, times, @_ == 2 ? $_[1] : 0);
 539           print STDERR "new=@t\n" if $Debug;
 540           bless \@t; }
 541
 542 sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps         ; }
 543 sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]};         $cu+$cs ; }
 544 sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
 545 sub real  { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r              ; }
 546 sub iters { $_[0]->[5] ; }
 547
 548
 549 $_Usage{timediff} = <<'USAGE';
 550 usage: $result_diff = timediff($result1, $result2);
 551 USAGE
 552
 553 sub timediff {
 554     my($a, $b) = @_;
 555
 556     die usage unless ref $a and ref $b;
 557
 558     my @r;
 559     for (my $i=0; $i < @$a; ++$i) {
 560         push(@r, $a->[$i] - $b->[$i]);
 561     }
 562     #die "Bad timediff(): ($r[1] + $r[2]) <= 0 (@$a[1,2]|@$b[1,2])\n"
 563     #        if ($r[1] + $r[2]) < 0;
 564     bless \@r;
 565 }
 566
 567 $_Usage{timesum} = <<'USAGE';
 568 usage: $sum = timesum($result1, $result2);
 569 USAGE
 570
 571 sub timesum {
 572     my($a, $b) = @_;
 573
 574     die usage unless ref $a and ref $b;
 575
 576     my @r;
 577     for (my $i=0; $i < @$a; ++$i) {
 578         push(@r, $a->[$i] + $b->[$i]);
 579     }
 580     bless \@r;
 581 }
 582
 583
 584 $_Usage{timestr} = <<'USAGE';
 585 usage: $formatted_result = timestr($result1);
 586 USAGE
 587
 588 sub timestr {
 589     my($tr, $style, $f) = @_;
 590
 591     die usage unless ref $tr;
 592
 593     my @t = @$tr;
 594     warn "bad time value (@t)" unless @t==6;
 595     my($r, $pu, $ps, $cu, $cs, $n) = @t;
 596     my($pt, $ct, $tt) = ($tr->cpu_p, $tr->cpu_c, $tr->cpu_a);
 597     $f = $Default_Format unless defined $f;
 598     # format a time in the required style, other formats may be added here
 599     $style ||= $Default_Style;
 600     return '' if $style eq 'none';
 601     $style = ($ct>0) ? 'all' : 'noc' if $style eq 'auto';
 602     my $s = "@t $style"; # default for unknown style
 603     my $w = $hirestime ? "%2g" : "%2d";
 604     $s = sprintf("$w wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
 605                             $r,$pu,$ps,$cu,$cs,$tt) if $style eq 'all';
 606     $s = sprintf("$w wallclock secs (%$f usr + %$f sys = %$f CPU)",
 607                             $r,$pu,$ps,$pt) if $style eq 'noc';
 608     $s = sprintf("$w wallclock secs (%$f cusr + %$f csys = %$f CPU)",
 609                             $r,$cu,$cs,$ct) if $style eq 'nop';
 610     my $elapsed = do {
 611         if ($style eq 'nop') {$cu+$cs}
 612         elsif ($style eq 'noc') {$pu+$ps}
 613         else {$cu+$cs+$pu+$ps}
 614     };
 615     $s .= sprintf(" @ %$f/s (n=$n)",$n/($elapsed)) if $n && $elapsed;
 616     $s;
 617 }
 618
 619 sub timedebug {
 620     my($msg, $t) = @_;
 621     print STDERR "$msg",timestr($t),"\n" if $Debug;
 622 }
 623
 624 # --- Functions implementing low-level support for timing loops
 625
 626 $_Usage{runloop} = <<'USAGE';
 627 usage: runloop($number, [$string | $coderef])
 628 USAGE
 629
 630 sub runloop {
 631     my($n, $c) = @_;
 632
 633     $n+=0; # force numeric now, so garbage won't creep into the eval
 634     croak "negative loopcount $n" if $n<0;
 635     confess usage unless defined $c;
 636     my($t0, $t1, $td); # before, after, difference
 637
 638     # find package of caller so we can execute code there
 639     my($curpack) = caller(0);
 640     my($i, $pack)= 0;
 641     while (($pack) = caller(++$i)) {
 642         last if $pack ne $curpack;
 643     }
 644
 645     my ($subcode, $subref);
 646     if (ref $c eq 'CODE') {
 647         $subcode = "sub { for (1 .. $n) { local \$_; package $pack; &\$c; } }";
 648         $subref  = eval $subcode;
 649     }
 650     else {
 651         $subcode = "sub { for (1 .. $n) { local \$_; package $pack; $c;} }";
 652         $subref  = _doeval($subcode);
 653     }
 654     croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
 655     print STDERR "runloop $n '$subcode'\n" if $Debug;
 656
 657     # Wait for the user timer to tick.  This makes the error range more like
 658     # -0.01, +0.  If we don't wait, then it's more like -0.01, +0.01.  This
 659     # may not seem important, but it significantly reduces the chances of
 660     # getting a too low initial $n in the initial, 'find the minimum' loop
 661     # in &countit.  This, in turn, can reduce the number of calls to
 662     # &runloop a lot, and thus reduce additive errors.
 663     my $tbase = Benchmark->new(0)->[1];
 664     while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
 665     $subref->();
 666     $t1 = Benchmark->new($n);
 667     $td = &timediff($t1, $t0);
 668     timedebug("runloop:",$td);
 669     $td;
 670 }
 671
 672 $_Usage{timeit} = <<'USAGE';
 673 usage: $result = timeit($count, 'code' );        or
 674        $result = timeit($count, sub { code } );
 675 USAGE
 676
 677 sub timeit {
 678     my($n, $code) = @_;
 679     my($wn, $wc, $wd);
 680
 681     die usage unless defined $code and
 682                      (!ref $code or ref $code eq 'CODE');
 683
 684     printf STDERR "timeit $n $code\n" if $Debug;
 685     my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
 686     if ($Do_Cache && exists $Cache{$cache_key} ) {
 687         $wn = $Cache{$cache_key};
 688     } else {
 689         $wn = &runloop($n, ref( $code ) ? sub { } : '' );
 690         # Can't let our baseline have any iterations, or they get subtracted
 691         # out of the result.
 692         $wn->[5] = 0;
 693         $Cache{$cache_key} = $wn;
 694     }
 695
 696     $wc = &runloop($n, $code);
 697
 698     $wd = timediff($wc, $wn);
 699     timedebug("timeit: ",$wc);
 700     timedebug("      - ",$wn);
 701     timedebug("      = ",$wd);
 702
 703     $wd;
 704 }
 705
 706
 707 my $default_for = 3;
 708 my $min_for     = 0.1;
 709
 710
 711 $_Usage{countit} = <<'USAGE';
 712 usage: $result = countit($time, 'code' );        or
 713        $result = countit($time, sub { code } );
 714 USAGE
 715
 716 sub countit {
 717     my ( $tmax, $code ) = @_;
 718
 719     die usage unless @_;
 720
 721     if ( not defined $tmax or $tmax == 0 ) {
 722         $tmax = $default_for;
 723     } elsif ( $tmax < 0 ) {
 724         $tmax = -$tmax;
 725     }
 726
 727     die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
 728         if $tmax < $min_for;
 729
 730     my ($n, $tc);
 731
 732     # First find the minimum $n that gives a significant timing.
 733     my $zeros=0;
 734     for ($n = 1; ; $n *= 2 ) {
 735         my $td = timeit($n, $code);
 736         $tc = $td->[1] + $td->[2];
 737         if ( $tc <= 0 and $n > 1024 ) {
 738             ++$zeros > 16
 739                 and die "Timing is consistently zero in estimation loop, cannot benchmark. N=$n\n";
 740         } else {
 741             $zeros = 0;
 742         }
 743         last if $tc > 0.1;
 744     }
 745
 746     my $nmin = $n;
 747
 748     # Get $n high enough that we can guess the final $n with some accuracy.
 749     my $tpra = 0.1 * $tmax; # Target/time practice.
 750     while ( $tc < $tpra ) {
 751         # The 5% fudge is to keep us from iterating again all
 752         # that often (this speeds overall responsiveness when $tmax is big
 753         # and we guess a little low).  This does not noticeably affect
 754         # accuracy since we're not counting these times.
 755         $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
 756         my $td = timeit($n, $code);
 757         my $new_tc = $td->[1] + $td->[2];
 758         # Make sure we are making progress.
 759         $tc = $new_tc > 1.2 * $tc ? $new_tc : 1.2 * $tc;
 760     }
 761
 762     # Now, do the 'for real' timing(s), repeating until we exceed
 763     # the max.
 764     my $ntot  = 0;
 765     my $rtot  = 0;
 766     my $utot  = 0.0;
 767     my $stot  = 0.0;
 768     my $cutot = 0.0;
 769     my $cstot = 0.0;
 770     my $ttot  = 0.0;
 771
 772     # The 5% fudge is because $n is often a few % low even for routines
 773     # with stable times and avoiding extra timeit()s is nice for
 774     # accuracy's sake.
 775     $n = int( $n * ( 1.05 * $tmax / $tc ) );
 776     $zeros=0;
 777     while () {
 778         my $td = timeit($n, $code);
 779         $ntot  += $n;
 780         $rtot  += $td->[0];
 781         $utot  += $td->[1];
 782         $stot  += $td->[2];
 783         $cutot += $td->[3];
 784         $cstot += $td->[4];
 785         $ttot = $utot + $stot;
 786         last if $ttot >= $tmax;
 787         if ( $ttot <= 0 ) {
 788             ++$zeros > 16
 789                 and die "Timing is consistently zero, cannot benchmark. N=$n\n";
 790         } else {
 791             $zeros = 0;
 792         }
 793         $ttot = 0.01 if $ttot < 0.01;
 794         my $r = $tmax / $ttot - 1; # Linear approximation.
 795         $n = int( $r * $ntot );
 796         $n = $nmin if $n < $nmin;
 797     }
 798
 799     return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
 800 }
 801
 802 # --- Functions implementing high-level time-then-print utilities
 803
 804 sub n_to_for {
 805     my $n = shift;
 806     return $n == 0 ? $default_for : $n < 0 ? -$n : undef;
 807 }
 808
 809 $_Usage{timethis} = <<'USAGE';
 810 usage: $result = timethis($time, 'code' );        or
 811        $result = timethis($time, sub { code } );
 812 USAGE
 813
 814 sub timethis{
 815     my($n, $code, $title, $style) = @_;
 816     my($t, $forn);
 817
 818     die usage unless defined $code and
 819                      (!ref $code or ref $code eq 'CODE');
 820
 821     if ( $n > 0 ) {
 822         croak "non-integer loopcount $n, stopped" if int($n)<$n;
 823         $t = timeit($n, $code);
 824         $title = "timethis $n" unless defined $title;
 825     } else {
 826         my $fort  = n_to_for( $n );
 827         $t     = countit( $fort, $code );
 828         $title = "timethis for $fort" unless defined $title;
 829         $forn  = $t->[-1];
 830     }
 831     local $| = 1;
 832     $style = "" unless defined $style;
 833     printf("%10s: ", $title) unless $style eq 'none';
 834     print timestr($t, $style, $Default_Format),"\n" unless $style eq 'none';
 835
 836     $n = $forn if defined $forn;
 837
 838     # A conservative warning to spot very silly tests.
 839     # Don't assume that your benchmark is ok simply because
 840     # you don't get this warning!
 841     print "            (warning: too few iterations for a reliable count)\n"
 842         if     $n < $Min_Count
 843             || ($t->real < 1 && $n < 1000)
 844             || $t->cpu_a < $Min_CPU;
 845     $t;
 846 }
 847
 848
 849 $_Usage{timethese} = <<'USAGE';
 850 usage: timethese($count, { Name1 => 'code1', ... });        or
 851        timethese($count, { Name1 => sub { code1 }, ... });
 852 USAGE
 853
 854 sub timethese{
 855     my($n, $alt, $style) = @_;
 856     die usage unless ref $alt eq 'HASH';
 857
 858     my @names = sort keys %$alt;
 859     $style = "" unless defined $style;
 860     print "Benchmark: " unless $style eq 'none';
 861     if ( $n > 0 ) {
 862         croak "non-integer loopcount $n, stopped" if int($n)<$n;
 863         print "timing $n iterations of" unless $style eq 'none';
 864     } else {
 865         print "running" unless $style eq 'none';
 866     }
 867     print " ", join(', ',@names) unless $style eq 'none';
 868     unless ( $n > 0 ) {
 869         my $for = n_to_for( $n );
 870         print ", each" if $n > 1 && $style ne 'none';
 871         print " for at least $for CPU seconds" unless $style eq 'none';
 872     }
 873     print "...\n" unless $style eq 'none';
 874
 875     # we could save the results in an array and produce a summary here
 876     # sum, min, max, avg etc etc
 877     my %results;
 878     foreach my $name (@names) {
 879         $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
 880     }
 881
 882     return \%results;
 883 }
 884
 885
 886 $_Usage{cmpthese} = <<'USAGE';
 887 usage: cmpthese($count, { Name1 => 'code1', ... });        or
 888        cmpthese($count, { Name1 => sub { code1 }, ... });  or
 889        cmpthese($result, $style);
 890 USAGE
 891
 892 sub cmpthese{
 893     my ($results, $style);
 894
 895     # $count can be a blessed object.
 896     if ( ref $_[0] eq 'HASH' ) {
 897         ($results, $style) = @_;
 898     }
 899     else {
 900         my($count, $code) = @_[0,1];
 901         $style = $_[2] if defined $_[2];
 902
 903         die usage unless ref $code eq 'HASH';
 904
 905         $results = timethese($count, $code, ($style || "none"));
 906     }
 907
 908     $style = "" unless defined $style;
 909
 910     # Flatten in to an array of arrays with the name as the first field
 911     my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
 912
 913     for (@vals) {
 914         # The epsilon fudge here is to prevent div by 0.  Since clock
 915         # resolutions are much larger, it's below the noise floor.
 916         my $elapsed = do {
 917             if ($style eq 'nop') {$_->[4]+$_->[5]}
 918             elsif ($style eq 'noc') {$_->[2]+$_->[3]}
 919             else {$_->[2]+$_->[3]+$_->[4]+$_->[5]}
 920         };
 921         my $rate = $_->[6]/(($elapsed)+0.000000000000001);
 922         $_->[7] = $rate;
 923     }
 924
 925     # Sort by rate
 926     @vals = sort { $a->[7] <=> $b->[7] } @vals;
 927
 928     # If more than half of the rates are greater than one...
 929     my $display_as_rate = @vals ? ($vals[$#vals>>1]->[7] > 1) : 0;
 930
 931     my @rows;
 932     my @col_widths;
 933
 934     my @top_row = (
 935         '',
 936         $display_as_rate ? 'Rate' : 's/iter',
 937         map { $_->[0] } @vals
 938     );
 939
 940     push @rows, \@top_row;
 941     @col_widths = map { length( $_ ) } @top_row;
 942
 943     # Build the data rows
 944     # We leave the last column in even though it never has any data.  Perhaps
 945     # it should go away.  Also, perhaps a style for a single column of
 946     # percentages might be nice.
 947     for my $row_val ( @vals ) {
 948         my @row;
 949
 950         # Column 0 = test name
 951         push @row, $row_val->[0];
 952         $col_widths[0] = length( $row_val->[0] )
 953             if length( $row_val->[0] ) > $col_widths[0];
 954
 955         # Column 1 = performance
 956         my $row_rate = $row_val->[7];
 957
 958         # We assume that we'll never get a 0 rate.
 959         my $rate = $display_as_rate ? $row_rate : 1 / $row_rate;
 960
 961         # Only give a few decimal places before switching to sci. notation,
 962         # since the results aren't usually that accurate anyway.
 963         my $format =
 964            $rate >= 100 ?
 965                "%0.0f" :
 966            $rate >= 10 ?
 967                "%0.1f" :
 968            $rate >= 1 ?
 969                "%0.2f" :
 970            $rate >= 0.1 ?
 971                "%0.3f" :
 972                "%0.2e";
 973
 974         $format .= "/s"
 975             if $display_as_rate;
 976
 977         my $formatted_rate = sprintf( $format, $rate );
 978         push @row, $formatted_rate;
 979         $col_widths[1] = length( $formatted_rate )
 980             if length( $formatted_rate ) > $col_widths[1];
 981
 982         # Columns 2..N = performance ratios
 983         my $skip_rest = 0;
 984         for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
 985             my $col_val = $vals[$col_num];
 986             my $out;
 987             if ( $skip_rest ) {
 988                 $out = '';
 989             }
 990             elsif ( $col_val->[0] eq $row_val->[0] ) {
 991                 $out = "--";
 992                 # $skip_rest = 1;
 993             }
 994             else {
 995                 my $col_rate = $col_val->[7];
 996                 $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
 997             }
 998             push @row, $out;
 999             $col_widths[$col_num+2] = length( $out )
1000                 if length( $out ) > $col_widths[$col_num+2];
1001
1002             # A little weirdness to set the first column width properly
1003             $col_widths[$col_num+2] = length( $col_val->[0] )
1004                 if length( $col_val->[0] ) > $col_widths[$col_num+2];
1005         }
1006         push @rows, \@row;
1007     }
1008
1009     return \@rows if $style eq "none";
1010
1011     # Equalize column widths in the chart as much as possible without
1012     # exceeding 80 characters.  This does not use or affect cols 0 or 1.
1013     my @sorted_width_refs =
1014        sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
1015     my $max_width = ${$sorted_width_refs[-1]};
1016
1017     my $total = @col_widths - 1 ;
1018     for ( @col_widths ) { $total += $_ }
1019
1020     STRETCHER:
1021     while ( $total < 80 ) {
1022         my $min_width = ${$sorted_width_refs[0]};
1023         last
1024            if $min_width == $max_width;
1025         for ( @sorted_width_refs ) {
1026             last
1027                 if $$_ > $min_width;
1028             ++$$_;
1029             ++$total;
1030             last STRETCHER
1031                 if $total >= 80;
1032         }
1033     }
1034
1035     # Dump the output
1036     my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
1037     substr( $format, 1, 0 ) = '-';
1038     for ( @rows ) {
1039         printf $format, @$_;
1040     }
1041
1042     return \@rows ;
1043 }
1044
1045
1046 1;