Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] -- perlA[=labelA] perlB[=labelB] ...
  18
  19     # run the tests against same perlA 2x, with and without extra
  20     # options
  21
  22     bench.pl [options] -- perlA=fast PerlA=slow -Mstrict -Dpsltoc
  23
  24     # Run bench.pl's own built-in sanity tests
  25
  26     bench.pl --action=selftest
  27
  28 =head1 DESCRIPTION
  29
  30 By default, F<bench.pl> will run code snippets found in
  31 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  32 how many instruction reads, data writes, branches, cache misses, etc. that
  33 one execution of the snippet uses. It will run them against two or more
  34 perl executables and show how much each test has gotten better or worse.
  35
  36 It is modelled on the F<perlbench> tool, but since it measures instruction
  37 reads etc., rather than timings, it is much more precise and reproducible.
  38 It is also considerably faster, and is capable of running tests in
  39 parallel (with C<-j>). Rather than  displaying a single relative
  40 percentage per test/perl combination, it displays values for 13 different
  41 measurements, such as instruction reads, conditional branch misses etc.
  42
  43 There are options to write the raw data to a file, and to read it back.
  44 This means that you can view the same run data in different views with
  45 different selection and sort options.
  46
  47 The optional C<=label> after each perl executable is used in the display
  48 output.
  49
  50 =head1 OPTIONS
  51
  52 =over 4
  53
  54 =item *
  55
  56 --action=I<foo>
  57
  58 What action to perform. The default is  I<grind>, which runs the benchmarks
  59 using I<cachegrind> as the back end. The only other action at the moment is
  60 I<selftest>, which runs some basic sanity checks and produces TAP output.
  61
  62 =item *
  63
  64 --average
  65
  66 Only display the overall average, rather than the results for each
  67 individual test.
  68
  69 =item *
  70
  71 --benchfile=I<foo>
  72
  73 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
  74 by default).
  75
  76 =item *
  77
  78 --bisect=I<field,minval,maxval>
  79
  80 Run a single test against one perl and exit with a zero status if the
  81 named field is in the specified range; exit 1 otherwise. It will complain
  82 if more than one test or perl has been specified. It is intended to be
  83 called as part of a bisect run, to determine when something changed.
  84 For example,
  85
  86     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
  87         ./miniperl
  88
  89 might be called from bisect to find when the number of instruction reads
  90 for test I<foo> falls outside the range 100..105.
  91
  92 =item *
  93
  94 --compact=<Iperl>
  95
  96 Display the results for a single perl executable in a compact form.
  97 Which perl to display is specified in the same manner as C<--norm>.
  98
  99 =item *
 100
 101 --debug
 102
 103 Enable verbose debugging output.
 104
 105 =item *
 106
 107 --fields=I<a,b,c>
 108
 109 Display only the specified fields; for example,
 110
 111     --fields=Ir,Ir_m,Ir_mm
 112
 113 If only one field is selected, the output is in more compact form.
 114
 115 =item *
 116
 117 --grindargs=I<foo>
 118
 119 Optional command-line arguments to pass to cachegrind invocations.
 120
 121 =item *
 122
 123 ---help
 124
 125 Display basic usage information.
 126
 127 =item *
 128
 129 -j I<N>
 130 --jobs=I<N>
 131
 132 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 133 process will running at a time, and should generally be set to the number
 134 of CPUs available.
 135
 136 =item *
 137
 138 --norm=I<foo>
 139
 140 Specify which perl column in the output to treat as the 100% norm.
 141 It may be a column number (0..N-1) or a perl executable name or label.
 142 It defaults to the leftmost column.
 143
 144 =item *
 145
 146 --perlargs=I<foo>
 147
 148 Optional command-line arguments to pass to each perl-under-test
 149 (perlA, perlB in synopsis) For example, C<--perlargs=-Ilib>.
 150
 151 =item *
 152
 153 --raw
 154
 155 Display raw data counts rather than percentages in the outputs. This
 156 allows you to see the exact number of intruction reads, branch misses etc.
 157 for each test/perl combination. It also causes the C<AVERAGE> display
 158 per field to be calculated based on the average of each tests's count
 159 rather than average of each percentage. This means that tests with very
 160 high counts will dominate.
 161
 162 =item *
 163
 164 --sort=I<field:perl>
 165
 166 Order the tests in the output based on the value of I<field> in the
 167 column I<perl>. The I<perl> value is as per C<--norm>. For example
 168
 169     bench.pl --sort=Dw:perl-5.20.0 \
 170         perl-5.16.0 perl-5.18.0 perl-5.20.0
 171
 172 =item *
 173
 174 -r I<file>
 175 --read=I<file>
 176
 177 Read in saved data from a previous C<--write> run from the specified file.
 178
 179 Requires C<JSON::PP> to be available.
 180
 181 =item *
 182
 183 --tests=I<FOO>
 184
 185 Specify a subset of tests to run (or in the case of C<--read>, to display).
 186 It may be either a comma-separated list of test names, or a regular
 187 expression. For example
 188
 189     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 190     --tests=/^expr::/
 191
 192 =item *
 193
 194 --verbose
 195
 196 Display progress information.
 197
 198 =item *
 199
 200 -w I<file>
 201 --write=I<file>
 202
 203 Save the raw data to the specified file. It can be read back later with
 204 C<--read>.
 205
 206 Requires C<JSON::PP> to be available.
 207
 208 =back
 209
 210 =cut
 211
 212
 213
 214 use 5.010000;
 215 use warnings;
 216 use strict;
 217 use Getopt::Long qw(:config no_auto_abbrev require_order);
 218 use IPC::Open2 ();
 219 use IO::Select;
 220 use IO::File;
 221 use POSIX ":sys_wait_h";
 222
 223 # The version of the file format used to save data. We refuse to process
 224 # the file if the integer component differs.
 225
 226 my $FORMAT_VERSION = 1.0;
 227
 228 # The fields we know about
 229
 230 my %VALID_FIELDS = map { $_ => 1 }
 231     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 232
 233 sub usage {
 234     die <<EOF;
 235 usage: $0 [options] -- perl[=label] ...
 236   --action=foo       What action to perform [default: grind].
 237   --average          Only display average, not individual test results.
 238   --benchfile=foo    File containing the benchmarks;
 239                        [default: t/perf/benchmarks].
 240   --bisect=f,min,max run a single test against one perl and exit with a
 241                        zero status if the named field is in the specified
 242                        range; exit 1 otherwise.
 243   --compact=perl     Display the results of a single perl in compact form.
 244                      Which perl specified like --norm
 245   --debug            Enable verbose debugging output.
 246   --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 247   --grindargs=foo    Optional command-line args to pass to cachegrind.
 248   --help             Display this help.
 249   -j|--jobs=N        Run N jobs in parallel [default 1].
 250   --norm=perl        Which perl column to treat as 100%; may be a column
 251                        number (0..N-1) or a perl executable name or label;
 252                        [default: 0].
 253   --perlargs=foo     Optional command-line args to pass to each perl to run.
 254   --raw              Display raw data counts rather than percentages.
 255   --sort=field:perl  Sort the tests based on the value of 'field' in the
 256                        column 'perl'. The perl value is as per --norm.
 257   -r|--read=file     Read in previously saved data from the specified file.
 258   --tests=FOO        Select only the specified tests from the benchmarks file;
 259                        FOO may be either of the form 'foo,bar' or '/regex/';
 260                        [default: all tests].
 261   --verbose          Display progress information.
 262   -w|--write=file    Save the raw data to the specified file.
 263
 264 --action is one of:
 265     grind            run the code under cachegrind
 266     selftest         perform a selftest; produce TAP output
 267
 268 The command line ends with one or more specified perl executables,
 269 which will be searched for in the current \$PATH. Each binary name may
 270 have an optional =LABEL appended, which will be used rather than the
 271 executable name in output. E.g.
 272
 273     perl-5.20.1=PRE-BUGFIX  perl-5.20.1-new=POST-BUGFIX
 274 EOF
 275 }
 276
 277 my %OPTS = (
 278     action    => 'grind',
 279     average   => 0,
 280     benchfile => 't/perf/benchmarks',
 281     bisect    => undef,
 282     compact   => undef,
 283     debug     => 0,
 284     grindargs => '',
 285     fields    => undef,
 286     jobs      => 1,
 287     norm      => 0,
 288     perlargs  => '',
 289     raw       => 0,
 290     read      => undef,
 291     sort      => undef,
 292     tests     => undef,
 293     verbose   => 0,
 294     write     => undef,
 295 );
 296
 297
 298 # process command-line args and call top-level action
 299
 300 {
 301     GetOptions(
 302         'action=s'    => \$OPTS{action},
 303         'average'     => \$OPTS{average},
 304         'benchfile=s' => \$OPTS{benchfile},
 305         'bisect=s'    => \$OPTS{bisect},
 306         'compact=s'   => \$OPTS{compact},
 307         'debug'       => \$OPTS{debug},
 308         'grindargs=s' => \$OPTS{grindargs},
 309         'help'        => \$OPTS{help},
 310         'fields=s'    => \$OPTS{fields},
 311         'jobs|j=i'    => \$OPTS{jobs},
 312         'norm=s'      => \$OPTS{norm},
 313         'perlargs=s'  => \$OPTS{perlargs},
 314         'raw'         => \$OPTS{raw},
 315         'read|r=s'    => \$OPTS{read},
 316         'sort=s'      => \$OPTS{sort},
 317         'tests=s'     => \$OPTS{tests},
 318         'verbose'     => \$OPTS{verbose},
 319         'write|w=s'   => \$OPTS{write},
 320     ) or usage;
 321
 322     usage if $OPTS{help};
 323
 324
 325     if (defined $OPTS{read} and defined $OPTS{write}) {
 326         die "Error: can't specify both --read and --write options\n";
 327     }
 328
 329     if (defined $OPTS{read} or defined $OPTS{write}) {
 330         # fail early if it's not present
 331         require JSON::PP;
 332     }
 333
 334     if (defined $OPTS{fields}) {
 335         my @f = split /,/, $OPTS{fields};
 336         for (@f) {
 337             die "Error: --fields: unknown field '$_'\n"
 338                 unless $VALID_FIELDS{$_};
 339         }
 340         my %f = map { $_ => 1 } @f;
 341         $OPTS{fields} = \%f;
 342     }
 343
 344     my %valid_actions = qw(grind 1 selftest 1);
 345     unless ($valid_actions{$OPTS{action}}) {
 346         die "Error: unrecognised action '$OPTS{action}'\n"
 347           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 348     }
 349
 350     if (defined $OPTS{sort}) {
 351         my @s = split /:/, $OPTS{sort};
 352         if (@s != 2) {
 353             die "Error: --sort argument should be of the form field:perl: "
 354               . "'$OPTS{sort}'\n";
 355         }
 356         my ($field, $perl) = @s;
 357         die "Error: --sort: unknown field '$field\n"
 358             unless $VALID_FIELDS{$field};
 359         # the 'perl' value will be validated later, after we have processed
 360         # the perls
 361         $OPTS{'sort-field'} = $field;
 362         $OPTS{'sort-perl'}  = $perl;
 363     }
 364
 365     if ($OPTS{action} eq 'selftest') {
 366         if (@ARGV) {
 367             die "Error: no perl executables may be specified with --read\n"
 368         }
 369     }
 370     elsif (defined $OPTS{bisect}) {
 371         die "Error: exactly one perl executable must be specified for bisect\n"
 372                                                 unless @ARGV == 1;
 373         die "Error: Can't specify both --bisect and --read\n"
 374                                                 if defined $OPTS{read};
 375         die "Error: Can't specify both --bisect and --write\n"
 376                                                 if defined $OPTS{write};
 377     }
 378     elsif (defined $OPTS{read}) {
 379         if (@ARGV) {
 380             die "Error: no perl executables may be specified with --read\n"
 381         }
 382     }
 383     elsif ($OPTS{raw}) {
 384         unless (@ARGV) {
 385             die "Error: at least one perl executable must be specified\n";
 386         }
 387     }
 388     else {
 389         unless (@ARGV >= 2) {
 390             die "Error: at least two perl executables must be specified\n";
 391         }
 392     }
 393
 394     if ($OPTS{action} eq 'grind') {
 395         do_grind(\@ARGV);
 396     }
 397     elsif ($OPTS{action} eq 'selftest') {
 398         do_selftest();
 399     }
 400 }
 401 exit 0;
 402
 403
 404 # Given a hash ref keyed by test names, filter it by deleting unwanted
 405 # tests, based on $OPTS{tests}.
 406
 407 sub filter_tests {
 408     my ($tests) = @_;
 409
 410     my $opt = $OPTS{tests};
 411     return unless defined $opt;
 412
 413     my @tests;
 414
 415     if ($opt =~ m{^/}) {
 416         $opt =~ s{^/(.+)/$}{$1}
 417             or die "Error: --tests regex must be of the form /.../\n";
 418         for (keys %$tests) {
 419             delete $tests->{$_} unless /$opt/;
 420         }
 421     }
 422     else {
 423         my %t;
 424         for (split /,/, $opt) {
 425             die "Error: no such test found: '$_'\n"
 426                 . ($OPTS{verbose} ? "  have: @{[ sort keys %$tests ]}\n" : "")
 427                 unless exists $tests->{$_};
 428             $t{$_} = 1;
 429         }
 430         for (keys %$tests) {
 431             delete $tests->{$_} unless exists $t{$_};
 432         }
 433     }
 434 }
 435
 436
 437 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 438 # return a hash ref { testname => { test }, ... }
 439 # and an array ref of the original test names order,
 440
 441 sub read_tests_file {
 442     my ($file) = @_;
 443
 444     my $ta = do $file;
 445     unless ($ta) {
 446         die "Error: can't parse '$file': $@\n" if $@;
 447         die "Error: can't read '$file': $!\n";
 448     }
 449
 450     my @orig_order;
 451     for (my $i=0; $i < @$ta; $i += 2) {
 452         push @orig_order, $ta->[$i];
 453     }
 454
 455     my $t = { @$ta };
 456     filter_tests($t);
 457     return $t, \@orig_order;
 458 }
 459
 460
 461 # Process the perl/column argument of options like --norm and --sort.
 462 # Return the index of the matching perl.
 463
 464 sub select_a_perl {
 465     my ($perl, $perls, $who) = @_;
 466
 467     if ($perl =~ /^[0-9]$/) {
 468         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 469                                         unless $perl < @$perls;
 470         return $perl;
 471     }
 472     else {
 473         my @perl = grep    $perls->[$_][0] eq $perl
 474                         || $perls->[$_][1] eq $perl,
 475                         0..$#$perls;
 476         die "Error: $who: unrecognised perl '$perl'\n"
 477                                         unless @perl;
 478         die "Error: $who: ambiguous perl '$perl'\n"
 479                                         if @perl > 1;
 480         return $perl[0];
 481     }
 482 }
 483
 484
 485 # Validate the list of perl=label (+ cmdline options) on the command line.
 486 # Return a list of [ exe, label, cmdline-options ] tuples, ie PUTs
 487
 488 sub process_puts {
 489     my @res_puts; # returned, each item is [ perlexe, label, @putargs ]
 490     my %seen;
 491     my @putargs; # collect not-perls into args per PUT
 492
 493     for my $p (reverse @_) {
 494         push @putargs, $p and next if $p =~ /^-/; # not-perl, dont send to qx//
 495
 496         my ($perl, $label) = split /=/, $p, 2;
 497         $label //= $perl;
 498         $label = $perl.$label if $label =~ /^\+/;
 499         die "$label cannot be used on 2 different PUTs\n" if $seen{$label}++;
 500
 501         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 502         if ($r eq "ok\n") {
 503             push @res_puts, [ $perl, $label, reverse @putargs ];
 504             @putargs = ();
 505             warn "Added Perl-Under-Test: [ @{[@{$res_puts[-1]}]} ]\n"
 506                 if $OPTS{verbose};
 507         } else {
 508             warn "putargs: @putargs + $p, a not-perl: $r\n"
 509                 if $OPTS{verbose};
 510             push @putargs, $p; # not-perl
 511         }
 512     }
 513     return reverse @res_puts;
 514 }
 515
 516
 517
 518 # Return a string containing perl test code wrapped in a loop
 519 # that runs $ARGV[0] times
 520
 521 sub make_perl_prog {
 522     my ($test, $desc, $setup, $code) = @_;
 523
 524     return <<EOF;
 525 # $desc
 526 package $test;
 527 BEGIN { srand(0) }
 528 $setup;
 529 for my \$__loop__ (1..\$ARGV[0]) {
 530     $code;
 531 }
 532 EOF
 533 }
 534
 535
 536 # Parse the output from cachegrind. Return a hash ref.
 537 # See do_selftest() for examples of the output format.
 538
 539 sub parse_cachegrind {
 540     my ($output, $id, $perl) = @_;
 541
 542     my %res;
 543
 544     my @lines = split /\n/, $output;
 545     for (@lines) {
 546         unless (s/(==\d+==)|(--\d+--) //) {
 547             die "Error: while executing $id:\n"
 548               . "unexpected code or cachegrind output:\n$_\n";
 549         }
 550         if (/I   refs:\s+([\d,]+)/) {
 551             $res{Ir} = $1;
 552         }
 553         elsif (/I1  misses:\s+([\d,]+)/) {
 554             $res{Ir_m1} = $1;
 555         }
 556         elsif (/LLi misses:\s+([\d,]+)/) {
 557             $res{Ir_mm} = $1;
 558         }
 559         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 560             @res{qw(Dr Dw)} = ($1,$2);
 561         }
 562         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 563             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 564         }
 565         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 566             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 567         }
 568         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 569             @res{qw(COND IND)} = ($1,$2);
 570         }
 571         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 572             @res{qw(COND_m IND_m)} = ($1,$2);
 573         }
 574     }
 575
 576     for my $field (keys %VALID_FIELDS) {
 577         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 578             unless exists $res{$field};
 579         $res{$field} =~ s/,//g;
 580     }
 581
 582     return \%res;
 583 }
 584
 585
 586 # Handle the 'grind' action
 587
 588 sub do_grind {
 589     my ($perl_args) = @_; # the residue of @ARGV after option processing
 590
 591     my ($loop_counts, $perls, $results, $tests, $order);
 592     my ($bisect_field, $bisect_min, $bisect_max);
 593
 594     if (defined $OPTS{bisect}) {
 595         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 596         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 597             unless
 598                     defined $bisect_max
 599                 and $bisect_min =~ /^[0-9]+$/
 600                 and $bisect_max =~ /^[0-9]+$/;
 601
 602         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 603             unless $VALID_FIELDS{$bisect_field};
 604
 605         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 606             if $bisect_min > $bisect_max;
 607     }
 608
 609     if (defined $OPTS{read}) {
 610         open my $in, '<:encoding(UTF-8)', $OPTS{read}
 611             or die " Error: can't open $OPTS{read} for reading: $!\n";
 612         my $data = do { local $/; <$in> };
 613         close $in;
 614
 615         my $hash = JSON::PP::decode_json($data);
 616         if (int($FORMAT_VERSION) < int($hash->{version})) {
 617             die "Error: unsupported version $hash->{version} in file"
 618               . "'$OPTS{read}' (too new)\n";
 619         }
 620         ($loop_counts, $perls, $results, $tests, $order) =
 621             @$hash{qw(loop_counts perls results tests order)};
 622
 623         filter_tests($results);
 624         filter_tests($tests);
 625
 626         if (!$order) {
 627             $order = [ sort keys %$tests ];
 628         }
 629     }
 630     else {
 631         # How many times to execute the loop for the two trials. The lower
 632         # value is intended to do the loop enough times that branch
 633         # prediction has taken hold; the higher loop allows us to see the
 634         # branch misses after that
 635         $loop_counts = [10, 20];
 636
 637         ($tests, $order) = read_tests_file($OPTS{benchfile});
 638         die "Error: only a single test may be specified with --bisect\n"
 639             if defined $OPTS{bisect} and keys %$tests != 1;
 640
 641         $perls = [ process_puts(@$perl_args) ];
 642
 643
 644         $results = grind_run($tests, $order, $perls, $loop_counts);
 645     }
 646
 647     # now that we have a list of perls, use it to process the
 648     # 'perl' component of the --norm and --sort args
 649
 650     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
 651     if (defined $OPTS{'sort-perl'}) {
 652         $OPTS{'sort-perl'} =
 653                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
 654     }
 655
 656     if (defined $OPTS{'compact'}) {
 657         $OPTS{'compact'} =
 658                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
 659     }
 660     if (defined $OPTS{write}) {
 661         my $json = JSON::PP::encode_json({
 662                     version      => $FORMAT_VERSION,
 663                     loop_counts  => $loop_counts,
 664                     perls        => $perls,
 665                     results      => $results,
 666                     tests        => $tests,
 667                     order        => $order,
 668                 });
 669
 670         open my $out, '>:encoding(UTF-8)', $OPTS{write}
 671             or die " Error: can't open $OPTS{write} for writing: $!\n";
 672         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
 673         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
 674     }
 675     else {
 676         my ($processed, $averages) =
 677                     grind_process($results, $perls, $loop_counts);
 678
 679         if (defined $OPTS{bisect}) {
 680             my @r = values %$results;
 681             die "Panic: expected exactly one test result in bisect\n"
 682                                                             if @r != 1;
 683             @r = values %{$r[0]};
 684             die "Panic: expected exactly one perl result in bisect\n"
 685                                                             if @r != 1;
 686             my $c = $r[0]{$bisect_field};
 687             die "Panic: no result in bisect for field '$bisect_field'\n"
 688                                                             unless defined $c;
 689             exit 0 if $bisect_min <= $c and $c <= $bisect_max;
 690             exit 1;
 691         }
 692         elsif (defined $OPTS{compact}) {
 693             grind_print_compact($processed, $averages, $OPTS{compact},
 694                                 $perls, $tests, $order);
 695         }
 696         else {
 697             grind_print($processed, $averages, $perls, $tests, $order);
 698         }
 699     }
 700 }
 701
 702
 703 # Run cachegrind for every test/perl combo.
 704 # It may run several processes in parallel when -j is specified.
 705 # Return a hash ref suitable for input to grind_process()
 706
 707 sub grind_run {
 708     my ($tests, $order, $perls, $counts) = @_;
 709
 710     # Build a list of all the jobs to run
 711
 712     my @jobs;
 713
 714     for my $test (grep $tests->{$_}, @$order) {
 715
 716         # Create two test progs: one with an empty loop and one with code.
 717         # Note that the empty loop is actually '{1;}' rather than '{}';
 718         # this causes the loop to have a single nextstate rather than a
 719         # stub op, so more closely matches the active loop; e.g.:
 720         #   {1;}    => nextstate;                       unstack
 721         #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
 722         my @prog = (
 723             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
 724             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
 725         );
 726
 727         for my $p (@$perls) {
 728             my ($perl, $label, @putargs) = @$p;
 729
 730             # Run both the empty loop and the active loop
 731             # $counts->[0] and $counts->[1] times.
 732
 733             for my $i (0,1) {
 734                 for my $j (0,1) {
 735                     my $cmd = "PERL_HASH_SEED=0 "
 736                             . "valgrind --tool=cachegrind  --branch-sim=yes "
 737                             . "--cachegrind-out-file=/dev/null "
 738                             . "$OPTS{grindargs} "
 739                             . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
 740                     # for debugging and error messages
 741                     my $id = "$test/$label "
 742                         . ($i ? "active" : "empty") . "/"
 743                         . ($j ? "long"   : "short") . " loop";
 744
 745                     push @jobs, {
 746                         test   => $test,
 747                         perl   => $perl,
 748                         plabel => $label,
 749                         cmd    => $cmd,
 750                         prog   => $prog[$i],
 751                         active => $i,
 752                         loopix => $j,
 753                         id     => $id,
 754                     };
 755                 }
 756             }
 757         }
 758     }
 759
 760     # Execute each cachegrind and store the results in %results.
 761
 762     local $SIG{PIPE} = 'IGNORE';
 763
 764     my $max_jobs = $OPTS{jobs};
 765     my $running  = 0; # count of executing jobs
 766     my %pids;         # map pids to jobs
 767     my %fds;          # map fds  to jobs
 768     my %results;
 769     my $select = IO::Select->new();
 770
 771     while (@jobs or $running) {
 772
 773         if ($OPTS{debug}) {
 774             printf "Main loop: pending=%d running=%d\n",
 775                 scalar(@jobs), $running;
 776         }
 777
 778         # Start new jobs
 779
 780         while (@jobs && $running < $max_jobs) {
 781             my $job = shift @jobs;
 782             my ($id, $cmd) =@$job{qw(id cmd)};
 783
 784             my ($in, $out, $pid);
 785             warn "Starting $id\n" if $OPTS{verbose};
 786             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
 787                 or die "Error: while starting cachegrind subprocess"
 788                    ." for $id:\n$@";
 789             $running++;
 790             $pids{$pid}    = $job;
 791             $fds{"$out"}   = $job;
 792             $job->{out_fd} = $out;
 793             $job->{output} = '';
 794             $job->{pid}    = $pid;
 795
 796             $out->blocking(0);
 797             $select->add($out);
 798
 799             if ($OPTS{debug}) {
 800                 print "Started pid $pid for $id\n";
 801             }
 802
 803             # Note:
 804             # In principle we should write to $in in the main select loop,
 805             # since it may block. In reality,
 806             #  a) the code we write to the perl process's stdin is likely
 807             #     to be less than the OS's pipe buffer size;
 808             #  b) by the time the perl process has read in all its stdin,
 809             #     the only output it should have generated is a few lines
 810             #     of cachegrind output preamble.
 811             # If these assumptions change, then perform the following print
 812             # in the select loop instead.
 813
 814             print $in $job->{prog};
 815             close $in;
 816         }
 817
 818         # Get output of running jobs
 819
 820         if ($OPTS{debug}) {
 821             printf "Select: waiting on (%s)\n",
 822                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
 823                             $select->handles;
 824         }
 825
 826         my @ready = $select->can_read;
 827
 828         if ($OPTS{debug}) {
 829             printf "Select: pids (%s) ready\n",
 830                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
 831         }
 832
 833         unless (@ready) {
 834             die "Panic: select returned no file handles\n";
 835         }
 836
 837         for my $fd (@ready) {
 838             my $j = $fds{"$fd"};
 839             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
 840             unless (defined $r) {
 841                 die "Panic: Read from process running $j->{id} gave:\n$!";
 842             }
 843             next if $r;
 844
 845             # EOF
 846
 847             if ($OPTS{debug}) {
 848                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
 849             }
 850
 851             $select->remove($j->{out_fd});
 852             close($j->{out_fd})
 853                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
 854             $running--;
 855             delete $fds{"$j->{out_fd}"};
 856             my $output = $j->{output};
 857
 858             if ($OPTS{debug}) {
 859                 my $p = $j->{prog};
 860                 $p =~ s/^/    : /mg;
 861                 my $o = $output;
 862                 $o =~ s/^/    : /mg;
 863
 864                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
 865                     . "Input:\n$p"
 866                     . "Output\n$o";
 867             }
 868
 869             $results{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
 870                     = parse_cachegrind($output, $j->{id}, $j->{perl});
 871         }
 872
 873         # Reap finished jobs
 874
 875         while (1) {
 876             my $kid = waitpid(-1, WNOHANG);
 877             my $ret = $?;
 878             last if $kid <= 0;
 879
 880             unless (exists $pids{$kid}) {
 881                 die "Panic: reaped unexpected child $kid";
 882             }
 883             my $j = $pids{$kid};
 884             if ($ret) {
 885                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
 886                     . "with the following output\n:$j->{output}\n";
 887             }
 888             delete $pids{$kid};
 889         }
 890     }
 891
 892     return \%results;
 893 }
 894
 895
 896
 897
 898 # grind_process(): process the data that has been extracted from
 899 # cachgegrind's output.
 900 #
 901 # $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
 902 # where active is 0 or 1 indicating an empty or active loop,
 903 # count is 0 or 1 indicating a short or long loop. E.g.
 904 #
 905 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
 906 #
 907 # The $res data structure is modified in-place by this sub.
 908 #
 909 # $perls is [ [ perl-exe, perl-label], .... ].
 910 #
 911 # $counts is [ N, M ] indicating the counts for the short and long loops.
 912 #
 913 #
 914 # return \%output, \%averages, where
 915 #
 916 # $output{benchmark_name}{perl_name}{field_name} = N
 917 # $averages{perl_name}{field_name} = M
 918 #
 919 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
 920 # M is the average raw count over all tests ($OPTS{raw}), or
 921 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
 922
 923 sub grind_process {
 924     my ($res, $perls, $counts) = @_;
 925
 926     # Process the four results for each test/perf combo:
 927     # Convert
 928     #    $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
 929     # to
 930     #    $res->{benchmark_name}{perl_name}{field_name} = averaged_n
 931     #
 932     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
 933     #                     empty loops, eliminating startup time
 934     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
 935     #                     active loops, eliminating startup time
 936     # (the two startup times may be different because different code
 937     # is being compiled); the difference of the two results above
 938     # divided by the count difference is the time to execute the
 939     # active code once, eliminating both startup and loop overhead.
 940
 941     for my $tests (values %$res) {
 942         for my $r (values %$tests) {
 943             my $r2;
 944             for (keys %{$r->[0][0]}) {
 945                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
 946                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
 947                         ) / ($counts->[1] - $counts->[0]);
 948                 $r2->{$_} = $n;
 949             }
 950             $r = $r2;
 951         }
 952     }
 953
 954     my %totals;
 955     my %counts;
 956     my %data;
 957
 958     my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
 959
 960     for my $test_name (keys %$res) {
 961         my $res1 = $res->{$test_name};
 962         my $res2_norm = $res1->{$perl_norm};
 963         for my $perl (keys %$res1) {
 964             my $res2 = $res1->{$perl};
 965             for my $field (keys %$res2) {
 966                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
 967
 968                 if ($OPTS{raw}) {
 969                     # Avoid annoying '-0.0' displays. Ideally this number
 970                     # should never be negative, but fluctuations in
 971                     # startup etc can theoretically make this happen
 972                     $q = 0 if ($q <= 0 && $q > -0.1);
 973                     $totals{$perl}{$field} += $q;
 974                     $counts{$perl}{$field}++;
 975                     $data{$test_name}{$perl}{$field} = $q;
 976                     next;
 977                 }
 978
 979                 # $p and $q are notionally integer counts, but
 980                 # due to variations in startup etc, it's possible for a
 981                 # count which is supposedly zero to be calculated as a
 982                 # small positive or negative value.
 983                 # In this case, set it to zero. Further below we
 984                 # special-case zeros to avoid division by zero errors etc.
 985
 986                 $p = 0.0 if $p < 0.01;
 987                 $q = 0.0 if $q < 0.01;
 988
 989                 if ($p == 0.0 && $q == 0.0) {
 990                     # Both perls gave a count of zero, so no change:
 991                     # treat as 100%
 992                     $totals{$perl}{$field} += 1;
 993                     $counts{$perl}{$field}++;
 994                     $data{$test_name}{$perl}{$field} = 1;
 995                 }
 996                 elsif ($p == 0.0 || $q == 0.0) {
 997                     # If either count is zero, there were too few events
 998                     # to give a meaningful ratio (and we will end up with
 999                     # division by zero if we try). Mark the result undef,
1000                     # indicating that it shouldn't be displayed; and skip
1001                     # adding to the average
1002                     $data{$test_name}{$perl}{$field} = undef;
1003                 }
1004                 else {
1005                     # For averages, we record q/p rather than p/q.
1006                     # Consider a test where perl_norm took 1000 cycles
1007                     # and perlN took 800 cycles. For the individual
1008                     # results we display p/q, or 1.25; i.e. a quarter
1009                     # quicker. For the averages, we instead sum all
1010                     # the 0.8's, which gives the total cycles required to
1011                     # execute all tests, with all tests given equal
1012                     # weight. Later we reciprocate the final result,
1013                     # i.e. 1/(sum(qi/pi)/n)
1014
1015                     $totals{$perl}{$field} += $q/$p;
1016                     $counts{$perl}{$field}++;
1017                     $data{$test_name}{$perl}{$field} = $p/$q;
1018                 }
1019             }
1020         }
1021     }
1022
1023     # Calculate averages based on %totals and %counts accumulated earlier.
1024
1025     my %averages;
1026     for my $perl (keys %totals) {
1027         my $t = $totals{$perl};
1028         for my $field (keys %$t) {
1029             $averages{$perl}{$field} = $OPTS{raw}
1030                 ? $t->{$field} / $counts{$perl}{$field}
1031                   # reciprocal - see comments above
1032                 : $counts{$perl}{$field} / $t->{$field};
1033         }
1034     }
1035
1036     return \%data, \%averages;
1037 }
1038
1039
1040
1041 # print a standard blurb at the start of the grind display
1042
1043 sub grind_blurb {
1044     my ($perls) = @_;
1045
1046     print <<EOF;
1047 Key:
1048     Ir   Instruction read
1049     Dr   Data read
1050     Dw   Data write
1051     COND conditional branches
1052     IND  indirect branches
1053     _m   branch predict miss
1054     _m1  level 1 cache miss
1055     _mm  last cache (e.g. L3) miss
1056     -    indeterminate percentage (e.g. 1/0)
1057
1058 EOF
1059
1060     if ($OPTS{raw}) {
1061         print "The numbers represent raw counts per loop iteration.\n";
1062     }
1063     else {
1064         print <<EOF;
1065 The numbers represent relative counts per loop iteration, compared to
1066 $perls->[$OPTS{norm}][1] at 100.0%.
1067 Higher is better: for example, using half as many instructions gives 200%,
1068 while using twice as many gives 50%.
1069 EOF
1070     }
1071 }
1072
1073
1074 # return a sorted list of the test names, plus 'AVERAGE'
1075
1076 sub sorted_test_names {
1077     my ($results, $order, $perls) = @_;
1078
1079     my @names;
1080     unless ($OPTS{average}) {
1081         if (defined $OPTS{'sort-field'}) {
1082             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1083             my $perl = $perls->[$perlix][0];
1084             @names = sort
1085                 {
1086                         $results->{$a}{$perl}{$field}
1087                     <=> $results->{$b}{$perl}{$field}
1088                 }
1089                 keys %$results;
1090         }
1091         else {
1092             @names = grep $results->{$_}, @$order;
1093         }
1094     }
1095
1096     # No point in displaying average for only one test.
1097     push @names,  'AVERAGE' unless @names == 1;
1098     @names;
1099 }
1100
1101
1102 # grind_print(): display the tabulated results of all the cachegrinds.
1103 #
1104 # Arguments are of the form:
1105 #    $results->{benchmark_name}{perl_name}{field_name} = N
1106 #    $averages->{perl_name}{field_name} = M
1107 #    $perls = [ [ perl-exe, perl-label ], ... ]
1108 #    $tests->{test_name}{desc => ..., ...}
1109
1110 sub grind_print {
1111     my ($results, $averages, $perls, $tests, $order) = @_;
1112
1113     my @perl_names = map $_->[0], @$perls;
1114     my @perl_labels = map $_->[1], @$perls;
1115     my %perl_labels;
1116     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1117
1118     my $field_label_width = 6;
1119     # Calculate the width to display for each column.
1120     my $min_width = $OPTS{raw} ? 8 : 6;
1121     my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1122                         @perl_labels;
1123
1124     # Print standard header.
1125     grind_blurb($perls);
1126
1127     my @test_names = sorted_test_names($results, $order, $perls);
1128
1129     # If only a single field is to be displayed, use a more compact
1130     # format with only a single line of output per test.
1131
1132     my $one_field = defined $OPTS{fields} &&  keys(%{$OPTS{fields}}) == 1;
1133
1134     if ($one_field) {
1135         print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
1136
1137         # The first column will now contain test names rather than
1138         # field names; Calculate the max width.
1139
1140         $field_label_width = 0;
1141         for (@test_names) {
1142             $field_label_width = length if length > $field_label_width;
1143         }
1144
1145         # Print the perl executables header.
1146
1147         print "\n";
1148         for my $i (0,1) {
1149             print " " x $field_label_width;
1150             for (0..$#widths) {
1151                 printf " %*s", $widths[$_],
1152                     $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1153             }
1154             print "\n";
1155         }
1156     }
1157
1158     # Dump the results for each test.
1159
1160     for my $test_name (@test_names) {
1161         my $doing_ave = ($test_name eq 'AVERAGE');
1162         my $res1 = $doing_ave ? $averages : $results->{$test_name};
1163
1164         unless ($one_field) {
1165             print "\n$test_name";
1166             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1167             print "\n\n";
1168
1169             # Print the perl executables header.
1170             for my $i (0,1) {
1171                 print " " x $field_label_width;
1172                 for (0..$#widths) {
1173                     printf " %*s", $widths[$_],
1174                         $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1175                 }
1176                 print "\n";
1177             }
1178         }
1179
1180         for my $field (qw(Ir Dr Dw COND IND
1181                           N
1182                           COND_m IND_m
1183                           N
1184                           Ir_m1 Dr_m1 Dw_m1
1185                           N
1186                           Ir_mm Dr_mm Dw_mm
1187                       ))
1188         {
1189             next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1190
1191             if ($field eq 'N') {
1192                 print "\n";
1193                 next;
1194             }
1195
1196             if ($one_field) {
1197                 printf "%-*s", $field_label_width, $test_name;
1198             }
1199             else {
1200                 printf "%*s", $field_label_width, $field;
1201             }
1202
1203             for my $i (0..$#widths) {
1204                 my $res2 = $res1->{$perl_labels[$i]};
1205                 my $p = $res2->{$field};
1206                 if (!defined $p) {
1207                     printf " %*s", $widths[$i], '-';
1208                 }
1209                 elsif ($OPTS{raw}) {
1210                     printf " %*.1f", $widths[$i], $p;
1211                 }
1212                 else {
1213                     printf " %*.2f", $widths[$i], $p * 100;
1214                 }
1215             }
1216             print "\n";
1217         }
1218     }
1219 }
1220
1221
1222
1223 # grind_print_compact(): like grind_print(), but display a single perl
1224 # in a compact form. Has an additional arg, $which_perl, which specifies
1225 # which perl to display.
1226 #
1227 # Arguments are of the form:
1228 #    $results->{benchmark_name}{perl_name}{field_name} = N
1229 #    $averages->{perl_name}{field_name} = M
1230 #    $perls = [ [ perl-exe, perl-label ], ... ]
1231 #    $tests->{test_name}{desc => ..., ...}
1232
1233 sub grind_print_compact {
1234     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1235
1236
1237     # the width to display for each column.
1238     my $width = $OPTS{raw} ? 7 : 6;
1239
1240     # Print standard header.
1241     grind_blurb($perls);
1242
1243     print "\nResults for $perls->[$which_perl][1]\n\n";
1244
1245     my @test_names = sorted_test_names($results, $order, $perls);
1246
1247     # Dump the results for each test.
1248
1249      my @fields = qw( Ir Dr Dw
1250                       COND IND
1251                       COND_m IND_m
1252                       Ir_m1 Dr_m1 Dw_m1
1253                       Ir_mm Dr_mm Dw_mm
1254                     );
1255     if ($OPTS{fields}) {
1256         @fields = grep exists $OPTS{fields}{$_}, @fields;
1257     }
1258
1259     printf " %*s", $width, $_      for @fields;
1260     print "\n";
1261     printf " %*s", $width, '------' for @fields;
1262     print "\n";
1263
1264     for my $test_name (@test_names) {
1265         my $doing_ave = ($test_name eq 'AVERAGE');
1266         my $res = $doing_ave ? $averages : $results->{$test_name};
1267         $res = $res->{$perls->[$which_perl][0]};
1268
1269         for my $field (@fields) {
1270             my $p = $res->{$field};
1271             if (!defined $p) {
1272                 printf " %*s", $width, '-';
1273             }
1274             elsif ($OPTS{raw}) {
1275                 printf " %*.1f", $width, $p;
1276             }
1277             else {
1278                 printf " %*.2f", $width, $p * 100;
1279             }
1280
1281         }
1282
1283         print "  $test_name\n";
1284     }
1285 }
1286
1287
1288 # do_selftest(): check that we can parse known cachegrind()
1289 # output formats. If the output of cachegrind changes, add a *new*
1290 # test here; keep the old tests to make sure we continue to parse
1291 # old cachegrinds
1292
1293 sub do_selftest {
1294
1295     my @tests = (
1296         'standard',
1297         <<'EOF',
1298 ==32350== Cachegrind, a cache and branch-prediction profiler
1299 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1300 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1301 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1302 ==32350==
1303 --32350-- warning: L3 cache found, using its data for the LL simulation.
1304 ==32350==
1305 ==32350== I   refs:      1,124,055
1306 ==32350== I1  misses:        5,573
1307 ==32350== LLi misses:        3,338
1308 ==32350== I1  miss rate:      0.49%
1309 ==32350== LLi miss rate:      0.29%
1310 ==32350==
1311 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1312 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1313 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1314 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1315 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1316 ==32350==
1317 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1318 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1319 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1320 ==32350==
1321 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1322 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1323 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1324 EOF
1325         {
1326             COND    =>  197050,
1327             COND_m  =>   17742,
1328             Dr      =>  259191,
1329             Dr_m1   =>    6098,
1330             Dr_mm   =>    2781,
1331             Dw      =>  145084,
1332             Dw_m1   =>    3510,
1333             Dw_mm   =>    3013,
1334             IND     =>    5322,
1335             IND_m   =>    1411,
1336             Ir      => 1124055,
1337             Ir_m1   =>    5573,
1338             Ir_mm   =>    3338,
1339         },
1340     );
1341
1342     for ('t', '.') {
1343         last if require "$_/test.pl";
1344     }
1345     plan(@tests / 3 * keys %VALID_FIELDS);
1346
1347     while (@tests) {
1348         my $desc     = shift @tests;
1349         my $output   = shift @tests;
1350         my $expected = shift @tests;
1351         my $p = parse_cachegrind($output);
1352         for (sort keys %VALID_FIELDS) {
1353             is($p->{$_}, $expected->{$_}, "$desc, $_");
1354         }
1355     }
1356 }