Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] -- perlA[=labelA] perlB[=labelB] ...
  18
  19     # run the tests against same perlA 2x, with and without extra
  20     # options
  21
  22     bench.pl [options] -- perlA=fast PerlA=slow -Mstrict -Dpsltoc
  23
  24     # Run bench.pl's own built-in sanity tests
  25
  26     bench.pl --action=selftest
  27
  28     # Run bench on blead, which is then modified and timed again
  29
  30     bench.pl [options] --write=blead.time -- ./perl=blead
  31     # hack hack hack
  32     bench.pl --read=blead.time -- ./perl=hacked
  33
  34     # You can also combine --read with --write
  35     bench.pl --read=blead.time --write=last.time -- ./perl=hacked
  36
  37 =head1 DESCRIPTION
  38
  39 By default, F<bench.pl> will run code snippets found in
  40 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  41 how many instruction reads, data writes, branches, cache misses, etc. that
  42 one execution of the snippet uses. It will run them against two or more
  43 perl executables and show how much each test has gotten better or worse.
  44
  45 It is modelled on the F<perlbench> tool, but since it measures instruction
  46 reads etc., rather than timings, it is much more precise and reproducible.
  47 It is also considerably faster, and is capable of running tests in
  48 parallel (with C<-j>). Rather than  displaying a single relative
  49 percentage per test/perl combination, it displays values for 13 different
  50 measurements, such as instruction reads, conditional branch misses etc.
  51
  52 There are options to write the raw data to a file, and to read it back.
  53 This means that you can view the same run data in different views with
  54 different selection and sort options. You can also use this mechanism
  55 to save the results of timing one perl, and then read it back while timing
  56 a modification, so that you dont have rerun the same tests on the same
  57 perl over and over, or have two perls built at the same time.
  58
  59 The optional C<=label> after each perl executable is used in the display
  60 output. If you are doing a two step benchmark then you should provide
  61 a label for at least the "base" perl.
  62
  63 =head1 OPTIONS
  64
  65 =over 4
  66
  67 =item *
  68
  69 --action=I<foo>
  70
  71 What action to perform. The default is  I<grind>, which runs the benchmarks
  72 using I<cachegrind> as the back end. The only other action at the moment is
  73 I<selftest>, which runs some basic sanity checks and produces TAP output.
  74
  75 =item *
  76
  77 --average
  78
  79 Only display the overall average, rather than the results for each
  80 individual test.
  81
  82 =item *
  83
  84 --benchfile=I<foo>
  85
  86 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
  87 by default).
  88
  89 =item *
  90
  91 --bisect=I<field,minval,maxval>
  92
  93 Run a single test against one perl and exit with a zero status if the
  94 named field is in the specified range; exit 1 otherwise. It will complain
  95 if more than one test or perl has been specified. It is intended to be
  96 called as part of a bisect run, to determine when something changed.
  97 For example,
  98
  99     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
 100         ./miniperl
 101
 102 might be called from bisect to find when the number of instruction reads
 103 for test I<foo> falls outside the range 100..105.
 104
 105 =item *
 106
 107 --compact=<Iperl>
 108
 109 Display the results for a single perl executable in a compact form.
 110 Which perl to display is specified in the same manner as C<--norm>.
 111
 112 =item *
 113
 114 --debug
 115
 116 Enable verbose debugging output.
 117
 118 =item *
 119
 120 --fields=I<a,b,c>
 121
 122 Display only the specified fields; for example,
 123
 124     --fields=Ir,Ir_m,Ir_mm
 125
 126 If only one field is selected, the output is in more compact form.
 127
 128 =item *
 129
 130 --grindargs=I<foo>
 131
 132 Optional command-line arguments to pass to all cachegrind invocations.
 133
 134 This option is appended to those which bench.pl uses for its own
 135 purposes; so it can be used to override them (see --debug output
 136 below), and can also be 'abused' to add redirects into the valgrind
 137 command invocation.
 138
 139 For example, this writes PERL_MEM_LOG activity to foobar.$$, because
 140 3>foobar.$$ redirects fd 3, then perl under PERL_MEM_LOG writes to fd 3.
 141
 142  $ perl Porting/bench.pl --jobs=2 --verbose --debug \
 143     --tests=call::sub::amp_empty \
 144     \
 145     --grindargs='--cachegrind-out-file=junk.$$ 3>foobar.$$' \
 146     -- \
 147     perl5.24.0  perl5.24.0:+memlog:PERL_MEM_LOG=3mst
 148
 149 for the +memlog tests, this executes as: (shown via --debug, then prettyfied)
 150
 151   Command: PERL_HASH_SEED=0 PERL_MEM_LOG=3mst
 152     valgrind --tool=cachegrind  --branch-sim=yes
 153     --cachegrind-out-file=/dev/null --cachegrind-out-file=junk.$$
 154     3>foobar.$$ perl5.24.0  - 10 2>&1
 155
 156 The result is that a set of junk.$$ files containing raw cachegrind
 157 output are written, and foobar.$$ contains the expected memlog output.
 158
 159 Notes:
 160
 161 Theres no obvious utility for those junk.$$ and foobar.$$ files, but
 162 you can have them anyway.
 163
 164 The 3 in PERL_MEM_LOG=3mst is needed because the output would
 165 otherwize go to STDERR, and cause parse_cachegrind() to reject the
 166 test and die.
 167
 168 The --grindargs redirect is needed to capture the memlog output;
 169 without it, the memlog output is written to fd3, around
 170 parse_cachegrind and effectively into /dev/null
 171
 172 PERL_MEM_LOG is expensive when used.
 173
 174 call::sub::amp_empty
 175 &foo function call with no args or body
 176
 177        perl5.24.0 perl5.24.0+memlog
 178        ---------- -----------------
 179     Ir      394.0          543477.5
 180     Dr      161.0          146814.1
 181     Dw       72.0          122304.6
 182   COND       58.0           66796.4
 183    IND        5.0            5537.7
 184
 185 COND_m        0.0            6743.1
 186  IND_m        5.0            1490.2
 187
 188  Ir_m1        0.0             683.7
 189  Dr_m1        0.0              65.9
 190  Dw_m1        0.0               8.5
 191
 192  Ir_mm        0.0              11.6
 193  Dr_mm        0.0              10.6
 194  Dw_mm        0.0               4.7
 195
 196
 197 =item *
 198
 199 ---help
 200
 201 Display basic usage information.
 202
 203 =item *
 204
 205 -j I<N>
 206 --jobs=I<N>
 207
 208 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 209 process will running at a time, and should generally be set to the number
 210 of CPUs available.
 211
 212 =item *
 213
 214 --norm=I<foo>
 215
 216 Specify which perl column in the output to treat as the 100% norm.
 217 It may be a column number (0..N-1) or a perl executable name or label.
 218 It defaults to the leftmost column.
 219
 220 =item *
 221
 222 --perlargs=I<foo>
 223
 224 Optional command-line arguments to pass to each perl-under-test
 225 (perlA, perlB in synopsis) For example, C<--perlargs=-Ilib>.
 226
 227 =item *
 228
 229 --raw
 230
 231 Display raw data counts rather than percentages in the outputs. This
 232 allows you to see the exact number of intruction reads, branch misses etc.
 233 for each test/perl combination. It also causes the C<AVERAGE> display
 234 per field to be calculated based on the average of each tests's count
 235 rather than average of each percentage. This means that tests with very
 236 high counts will dominate.
 237
 238 =item *
 239
 240 --sort=I<field:perl>
 241
 242 Order the tests in the output based on the value of I<field> in the
 243 column I<perl>. The I<perl> value is as per C<--norm>. For example
 244
 245     bench.pl --sort=Dw:perl-5.20.0 \
 246         perl-5.16.0 perl-5.18.0 perl-5.20.0
 247
 248 =item *
 249
 250 -r I<file>
 251 --read=I<file>
 252
 253 Read in saved data from a previous C<--write> run from the specified file.
 254
 255 Requires C<JSON::PP> to be available.
 256
 257 =item *
 258
 259 --tests=I<FOO>
 260
 261 Specify a subset of tests to run (or in the case of C<--read>, to display).
 262 It may be either a comma-separated list of test names, or a regular
 263 expression. For example
 264
 265     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 266     --tests=/^expr::/
 267
 268 =item *
 269
 270 --verbose
 271
 272 Display progress information.
 273
 274 =item *
 275
 276 -w I<file>
 277 --write=I<file>
 278
 279 Save the raw data to the specified file. It can be read back later with
 280 C<--read>. If combined with C<--read> then the output file will be
 281 the merge of the file read and any additional perls added on the command
 282 line.
 283
 284 Requires C<JSON::PP> to be available.
 285
 286 =back
 287
 288 =cut
 289
 290
 291
 292 use 5.010000;
 293 use warnings;
 294 use strict;
 295 use Getopt::Long qw(:config no_auto_abbrev require_order);
 296 use IPC::Open2 ();
 297 use IO::Select;
 298 use IO::File;
 299 use POSIX ":sys_wait_h";
 300
 301 # The version of the file format used to save data. We refuse to process
 302 # the file if the integer component differs.
 303
 304 my $FORMAT_VERSION = 1.0;
 305
 306 # The fields we know about
 307
 308 my %VALID_FIELDS = map { $_ => 1 }
 309     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 310
 311 sub usage {
 312     die <<EOF;
 313 usage: $0 [options] -- perl[=label] ...
 314   --action=foo       What action to perform [default: grind].
 315   --average          Only display average, not individual test results.
 316   --benchfile=foo    File containing the benchmarks;
 317                        [default: t/perf/benchmarks].
 318   --bisect=f,min,max run a single test against one perl and exit with a
 319                        zero status if the named field is in the specified
 320                        range; exit 1 otherwise.
 321   --compact=perl     Display the results of a single perl in compact form.
 322                      Which perl specified like --norm
 323   --debug            Enable verbose debugging output.
 324   --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 325   --grindargs=foo    Optional command-line args to pass to cachegrind.
 326   --help             Display this help.
 327   -j|--jobs=N        Run N jobs in parallel [default 1].
 328   --norm=perl        Which perl column to treat as 100%; may be a column
 329                        number (0..N-1) or a perl executable name or label;
 330                        [default: 0].
 331   --perlargs=foo     Optional command-line args to pass to each perl to run.
 332   --raw              Display raw data counts rather than percentages.
 333   --show             Show results even though we are going to write results.
 334   --sort=field:perl  Sort the tests based on the value of 'field' in the
 335                        column 'perl'. The perl value is as per --norm.
 336   -r|--read=file     Read in previously saved data from the specified file.
 337   --tests=FOO        Select only the specified tests from the benchmarks file;
 338                        FOO may be either of the form 'foo,bar' or '/regex/';
 339                        [default: all tests].
 340   --verbose          Display progress information.
 341   -w|--write=file    Save the raw data to the specified file.
 342
 343 --action is one of:
 344     grind            run the code under cachegrind
 345     selftest         perform a selftest; produce TAP output
 346
 347 The command line ends with one or more specified perl executables,
 348 which will be searched for in the current \$PATH. Each binary name may
 349 have an optional =LABEL appended, which will be used rather than the
 350 executable name in output. E.g.
 351
 352     perl-5.20.1=PRE-BUGFIX  perl-5.20.1-new=POST-BUGFIX
 353 EOF
 354 }
 355
 356 my %OPTS = (
 357     action    => 'grind',
 358     average   => 0,
 359     benchfile => 't/perf/benchmarks',
 360     bisect    => undef,
 361     compact   => undef,
 362     debug     => 0,
 363     grindargs => '',
 364     fields    => undef,
 365     jobs      => 1,
 366     norm      => 0,
 367     perlargs  => '',
 368     raw       => 0,
 369     read      => undef,
 370     show      => 0,
 371     sort      => undef,
 372     tests     => undef,
 373     verbose   => 0,
 374     write     => undef,
 375 );
 376
 377
 378 # process command-line args and call top-level action
 379
 380 {
 381     GetOptions(
 382         'action=s'    => \$OPTS{action},
 383         'average'     => \$OPTS{average},
 384         'benchfile=s' => \$OPTS{benchfile},
 385         'bisect=s'    => \$OPTS{bisect},
 386         'compact=s'   => \$OPTS{compact},
 387         'debug'       => \$OPTS{debug},
 388         'grindargs=s' => \$OPTS{grindargs},
 389         'help'        => \$OPTS{help},
 390         'fields=s'    => \$OPTS{fields},
 391         'jobs|j=i'    => \$OPTS{jobs},
 392         'norm=s'      => \$OPTS{norm},
 393         'perlargs=s'  => \$OPTS{perlargs},
 394         'raw'         => \$OPTS{raw},
 395         'read|r=s@'   => \$OPTS{read},
 396         'show!'       => \$OPTS{show},
 397         'sort=s'      => \$OPTS{sort},
 398         'tests=s'     => \$OPTS{tests},
 399         'verbose'     => \$OPTS{verbose},
 400         'write|w=s'   => \$OPTS{write},
 401     ) or usage;
 402
 403     usage if $OPTS{help};
 404
 405
 406     if (defined $OPTS{read} or defined $OPTS{write}) {
 407         # fail early if it's not present
 408         require JSON::PP;
 409     }
 410
 411     if (defined $OPTS{fields}) {
 412         my @f = split /,/, $OPTS{fields};
 413         for (@f) {
 414             die "Error: --fields: unknown field '$_'\n"
 415                 unless $VALID_FIELDS{$_};
 416         }
 417         my %f = map { $_ => 1 } @f;
 418         $OPTS{fields} = \%f;
 419     }
 420
 421     my %valid_actions = qw(grind 1 selftest 1);
 422     unless ($valid_actions{$OPTS{action}}) {
 423         die "Error: unrecognised action '$OPTS{action}'\n"
 424           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 425     }
 426
 427     if (defined $OPTS{sort}) {
 428         my @s = split /:/, $OPTS{sort};
 429         if (@s != 2) {
 430             die "Error: --sort argument should be of the form field:perl: "
 431               . "'$OPTS{sort}'\n";
 432         }
 433         my ($field, $perl) = @s;
 434         die "Error: --sort: unknown field '$field\n"
 435             unless $VALID_FIELDS{$field};
 436         # the 'perl' value will be validated later, after we have processed
 437         # the perls
 438         $OPTS{'sort-field'} = $field;
 439         $OPTS{'sort-perl'}  = $perl;
 440     }
 441
 442     if ($OPTS{action} eq 'selftest') {
 443         if (@ARGV) {
 444             die "Error: no perl executables may be specified with --read\n"
 445         }
 446     }
 447     elsif (defined $OPTS{bisect}) {
 448         die "Error: exactly one perl executable must be specified for bisect\n"
 449                                                 unless @ARGV == 1;
 450         die "Error: Can't specify both --bisect and --read\n"
 451                                                 if defined $OPTS{read};
 452         die "Error: Can't specify both --bisect and --write\n"
 453                                                 if defined $OPTS{write};
 454     }
 455
 456     if ($OPTS{action} eq 'grind') {
 457         do_grind(\@ARGV);
 458     }
 459     elsif ($OPTS{action} eq 'selftest') {
 460         do_selftest();
 461     }
 462 }
 463 exit 0;
 464
 465
 466 # Given a hash ref keyed by test names, filter it by deleting unwanted
 467 # tests, based on $OPTS{tests}.
 468
 469 sub filter_tests {
 470     my ($tests) = @_;
 471
 472     my $opt = $OPTS{tests};
 473     return unless defined $opt;
 474
 475     my @tests;
 476
 477     if ($opt =~ m{^/}) {
 478         $opt =~ s{^/(.+)/$}{$1}
 479             or die "Error: --tests regex must be of the form /.../\n";
 480         for (keys %$tests) {
 481             delete $tests->{$_} unless /$opt/;
 482         }
 483     }
 484     else {
 485         my %t;
 486         for (split /,/, $opt) {
 487             die "Error: no such test found: '$_'\n"
 488                 . ($OPTS{verbose} ? "  have: @{[ sort keys %$tests ]}\n" : "")
 489                 unless exists $tests->{$_};
 490             $t{$_} = 1;
 491         }
 492         for (keys %$tests) {
 493             delete $tests->{$_} unless exists $t{$_};
 494         }
 495     }
 496     die "Error: no tests to run\n" unless %$tests;
 497 }
 498
 499
 500 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 501 # return a hash ref { testname => { test }, ... }
 502 # and an array ref of the original test names order,
 503
 504 sub read_tests_file {
 505     my ($file) = @_;
 506
 507     my $ta;
 508     {
 509         local @INC = ('.');
 510         $ta = do $file;
 511     }
 512     unless ($ta) {
 513         die "Error: can't parse '$file': $@\n" if $@;
 514         die "Error: can't read '$file': $!\n";
 515     }
 516
 517     my @orig_order;
 518     for (my $i=0; $i < @$ta; $i += 2) {
 519         push @orig_order, $ta->[$i];
 520     }
 521
 522     my $t = { @$ta };
 523     filter_tests($t);
 524     return $t, \@orig_order;
 525 }
 526
 527
 528 # Process the perl/column argument of options like --norm and --sort.
 529 # Return the index of the matching perl.
 530
 531 sub select_a_perl {
 532     my ($perl, $perls, $who) = @_;
 533     $perls||=[];
 534     if ($perl =~ /^[0-9]$/) {
 535         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 536                                         unless $perl < @$perls;
 537         return $perl;
 538     }
 539     else {
 540         my @perl = grep    $perls->[$_][0] eq $perl
 541                         || $perls->[$_][1] eq $perl,
 542                         0..$#$perls;
 543         die "Error: $who: unrecognised perl '$perl'\n"
 544                                         unless @perl;
 545         die "Error: $who: ambiguous perl '$perl'\n"
 546                                         if @perl > 1;
 547         return $perl[0];
 548     }
 549 }
 550
 551
 552 # Validate the list of perl=label (+ cmdline options) on the command line.
 553 # Return a list of [ exe, label, cmdline-options ] tuples, i.e.
 554 # 'perl-under-test's (PUTs)
 555
 556 sub process_puts {
 557     my $read_perls= shift;
 558     my @res_puts; # returned, each item is [ perlexe, label, @putargs ]
 559     my %seen= map { $_->[1] => 1 } @$read_perls;
 560     my @putargs; # collect not-perls into args per PUT
 561
 562     for my $p (reverse @_) {
 563         push @putargs, $p and next if $p =~ /^-/; # not-perl, dont send to qx//
 564
 565         my ($perl, $label, $env) = split /[=:,]/, $p, 3;
 566         $label //= $perl;
 567         $label = $perl.$label if $label =~ /^\+/;
 568         die "$label cannot be used on 2 different perls under test\n" if $seen{$label}++;
 569
 570         my %env;
 571         if ($env) {
 572             %env = split /[=,]/, $env;
 573         }
 574         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 575         if ($r eq "ok\n") {
 576             push @res_puts, [ $perl, $label, \%env, reverse @putargs ];
 577             @putargs = ();
 578             warn "Added Perl-Under-Test: [ @{[@{$res_puts[-1]}]} ]\n"
 579                 if $OPTS{verbose};
 580         } else {
 581             warn "perl-under-test args: @putargs + a not-perl: $p $r\n"
 582                 if $OPTS{verbose};
 583             push @putargs, $p; # not-perl
 584         }
 585     }
 586     return reverse @res_puts;
 587 }
 588
 589
 590
 591 # Return a string containing perl test code wrapped in a loop
 592 # that runs $ARGV[0] times
 593
 594 sub make_perl_prog {
 595     my ($test, $desc, $setup, $code) = @_;
 596
 597     return <<EOF;
 598 # $desc
 599 package $test;
 600 BEGIN { srand(0) }
 601 $setup;
 602 for my \$__loop__ (1..\$ARGV[0]) {
 603     $code;
 604 }
 605 EOF
 606 }
 607
 608
 609 # Parse the output from cachegrind. Return a hash ref.
 610 # See do_selftest() for examples of the output format.
 611
 612 sub parse_cachegrind {
 613     my ($output, $id, $perl) = @_;
 614
 615     my %res;
 616
 617     my @lines = split /\n/, $output;
 618     for (@lines) {
 619         unless (s/(==\d+==)|(--\d+--) //) {
 620             die "Error: while executing $id:\n"
 621               . "unexpected code or cachegrind output:\n$_\n";
 622         }
 623         if (/I   refs:\s+([\d,]+)/) {
 624             $res{Ir} = $1;
 625         }
 626         elsif (/I1  misses:\s+([\d,]+)/) {
 627             $res{Ir_m1} = $1;
 628         }
 629         elsif (/LLi misses:\s+([\d,]+)/) {
 630             $res{Ir_mm} = $1;
 631         }
 632         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 633             @res{qw(Dr Dw)} = ($1,$2);
 634         }
 635         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 636             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 637         }
 638         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 639             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 640         }
 641         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 642             @res{qw(COND IND)} = ($1,$2);
 643         }
 644         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 645             @res{qw(COND_m IND_m)} = ($1,$2);
 646         }
 647     }
 648
 649     for my $field (keys %VALID_FIELDS) {
 650         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 651             unless exists $res{$field};
 652         $res{$field} =~ s/,//g;
 653     }
 654
 655     return \%res;
 656 }
 657
 658
 659 # Handle the 'grind' action
 660
 661 sub do_grind {
 662     my ($perl_args) = @_; # the residue of @ARGV after option processing
 663
 664     my ($loop_counts, $perls, $results, $tests, $order);
 665     my ($bisect_field, $bisect_min, $bisect_max);
 666
 667     if (defined $OPTS{bisect}) {
 668         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 669         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 670             unless
 671                     defined $bisect_max
 672                 and $bisect_min =~ /^[0-9]+$/
 673                 and $bisect_max =~ /^[0-9]+$/;
 674
 675         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 676             unless $VALID_FIELDS{$bisect_field};
 677
 678         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 679             if $bisect_min > $bisect_max;
 680     }
 681
 682     foreach my $file (@{$OPTS{read}}) {
 683         open my $in, '<:encoding(UTF-8)', $file
 684             or die " Error: can't open '$file' for reading: $!\n";
 685         my $data = do { local $/; <$in> };
 686         close $in;
 687
 688         my $hash = JSON::PP::decode_json($data);
 689         if (int($FORMAT_VERSION) < int($hash->{version})) {
 690             die "Error: unsupported version $hash->{version} in file"
 691               . "'$file' (too new)\n";
 692         }
 693         my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) =
 694             @$hash{qw(loop_counts perls results tests order)};
 695         filter_tests($read_results);
 696         filter_tests($read_tests);
 697         if (!$read_order) {
 698             $order = [ sort keys %$read_tests ];
 699         }
 700         if (!$loop_counts) {
 701             ($loop_counts, $perls, $results, $tests, $order) =
 702                 ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order);
 703             filter_tests($results);
 704             filter_tests($tests);
 705             if (!$order) {
 706                 $order = [ sort keys %$tests ];
 707             }
 708         } else {
 709             my @have_keys= sort keys %$read_tests;
 710             my @want_keys= sort keys %$tests;
 711
 712             if ("@have_keys" ne "@want_keys" or
 713                 "@$read_loop_counts" ne "@$loop_counts")
 714             {
 715                 die "tests run aren't the same, cant merge read files";
 716             }
 717
 718             push @$perls, @{$hash->{perls}};
 719             foreach my $test (keys %{$hash->{results}}) {
 720                 foreach my $perl (keys %{$hash->{results}{$test}}) {
 721                     $results->{$test}{$perl}= $hash->{results}{$test}{$perl};
 722                 }
 723             }
 724         }
 725     }
 726
 727     if (@$perl_args) {
 728         unless ($loop_counts) {
 729             # How many times to execute the loop for the two trials. The lower
 730             # value is intended to do the loop enough times that branch
 731             # prediction has taken hold; the higher loop allows us to see the
 732             # branch misses after that
 733             $loop_counts = [10, 20];
 734
 735             ($tests, $order) = read_tests_file($OPTS{benchfile});
 736             die "Error: only a single test may be specified with --bisect\n"
 737                 if defined $OPTS{bisect} and keys %$tests != 1;
 738         }
 739
 740         my @run_perls= process_puts($perls, @$perl_args);
 741         push @$perls, @run_perls;
 742         die "Error: Not enough perls to run a report, and --write not specified.\n"
 743             if @$perls < 2 and !$OPTS{write};
 744         $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
 745     }
 746
 747     if (!$perls or !@$perls) {
 748         die "Error: nothing to do: no perls to run, no data to read.\n";
 749     }
 750     # now that we have a list of perls, use it to process the
 751     # 'perl' component of the --norm and --sort args
 752
 753     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
 754     if (defined $OPTS{'sort-perl'}) {
 755         $OPTS{'sort-perl'} =
 756                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
 757     }
 758
 759     if (defined $OPTS{'compact'}) {
 760         $OPTS{'compact'} =
 761                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
 762     }
 763     if (defined $OPTS{write}) {
 764         my $json = JSON::PP::encode_json({
 765                     version      => $FORMAT_VERSION,
 766                     loop_counts  => $loop_counts,
 767                     perls        => $perls,
 768                     results      => $results,
 769                     tests        => $tests,
 770                     order        => $order,
 771                 });
 772
 773         open my $out, '>:encoding(UTF-8)', $OPTS{write}
 774             or die "Error: can't open '$OPTS{write}' for writing: $!\n";
 775         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
 776         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
 777     }
 778     if (!$OPTS{write} or $OPTS{show}) {
 779         if (@$perls < 2) {
 780             die "Error: need more than one perl to do a report.\n";
 781         }
 782         my ($processed, $averages) =
 783                     grind_process($results, $perls, $loop_counts);
 784
 785         if (defined $OPTS{bisect}) {
 786             my @r = values %$results;
 787             die "Panic: expected exactly one test result in bisect\n"
 788                                                             if @r != 1;
 789             @r = values %{$r[0]};
 790             die "Panic: expected exactly one perl result in bisect\n"
 791                                                             if @r != 1;
 792             my $c = $r[0]{$bisect_field};
 793             die "Panic: no result in bisect for field '$bisect_field'\n"
 794                                                             unless defined $c;
 795             exit 0 if $bisect_min <= $c and $c <= $bisect_max;
 796             exit 1;
 797         }
 798         elsif (defined $OPTS{compact}) {
 799             grind_print_compact($processed, $averages, $OPTS{compact},
 800                                 $perls, $tests, $order);
 801         }
 802         else {
 803             grind_print($processed, $averages, $perls, $tests, $order);
 804         }
 805     }
 806 }
 807
 808
 809 # Run cachegrind for every test/perl combo.
 810 # It may run several processes in parallel when -j is specified.
 811 # Return a hash ref suitable for input to grind_process()
 812
 813 sub grind_run {
 814     my ($tests, $order, $perls, $counts, $results) = @_;
 815
 816     # Build a list of all the jobs to run
 817
 818     my @jobs;
 819
 820     for my $test (grep $tests->{$_}, @$order) {
 821
 822         # Create two test progs: one with an empty loop and one with code.
 823         # Note that the empty loop is actually '{1;}' rather than '{}';
 824         # this causes the loop to have a single nextstate rather than a
 825         # stub op, so more closely matches the active loop; e.g.:
 826         #   {1;}    => nextstate;                       unstack
 827         #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
 828         my @prog = (
 829             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
 830             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
 831         );
 832
 833         for my $p (@$perls) {
 834             my ($perl, $label, $env, @putargs) = @$p;
 835
 836             # Run both the empty loop and the active loop
 837             # $counts->[0] and $counts->[1] times.
 838
 839             for my $i (0,1) {
 840                 for my $j (0,1) {
 841                     my $envstr = '';
 842                     if (ref $env) {
 843                         $envstr .= "$_=$env->{$_} " for sort keys %$env;
 844                     }
 845                     my $cmd = "PERL_HASH_SEED=0 $envstr"
 846                             . "valgrind --tool=cachegrind  --branch-sim=yes "
 847                             . "--cachegrind-out-file=/dev/null "
 848                             . "$OPTS{grindargs} "
 849                             . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
 850                     # for debugging and error messages
 851                     my $id = "$test/$label "
 852                         . ($i ? "active" : "empty") . "/"
 853                         . ($j ? "long"   : "short") . " loop";
 854
 855                     push @jobs, {
 856                         test   => $test,
 857                         perl   => $perl,
 858                         plabel => $label,
 859                         cmd    => $cmd,
 860                         prog   => $prog[$i],
 861                         active => $i,
 862                         loopix => $j,
 863                         id     => $id,
 864                     };
 865                 }
 866             }
 867         }
 868     }
 869
 870     # Execute each cachegrind and store the results in %results.
 871
 872     local $SIG{PIPE} = 'IGNORE';
 873
 874     my $max_jobs = $OPTS{jobs};
 875     my $running  = 0; # count of executing jobs
 876     my %pids;         # map pids to jobs
 877     my %fds;          # map fds  to jobs
 878     my $select = IO::Select->new();
 879
 880     while (@jobs or $running) {
 881
 882         if ($OPTS{debug}) {
 883             printf "Main loop: pending=%d running=%d\n",
 884                 scalar(@jobs), $running;
 885         }
 886
 887         # Start new jobs
 888
 889         while (@jobs && $running < $max_jobs) {
 890             my $job = shift @jobs;
 891             my ($id, $cmd) =@$job{qw(id cmd)};
 892
 893             my ($in, $out, $pid);
 894             warn "Starting $id\n" if $OPTS{verbose};
 895             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
 896                 or die "Error: while starting cachegrind subprocess"
 897                    ." for $id:\n$@";
 898             $running++;
 899             $pids{$pid}    = $job;
 900             $fds{"$out"}   = $job;
 901             $job->{out_fd} = $out;
 902             $job->{output} = '';
 903             $job->{pid}    = $pid;
 904
 905             $out->blocking(0);
 906             $select->add($out);
 907
 908             if ($OPTS{debug}) {
 909                 print "Started pid $pid for $id\n";
 910             }
 911
 912             # Note:
 913             # In principle we should write to $in in the main select loop,
 914             # since it may block. In reality,
 915             #  a) the code we write to the perl process's stdin is likely
 916             #     to be less than the OS's pipe buffer size;
 917             #  b) by the time the perl process has read in all its stdin,
 918             #     the only output it should have generated is a few lines
 919             #     of cachegrind output preamble.
 920             # If these assumptions change, then perform the following print
 921             # in the select loop instead.
 922
 923             print $in $job->{prog};
 924             close $in;
 925         }
 926
 927         # Get output of running jobs
 928
 929         if ($OPTS{debug}) {
 930             printf "Select: waiting on (%s)\n",
 931                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
 932                             $select->handles;
 933         }
 934
 935         my @ready = $select->can_read;
 936
 937         if ($OPTS{debug}) {
 938             printf "Select: pids (%s) ready\n",
 939                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
 940         }
 941
 942         unless (@ready) {
 943             die "Panic: select returned no file handles\n";
 944         }
 945
 946         for my $fd (@ready) {
 947             my $j = $fds{"$fd"};
 948             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
 949             unless (defined $r) {
 950                 die "Panic: Read from process running $j->{id} gave:\n$!";
 951             }
 952             next if $r;
 953
 954             # EOF
 955
 956             if ($OPTS{debug}) {
 957                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
 958             }
 959
 960             $select->remove($j->{out_fd});
 961             close($j->{out_fd})
 962                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
 963             $running--;
 964             delete $fds{"$j->{out_fd}"};
 965             my $output = $j->{output};
 966
 967             if ($OPTS{debug}) {
 968                 my $p = $j->{prog};
 969                 $p =~ s/^/    : /mg;
 970                 my $o = $output;
 971                 $o =~ s/^/    : /mg;
 972
 973                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
 974                     . "Input:\n$p"
 975                     . "Output\n$o";
 976             }
 977
 978             $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
 979                     = parse_cachegrind($output, $j->{id}, $j->{perl});
 980         }
 981
 982         # Reap finished jobs
 983
 984         while (1) {
 985             my $kid = waitpid(-1, WNOHANG);
 986             my $ret = $?;
 987             last if $kid <= 0;
 988
 989             unless (exists $pids{$kid}) {
 990                 die "Panic: reaped unexpected child $kid";
 991             }
 992             my $j = $pids{$kid};
 993             if ($ret) {
 994                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
 995                     . "with the following output\n:$j->{output}\n";
 996             }
 997             delete $pids{$kid};
 998         }
 999     }
1000
1001     return $results;
1002 }
1003
1004
1005
1006
1007 # grind_process(): process the data that has been extracted from
1008 # cachgegrind's output.
1009 #
1010 # $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
1011 # where active is 0 or 1 indicating an empty or active loop,
1012 # count is 0 or 1 indicating a short or long loop. E.g.
1013 #
1014 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
1015 #
1016 # The $res data structure is modified in-place by this sub.
1017 #
1018 # $perls is [ [ perl-exe, perl-label], .... ].
1019 #
1020 # $counts is [ N, M ] indicating the counts for the short and long loops.
1021 #
1022 #
1023 # return \%output, \%averages, where
1024 #
1025 # $output{benchmark_name}{perl_name}{field_name} = N
1026 # $averages{perl_name}{field_name} = M
1027 #
1028 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
1029 # M is the average raw count over all tests ($OPTS{raw}), or
1030 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
1031
1032 sub grind_process {
1033     my ($res, $perls, $counts) = @_;
1034
1035     # Process the four results for each test/perf combo:
1036     # Convert
1037     #    $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
1038     # to
1039     #    $res->{benchmark_name}{perl_name}{field_name} = averaged_n
1040     #
1041     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1042     #                     empty loops, eliminating startup time
1043     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1044     #                     active loops, eliminating startup time
1045     # (the two startup times may be different because different code
1046     # is being compiled); the difference of the two results above
1047     # divided by the count difference is the time to execute the
1048     # active code once, eliminating both startup and loop overhead.
1049
1050     for my $tests (values %$res) {
1051         for my $r (values %$tests) {
1052             my $r2;
1053             for (keys %{$r->[0][0]}) {
1054                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
1055                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
1056                         ) / ($counts->[1] - $counts->[0]);
1057                 $r2->{$_} = $n;
1058             }
1059             $r = $r2;
1060         }
1061     }
1062
1063     my %totals;
1064     my %counts;
1065     my %data;
1066
1067     my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
1068
1069     for my $test_name (keys %$res) {
1070         my $res1 = $res->{$test_name};
1071         my $res2_norm = $res1->{$perl_norm};
1072         for my $perl (keys %$res1) {
1073             my $res2 = $res1->{$perl};
1074             for my $field (keys %$res2) {
1075                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1076
1077                 if ($OPTS{raw}) {
1078                     # Avoid annoying '-0.0' displays. Ideally this number
1079                     # should never be negative, but fluctuations in
1080                     # startup etc can theoretically make this happen
1081                     $q = 0 if ($q <= 0 && $q > -0.1);
1082                     $totals{$perl}{$field} += $q;
1083                     $counts{$perl}{$field}++;
1084                     $data{$test_name}{$perl}{$field} = $q;
1085                     next;
1086                 }
1087
1088                 # $p and $q are notionally integer counts, but
1089                 # due to variations in startup etc, it's possible for a
1090                 # count which is supposedly zero to be calculated as a
1091                 # small positive or negative value.
1092                 # In this case, set it to zero. Further below we
1093                 # special-case zeros to avoid division by zero errors etc.
1094
1095                 $p = 0.0 if $p < 0.01;
1096                 $q = 0.0 if $q < 0.01;
1097
1098                 if ($p == 0.0 && $q == 0.0) {
1099                     # Both perls gave a count of zero, so no change:
1100                     # treat as 100%
1101                     $totals{$perl}{$field} += 1;
1102                     $counts{$perl}{$field}++;
1103                     $data{$test_name}{$perl}{$field} = 1;
1104                 }
1105                 elsif ($p == 0.0 || $q == 0.0) {
1106                     # If either count is zero, there were too few events
1107                     # to give a meaningful ratio (and we will end up with
1108                     # division by zero if we try). Mark the result undef,
1109                     # indicating that it shouldn't be displayed; and skip
1110                     # adding to the average
1111                     $data{$test_name}{$perl}{$field} = undef;
1112                 }
1113                 else {
1114                     # For averages, we record q/p rather than p/q.
1115                     # Consider a test where perl_norm took 1000 cycles
1116                     # and perlN took 800 cycles. For the individual
1117                     # results we display p/q, or 1.25; i.e. a quarter
1118                     # quicker. For the averages, we instead sum all
1119                     # the 0.8's, which gives the total cycles required to
1120                     # execute all tests, with all tests given equal
1121                     # weight. Later we reciprocate the final result,
1122                     # i.e. 1/(sum(qi/pi)/n)
1123
1124                     $totals{$perl}{$field} += $q/$p;
1125                     $counts{$perl}{$field}++;
1126                     $data{$test_name}{$perl}{$field} = $p/$q;
1127                 }
1128             }
1129         }
1130     }
1131
1132     # Calculate averages based on %totals and %counts accumulated earlier.
1133
1134     my %averages;
1135     for my $perl (keys %totals) {
1136         my $t = $totals{$perl};
1137         for my $field (keys %$t) {
1138             $averages{$perl}{$field} = $OPTS{raw}
1139                 ? $t->{$field} / $counts{$perl}{$field}
1140                   # reciprocal - see comments above
1141                 : $counts{$perl}{$field} / $t->{$field};
1142         }
1143     }
1144
1145     return \%data, \%averages;
1146 }
1147
1148
1149
1150 # print a standard blurb at the start of the grind display
1151
1152 sub grind_blurb {
1153     my ($perls) = @_;
1154
1155     print <<EOF;
1156 Key:
1157     Ir   Instruction read
1158     Dr   Data read
1159     Dw   Data write
1160     COND conditional branches
1161     IND  indirect branches
1162     _m   branch predict miss
1163     _m1  level 1 cache miss
1164     _mm  last cache (e.g. L3) miss
1165     -    indeterminate percentage (e.g. 1/0)
1166
1167 EOF
1168
1169     if ($OPTS{raw}) {
1170         print "The numbers represent raw counts per loop iteration.\n";
1171     }
1172     else {
1173         print <<EOF;
1174 The numbers represent relative counts per loop iteration, compared to
1175 $perls->[$OPTS{norm}][1] at 100.0%.
1176 Higher is better: for example, using half as many instructions gives 200%,
1177 while using twice as many gives 50%.
1178 EOF
1179     }
1180 }
1181
1182
1183 # return a sorted list of the test names, plus 'AVERAGE'
1184
1185 sub sorted_test_names {
1186     my ($results, $order, $perls) = @_;
1187
1188     my @names;
1189     unless ($OPTS{average}) {
1190         if (defined $OPTS{'sort-field'}) {
1191             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1192             my $perl = $perls->[$perlix][1];
1193             @names = sort
1194                 {
1195                         $results->{$a}{$perl}{$field}
1196                     <=> $results->{$b}{$perl}{$field}
1197                 }
1198                 keys %$results;
1199         }
1200         else {
1201             @names = grep $results->{$_}, @$order;
1202         }
1203     }
1204
1205     # No point in displaying average for only one test.
1206     push @names,  'AVERAGE' unless @names == 1;
1207     @names;
1208 }
1209
1210
1211 # grind_print(): display the tabulated results of all the cachegrinds.
1212 #
1213 # Arguments are of the form:
1214 #    $results->{benchmark_name}{perl_name}{field_name} = N
1215 #    $averages->{perl_name}{field_name} = M
1216 #    $perls = [ [ perl-exe, perl-label ], ... ]
1217 #    $tests->{test_name}{desc => ..., ...}
1218
1219 sub grind_print {
1220     my ($results, $averages, $perls, $tests, $order) = @_;
1221
1222     my @perl_names = map $_->[0], @$perls;
1223     my @perl_labels = map $_->[1], @$perls;
1224     my %perl_labels;
1225     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1226
1227     my $field_label_width = 6;
1228     # Calculate the width to display for each column.
1229     my $min_width = $OPTS{raw} ? 8 : 6;
1230     my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1231                         @perl_labels;
1232
1233     # Print standard header.
1234     grind_blurb($perls);
1235
1236     my @test_names = sorted_test_names($results, $order, $perls);
1237
1238     # If only a single field is to be displayed, use a more compact
1239     # format with only a single line of output per test.
1240
1241     my $one_field = defined $OPTS{fields} &&  keys(%{$OPTS{fields}}) == 1;
1242
1243     if ($one_field) {
1244         print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
1245
1246         # The first column will now contain test names rather than
1247         # field names; Calculate the max width.
1248
1249         $field_label_width = 0;
1250         for (@test_names) {
1251             $field_label_width = length if length > $field_label_width;
1252         }
1253
1254         # Print the perl executables header.
1255
1256         print "\n";
1257         for my $i (0,1) {
1258             print " " x $field_label_width;
1259             for (0..$#widths) {
1260                 printf " %*s", $widths[$_],
1261                     $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1262             }
1263             print "\n";
1264         }
1265     }
1266
1267     # Dump the results for each test.
1268
1269     for my $test_name (@test_names) {
1270         my $doing_ave = ($test_name eq 'AVERAGE');
1271         my $res1 = $doing_ave ? $averages : $results->{$test_name};
1272
1273         unless ($one_field) {
1274             print "\n$test_name";
1275             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1276             print "\n\n";
1277
1278             # Print the perl executables header.
1279             for my $i (0,1) {
1280                 print " " x $field_label_width;
1281                 for (0..$#widths) {
1282                     printf " %*s", $widths[$_],
1283                         $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1284                 }
1285                 print "\n";
1286             }
1287         }
1288
1289         for my $field (qw(Ir Dr Dw COND IND
1290                           N
1291                           COND_m IND_m
1292                           N
1293                           Ir_m1 Dr_m1 Dw_m1
1294                           N
1295                           Ir_mm Dr_mm Dw_mm
1296                       ))
1297         {
1298             next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1299
1300             if ($field eq 'N') {
1301                 print "\n";
1302                 next;
1303             }
1304
1305             if ($one_field) {
1306                 printf "%-*s", $field_label_width, $test_name;
1307             }
1308             else {
1309                 printf "%*s", $field_label_width, $field;
1310             }
1311
1312             for my $i (0..$#widths) {
1313                 my $res2 = $res1->{$perl_labels[$i]};
1314                 my $p = $res2->{$field};
1315                 if (!defined $p) {
1316                     printf " %*s", $widths[$i], '-';
1317                 }
1318                 elsif ($OPTS{raw}) {
1319                     printf " %*.1f", $widths[$i], $p;
1320                 }
1321                 else {
1322                     printf " %*.2f", $widths[$i], $p * 100;
1323                 }
1324             }
1325             print "\n";
1326         }
1327     }
1328 }
1329
1330
1331
1332 # grind_print_compact(): like grind_print(), but display a single perl
1333 # in a compact form. Has an additional arg, $which_perl, which specifies
1334 # which perl to display.
1335 #
1336 # Arguments are of the form:
1337 #    $results->{benchmark_name}{perl_name}{field_name} = N
1338 #    $averages->{perl_name}{field_name} = M
1339 #    $perls = [ [ perl-exe, perl-label ], ... ]
1340 #    $tests->{test_name}{desc => ..., ...}
1341
1342 sub grind_print_compact {
1343     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1344
1345
1346     # the width to display for each column.
1347     my $width = $OPTS{raw} ? 7 : 6;
1348
1349     # Print standard header.
1350     grind_blurb($perls);
1351
1352     print "\nResults for $perls->[$which_perl][1]\n\n";
1353
1354     my @test_names = sorted_test_names($results, $order, $perls);
1355
1356     # Dump the results for each test.
1357
1358      my @fields = qw( Ir Dr Dw
1359                       COND IND
1360                       COND_m IND_m
1361                       Ir_m1 Dr_m1 Dw_m1
1362                       Ir_mm Dr_mm Dw_mm
1363                     );
1364     if ($OPTS{fields}) {
1365         @fields = grep exists $OPTS{fields}{$_}, @fields;
1366     }
1367
1368     printf " %*s", $width, $_      for @fields;
1369     print "\n";
1370     printf " %*s", $width, '------' for @fields;
1371     print "\n";
1372
1373     for my $test_name (@test_names) {
1374         my $doing_ave = ($test_name eq 'AVERAGE');
1375         my $res = $doing_ave ? $averages : $results->{$test_name};
1376         $res = $res->{$perls->[$which_perl][1]};
1377
1378         for my $field (@fields) {
1379             my $p = $res->{$field};
1380             if (!defined $p) {
1381                 printf " %*s", $width, '-';
1382             }
1383             elsif ($OPTS{raw}) {
1384                 printf " %*.1f", $width, $p;
1385             }
1386             else {
1387                 printf " %*.2f", $width, $p * 100;
1388             }
1389
1390         }
1391
1392         print "  $test_name\n";
1393     }
1394 }
1395
1396
1397 # do_selftest(): check that we can parse known cachegrind()
1398 # output formats. If the output of cachegrind changes, add a *new*
1399 # test here; keep the old tests to make sure we continue to parse
1400 # old cachegrinds
1401
1402 sub do_selftest {
1403
1404     my @tests = (
1405         'standard',
1406         <<'EOF',
1407 ==32350== Cachegrind, a cache and branch-prediction profiler
1408 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1409 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1410 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1411 ==32350==
1412 --32350-- warning: L3 cache found, using its data for the LL simulation.
1413 ==32350==
1414 ==32350== I   refs:      1,124,055
1415 ==32350== I1  misses:        5,573
1416 ==32350== LLi misses:        3,338
1417 ==32350== I1  miss rate:      0.49%
1418 ==32350== LLi miss rate:      0.29%
1419 ==32350==
1420 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1421 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1422 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1423 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1424 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1425 ==32350==
1426 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1427 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1428 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1429 ==32350==
1430 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1431 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1432 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1433 EOF
1434         {
1435             COND    =>  197050,
1436             COND_m  =>   17742,
1437             Dr      =>  259191,
1438             Dr_m1   =>    6098,
1439             Dr_mm   =>    2781,
1440             Dw      =>  145084,
1441             Dw_m1   =>    3510,
1442             Dw_mm   =>    3013,
1443             IND     =>    5322,
1444             IND_m   =>    1411,
1445             Ir      => 1124055,
1446             Ir_m1   =>    5573,
1447             Ir_mm   =>    3338,
1448         },
1449     );
1450
1451     for ('./t', '.') {
1452         my $t = "$_/test.pl";
1453         next unless  -f $t;
1454         require $t;
1455     }
1456     plan(@tests / 3 * keys %VALID_FIELDS);
1457
1458     while (@tests) {
1459         my $desc     = shift @tests;
1460         my $output   = shift @tests;
1461         my $expected = shift @tests;
1462         my $p = parse_cachegrind($output);
1463         for (sort keys %VALID_FIELDS) {
1464             is($p->{$_}, $expected->{$_}, "$desc, $_");
1465         }
1466     }
1467 }