Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] -- perlA[=labelA] perlB[=labelB] ...
  18
  19     # run the tests against the same perl twice, with varying options
  20
  21     bench.pl [options] -- perlA=bigint -Mbigint perlA=int
  22
  23     # Run bench on blead, saving results to file; then modify the blead
  24     # binary, and benchmark again, comparing against the saved results
  25
  26     bench.pl [options] --write=blead.time -- ./perl=blead
  27     # ... hack hack hack, updating ./perl ...
  28     bench.pl --read=blead.time -- ./perl=hacked
  29
  30     # You can also combine --read with --write and new benchmark runs
  31
  32     bench.pl --read=blead.time --write=last.time -- ./perl=hacked
  33
  34 =head1 DESCRIPTION
  35
  36 By default, F<bench.pl> will run code snippets found in
  37 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  38 how many instruction reads, data writes, branches, cache misses, etc. that
  39 one execution of the snippet uses. Usually it will run them against two or
  40 more perl executables and show how much each test has gotten better or
  41 worse.
  42
  43 It is modelled on the F<perlbench> tool, but since it measures instruction
  44 reads etc., rather than timings, it is much more precise and reproducible.
  45 It is also considerably faster, and is capable of running tests in
  46 parallel (with C<-j>). Rather than  displaying a single relative
  47 percentage per test/perl combination, it displays values for 13 different
  48 measurements, such as instruction reads, conditional branch misses etc.
  49
  50 There are options to write the raw data to a file, and to read it back.
  51 This means that you can view the same run data in different views with
  52 different selection and sort options. You can also use this mechanism
  53 to save the results of timing one perl, and then read it back while timing
  54 a modification, so that you don't have rerun the same tests on the same
  55 perl over and over, or have two perl executables built at the same time.
  56
  57 The optional C<=label> after each perl executable is used in the display
  58 output. If you are doing a two step benchmark then you should provide
  59 a label for at least the "base" perl.
  60
  61 =head1 OPTIONS
  62
  63 =head2 General options
  64
  65 =over 4
  66
  67 =item *
  68
  69 --action=I<foo>
  70
  71 What action to perform. The default is  I<grind>, which runs the benchmarks
  72 using I<cachegrind> as the back end. The only other action at the moment is
  73 I<selftest>, which runs some basic sanity checks and produces TAP output.
  74
  75 =item *
  76
  77 --debug
  78
  79 Enable verbose debugging output.
  80
  81 =item *
  82
  83 ---help
  84
  85 Display basic usage information.
  86
  87 =item *
  88
  89 --verbose
  90
  91 Display progress information.
  92
  93 =back
  94
  95 =head2 Test selection options
  96
  97 =over 4
  98
  99 =item *
 100
 101 --tests=I<FOO>
 102
 103 Specify a subset of tests to run (or in the case of C<--read>, to read).
 104 It may be either a comma-separated list of test names, or a regular
 105 expression. For example
 106
 107     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 108     --tests=/^expr::/
 109
 110
 111 =back
 112
 113 =head2 Input options
 114
 115 =over 4
 116
 117
 118 =item *
 119
 120 -r I<file>
 121 --read=I<file>
 122
 123 Read in saved data from a previous C<--write> run from the specified file.
 124 If C<--tests> is present too, then only tests matching those conditions
 125 are read from the file.
 126
 127 C<--read> may be specified multiple times, in which case the results
 128 across all files are aggregated. The list of test names from each file
 129 (after filtering by C<--tests>) must be identical across all files.
 130
 131 This list of tests is used instead of that obtained from the normal
 132 benchmark file (or C<--benchfile>) for any benchmarks that are run.
 133
 134 Requires C<JSON::PP> to be available.
 135
 136 =back
 137
 138 =head2 Benchmarking options
 139
 140 Benchmarks will be run for all perls specified on the command line.
 141 These options can be used to modify the benchmarking behavior:
 142
 143 =over 4
 144
 145 =item *
 146
 147 --benchfile=I<foo>
 148
 149 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
 150 by default).
 151
 152 =item *
 153
 154 --grindargs=I<foo>
 155
 156 Optional command-line arguments to pass to all cachegrind invocations.
 157
 158 This option is appended to those which bench.pl uses for its own
 159 purposes; so it can be used to override them (see --debug output
 160 below), and can also be 'abused' to add redirects into the valgrind
 161 command invocation.
 162
 163 For example, this writes PERL_MEM_LOG activity to foobar.$$, because
 164 3>foobar.$$ redirects fd 3, then perl under PERL_MEM_LOG writes to fd 3.
 165
 166  $ perl Porting/bench.pl --jobs=2 --verbose --debug \
 167     --tests=call::sub::amp_empty \
 168     \
 169     --grindargs='--cachegrind-out-file=junk.$$ 3>foobar.$$' \
 170     -- \
 171     perl5.24.0  perl5.24.0:+memlog:PERL_MEM_LOG=3mst
 172
 173 for the +memlog tests, this executes as: (shown via --debug, then prettyfied)
 174
 175   Command: PERL_HASH_SEED=0 PERL_MEM_LOG=3mst
 176     valgrind --tool=cachegrind  --branch-sim=yes
 177     --cachegrind-out-file=/dev/null --cachegrind-out-file=junk.$$
 178     3>foobar.$$ perl5.24.0  - 10 2>&1
 179
 180 The result is that a set of junk.$$ files containing raw cachegrind
 181 output are written, and foobar.$$ contains the expected memlog output.
 182
 183 Notes:
 184
 185 Theres no obvious utility for those junk.$$ and foobar.$$ files, but
 186 you can have them anyway.
 187
 188 The 3 in PERL_MEM_LOG=3mst is needed because the output would
 189 otherwize go to STDERR, and cause parse_cachegrind() to reject the
 190 test and die.
 191
 192 The --grindargs redirect is needed to capture the memlog output;
 193 without it, the memlog output is written to fd3, around
 194 parse_cachegrind and effectively into /dev/null
 195
 196 PERL_MEM_LOG is expensive when used.
 197
 198 call::sub::amp_empty
 199 &foo function call with no args or body
 200
 201        perl5.24.0 perl5.24.0+memlog
 202        ---------- -----------------
 203     Ir      394.0          543477.5
 204     Dr      161.0          146814.1
 205     Dw       72.0          122304.6
 206   COND       58.0           66796.4
 207    IND        5.0            5537.7
 208
 209 COND_m        0.0            6743.1
 210  IND_m        5.0            1490.2
 211
 212  Ir_m1        0.0             683.7
 213  Dr_m1        0.0              65.9
 214  Dw_m1        0.0               8.5
 215
 216  Ir_mm        0.0              11.6
 217  Dr_mm        0.0              10.6
 218  Dw_mm        0.0               4.7
 219
 220 =item *
 221
 222 -j I<N>
 223 --jobs=I<N>
 224
 225 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 226 process will running at a time, and should generally be set to the number
 227 of CPUs available.
 228
 229 =item *
 230
 231 --perlargs=I<foo>
 232
 233 Optional command-line arguments to pass to each perl-under-test
 234 (perlA, perlB in synopsis) For example, C<--perlargs=-Ilib>.
 235
 236 =back
 237
 238 =head2 Output options
 239
 240 Any results accumulated via --read or running benchmarks can be output
 241 in any or all of these three ways:
 242
 243 =over 4
 244
 245 =item *
 246
 247 -w I<file>
 248 --write=I<file>
 249
 250 Save the raw data to the specified file. It can be read back later with
 251 C<--read>. If combined with C<--read> then the output file will be
 252 the merge of the file read and any additional perls added on the command
 253 line.
 254
 255 Requires C<JSON::PP> to be available.
 256
 257 =item *
 258
 259 --bisect=I<field,minval,maxval>
 260
 261 Run a single test against one perl and exit with a zero status if the
 262 named field is in the specified range; exit 1 otherwise. It will complain
 263 if more than one test or perl has been specified. It is intended to be
 264 called as part of a bisect run, to determine when something changed.
 265 For example,
 266
 267     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
 268         ./miniperl
 269
 270 might be called from bisect to find when the number of instruction reads
 271 for test I<foo> falls outside the range 100..105.
 272
 273 =item *
 274
 275 --show
 276
 277 Display the results to stdout in human-readable form.  This is enabled by
 278 default, except with --write and --bisect. The following sub-options alter
 279 how --show behaves.
 280
 281 =over 4
 282
 283 =item *
 284
 285 --average
 286
 287 Only display the overall average, rather than the results for each
 288 individual test.
 289
 290 =item *
 291
 292 --compact=<Iperl>
 293
 294 Display the results for a single perl executable in a compact form.
 295 Which perl to display is specified in the same manner as C<--norm>.
 296
 297 =item *
 298
 299 --fields=I<a,b,c>
 300
 301 Display only the specified fields; for example,
 302
 303     --fields=Ir,Ir_m,Ir_mm
 304
 305 If only one field is selected, the output is in more compact form.
 306
 307 =item *
 308
 309 --norm=I<foo>
 310
 311 Specify which perl column in the output to treat as the 100% norm.
 312 It may be a column number (0..N-1) or a perl executable name or label.
 313 It defaults to the leftmost column.
 314
 315 =item *
 316
 317 --raw
 318
 319 Display raw data counts rather than percentages in the outputs. This
 320 allows you to see the exact number of intruction reads, branch misses etc.
 321 for each test/perl combination. It also causes the C<AVERAGE> display
 322 per field to be calculated based on the average of each tests's count
 323 rather than average of each percentage. This means that tests with very
 324 high counts will dominate.
 325
 326 =item *
 327
 328 --sort=I<field:perl>
 329
 330 Order the tests in the output based on the value of I<field> in the
 331 column I<perl>. The I<perl> value is as per C<--norm>. For example
 332
 333     bench.pl --sort=Dw:perl-5.20.0 \
 334         perl-5.16.0 perl-5.18.0 perl-5.20.0
 335
 336 =back
 337
 338 =back
 339
 340 =cut
 341
 342
 343
 344 use 5.010000;
 345 use warnings;
 346 use strict;
 347 use Getopt::Long qw(:config no_auto_abbrev require_order);
 348 use IPC::Open2 ();
 349 use IO::Select;
 350 use IO::File;
 351 use POSIX ":sys_wait_h";
 352
 353 # The version of the file format used to save data. We refuse to process
 354 # the file if the integer component differs.
 355
 356 my $FORMAT_VERSION = 1.0;
 357
 358 # The fields we know about
 359
 360 my %VALID_FIELDS = map { $_ => 1 }
 361     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 362
 363 sub usage {
 364     die <<EOF;
 365 Usage: $0 [options] -- perl[=label] ...
 366
 367 General options:
 368
 369   --action=foo       What action to perform [default: grind]:
 370                         grind      run the code under cachegrind
 371                         selftest   perform a selftest; produce TAP output
 372   --debug            Enable verbose debugging output.
 373   --help             Display this help.
 374   --verbose          Display progress information.
 375
 376
 377 Selection:
 378
 379   --tests=FOO        Select only the specified tests for reading, benchmarking
 380                        and display.  FOO may be either a list of tests or
 381                        a pattern: 'foo,bar,baz' or '/regex/';
 382                        [default: all tests].
 383
 384 Input:
 385
 386   -r|--read=file     Read in previously saved data from the specified file.
 387                         May be repeated, and be used together with new
 388                         benchmarking to create combined results.
 389
 390 Benchmarking:
 391   Benchmarks will be run for any perl specified on the command line.
 392   These options can be used to modify the benchmarking behavior:
 393
 394   --benchfile=foo    File containing the benchmarks.
 395                          [default: t/perf/benchmarks].
 396   --grindargs=foo    Optional command-line args to pass to cachegrind.
 397   -j|--jobs=N        Run N jobs in parallel [default 1].
 398   --perlargs=foo     Optional command-line args to pass to each perl to run.
 399
 400 Output:
 401   Any results accumulated via --read or running benchmarks can be output
 402   in any or all of these three ways:
 403
 404   -w|--write=file    Save the raw data to the specified file (may be read
 405                        back later with --read).
 406
 407   --bisect=f,min,max Exit with a zero status if the named field f is in
 408                        the specified min..max range; exit 1 otherwise.
 409                        Produces no other output. Only legal if a single
 410                        benchmark test has been specified.
 411
 412   --show             Display the results to stdout in human-readable form.
 413                        This is enabled by default, except with --write and
 414                        --bisect. The following sub-options alter how
 415                        --show behaves.
 416
 417     --average          Only display average, not individual test results.
 418     --compact=perl     Display the results of a single perl in compact form.
 419                        Which perl specified like --norm
 420     --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 421     --norm=perl        Which perl column to treat as 100%; may be a column
 422                          number (0..N-1) or a perl executable name or label;
 423                          [default: 0].
 424     --raw              Display raw data counts rather than percentages.
 425     --sort=field:perl  Sort the tests based on the value of 'field' in the
 426                        column 'perl'. The perl value is as per --norm.
 427
 428
 429 The command line ends with one or more specified perl executables,
 430 which will be searched for in the current \$PATH. Each binary name may
 431 have an optional =LABEL appended, which will be used rather than the
 432 executable name in output. E.g.
 433
 434     perl-5.20.1=PRE-BUGFIX  perl-5.20.1-new=POST-BUGFIX
 435 EOF
 436 }
 437
 438 my %OPTS = (
 439     action    => 'grind',
 440     average   => 0,
 441     benchfile => undef,
 442     bisect    => undef,
 443     compact   => undef,
 444     debug     => 0,
 445     grindargs => '',
 446     fields    => undef,
 447     jobs      => 1,
 448     norm      => 0,
 449     perlargs  => '',
 450     raw       => 0,
 451     read      => undef,
 452     show      => undef,
 453     sort      => undef,
 454     tests     => undef,
 455     verbose   => 0,
 456     write     => undef,
 457 );
 458
 459
 460 # process command-line args and call top-level action
 461
 462 {
 463     GetOptions(
 464         'action=s'    => \$OPTS{action},
 465         'average'     => \$OPTS{average},
 466         'benchfile=s' => \$OPTS{benchfile},
 467         'bisect=s'    => \$OPTS{bisect},
 468         'compact=s'   => \$OPTS{compact},
 469         'debug'       => \$OPTS{debug},
 470         'grindargs=s' => \$OPTS{grindargs},
 471         'help|h'      => \$OPTS{help},
 472         'fields=s'    => \$OPTS{fields},
 473         'jobs|j=i'    => \$OPTS{jobs},
 474         'norm=s'      => \$OPTS{norm},
 475         'perlargs=s'  => \$OPTS{perlargs},
 476         'raw'         => \$OPTS{raw},
 477         'read|r=s@'   => \$OPTS{read},
 478         'show'        => \$OPTS{show},
 479         'sort=s'      => \$OPTS{sort},
 480         'tests=s'     => \$OPTS{tests},
 481         'verbose'     => \$OPTS{verbose},
 482         'write|w=s'   => \$OPTS{write},
 483     ) or die "Use the -h option for usage information.\n";
 484
 485     usage if $OPTS{help};
 486
 487
 488     if (defined $OPTS{read} or defined $OPTS{write}) {
 489         # fail early if it's not present
 490         require JSON::PP;
 491     }
 492
 493     if (defined $OPTS{fields}) {
 494         my @f = split /,/, $OPTS{fields};
 495         for (@f) {
 496             die "Error: --fields: unknown field '$_'\n"
 497                 unless $VALID_FIELDS{$_};
 498         }
 499         my %f = map { $_ => 1 } @f;
 500         $OPTS{fields} = \%f;
 501     }
 502
 503     my %valid_actions = qw(grind 1 selftest 1);
 504     unless ($valid_actions{$OPTS{action}}) {
 505         die "Error: unrecognised action '$OPTS{action}'\n"
 506           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 507     }
 508
 509     if (defined $OPTS{sort}) {
 510         my @s = split /:/, $OPTS{sort};
 511         if (@s != 2) {
 512             die "Error: --sort argument should be of the form field:perl: "
 513               . "'$OPTS{sort}'\n";
 514         }
 515         my ($field, $perl) = @s;
 516         die "Error: --sort: unknown field '$field'\n"
 517             unless $VALID_FIELDS{$field};
 518         # the 'perl' value will be validated later, after we have processed
 519         # the perls
 520         $OPTS{'sort-field'} = $field;
 521         $OPTS{'sort-perl'}  = $perl;
 522     }
 523
 524     # show is the default output action
 525     $OPTS{show} = 1 unless $OPTS{write} || $OPTS{bisect};
 526
 527     if ($OPTS{action} eq 'grind') {
 528         do_grind(\@ARGV);
 529     }
 530     elsif ($OPTS{action} eq 'selftest') {
 531         if (@ARGV) {
 532             die "Error: no perl executables may be specified with selftest\n"
 533         }
 534         do_selftest();
 535     }
 536 }
 537 exit 0;
 538
 539
 540 # Given a hash ref keyed by test names, filter it by deleting unwanted
 541 # tests, based on $OPTS{tests}.
 542
 543 sub filter_tests {
 544     my ($tests) = @_;
 545
 546     my $opt = $OPTS{tests};
 547     return unless defined $opt;
 548
 549     my @tests;
 550
 551     if ($opt =~ m{^/}) {
 552         $opt =~ s{^/(.+)/$}{$1}
 553             or die "Error: --tests regex must be of the form /.../\n";
 554         for (keys %$tests) {
 555             delete $tests->{$_} unless /$opt/;
 556         }
 557     }
 558     else {
 559         my %t;
 560         for (split /,/, $opt) {
 561             die "Error: no such test found: '$_'\n"
 562                 . ($OPTS{verbose} ? "  have: @{[ sort keys %$tests ]}\n" : "")
 563                 unless exists $tests->{$_};
 564             $t{$_} = 1;
 565         }
 566         for (keys %$tests) {
 567             delete $tests->{$_} unless exists $t{$_};
 568         }
 569     }
 570     die "Error: no tests to run\n" unless %$tests;
 571 }
 572
 573
 574 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 575 # return a hash ref { testname => { test }, ... }
 576 # and an array ref of the original test names order,
 577
 578 sub read_tests_file {
 579     my ($file) = @_;
 580
 581     my $ta;
 582     {
 583         local @INC = ('.');
 584         $ta = do $file;
 585     }
 586     unless ($ta) {
 587         die "Error: can't load '$file': code didn't return a true value\n"
 588                 if defined $ta;
 589         die "Error: can't parse '$file':\n$@\n" if $@;
 590         die "Error: can't read '$file': $!\n";
 591     }
 592
 593     my @orig_order;
 594     for (my $i=0; $i < @$ta; $i += 2) {
 595         push @orig_order, $ta->[$i];
 596     }
 597
 598     my $t = { @$ta };
 599     filter_tests($t);
 600     return $t, \@orig_order;
 601 }
 602
 603
 604 # Process the perl name/label/column argument of options like --norm and
 605 # --sort.  Return the index of the matching perl.
 606
 607 sub select_a_perl {
 608     my ($perl, $perls, $who) = @_;
 609     $perls||=[];
 610     if ($perl =~ /^[0-9]$/) {
 611         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 612                                         unless $perl < @$perls;
 613         return $perl;
 614     }
 615     else {
 616         my @perl = grep    $perls->[$_][0] eq $perl
 617                         || $perls->[$_][1] eq $perl,
 618                         0..$#$perls;
 619         die "Error: $who: unrecognised perl '$perl'\n"
 620                                         unless @perl;
 621         die "Error: $who: ambiguous perl '$perl'\n"
 622                                         if @perl > 1;
 623         return $perl[0];
 624     }
 625 }
 626
 627
 628 # Validate the list of perl=label (+ cmdline options) on the command line.
 629 # Return a list of [ exe, label, cmdline-options ] tuples, i.e.
 630 # 'perl-under-test's (PUTs)
 631
 632 sub process_puts {
 633     my $read_perls= shift;
 634     my @res_puts; # returned, each item is [ perlexe, label, @putargs ]
 635     my %seen= map { $_->[1] => 1 } @$read_perls;
 636     my @putargs; # collect not-perls into args per PUT
 637
 638     for my $p (reverse @_) {
 639         push @putargs, $p and next if $p =~ /^-/; # not-perl, dont send to qx//
 640
 641         my ($perl, $label, $env) = split /[=:,]/, $p, 3;
 642         $label //= $perl;
 643         $label = $perl.$label if $label =~ /^\+/;
 644         die "$label cannot be used on 2 different perls under test\n" if $seen{$label}++;
 645
 646         my %env;
 647         if ($env) {
 648             %env = split /[=,]/, $env;
 649         }
 650         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 651         if ($r eq "ok\n") {
 652             push @res_puts, [ $perl, $label, \%env, reverse @putargs ];
 653             @putargs = ();
 654             warn "Added Perl-Under-Test: [ @{[@{$res_puts[-1]}]} ]\n"
 655                 if $OPTS{verbose};
 656         } else {
 657             warn "perl-under-test args: @putargs + a not-perl: $p $r\n"
 658                 if $OPTS{verbose};
 659             push @putargs, $p; # not-perl
 660         }
 661     }
 662     return reverse @res_puts;
 663 }
 664
 665
 666
 667 # Return a string containing perl test code wrapped in a loop
 668 # that runs $ARGV[0] times
 669
 670 sub make_perl_prog {
 671     my ($test, $desc, $setup, $code) = @_;
 672
 673     return <<EOF;
 674 # $desc
 675 package $test;
 676 BEGIN { srand(0) }
 677 $setup;
 678 for my \$__loop__ (1..\$ARGV[0]) {
 679     $code;
 680 }
 681 EOF
 682 }
 683
 684
 685 # Parse the output from cachegrind. Return a hash ref.
 686 # See do_selftest() for examples of the output format.
 687
 688 sub parse_cachegrind {
 689     my ($output, $id, $perl) = @_;
 690
 691     my %res;
 692
 693     my @lines = split /\n/, $output;
 694     for (@lines) {
 695         unless (s/(==\d+==)|(--\d+--) //) {
 696             die "Error: while executing $id:\n"
 697               . "unexpected code or cachegrind output:\n$_\n";
 698         }
 699         if (/I   refs:\s+([\d,]+)/) {
 700             $res{Ir} = $1;
 701         }
 702         elsif (/I1  misses:\s+([\d,]+)/) {
 703             $res{Ir_m1} = $1;
 704         }
 705         elsif (/LLi misses:\s+([\d,]+)/) {
 706             $res{Ir_mm} = $1;
 707         }
 708         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 709             @res{qw(Dr Dw)} = ($1,$2);
 710         }
 711         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 712             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 713         }
 714         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 715             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 716         }
 717         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 718             @res{qw(COND IND)} = ($1,$2);
 719         }
 720         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 721             @res{qw(COND_m IND_m)} = ($1,$2);
 722         }
 723     }
 724
 725     for my $field (keys %VALID_FIELDS) {
 726         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 727             unless exists $res{$field};
 728         $res{$field} =~ s/,//g;
 729     }
 730
 731     return \%res;
 732 }
 733
 734
 735 # Handle the 'grind' action
 736
 737 sub do_grind {
 738     my ($perl_args) = @_; # the residue of @ARGV after option processing
 739
 740     my ($loop_counts, $perls, $results, $tests, $order, @run_perls);
 741     my ($bisect_field, $bisect_min, $bisect_max);
 742     my ($done_read, $processed, $averages);
 743
 744     if (defined $OPTS{bisect}) {
 745         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 746         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 747             unless
 748                     defined $bisect_max
 749                 and $bisect_min =~ /^[0-9]+$/
 750                 and $bisect_max =~ /^[0-9]+$/;
 751
 752         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 753             unless $VALID_FIELDS{$bisect_field};
 754
 755         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 756             if $bisect_min > $bisect_max;
 757     }
 758
 759     # Read in previous benchmark results
 760
 761     foreach my $file (@{$OPTS{read}}) {
 762         open my $in, '<:encoding(UTF-8)', $file
 763             or die "Error: can't open '$file' for reading: $!\n";
 764         my $data = do { local $/; <$in> };
 765         close $in;
 766
 767         my $hash = JSON::PP::decode_json($data);
 768         if (int($FORMAT_VERSION) < int($hash->{version})) {
 769             die "Error: unsupported version $hash->{version} in file"
 770               . " '$file' (too new)\n";
 771         }
 772         my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) =
 773             @$hash{qw(loop_counts perls results tests order)};
 774
 775         # check file contents for consistency
 776         my $k_o = join ';', sort @$read_order;
 777         my $k_r = join ';', sort keys %$read_results;
 778         my $k_t = join ';', sort keys %$read_tests;
 779         die "File '$file' contains no results\n" unless length $k_r;
 780         die "File '$file' contains differing test and results names\n"
 781             unless $k_r eq $k_t;
 782         die "File '$file' contains differing test and sort order names\n"
 783             unless $k_o eq $k_t;
 784
 785         # delete tests not matching --tests= criteria, if any
 786         filter_tests($read_results);
 787         filter_tests($read_tests);
 788
 789         if (!$done_read) {
 790             ($loop_counts, $perls, $results, $tests, $order) =
 791                 ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order);
 792             $done_read = 1;
 793         }
 794         else {
 795             # merge results across multiple files
 796
 797             if (   join(';', sort keys %$tests)
 798                 ne join(';', sort keys %$read_tests))
 799             {
 800                 my $err = "Can't merge multiple read files: "
 801                         . "they contain differing test sets.\n";
 802                 if ($OPTS{verbose}) {
 803                     $err .= "Previous tests:\n";
 804                     $err .= "  $_\n" for sort keys %$tests;
 805                     $err .= "tests from '$file':\n";
 806                     $err .= "  $_\n" for sort keys %$read_tests;
 807                 }
 808                 else {
 809                     $err .= "Re-run with --verbose to see the differences.\n";
 810                 }
 811                 die $err;
 812             }
 813
 814             if ("@$read_loop_counts" ne "@$loop_counts") {
 815                 die "Can't merge multiple read files: differing loop counts:\n"
 816                 . "  (previous=(@$loop_counts), "
 817                 . "'$file'=(@$read_loop_counts))\n";
 818             }
 819
 820             push @$perls, @{$hash->{perls}};
 821             foreach my $test (keys %{$hash->{results}}) {
 822                 foreach my $perl (keys %{$hash->{results}{$test}}) {
 823                     $results->{$test}{$perl}= $hash->{results}{$test}{$perl};
 824                 }
 825             }
 826         }
 827     }
 828     die "Error: --benchfile cannot be used when --read is present\n"
 829         if $done_read && defined $OPTS{benchfile};
 830
 831     # Gather list of perls to benchmark:
 832
 833     if (@$perl_args) {
 834         unless ($done_read) {
 835             # How many times to execute the loop for the two trials. The lower
 836             # value is intended to do the loop enough times that branch
 837             # prediction has taken hold; the higher loop allows us to see the
 838             # branch misses after that
 839             $loop_counts = [10, 20];
 840
 841             ($tests, $order) =
 842                 read_tests_file($OPTS{benchfile} // 't/perf/benchmarks');
 843         }
 844
 845         @run_perls = process_puts($perls, @$perl_args);
 846         push @$perls, @run_perls;
 847     }
 848
 849     # strip @$order to just the actual tests present
 850     $order = [ grep exists $tests->{$_}, @$order ];
 851
 852     # Now we know what perls and tests we have, do extra option processing
 853     # and checking (done before grinding, so time isn't wasted if we die).
 854
 855     if (!$perls or !@$perls) {
 856         die "Error: nothing to do: no perls to run, no data to read.\n";
 857     }
 858     if (@$perls < 2 and $OPTS{show} and !$OPTS{raw}) {
 859         die "Error: need at least 2 perls for comparison.\n"
 860     }
 861
 862     if ($OPTS{bisect}) {
 863         die "Error: exactly one perl executable must be specified for bisect\n"
 864             unless @$perls == 1;
 865         die "Error: only a single test may be specified with --bisect\n"
 866             unless keys %$tests == 1;
 867     }
 868
 869     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
 870
 871     if (defined $OPTS{'sort-perl'}) {
 872         $OPTS{'sort-perl'} =
 873                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
 874     }
 875
 876     if (defined $OPTS{'compact'}) {
 877         $OPTS{'compact'} =
 878                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
 879     }
 880
 881
 882     # Run the benchmarks; accumulate with any previously read # results.
 883
 884     if (@run_perls) {
 885         $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
 886     }
 887
 888
 889     # Handle the 3 forms of output
 890
 891     if (defined $OPTS{write}) {
 892         my $json = JSON::PP::encode_json({
 893                     version      => $FORMAT_VERSION,
 894                     loop_counts  => $loop_counts,
 895                     perls        => $perls,
 896                     results      => $results,
 897                     tests        => $tests,
 898                     order        => $order,
 899                 });
 900
 901         open my $out, '>:encoding(UTF-8)', $OPTS{write}
 902             or die "Error: can't open '$OPTS{write}' for writing: $!\n";
 903         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
 904         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
 905     }
 906
 907     if ($OPTS{show} or $OPTS{bisect}) {
 908         # numerically process the raw data
 909         ($processed, $averages) =
 910                     grind_process($results, $perls, $loop_counts);
 911     }
 912
 913     if ($OPTS{show}) {
 914         if (defined $OPTS{compact}) {
 915             grind_print_compact($processed, $averages, $OPTS{compact},
 916                                 $perls, $tests, $order);
 917         }
 918         else {
 919             grind_print($processed, $averages, $perls, $tests, $order);
 920         }
 921     }
 922
 923     if ($OPTS{bisect}) {
 924         # these panics shouldn't happen if the bisect checks above are sound
 925         my @r = values %$results;
 926         die "Panic: expected exactly one test result in bisect\n"
 927                                                         if @r != 1;
 928         @r = values %{$r[0]};
 929         die "Panic: expected exactly one perl result in bisect\n"
 930                                                         if @r != 1;
 931         my $c = $r[0]{$bisect_field};
 932         die "Panic: no result in bisect for field '$bisect_field'\n"
 933                                                         unless defined $c;
 934
 935         exit 0 if $bisect_min <= $c and $c <= $bisect_max;
 936         exit 1;
 937     }
 938 }
 939
 940
 941 # Run cachegrind for every test/perl combo.
 942 # It may run several processes in parallel when -j is specified.
 943 # Return a hash ref suitable for input to grind_process()
 944
 945 sub grind_run {
 946     my ($tests, $order, $perls, $counts, $results) = @_;
 947
 948     # Build a list of all the jobs to run
 949
 950     my @jobs;
 951
 952     for my $test (grep $tests->{$_}, @$order) {
 953
 954         # Create two test progs: one with an empty loop and one with code.
 955         # Note that the empty loop is actually '{1;}' rather than '{}';
 956         # this causes the loop to have a single nextstate rather than a
 957         # stub op, so more closely matches the active loop; e.g.:
 958         #   {1;}    => nextstate;                       unstack
 959         #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
 960         my @prog = (
 961             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
 962             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
 963         );
 964
 965         for my $p (@$perls) {
 966             my ($perl, $label, $env, @putargs) = @$p;
 967
 968             # Run both the empty loop and the active loop
 969             # $counts->[0] and $counts->[1] times.
 970
 971             for my $i (0,1) {
 972                 for my $j (0,1) {
 973                     my $envstr = '';
 974                     if (ref $env) {
 975                         $envstr .= "$_=$env->{$_} " for sort keys %$env;
 976                     }
 977                     my $cmd = "PERL_HASH_SEED=0 $envstr"
 978                             . "valgrind --tool=cachegrind  --branch-sim=yes "
 979                             . "--cachegrind-out-file=/dev/null "
 980                             . "$OPTS{grindargs} "
 981                             . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
 982                     # for debugging and error messages
 983                     my $id = "$test/$label "
 984                         . ($i ? "active" : "empty") . "/"
 985                         . ($j ? "long"   : "short") . " loop";
 986
 987                     push @jobs, {
 988                         test   => $test,
 989                         perl   => $perl,
 990                         plabel => $label,
 991                         cmd    => $cmd,
 992                         prog   => $prog[$i],
 993                         active => $i,
 994                         loopix => $j,
 995                         id     => $id,
 996                     };
 997                 }
 998             }
 999         }
1000     }
1001
1002     # Execute each cachegrind and store the results in %results.
1003
1004     local $SIG{PIPE} = 'IGNORE';
1005
1006     my $max_jobs = $OPTS{jobs};
1007     my $running  = 0; # count of executing jobs
1008     my %pids;         # map pids to jobs
1009     my %fds;          # map fds  to jobs
1010     my $select = IO::Select->new();
1011
1012     while (@jobs or $running) {
1013
1014         if ($OPTS{debug}) {
1015             printf "Main loop: pending=%d running=%d\n",
1016                 scalar(@jobs), $running;
1017         }
1018
1019         # Start new jobs
1020
1021         while (@jobs && $running < $max_jobs) {
1022             my $job = shift @jobs;
1023             my ($id, $cmd) =@$job{qw(id cmd)};
1024
1025             my ($in, $out, $pid);
1026             warn "Starting $id\n" if $OPTS{verbose};
1027             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
1028                 or die "Error: while starting cachegrind subprocess"
1029                    ." for $id:\n$@";
1030             $running++;
1031             $pids{$pid}    = $job;
1032             $fds{"$out"}   = $job;
1033             $job->{out_fd} = $out;
1034             $job->{output} = '';
1035             $job->{pid}    = $pid;
1036
1037             $out->blocking(0);
1038             $select->add($out);
1039
1040             if ($OPTS{debug}) {
1041                 print "Started pid $pid for $id\n";
1042             }
1043
1044             # Note:
1045             # In principle we should write to $in in the main select loop,
1046             # since it may block. In reality,
1047             #  a) the code we write to the perl process's stdin is likely
1048             #     to be less than the OS's pipe buffer size;
1049             #  b) by the time the perl process has read in all its stdin,
1050             #     the only output it should have generated is a few lines
1051             #     of cachegrind output preamble.
1052             # If these assumptions change, then perform the following print
1053             # in the select loop instead.
1054
1055             print $in $job->{prog};
1056             close $in;
1057         }
1058
1059         # Get output of running jobs
1060
1061         if ($OPTS{debug}) {
1062             printf "Select: waiting on (%s)\n",
1063                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
1064                             $select->handles;
1065         }
1066
1067         my @ready = $select->can_read;
1068
1069         if ($OPTS{debug}) {
1070             printf "Select: pids (%s) ready\n",
1071                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
1072         }
1073
1074         unless (@ready) {
1075             die "Panic: select returned no file handles\n";
1076         }
1077
1078         for my $fd (@ready) {
1079             my $j = $fds{"$fd"};
1080             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
1081             unless (defined $r) {
1082                 die "Panic: Read from process running $j->{id} gave:\n$!";
1083             }
1084             next if $r;
1085
1086             # EOF
1087
1088             if ($OPTS{debug}) {
1089                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
1090             }
1091
1092             $select->remove($j->{out_fd});
1093             close($j->{out_fd})
1094                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
1095             $running--;
1096             delete $fds{"$j->{out_fd}"};
1097             my $output = $j->{output};
1098
1099             if ($OPTS{debug}) {
1100                 my $p = $j->{prog};
1101                 $p =~ s/^/    : /mg;
1102                 my $o = $output;
1103                 $o =~ s/^/    : /mg;
1104
1105                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
1106                     . "Input:\n$p"
1107                     . "Output\n$o";
1108             }
1109
1110             $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
1111                     = parse_cachegrind($output, $j->{id}, $j->{perl});
1112         }
1113
1114         # Reap finished jobs
1115
1116         while (1) {
1117             my $kid = waitpid(-1, WNOHANG);
1118             my $ret = $?;
1119             last if $kid <= 0;
1120
1121             unless (exists $pids{$kid}) {
1122                 die "Panic: reaped unexpected child $kid";
1123             }
1124             my $j = $pids{$kid};
1125             if ($ret) {
1126                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
1127                     . "with the following output\n:$j->{output}\n";
1128             }
1129             delete $pids{$kid};
1130         }
1131     }
1132
1133     return $results;
1134 }
1135
1136
1137
1138
1139 # grind_process(): process the data that has been extracted from
1140 # cachgegrind's output.
1141 #
1142 # $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
1143 # where active is 0 or 1 indicating an empty or active loop,
1144 # count is 0 or 1 indicating a short or long loop. E.g.
1145 #
1146 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
1147 #
1148 # The $res data structure is modified in-place by this sub.
1149 #
1150 # $perls is [ [ perl-exe, perl-label], .... ].
1151 #
1152 # $counts is [ N, M ] indicating the counts for the short and long loops.
1153 #
1154 #
1155 # return \%output, \%averages, where
1156 #
1157 # $output{benchmark_name}{perl_name}{field_name} = N
1158 # $averages{perl_name}{field_name} = M
1159 #
1160 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
1161 # M is the average raw count over all tests ($OPTS{raw}), or
1162 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
1163
1164 sub grind_process {
1165     my ($res, $perls, $counts) = @_;
1166
1167     # Process the four results for each test/perf combo:
1168     # Convert
1169     #    $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
1170     # to
1171     #    $res->{benchmark_name}{perl_name}{field_name} = averaged_n
1172     #
1173     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1174     #                     empty loops, eliminating startup time
1175     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1176     #                     active loops, eliminating startup time
1177     # (the two startup times may be different because different code
1178     # is being compiled); the difference of the two results above
1179     # divided by the count difference is the time to execute the
1180     # active code once, eliminating both startup and loop overhead.
1181
1182     for my $tests (values %$res) {
1183         for my $r (values %$tests) {
1184             my $r2;
1185             for (keys %{$r->[0][0]}) {
1186                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
1187                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
1188                         ) / ($counts->[1] - $counts->[0]);
1189                 $r2->{$_} = $n;
1190             }
1191             $r = $r2;
1192         }
1193     }
1194
1195     my %totals;
1196     my %counts;
1197     my %data;
1198
1199     my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
1200
1201     for my $test_name (keys %$res) {
1202         my $res1 = $res->{$test_name};
1203         my $res2_norm = $res1->{$perl_norm};
1204         for my $perl (keys %$res1) {
1205             my $res2 = $res1->{$perl};
1206             for my $field (keys %$res2) {
1207                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1208
1209                 if ($OPTS{raw}) {
1210                     # Avoid annoying '-0.0' displays. Ideally this number
1211                     # should never be negative, but fluctuations in
1212                     # startup etc can theoretically make this happen
1213                     $q = 0 if ($q <= 0 && $q > -0.1);
1214                     $totals{$perl}{$field} += $q;
1215                     $counts{$perl}{$field}++;
1216                     $data{$test_name}{$perl}{$field} = $q;
1217                     next;
1218                 }
1219
1220                 # $p and $q are notionally integer counts, but
1221                 # due to variations in startup etc, it's possible for a
1222                 # count which is supposedly zero to be calculated as a
1223                 # small positive or negative value.
1224                 # In this case, set it to zero. Further below we
1225                 # special-case zeros to avoid division by zero errors etc.
1226
1227                 $p = 0.0 if $p < 0.01;
1228                 $q = 0.0 if $q < 0.01;
1229
1230                 if ($p == 0.0 && $q == 0.0) {
1231                     # Both perls gave a count of zero, so no change:
1232                     # treat as 100%
1233                     $totals{$perl}{$field} += 1;
1234                     $counts{$perl}{$field}++;
1235                     $data{$test_name}{$perl}{$field} = 1;
1236                 }
1237                 elsif ($p == 0.0 || $q == 0.0) {
1238                     # If either count is zero, there were too few events
1239                     # to give a meaningful ratio (and we will end up with
1240                     # division by zero if we try). Mark the result undef,
1241                     # indicating that it shouldn't be displayed; and skip
1242                     # adding to the average
1243                     $data{$test_name}{$perl}{$field} = undef;
1244                 }
1245                 else {
1246                     # For averages, we record q/p rather than p/q.
1247                     # Consider a test where perl_norm took 1000 cycles
1248                     # and perlN took 800 cycles. For the individual
1249                     # results we display p/q, or 1.25; i.e. a quarter
1250                     # quicker. For the averages, we instead sum all
1251                     # the 0.8's, which gives the total cycles required to
1252                     # execute all tests, with all tests given equal
1253                     # weight. Later we reciprocate the final result,
1254                     # i.e. 1/(sum(qi/pi)/n)
1255
1256                     $totals{$perl}{$field} += $q/$p;
1257                     $counts{$perl}{$field}++;
1258                     $data{$test_name}{$perl}{$field} = $p/$q;
1259                 }
1260             }
1261         }
1262     }
1263
1264     # Calculate averages based on %totals and %counts accumulated earlier.
1265
1266     my %averages;
1267     for my $perl (keys %totals) {
1268         my $t = $totals{$perl};
1269         for my $field (keys %$t) {
1270             $averages{$perl}{$field} = $OPTS{raw}
1271                 ? $t->{$field} / $counts{$perl}{$field}
1272                   # reciprocal - see comments above
1273                 : $counts{$perl}{$field} / $t->{$field};
1274         }
1275     }
1276
1277     return \%data, \%averages;
1278 }
1279
1280
1281
1282 # print a standard blurb at the start of the grind display
1283
1284 sub grind_blurb {
1285     my ($perls) = @_;
1286
1287     print <<EOF;
1288 Key:
1289     Ir   Instruction read
1290     Dr   Data read
1291     Dw   Data write
1292     COND conditional branches
1293     IND  indirect branches
1294     _m   branch predict miss
1295     _m1  level 1 cache miss
1296     _mm  last cache (e.g. L3) miss
1297     -    indeterminate percentage (e.g. 1/0)
1298
1299 EOF
1300
1301     if ($OPTS{raw}) {
1302         print "The numbers represent raw counts per loop iteration.\n";
1303     }
1304     else {
1305         print <<EOF;
1306 The numbers represent relative counts per loop iteration, compared to
1307 $perls->[$OPTS{norm}][1] at 100.0%.
1308 Higher is better: for example, using half as many instructions gives 200%,
1309 while using twice as many gives 50%.
1310 EOF
1311     }
1312 }
1313
1314
1315 # return a sorted list of the test names, plus 'AVERAGE'
1316
1317 sub sorted_test_names {
1318     my ($results, $order, $perls) = @_;
1319
1320     my @names;
1321     unless ($OPTS{average}) {
1322         if (defined $OPTS{'sort-field'}) {
1323             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1324             my $perl = $perls->[$perlix][1];
1325             @names = sort
1326                 {
1327                         $results->{$a}{$perl}{$field}
1328                     <=> $results->{$b}{$perl}{$field}
1329                 }
1330                 keys %$results;
1331         }
1332         else {
1333             @names = grep $results->{$_}, @$order;
1334         }
1335     }
1336
1337     # No point in displaying average for only one test.
1338     push @names,  'AVERAGE' unless @names == 1;
1339     @names;
1340 }
1341
1342
1343 # grind_print(): display the tabulated results of all the cachegrinds.
1344 #
1345 # Arguments are of the form:
1346 #    $results->{benchmark_name}{perl_name}{field_name} = N
1347 #    $averages->{perl_name}{field_name} = M
1348 #    $perls = [ [ perl-exe, perl-label ], ... ]
1349 #    $tests->{test_name}{desc => ..., ...}
1350
1351 sub grind_print {
1352     my ($results, $averages, $perls, $tests, $order) = @_;
1353
1354     my @perl_names = map $_->[0], @$perls;
1355     my @perl_labels = map $_->[1], @$perls;
1356     my %perl_labels;
1357     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1358
1359     my $field_label_width = 6;
1360     # Calculate the width to display for each column.
1361     my $min_width = $OPTS{raw} ? 8 : 6;
1362     my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1363                         @perl_labels;
1364
1365     # Print standard header.
1366     grind_blurb($perls);
1367
1368     my @test_names = sorted_test_names($results, $order, $perls);
1369
1370     # If only a single field is to be displayed, use a more compact
1371     # format with only a single line of output per test.
1372
1373     my $one_field = defined $OPTS{fields} &&  keys(%{$OPTS{fields}}) == 1;
1374
1375     if ($one_field) {
1376         print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
1377
1378         # The first column will now contain test names rather than
1379         # field names; Calculate the max width.
1380
1381         $field_label_width = 0;
1382         for (@test_names) {
1383             $field_label_width = length if length > $field_label_width;
1384         }
1385
1386         # Print the perl executables header.
1387
1388         print "\n";
1389         for my $i (0,1) {
1390             print " " x $field_label_width;
1391             for (0..$#widths) {
1392                 printf " %*s", $widths[$_],
1393                     $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1394             }
1395             print "\n";
1396         }
1397     }
1398
1399     # Dump the results for each test.
1400
1401     for my $test_name (@test_names) {
1402         my $doing_ave = ($test_name eq 'AVERAGE');
1403         my $res1 = $doing_ave ? $averages : $results->{$test_name};
1404
1405         unless ($one_field) {
1406             print "\n$test_name";
1407             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1408             print "\n\n";
1409
1410             # Print the perl executables header.
1411             for my $i (0,1) {
1412                 print " " x $field_label_width;
1413                 for (0..$#widths) {
1414                     printf " %*s", $widths[$_],
1415                         $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1416                 }
1417                 print "\n";
1418             }
1419         }
1420
1421         for my $field (qw(Ir Dr Dw COND IND
1422                           N
1423                           COND_m IND_m
1424                           N
1425                           Ir_m1 Dr_m1 Dw_m1
1426                           N
1427                           Ir_mm Dr_mm Dw_mm
1428                       ))
1429         {
1430             next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1431
1432             if ($field eq 'N') {
1433                 print "\n";
1434                 next;
1435             }
1436
1437             if ($one_field) {
1438                 printf "%-*s", $field_label_width, $test_name;
1439             }
1440             else {
1441                 printf "%*s", $field_label_width, $field;
1442             }
1443
1444             for my $i (0..$#widths) {
1445                 my $res2 = $res1->{$perl_labels[$i]};
1446                 my $p = $res2->{$field};
1447                 if (!defined $p) {
1448                     printf " %*s", $widths[$i], '-';
1449                 }
1450                 elsif ($OPTS{raw}) {
1451                     printf " %*.1f", $widths[$i], $p;
1452                 }
1453                 else {
1454                     printf " %*.2f", $widths[$i], $p * 100;
1455                 }
1456             }
1457             print "\n";
1458         }
1459     }
1460 }
1461
1462
1463
1464 # grind_print_compact(): like grind_print(), but display a single perl
1465 # in a compact form. Has an additional arg, $which_perl, which specifies
1466 # which perl to display.
1467 #
1468 # Arguments are of the form:
1469 #    $results->{benchmark_name}{perl_name}{field_name} = N
1470 #    $averages->{perl_name}{field_name} = M
1471 #    $perls = [ [ perl-exe, perl-label ], ... ]
1472 #    $tests->{test_name}{desc => ..., ...}
1473
1474 sub grind_print_compact {
1475     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1476
1477
1478     # the width to display for each column.
1479     my $width = $OPTS{raw} ? 7 : 6;
1480
1481     # Print standard header.
1482     grind_blurb($perls);
1483
1484     print "\nResults for $perls->[$which_perl][1]\n\n";
1485
1486     my @test_names = sorted_test_names($results, $order, $perls);
1487
1488     # Dump the results for each test.
1489
1490      my @fields = qw( Ir Dr Dw
1491                       COND IND
1492                       COND_m IND_m
1493                       Ir_m1 Dr_m1 Dw_m1
1494                       Ir_mm Dr_mm Dw_mm
1495                     );
1496     if ($OPTS{fields}) {
1497         @fields = grep exists $OPTS{fields}{$_}, @fields;
1498     }
1499
1500     printf " %*s", $width, $_      for @fields;
1501     print "\n";
1502     printf " %*s", $width, '------' for @fields;
1503     print "\n";
1504
1505     for my $test_name (@test_names) {
1506         my $doing_ave = ($test_name eq 'AVERAGE');
1507         my $res = $doing_ave ? $averages : $results->{$test_name};
1508         $res = $res->{$perls->[$which_perl][1]};
1509
1510         for my $field (@fields) {
1511             my $p = $res->{$field};
1512             if (!defined $p) {
1513                 printf " %*s", $width, '-';
1514             }
1515             elsif ($OPTS{raw}) {
1516                 printf " %*.1f", $width, $p;
1517             }
1518             else {
1519                 printf " %*.2f", $width, $p * 100;
1520             }
1521
1522         }
1523
1524         print "  $test_name\n";
1525     }
1526 }
1527
1528
1529 # do_selftest(): check that we can parse known cachegrind()
1530 # output formats. If the output of cachegrind changes, add a *new*
1531 # test here; keep the old tests to make sure we continue to parse
1532 # old cachegrinds
1533
1534 sub do_selftest {
1535
1536     my @tests = (
1537         'standard',
1538         <<'EOF',
1539 ==32350== Cachegrind, a cache and branch-prediction profiler
1540 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1541 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1542 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1543 ==32350==
1544 --32350-- warning: L3 cache found, using its data for the LL simulation.
1545 ==32350==
1546 ==32350== I   refs:      1,124,055
1547 ==32350== I1  misses:        5,573
1548 ==32350== LLi misses:        3,338
1549 ==32350== I1  miss rate:      0.49%
1550 ==32350== LLi miss rate:      0.29%
1551 ==32350==
1552 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1553 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1554 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1555 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1556 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1557 ==32350==
1558 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1559 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1560 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1561 ==32350==
1562 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1563 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1564 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1565 EOF
1566         {
1567             COND    =>  197050,
1568             COND_m  =>   17742,
1569             Dr      =>  259191,
1570             Dr_m1   =>    6098,
1571             Dr_mm   =>    2781,
1572             Dw      =>  145084,
1573             Dw_m1   =>    3510,
1574             Dw_mm   =>    3013,
1575             IND     =>    5322,
1576             IND_m   =>    1411,
1577             Ir      => 1124055,
1578             Ir_m1   =>    5573,
1579             Ir_mm   =>    3338,
1580         },
1581     );
1582
1583     for ('./t', '.') {
1584         my $t = "$_/test.pl";
1585         next unless  -f $t;
1586         require $t;
1587     }
1588     plan(@tests / 3 * keys %VALID_FIELDS);
1589
1590     while (@tests) {
1591         my $desc     = shift @tests;
1592         my $output   = shift @tests;
1593         my $expected = shift @tests;
1594         my $p = parse_cachegrind($output);
1595         for (sort keys %VALID_FIELDS) {
1596             is($p->{$_}, $expected->{$_}, "$desc, $_");
1597         }
1598     }
1599 }