Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] perlA[=labelA] perlB[=labelB] ...
  18
  19     # run the tests against the same perl twice, with varying options
  20
  21     bench.pl [options] perlA=bigint --args='-Mbigint' perlA=plain
  22
  23     # Run bench on blead, saving results to file; then modify the blead
  24     # binary, and benchmark again, comparing against the saved results
  25
  26     bench.pl [options] --write=blead.time ./perl=blead
  27     # ... hack hack hack, updating ./perl ...
  28     bench.pl --read=blead.time ./perl=hacked
  29
  30     # You can also combine --read with --write and new benchmark runs
  31
  32     bench.pl --read=blead.time --write=last.time -- ./perl=hacked
  33
  34 =head1 DESCRIPTION
  35
  36 By default, F<bench.pl> will run code snippets found in
  37 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  38 how many instruction reads, data writes, branches, cache misses, etc. that
  39 one execution of the snippet uses. Usually it will run them against two or
  40 more perl executables and show how much each test has gotten better or
  41 worse.
  42
  43 It is modelled on the F<perlbench> tool, but since it measures instruction
  44 reads etc., rather than timings, it is much more precise and reproducible.
  45 It is also considerably faster, and is capable of running tests in
  46 parallel (with C<-j>). Rather than  displaying a single relative
  47 percentage per test/perl combination, it displays values for 13 different
  48 measurements, such as instruction reads, conditional branch misses etc.
  49
  50 There are options to write the raw data to a file, and to read it back.
  51 This means that you can view the same run data in different views with
  52 different selection and sort options. You can also use this mechanism
  53 to save the results of timing one perl, and then read it back while timing
  54 a modification, so that you don't have rerun the same tests on the same
  55 perl over and over, or have two perl executables built at the same time.
  56
  57 The optional C<=label> after each perl executable is used in the display
  58 output. If you are doing a two step benchmark then you should provide
  59 a label for at least the "base" perl. If a label isn't specified, it
  60 defaults to the name of the perl executable. Labels must be unique across
  61 all current executables, plus any previous ones obtained via --read.
  62
  63 In its most general form, the specification of a perl executable is:
  64
  65     path/perl=+mylabel --args='-foo -bar' --args='-baz' \
  66                        --env='A=a' --env='B=b'
  67
  68 This defines how to run the executable F<path/perl>. It has a label,
  69 which due to the C<+>, is appended to the binary name to give a label of
  70 C<path/perl=+mylabel> (without the C<+>, the label would be just
  71 C<mylabel>).
  72
  73 It can be optionally followed by one or more C<--args> or C<--env>
  74 switches, which specify extra command line arguments or environment
  75 variables to use when invoking that executable. Each C<--env> switch
  76 should be of the form C<--env=VARIABLE=value>. Any C<--arg> values are
  77 concatenated to the eventual command line, along with the global
  78 C<--perlargs> value if any. The above would cause a system() call looking
  79 something like:
  80
  81     PERL_HASH_SEED=0 A=a B=b valgrind --tool=cachegrind \
  82         path/perl -foo -bar -baz ....
  83
  84 =head1 OPTIONS
  85
  86 =head2 General options
  87
  88 =over 4
  89
  90 =item *
  91
  92 --action=I<foo>
  93
  94 What action to perform. The default is  I<grind>, which runs the benchmarks
  95 using I<cachegrind> as the back end. The only other action at the moment is
  96 I<selftest>, which runs some basic sanity checks and produces TAP output.
  97
  98 =item *
  99
 100 --debug
 101
 102 Enable verbose debugging output.
 103
 104 =item *
 105
 106 ---help
 107
 108 Display basic usage information.
 109
 110 =item *
 111
 112 --verbose
 113
 114 Display progress information.
 115
 116 =back
 117
 118 =head2 Test selection options
 119
 120 =over 4
 121
 122 =item *
 123
 124 --tests=I<FOO>
 125
 126 Specify a subset of tests to run (or in the case of C<--read>, to read).
 127 It may be either a comma-separated list of test names, or a regular
 128 expression. For example
 129
 130     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 131     --tests=/^expr::/
 132
 133
 134 =back
 135
 136 =head2 Input options
 137
 138 =over 4
 139
 140
 141 =item *
 142
 143 -r I<file>
 144 --read=I<file>
 145
 146 Read in saved data from a previous C<--write> run from the specified file.
 147 If C<--tests> is present too, then only tests matching those conditions
 148 are read from the file.
 149
 150 C<--read> may be specified multiple times, in which case the results
 151 across all files are aggregated. The list of test names from each file
 152 (after filtering by C<--tests>) must be identical across all files.
 153
 154 This list of tests is used instead of that obtained from the normal
 155 benchmark file (or C<--benchfile>) for any benchmarks that are run.
 156
 157 The perl labels must be unique across all read in test results.
 158
 159 Requires C<JSON::PP> to be available.
 160
 161 =back
 162
 163 =head2 Benchmarking options
 164
 165 Benchmarks will be run for all perls specified on the command line.
 166 These options can be used to modify the benchmarking behavior:
 167
 168 =over 4
 169
 170 =item *
 171
 172 --autolabel
 173
 174 Generate a unique label for every executable which doesn't have an
 175 explicit C<=label>. Works by stripping out common prefixes and suffixes
 176 from the executable names, then for any non-unique names, appending
 177 C<-0>, C<-1>, etc. text directly surrounding the unique part which look
 178 like version numbers (i.e. which match C</[0-9\.]+/>) aren't stripped.
 179 For example,
 180
 181     perl-5.20.0-threaded  perl-5.22.0-threaded  perl-5.24.0-threaded
 182
 183 stripped to unique parts would be:
 184
 185     20  22  24
 186
 187 but is actually only stripped down to:
 188
 189     5.20.0  5.22.0  5.24.0
 190
 191
 192 =item *
 193
 194 --benchfile=I<foo>
 195
 196 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
 197 by default).
 198
 199 =item *
 200
 201 --grindargs=I<foo>
 202
 203 Optional command-line arguments to pass to all cachegrind invocations.
 204
 205 =item *
 206
 207 -j I<N>
 208 --jobs=I<N>
 209
 210 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 211 process will run at a time, and should generally be set to the number
 212 of CPUs available.
 213
 214 =item *
 215
 216 --perlargs=I<foo>
 217
 218 Optional command-line arguments to pass to every perl executable.  This
 219 may optionaly be combined with C<--args> switches following individual
 220 perls. For example:
 221
 222     bench.pl --perlargs='-Ilib -It/lib' .... \
 223         perlA --args='-Mstrict' \
 224         perlB --args='-Mwarnings'
 225
 226 would cause the invocations
 227
 228     perlA -Ilib -It/lib -Mstrict
 229     perlB -Ilib -It/lib -Mwarnings
 230
 231 =back
 232
 233 =head2 Output options
 234
 235 Any results accumulated via --read or by running benchmarks can be output
 236 in any or all of these three ways:
 237
 238 =over 4
 239
 240 =item *
 241
 242 -w I<file>
 243 --write=I<file>
 244
 245 Save the raw data to the specified file. It can be read back later with
 246 C<--read>. If combined with C<--read> then the output file will be
 247 the merge of the file read and any additional perls added on the command
 248 line.
 249
 250 Requires C<JSON::PP> to be available.
 251
 252 =item *
 253
 254 --bisect=I<field,minval,maxval>
 255
 256 Exit with a zero status if the named field is in the specified range;
 257 exit with 1 otherwise. It will complain if more than one test or perl has
 258 been specified. It is intended to be called as part of a bisect run, to
 259 determine when something changed.  For example,
 260
 261     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
 262         ./miniperl
 263
 264 might be called from bisect to find when the number of instruction reads
 265 for test I<foo> falls outside the range 100..105.
 266
 267 =item *
 268
 269 --show
 270
 271 Display the results to stdout in human-readable form.  This is enabled by
 272 default, except with --write and --bisect. The following sub-options alter
 273 how --show behaves.
 274
 275 =over 4
 276
 277 =item *
 278
 279 --average
 280
 281 Only display the overall average, rather than the results for each
 282 individual test.
 283
 284 =item *
 285
 286 --compact=I<perl>
 287
 288 Display the results for a single perl executable in a compact form.
 289 Which perl to display is specified in the same manner as C<--norm>.
 290
 291 =item *
 292
 293 --fields=I<a,b,c>
 294
 295 Display only the specified fields; for example,
 296
 297     --fields=Ir,Ir_m,Ir_mm
 298
 299 If only one field is selected, the output is in more compact form.
 300
 301 =item *
 302
 303 --norm=I<foo>
 304
 305 Specify which perl column in the output to treat as the 100% norm.
 306 It may be a column number (0..N-1) or a perl executable name or label.
 307 It defaults to the leftmost column.
 308
 309 =item *
 310
 311 --raw
 312
 313 Display raw data counts rather than percentages in the outputs. This
 314 allows you to see the exact number of intruction reads, branch misses etc.
 315 for each test/perl combination. It also causes the C<AVERAGE> display
 316 per field to be calculated based on the average of each tests's count
 317 rather than average of each percentage. This means that tests with very
 318 high counts will dominate.
 319
 320 =item *
 321
 322 --sort=I<field:perl>
 323
 324 Order the tests in the output based on the value of I<field> in the
 325 column I<perl>. The I<perl> value is as per C<--norm>. For example
 326
 327     bench.pl --sort=Dw:perl-5.20.0 \
 328         perl-5.16.0 perl-5.18.0 perl-5.20.0
 329
 330 =back
 331
 332 =back
 333
 334 =cut
 335
 336
 337
 338 use 5.010000;
 339 use warnings;
 340 use strict;
 341 use Getopt::Long qw(:config no_auto_abbrev require_order);
 342 use IPC::Open2 ();
 343 use IO::Select;
 344 use IO::File;
 345 use POSIX ":sys_wait_h";
 346
 347 # The version of the file format used to save data. We refuse to process
 348 # the file if the integer component differs.
 349
 350 my $FORMAT_VERSION = 1.0;
 351
 352 # The fields we know about
 353
 354 my %VALID_FIELDS = map { $_ => 1 }
 355     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 356
 357 sub usage {
 358     die <<EOF;
 359 Usage: $0 [options] -- perl[=label] ...
 360
 361 General options:
 362
 363   --action=foo       What action to perform [default: grind]:
 364                         grind      run the code under cachegrind
 365                         selftest   perform a selftest; produce TAP output
 366   --debug            Enable verbose debugging output.
 367   --help             Display this help.
 368   --verbose          Display progress information.
 369
 370
 371 Selection:
 372
 373   --tests=FOO        Select only the specified tests for reading, benchmarking
 374                        and display.  FOO may be either a list of tests or
 375                        a pattern: 'foo,bar,baz' or '/regex/';
 376                        [default: all tests].
 377
 378 Input:
 379
 380   -r|--read=file     Read in previously saved data from the specified file.
 381                         May be repeated, and be used together with new
 382                         benchmarking to create combined results.
 383
 384 Benchmarking:
 385   Benchmarks will be run for any perl specified on the command line.
 386   These options can be used to modify the benchmarking behavior:
 387
 388   --autolabel        generate labels for any executables without one
 389   --benchfile=foo    File containing the benchmarks.
 390                          [default: t/perf/benchmarks].
 391   --grindargs=foo    Optional command-line args to pass to cachegrind.
 392   -j|--jobs=N        Run N jobs in parallel [default 1].
 393   --perlargs=foo     Optional command-line args to pass to each perl to run.
 394
 395 Output:
 396   Any results accumulated via --read or running benchmarks can be output
 397   in any or all of these three ways:
 398
 399   -w|--write=file    Save the raw data to the specified file (may be read
 400                        back later with --read).
 401
 402   --bisect=f,min,max Exit with a zero status if the named field f is in
 403                        the specified min..max range; exit 1 otherwise.
 404                        Produces no other output. Only legal if a single
 405                        benchmark test has been specified.
 406
 407   --show             Display the results to stdout in human-readable form.
 408                        This is enabled by default, except with --write and
 409                        --bisect. The following sub-options alter how
 410                        --show behaves.
 411
 412     --average          Only display average, not individual test results.
 413     --compact=perl     Display the results of a single perl in compact form.
 414                        Which perl specified like --norm
 415     --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 416     --norm=perl        Which perl column to treat as 100%; may be a column
 417                          number (0..N-1) or a perl executable name or label;
 418                          [default: 0].
 419     --raw              Display raw data counts rather than percentages.
 420     --sort=field:perl  Sort the tests based on the value of 'field' in the
 421                        column 'perl'. The perl value is as per --norm.
 422
 423
 424 The command line ends with one or more specified perl executables,
 425 which will be searched for in the current \$PATH. Each binary name may
 426 have an optional =LABEL appended, which will be used rather than the
 427 executable name in output. The labels must be unique across all current
 428 executables and previous runs obtained via --read. Each executable may
 429 optionally be succeeded by --args= and --env= to specify per-executable
 430 arguments and environmenbt variables:
 431
 432     perl-5.24.0=strict --args='-Mwarnings -Mstrict' --env='FOO=foo' \
 433     perl-5.24.0=plain
 434 EOF
 435 }
 436
 437 my %OPTS = (
 438     action    => 'grind',
 439     average   => 0,
 440     benchfile => undef,
 441     bisect    => undef,
 442     compact   => undef,
 443     debug     => 0,
 444     grindargs => '',
 445     fields    => undef,
 446     jobs      => 1,
 447     norm      => 0,
 448     perlargs  => '',
 449     raw       => 0,
 450     read      => undef,
 451     show      => undef,
 452     sort      => undef,
 453     tests     => undef,
 454     verbose   => 0,
 455     write     => undef,
 456 );
 457
 458
 459 # process command-line args and call top-level action
 460
 461 {
 462     GetOptions(
 463         'action=s'    => \$OPTS{action},
 464         'average'     => \$OPTS{average},
 465         'autolabel'   => \$OPTS{autolabel},
 466         'benchfile=s' => \$OPTS{benchfile},
 467         'bisect=s'    => \$OPTS{bisect},
 468         'compact=s'   => \$OPTS{compact},
 469         'debug'       => \$OPTS{debug},
 470         'grindargs=s' => \$OPTS{grindargs},
 471         'help|h'      => \$OPTS{help},
 472         'fields=s'    => \$OPTS{fields},
 473         'jobs|j=i'    => \$OPTS{jobs},
 474         'norm=s'      => \$OPTS{norm},
 475         'perlargs=s'  => \$OPTS{perlargs},
 476         'raw'         => \$OPTS{raw},
 477         'read|r=s@'   => \$OPTS{read},
 478         'show'        => \$OPTS{show},
 479         'sort=s'      => \$OPTS{sort},
 480         'tests=s'     => \$OPTS{tests},
 481         'verbose'     => \$OPTS{verbose},
 482         'write|w=s'   => \$OPTS{write},
 483     ) or die "Use the -h option for usage information.\n";
 484
 485     usage if $OPTS{help};
 486
 487
 488     if (defined $OPTS{read} or defined $OPTS{write}) {
 489         # fail early if it's not present
 490         require JSON::PP;
 491     }
 492
 493     if (defined $OPTS{fields}) {
 494         my @f = split /,/, $OPTS{fields};
 495         for (@f) {
 496             die "Error: --fields: unknown field '$_'\n"
 497                 unless $VALID_FIELDS{$_};
 498         }
 499         my %f = map { $_ => 1 } @f;
 500         $OPTS{fields} = \%f;
 501     }
 502
 503     my %valid_actions = qw(grind 1 selftest 1);
 504     unless ($valid_actions{$OPTS{action}}) {
 505         die "Error: unrecognised action '$OPTS{action}'\n"
 506           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 507     }
 508
 509     if (defined $OPTS{sort}) {
 510         my @s = split /:/, $OPTS{sort};
 511         if (@s != 2) {
 512             die "Error: --sort argument should be of the form field:perl: "
 513               . "'$OPTS{sort}'\n";
 514         }
 515         my ($field, $perl) = @s;
 516         die "Error: --sort: unknown field '$field'\n"
 517             unless $VALID_FIELDS{$field};
 518         # the 'perl' value will be validated later, after we have processed
 519         # the perls
 520         $OPTS{'sort-field'} = $field;
 521         $OPTS{'sort-perl'}  = $perl;
 522     }
 523
 524     # show is the default output action
 525     $OPTS{show} = 1 unless $OPTS{write} || $OPTS{bisect};
 526
 527     if ($OPTS{action} eq 'grind') {
 528         do_grind(\@ARGV);
 529     }
 530     elsif ($OPTS{action} eq 'selftest') {
 531         if (@ARGV) {
 532             die "Error: no perl executables may be specified with selftest\n"
 533         }
 534         do_selftest();
 535     }
 536 }
 537 exit 0;
 538
 539
 540 # Given a hash ref keyed by test names, filter it by deleting unwanted
 541 # tests, based on $OPTS{tests}.
 542
 543 sub filter_tests {
 544     my ($tests) = @_;
 545
 546     my $opt = $OPTS{tests};
 547     return unless defined $opt;
 548
 549     my @tests;
 550
 551     if ($opt =~ m{^/}) {
 552         $opt =~ s{^/(.+)/$}{$1}
 553             or die "Error: --tests regex must be of the form /.../\n";
 554         for (keys %$tests) {
 555             delete $tests->{$_} unless /$opt/;
 556         }
 557     }
 558     else {
 559         my %t;
 560         for (split /,/, $opt) {
 561             $t{$_} = 1;
 562             next if exists $tests->{$_};
 563
 564             my $e = "Error: no such test found: '$_'\n";
 565             if ($OPTS{verbose}) {
 566                 $e .= "Valid test names are:\n";
 567                 $e .= "  $_\n" for sort keys %$tests;
 568             }
 569             else {
 570                 $e .= "Re-run with --verbose for a list of valid tests.\n";
 571             }
 572             die $e;
 573         }
 574         for (keys %$tests) {
 575             delete $tests->{$_} unless exists $t{$_};
 576         }
 577     }
 578     die "Error: no tests to run\n" unless %$tests;
 579 }
 580
 581
 582 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 583 # return a hash ref { testname => { test }, ... }
 584 # and an array ref of the original test names order,
 585
 586 sub read_tests_file {
 587     my ($file) = @_;
 588
 589     my $ta;
 590     {
 591         local @INC = ('.');
 592         $ta = do $file;
 593     }
 594     unless ($ta) {
 595         die "Error: can't load '$file': code didn't return a true value\n"
 596                 if defined $ta;
 597         die "Error: can't parse '$file':\n$@\n" if $@;
 598         die "Error: can't read '$file': $!\n";
 599     }
 600
 601     my @orig_order;
 602     for (my $i=0; $i < @$ta; $i += 2) {
 603         push @orig_order, $ta->[$i];
 604     }
 605
 606     my $t = { @$ta };
 607     filter_tests($t);
 608     return $t, \@orig_order;
 609 }
 610
 611
 612 # Process the perl name/label/column argument of options like --norm and
 613 # --sort.  Return the index of the matching perl.
 614
 615 sub select_a_perl {
 616     my ($perl, $perls, $who) = @_;
 617     $perls||=[];
 618     if ($perl =~ /^[0-9]$/) {
 619         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 620                                         unless $perl < @$perls;
 621         return $perl;
 622     }
 623     else {
 624         my @perl = grep    $perls->[$_][0] eq $perl
 625                         || $perls->[$_][1] eq $perl,
 626                         0..$#$perls;
 627         die "Error: $who: unrecognised perl '$perl'\n"
 628                                         unless @perl;
 629         die "Error: $who: ambiguous perl '$perl'\n"
 630                                         if @perl > 1;
 631         return $perl[0];
 632     }
 633 }
 634
 635
 636 # Validate the list of perl executables on the command line.
 637 # The general form is
 638 #
 639 #      a_perl_exe[=label] [ --args='perl args'] [ --env='FOO=foo' ]
 640 #
 641 # Return a list of [ exe, label, {env}, 'args' ] tuples
 642
 643 sub process_executables_list {
 644     my ($read_perls, @cmd_line_args) = @_;
 645
 646     my @results; # returned, each item is [ perlexe, label, {env}, 'args' ]
 647     my %seen_from_reads = map { $_->[1] => 1 } @$read_perls;
 648     my %seen;
 649     my @labels;
 650
 651     while (@cmd_line_args) {
 652         my $item = shift @cmd_line_args;
 653
 654         if ($item =~ /^--(.*)$/) {
 655             my ($switch, $val) = split /=/, $1, 2;
 656             die "Error: unrecognised executable switch '--$switch'\n"
 657                 unless $switch =~  /^(args|env)$/;
 658
 659             die "Error: --$switch without a preceding executable name\n"
 660                 unless @results;
 661
 662             unless (defined $val) {
 663                 $val = shift @cmd_line_args;
 664                 die "Error: --$switch is missing value\n"
 665                     unless defined $val;
 666             }
 667
 668             if ($switch eq 'args') {
 669                 $results[-1][3] .= " $val";
 670             }
 671             else {
 672                 # --env
 673                 $val =~ /^(\w+)=(.*)$/
 674                     or die "Error: --env is missing =value\n";
 675                 $results[-1][2]{$1} = $2;
 676             }
 677
 678             next;
 679         }
 680
 681         # whatever is left must be the name of an executable
 682
 683         my ($perl, $label) = split /=/, $item, 2;
 684         push @labels, $label;
 685         unless ($OPTS{autolabel}) {
 686             $label //= $perl;
 687             $label = $perl.$label if $label =~ /^\+/;
 688         }
 689
 690         die "Error: duplicate label '$label': "
 691                         . "each executable must have a unique label\n"
 692             if defined $label && $seen{$label}++;
 693
 694         die "Error: duplicate label '$label': "
 695                         . "seen both in --read file and on command line\n"
 696             if defined $label && $seen_from_reads{$label};
 697
 698         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 699         die "Error: unable to execute '$perl': $r\n" if $r ne "ok\n";
 700
 701         push @results, [ $perl, $label,  { }, '' ];
 702     }
 703
 704     # make args '' by default
 705     for (@results) {
 706         push @$_, '' unless @$_ > 3;
 707     }
 708
 709     if ($OPTS{autolabel}) {
 710
 711         # create a list of [ 'perl-path', $i ] pairs for all
 712         # $results[$i] which don't have a label
 713         my @labels;
 714         for (0..$#results)  {
 715             push @labels, [ $results[$_][0], $_ ]
 716                         unless defined $results[$_][1];
 717         }
 718
 719         if (@labels) {
 720             # strip off common prefixes
 721             my $pre = '';
 722           STRIP_PREFIX:
 723             while (length $labels[0][0]) {
 724                 my $c = substr($labels[0][0], 0, 1);
 725                 for my $i (1..$#labels) {
 726                     last STRIP_PREFIX if substr($labels[$i][0], 0, 1) ne $c;
 727                 }
 728                 substr($labels[$_][0], 0, 1)  = '' for 0..$#labels;
 729                 $pre .= $c;
 730             }
 731             # add back any final "version-ish" prefix
 732             $pre =~ s/^.*?([0-9\.]*)$/$1/;
 733             substr($labels[$_][0], 0, 0) = $pre for 0..$#labels;
 734
 735             # strip off common suffixes
 736             my $post = '';
 737           STRIP_SUFFFIX:
 738             while (length $labels[0][0]) {
 739                 my $c = substr($labels[0][0], -1, 1);
 740                 for my $i (1..$#labels) {
 741                     last STRIP_SUFFFIX if substr($labels[$i][0], -1, 1) ne $c;
 742                 }
 743                 chop $labels[$_][0] for 0..$#labels;
 744                 $post = "$c$post";
 745             }
 746             # add back any initial "version-ish" suffix
 747             $post =~ s/^([0-9\.]*).*$/$1/;
 748             $labels[$_][0] .= $post for 0..$#labels;
 749
 750             # now de-duplicate labels
 751
 752             my (%seen, %index);
 753             $seen{$read_perls->[$_][1]}++ for 0..$#$read_perls;
 754             $seen{$labels[$_][0]}++ for 0..$#labels;
 755
 756             for my $i (0..$#labels)  {
 757                 my $label = $labels[$i][0];
 758                 next unless $seen{$label} > 1;
 759                 my $d = length($label) ? '-' : '';
 760                 my $n = $index{$label} // 0;
 761                 $n++ while exists $seen{"$label$d$n"};
 762                 $labels[$i][0] .= "$d$n";
 763                 $index{$label} = $n + 1;
 764             }
 765
 766             # finally, store them
 767             $results[$_->[1]][1]= $_->[0] for @labels;
 768         }
 769     }
 770
 771
 772     return @results;
 773 }
 774
 775
 776
 777 # Return a string containing perl test code wrapped in a loop
 778 # that runs $ARGV[0] times
 779
 780 sub make_perl_prog {
 781     my ($test, $desc, $setup, $code) = @_;
 782
 783     return <<EOF;
 784 # $desc
 785 package $test;
 786 BEGIN { srand(0) }
 787 $setup;
 788 for my \$__loop__ (1..\$ARGV[0]) {
 789     $code;
 790 }
 791 EOF
 792 }
 793
 794
 795 # Parse the output from cachegrind. Return a hash ref.
 796 # See do_selftest() for examples of the output format.
 797
 798 sub parse_cachegrind {
 799     my ($output, $id, $perl) = @_;
 800
 801     my %res;
 802
 803     my @lines = split /\n/, $output;
 804     for (@lines) {
 805         unless (s/(==\d+==)|(--\d+--) //) {
 806             die "Error: while executing $id:\n"
 807               . "unexpected code or cachegrind output:\n$_\n";
 808         }
 809         if (/I   refs:\s+([\d,]+)/) {
 810             $res{Ir} = $1;
 811         }
 812         elsif (/I1  misses:\s+([\d,]+)/) {
 813             $res{Ir_m1} = $1;
 814         }
 815         elsif (/LLi misses:\s+([\d,]+)/) {
 816             $res{Ir_mm} = $1;
 817         }
 818         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 819             @res{qw(Dr Dw)} = ($1,$2);
 820         }
 821         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 822             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 823         }
 824         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 825             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 826         }
 827         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 828             @res{qw(COND IND)} = ($1,$2);
 829         }
 830         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 831             @res{qw(COND_m IND_m)} = ($1,$2);
 832         }
 833     }
 834
 835     for my $field (keys %VALID_FIELDS) {
 836         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 837             unless exists $res{$field};
 838         $res{$field} =~ s/,//g;
 839     }
 840
 841     return \%res;
 842 }
 843
 844
 845 # Handle the 'grind' action
 846
 847 sub do_grind {
 848     my ($cmd_line_args) = @_; # the residue of @ARGV after option processing
 849
 850     my ($loop_counts, $perls, $results, $tests, $order, @run_perls);
 851     my ($bisect_field, $bisect_min, $bisect_max);
 852     my ($done_read, $processed, $averages, %seen_labels);
 853
 854     if (defined $OPTS{bisect}) {
 855         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 856         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 857             unless
 858                     defined $bisect_max
 859                 and $bisect_min =~ /^[0-9]+$/
 860                 and $bisect_max =~ /^[0-9]+$/;
 861
 862         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 863             unless $VALID_FIELDS{$bisect_field};
 864
 865         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 866             if $bisect_min > $bisect_max;
 867     }
 868
 869     # Read in previous benchmark results
 870
 871     foreach my $file (@{$OPTS{read}}) {
 872         open my $in, '<:encoding(UTF-8)', $file
 873             or die "Error: can't open '$file' for reading: $!\n";
 874         my $data = do { local $/; <$in> };
 875         close $in;
 876
 877         my $hash = JSON::PP::decode_json($data);
 878         if (int($FORMAT_VERSION) < int($hash->{version})) {
 879             die "Error: unsupported version $hash->{version} in file"
 880               . " '$file' (too new)\n";
 881         }
 882         my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) =
 883             @$hash{qw(loop_counts perls results tests order)};
 884
 885         # check file contents for consistency
 886         my $k_o = join ';', sort @$read_order;
 887         my $k_r = join ';', sort keys %$read_results;
 888         my $k_t = join ';', sort keys %$read_tests;
 889         die "File '$file' contains no results\n" unless length $k_r;
 890         die "File '$file' contains differing test and results names\n"
 891             unless $k_r eq $k_t;
 892         die "File '$file' contains differing test and sort order names\n"
 893             unless $k_o eq $k_t;
 894
 895         # delete tests not matching --tests= criteria, if any
 896         filter_tests($read_results);
 897         filter_tests($read_tests);
 898
 899         for my $perl (@$read_perls) {
 900             my $label = $perl->[1];
 901             die "Error: duplicate label '$label': seen in file '$file'\n"
 902                 if exists $seen_labels{$label};
 903             $seen_labels{$label}++;
 904         }
 905
 906         if (!$done_read) {
 907             ($loop_counts, $perls, $results, $tests, $order) =
 908                 ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order);
 909             $done_read = 1;
 910         }
 911         else {
 912             # merge results across multiple files
 913
 914             if (   join(';', sort keys %$tests)
 915                 ne join(';', sort keys %$read_tests))
 916             {
 917                 my $err = "Can't merge multiple read files: "
 918                         . "they contain differing test sets.\n";
 919                 if ($OPTS{verbose}) {
 920                     $err .= "Previous tests:\n";
 921                     $err .= "  $_\n" for sort keys %$tests;
 922                     $err .= "tests from '$file':\n";
 923                     $err .= "  $_\n" for sort keys %$read_tests;
 924                 }
 925                 else {
 926                     $err .= "Re-run with --verbose to see the differences.\n";
 927                 }
 928                 die $err;
 929             }
 930
 931             if ("@$read_loop_counts" ne "@$loop_counts") {
 932                 die "Can't merge multiple read files: differing loop counts:\n"
 933                 . "  (previous=(@$loop_counts), "
 934                 . "'$file'=(@$read_loop_counts))\n";
 935             }
 936
 937             push @$perls, @{$read_perls};
 938             foreach my $test (keys %{$read_results}) {
 939                 foreach my $label (keys %{$read_results->{$test}}) {
 940                     $results->{$test}{$label}= $read_results->{$test}{$label};
 941                 }
 942             }
 943         }
 944     }
 945     die "Error: --benchfile cannot be used when --read is present\n"
 946         if $done_read && defined $OPTS{benchfile};
 947
 948     # Gather list of perls to benchmark:
 949
 950     if (@$cmd_line_args) {
 951         unless ($done_read) {
 952             # How many times to execute the loop for the two trials. The lower
 953             # value is intended to do the loop enough times that branch
 954             # prediction has taken hold; the higher loop allows us to see the
 955             # branch misses after that
 956             $loop_counts = [10, 20];
 957
 958             ($tests, $order) =
 959                 read_tests_file($OPTS{benchfile} // 't/perf/benchmarks');
 960         }
 961
 962         @run_perls = process_executables_list($perls, @$cmd_line_args);
 963         push @$perls, @run_perls;
 964     }
 965
 966     # strip @$order to just the actual tests present
 967     $order = [ grep exists $tests->{$_}, @$order ];
 968
 969     # Now we know what perls and tests we have, do extra option processing
 970     # and checking (done before grinding, so time isn't wasted if we die).
 971
 972     if (!$perls or !@$perls) {
 973         die "Error: nothing to do: no perls to run, no data to read.\n";
 974     }
 975     if (@$perls < 2 and $OPTS{show} and !$OPTS{raw}) {
 976         die "Error: need at least 2 perls for comparison.\n"
 977     }
 978
 979     if ($OPTS{bisect}) {
 980         die "Error: exactly one perl executable must be specified for bisect\n"
 981             unless @$perls == 1;
 982         die "Error: only a single test may be specified with --bisect\n"
 983             unless keys %$tests == 1;
 984     }
 985
 986     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
 987
 988     if (defined $OPTS{'sort-perl'}) {
 989         $OPTS{'sort-perl'} =
 990                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
 991     }
 992
 993     if (defined $OPTS{'compact'}) {
 994         $OPTS{'compact'} =
 995                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
 996     }
 997
 998
 999     # Run the benchmarks; accumulate with any previously read # results.
1000
1001     if (@run_perls) {
1002         $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
1003     }
1004
1005
1006     # Handle the 3 forms of output
1007
1008     if (defined $OPTS{write}) {
1009         my $json = JSON::PP::encode_json({
1010                     version      => $FORMAT_VERSION,
1011                     loop_counts  => $loop_counts,
1012                     perls        => $perls,
1013                     results      => $results,
1014                     tests        => $tests,
1015                     order        => $order,
1016                 });
1017
1018         open my $out, '>:encoding(UTF-8)', $OPTS{write}
1019             or die "Error: can't open '$OPTS{write}' for writing: $!\n";
1020         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
1021         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
1022     }
1023
1024     if ($OPTS{show} or $OPTS{bisect}) {
1025         # numerically process the raw data
1026         ($processed, $averages) =
1027                     grind_process($results, $perls, $loop_counts);
1028     }
1029
1030     if ($OPTS{show}) {
1031         if (defined $OPTS{compact}) {
1032             grind_print_compact($processed, $averages, $OPTS{compact},
1033                                 $perls, $tests, $order);
1034         }
1035         else {
1036             grind_print($processed, $averages, $perls, $tests, $order);
1037         }
1038     }
1039
1040     if ($OPTS{bisect}) {
1041         # these panics shouldn't happen if the bisect checks above are sound
1042         my @r = values %$results;
1043         die "Panic: expected exactly one test result in bisect\n"
1044                                                         if @r != 1;
1045         @r = values %{$r[0]};
1046         die "Panic: expected exactly one perl result in bisect\n"
1047                                                         if @r != 1;
1048         my $c = $r[0]{$bisect_field};
1049         die "Panic: no result in bisect for field '$bisect_field'\n"
1050                                                         unless defined $c;
1051
1052         print "Bisect: $bisect_field had the value $c\n";
1053
1054         exit 0 if $bisect_min <= $c and $c <= $bisect_max;
1055         exit 1;
1056     }
1057 }
1058
1059
1060 # Run cachegrind for every test/perl combo.
1061 # It may run several processes in parallel when -j is specified.
1062 # Return a hash ref suitable for input to grind_process()
1063
1064 sub grind_run {
1065     my ($tests, $order, $perls, $counts, $results) = @_;
1066
1067     # Build a list of all the jobs to run
1068
1069     my @jobs;
1070
1071     for my $test (grep $tests->{$_}, @$order) {
1072
1073         # Create two test progs: one with an empty loop and one with code.
1074         # Note that the empty loop is actually '{1;}' rather than '{}';
1075         # this causes the loop to have a single nextstate rather than a
1076         # stub op, so more closely matches the active loop; e.g.:
1077         #   {1;}    => nextstate;                       unstack
1078         #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
1079         my @prog = (
1080             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
1081             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
1082         );
1083
1084         for my $p (@$perls) {
1085             my ($perl, $label, $env, $args) = @$p;
1086
1087             # Run both the empty loop and the active loop
1088             # $counts->[0] and $counts->[1] times.
1089
1090             for my $i (0,1) {
1091                 for my $j (0,1) {
1092                     my $envstr = '';
1093                     if (ref $env) {
1094                         $envstr .= "$_=$env->{$_} " for sort keys %$env;
1095                     }
1096                     my $cmd = "PERL_HASH_SEED=0 $envstr"
1097                             . "valgrind --tool=cachegrind  --branch-sim=yes "
1098                             . "--cachegrind-out-file=/dev/null "
1099                             . "$OPTS{grindargs} "
1100                             . "$perl $OPTS{perlargs} $args - $counts->[$j] 2>&1";
1101                     # for debugging and error messages
1102                     my $id = "$test/$label "
1103                         . ($i ? "active" : "empty") . "/"
1104                         . ($j ? "long"   : "short") . " loop";
1105
1106                     push @jobs, {
1107                         test   => $test,
1108                         perl   => $perl,
1109                         plabel => $label,
1110                         cmd    => $cmd,
1111                         prog   => $prog[$i],
1112                         active => $i,
1113                         loopix => $j,
1114                         id     => $id,
1115                     };
1116                 }
1117             }
1118         }
1119     }
1120
1121     # Execute each cachegrind and store the results in %results.
1122
1123     local $SIG{PIPE} = 'IGNORE';
1124
1125     my $max_jobs = $OPTS{jobs};
1126     my $running  = 0; # count of executing jobs
1127     my %pids;         # map pids to jobs
1128     my %fds;          # map fds  to jobs
1129     my $select = IO::Select->new();
1130
1131     while (@jobs or $running) {
1132
1133         if ($OPTS{debug}) {
1134             printf "Main loop: pending=%d running=%d\n",
1135                 scalar(@jobs), $running;
1136         }
1137
1138         # Start new jobs
1139
1140         while (@jobs && $running < $max_jobs) {
1141             my $job = shift @jobs;
1142             my ($id, $cmd) =@$job{qw(id cmd)};
1143
1144             my ($in, $out, $pid);
1145             warn "Starting $id\n" if $OPTS{verbose};
1146             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
1147                 or die "Error: while starting cachegrind subprocess"
1148                    ." for $id:\n$@";
1149             $running++;
1150             $pids{$pid}    = $job;
1151             $fds{"$out"}   = $job;
1152             $job->{out_fd} = $out;
1153             $job->{output} = '';
1154             $job->{pid}    = $pid;
1155
1156             $out->blocking(0);
1157             $select->add($out);
1158
1159             if ($OPTS{debug}) {
1160                 print "Started pid $pid for $id\n";
1161             }
1162
1163             # Note:
1164             # In principle we should write to $in in the main select loop,
1165             # since it may block. In reality,
1166             #  a) the code we write to the perl process's stdin is likely
1167             #     to be less than the OS's pipe buffer size;
1168             #  b) by the time the perl process has read in all its stdin,
1169             #     the only output it should have generated is a few lines
1170             #     of cachegrind output preamble.
1171             # If these assumptions change, then perform the following print
1172             # in the select loop instead.
1173
1174             print $in $job->{prog};
1175             close $in;
1176         }
1177
1178         # Get output of running jobs
1179
1180         if ($OPTS{debug}) {
1181             printf "Select: waiting on (%s)\n",
1182                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
1183                             $select->handles;
1184         }
1185
1186         my @ready = $select->can_read;
1187
1188         if ($OPTS{debug}) {
1189             printf "Select: pids (%s) ready\n",
1190                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
1191         }
1192
1193         unless (@ready) {
1194             die "Panic: select returned no file handles\n";
1195         }
1196
1197         for my $fd (@ready) {
1198             my $j = $fds{"$fd"};
1199             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
1200             unless (defined $r) {
1201                 die "Panic: Read from process running $j->{id} gave:\n$!";
1202             }
1203             next if $r;
1204
1205             # EOF
1206
1207             if ($OPTS{debug}) {
1208                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
1209             }
1210
1211             $select->remove($j->{out_fd});
1212             close($j->{out_fd})
1213                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
1214             $running--;
1215             delete $fds{"$j->{out_fd}"};
1216             my $output = $j->{output};
1217
1218             if ($OPTS{debug}) {
1219                 my $p = $j->{prog};
1220                 $p =~ s/^/    : /mg;
1221                 my $o = $output;
1222                 $o =~ s/^/    : /mg;
1223
1224                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
1225                     . "Input:\n$p"
1226                     . "Output\n$o";
1227             }
1228
1229             $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
1230                     = parse_cachegrind($output, $j->{id}, $j->{perl});
1231         }
1232
1233         # Reap finished jobs
1234
1235         while (1) {
1236             my $kid = waitpid(-1, WNOHANG);
1237             my $ret = $?;
1238             last if $kid <= 0;
1239
1240             unless (exists $pids{$kid}) {
1241                 die "Panic: reaped unexpected child $kid";
1242             }
1243             my $j = $pids{$kid};
1244             if ($ret) {
1245                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
1246                     . "with the following output\n:$j->{output}\n";
1247             }
1248             delete $pids{$kid};
1249         }
1250     }
1251
1252     return $results;
1253 }
1254
1255
1256
1257
1258 # grind_process(): process the data that has been extracted from
1259 # cachgegrind's output.
1260 #
1261 # $res is of the form ->{benchmark_name}{perl_label}[active][count]{field_name},
1262 # where active is 0 or 1 indicating an empty or active loop,
1263 # count is 0 or 1 indicating a short or long loop. E.g.
1264 #
1265 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
1266 #
1267 # The $res data structure is modified in-place by this sub.
1268 #
1269 # $perls is [ [ perl-exe, perl-label], .... ].
1270 #
1271 # $counts is [ N, M ] indicating the counts for the short and long loops.
1272 #
1273 #
1274 # return \%output, \%averages, where
1275 #
1276 # $output{benchmark_name}{perl_label}{field_name} = N
1277 # $averages{perl_label}{field_name} = M
1278 #
1279 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
1280 # M is the average raw count over all tests ($OPTS{raw}), or
1281 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
1282
1283 sub grind_process {
1284     my ($res, $perls, $counts) = @_;
1285
1286     # Process the four results for each test/perf combo:
1287     # Convert
1288     #    $res->{benchmark_name}{perl_label}[active][count]{field_name} = n
1289     # to
1290     #    $res->{benchmark_name}{perl_label}{field_name} = averaged_n
1291     #
1292     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1293     #                     empty loops, eliminating startup time
1294     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1295     #                     active loops, eliminating startup time
1296     # (the two startup times may be different because different code
1297     # is being compiled); the difference of the two results above
1298     # divided by the count difference is the time to execute the
1299     # active code once, eliminating both startup and loop overhead.
1300
1301     for my $tests (values %$res) {
1302         for my $r (values %$tests) {
1303             my $r2;
1304             for (keys %{$r->[0][0]}) {
1305                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
1306                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
1307                         ) / ($counts->[1] - $counts->[0]);
1308                 $r2->{$_} = $n;
1309             }
1310             $r = $r2;
1311         }
1312     }
1313
1314     my %totals;
1315     my %counts;
1316     my %data;
1317
1318     my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
1319
1320     for my $test_name (keys %$res) {
1321         my $res1 = $res->{$test_name};
1322         my $res2_norm = $res1->{$perl_norm};
1323         for my $perl (keys %$res1) {
1324             my $res2 = $res1->{$perl};
1325             for my $field (keys %$res2) {
1326                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1327
1328                 if ($OPTS{raw}) {
1329                     # Avoid annoying '-0.0' displays. Ideally this number
1330                     # should never be negative, but fluctuations in
1331                     # startup etc can theoretically make this happen
1332                     $q = 0 if ($q <= 0 && $q > -0.1);
1333                     $totals{$perl}{$field} += $q;
1334                     $counts{$perl}{$field}++;
1335                     $data{$test_name}{$perl}{$field} = $q;
1336                     next;
1337                 }
1338
1339                 # $p and $q are notionally integer counts, but
1340                 # due to variations in startup etc, it's possible for a
1341                 # count which is supposedly zero to be calculated as a
1342                 # small positive or negative value.
1343                 # In this case, set it to zero. Further below we
1344                 # special-case zeros to avoid division by zero errors etc.
1345
1346                 $p = 0.0 if $p < 0.01;
1347                 $q = 0.0 if $q < 0.01;
1348
1349                 if ($p == 0.0 && $q == 0.0) {
1350                     # Both perls gave a count of zero, so no change:
1351                     # treat as 100%
1352                     $totals{$perl}{$field} += 1;
1353                     $counts{$perl}{$field}++;
1354                     $data{$test_name}{$perl}{$field} = 1;
1355                 }
1356                 elsif ($p == 0.0 || $q == 0.0) {
1357                     # If either count is zero, there were too few events
1358                     # to give a meaningful ratio (and we will end up with
1359                     # division by zero if we try). Mark the result undef,
1360                     # indicating that it shouldn't be displayed; and skip
1361                     # adding to the average
1362                     $data{$test_name}{$perl}{$field} = undef;
1363                 }
1364                 else {
1365                     # For averages, we record q/p rather than p/q.
1366                     # Consider a test where perl_norm took 1000 cycles
1367                     # and perlN took 800 cycles. For the individual
1368                     # results we display p/q, or 1.25; i.e. a quarter
1369                     # quicker. For the averages, we instead sum all
1370                     # the 0.8's, which gives the total cycles required to
1371                     # execute all tests, with all tests given equal
1372                     # weight. Later we reciprocate the final result,
1373                     # i.e. 1/(sum(qi/pi)/n)
1374
1375                     $totals{$perl}{$field} += $q/$p;
1376                     $counts{$perl}{$field}++;
1377                     $data{$test_name}{$perl}{$field} = $p/$q;
1378                 }
1379             }
1380         }
1381     }
1382
1383     # Calculate averages based on %totals and %counts accumulated earlier.
1384
1385     my %averages;
1386     for my $perl (keys %totals) {
1387         my $t = $totals{$perl};
1388         for my $field (keys %$t) {
1389             $averages{$perl}{$field} = $OPTS{raw}
1390                 ? $t->{$field} / $counts{$perl}{$field}
1391                   # reciprocal - see comments above
1392                 : $counts{$perl}{$field} / $t->{$field};
1393         }
1394     }
1395
1396     return \%data, \%averages;
1397 }
1398
1399
1400
1401 # print a standard blurb at the start of the grind display
1402
1403 sub grind_blurb {
1404     my ($perls) = @_;
1405
1406     print <<EOF;
1407 Key:
1408     Ir   Instruction read
1409     Dr   Data read
1410     Dw   Data write
1411     COND conditional branches
1412     IND  indirect branches
1413     _m   branch predict miss
1414     _m1  level 1 cache miss
1415     _mm  last cache (e.g. L3) miss
1416     -    indeterminate percentage (e.g. 1/0)
1417
1418 EOF
1419
1420     if ($OPTS{raw}) {
1421         print "The numbers represent raw counts per loop iteration.\n";
1422     }
1423     else {
1424         print <<EOF;
1425 The numbers represent relative counts per loop iteration, compared to
1426 $perls->[$OPTS{norm}][1] at 100.0%.
1427 Higher is better: for example, using half as many instructions gives 200%,
1428 while using twice as many gives 50%.
1429 EOF
1430     }
1431 }
1432
1433
1434 # return a sorted list of the test names, plus 'AVERAGE'
1435
1436 sub sorted_test_names {
1437     my ($results, $order, $perls) = @_;
1438
1439     my @names;
1440     unless ($OPTS{average}) {
1441         if (defined $OPTS{'sort-field'}) {
1442             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1443             my $perl = $perls->[$perlix][1];
1444             @names = sort
1445                 {
1446                         $results->{$a}{$perl}{$field}
1447                     <=> $results->{$b}{$perl}{$field}
1448                 }
1449                 keys %$results;
1450         }
1451         else {
1452             @names = grep $results->{$_}, @$order;
1453         }
1454     }
1455
1456     # No point in displaying average for only one test.
1457     push @names,  'AVERAGE' unless @names == 1;
1458     @names;
1459 }
1460
1461
1462 # grind_print(): display the tabulated results of all the cachegrinds.
1463 #
1464 # Arguments are of the form:
1465 #    $results->{benchmark_name}{perl_label}{field_name} = N
1466 #    $averages->{perl_label}{field_name} = M
1467 #    $perls = [ [ perl-exe, perl-label ], ... ]
1468 #    $tests->{test_name}{desc => ..., ...}
1469
1470 sub grind_print {
1471     my ($results, $averages, $perls, $tests, $order) = @_;
1472
1473     my @perl_names = map $_->[0], @$perls;
1474     my @perl_labels = map $_->[1], @$perls;
1475     my %perl_labels;
1476     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1477
1478     my $field_label_width = 6;
1479     # Calculate the width to display for each column.
1480     my $min_width = $OPTS{raw} ? 8 : 6;
1481     my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1482                         @perl_labels;
1483
1484     # Print standard header.
1485     grind_blurb($perls);
1486
1487     my @test_names = sorted_test_names($results, $order, $perls);
1488
1489     # If only a single field is to be displayed, use a more compact
1490     # format with only a single line of output per test.
1491
1492     my $one_field = defined $OPTS{fields} &&  keys(%{$OPTS{fields}}) == 1;
1493
1494     if ($one_field) {
1495         print "\nResults for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
1496
1497         # The first column will now contain test names rather than
1498         # field names; Calculate the max width.
1499
1500         $field_label_width = 0;
1501         for (@test_names) {
1502             $field_label_width = length if length > $field_label_width;
1503         }
1504
1505         # Print the perl executables header.
1506
1507         print "\n";
1508         for my $i (0,1) {
1509             print " " x $field_label_width;
1510             for (0..$#widths) {
1511                 printf " %*s", $widths[$_],
1512                     $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1513             }
1514             print "\n";
1515         }
1516     }
1517
1518     # Dump the results for each test.
1519
1520     for my $test_name (@test_names) {
1521         my $doing_ave = ($test_name eq 'AVERAGE');
1522         my $res1 = $doing_ave ? $averages : $results->{$test_name};
1523
1524         unless ($one_field) {
1525             print "\n$test_name";
1526             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1527             print "\n\n";
1528
1529             # Print the perl executables header.
1530             for my $i (0,1) {
1531                 print " " x $field_label_width;
1532                 for (0..$#widths) {
1533                     printf " %*s", $widths[$_],
1534                         $i ? ('-' x$widths[$_]) :  $perl_labels[$_];
1535                 }
1536                 print "\n";
1537             }
1538         }
1539
1540         for my $field (qw(Ir Dr Dw COND IND
1541                           N
1542                           COND_m IND_m
1543                           N
1544                           Ir_m1 Dr_m1 Dw_m1
1545                           N
1546                           Ir_mm Dr_mm Dw_mm
1547                       ))
1548         {
1549             next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1550
1551             if ($field eq 'N') {
1552                 print "\n";
1553                 next;
1554             }
1555
1556             if ($one_field) {
1557                 printf "%-*s", $field_label_width, $test_name;
1558             }
1559             else {
1560                 printf "%*s", $field_label_width, $field;
1561             }
1562
1563             for my $i (0..$#widths) {
1564                 my $res2 = $res1->{$perl_labels[$i]};
1565                 my $p = $res2->{$field};
1566                 if (!defined $p) {
1567                     printf " %*s", $widths[$i], '-';
1568                 }
1569                 elsif ($OPTS{raw}) {
1570                     printf " %*.1f", $widths[$i], $p;
1571                 }
1572                 else {
1573                     printf " %*.2f", $widths[$i], $p * 100;
1574                 }
1575             }
1576             print "\n";
1577         }
1578     }
1579 }
1580
1581
1582
1583 # grind_print_compact(): like grind_print(), but display a single perl
1584 # in a compact form. Has an additional arg, $which_perl, which specifies
1585 # which perl to display.
1586 #
1587 # Arguments are of the form:
1588 #    $results->{benchmark_name}{perl_label}{field_name} = N
1589 #    $averages->{perl_label}{field_name} = M
1590 #    $perls = [ [ perl-exe, perl-label ], ... ]
1591 #    $tests->{test_name}{desc => ..., ...}
1592
1593 sub grind_print_compact {
1594     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1595
1596
1597     # the width to display for each column.
1598     my $width = $OPTS{raw} ? 7 : 6;
1599
1600     # Print standard header.
1601     grind_blurb($perls);
1602
1603     print "\nResults for $perls->[$which_perl][1]\n\n";
1604
1605     my @test_names = sorted_test_names($results, $order, $perls);
1606
1607     # Dump the results for each test.
1608
1609      my @fields = qw( Ir Dr Dw
1610                       COND IND
1611                       COND_m IND_m
1612                       Ir_m1 Dr_m1 Dw_m1
1613                       Ir_mm Dr_mm Dw_mm
1614                     );
1615     if ($OPTS{fields}) {
1616         @fields = grep exists $OPTS{fields}{$_}, @fields;
1617     }
1618
1619     printf " %*s", $width, $_      for @fields;
1620     print "\n";
1621     printf " %*s", $width, '------' for @fields;
1622     print "\n";
1623
1624     for my $test_name (@test_names) {
1625         my $doing_ave = ($test_name eq 'AVERAGE');
1626         my $res = $doing_ave ? $averages : $results->{$test_name};
1627         $res = $res->{$perls->[$which_perl][1]};
1628
1629         for my $field (@fields) {
1630             my $p = $res->{$field};
1631             if (!defined $p) {
1632                 printf " %*s", $width, '-';
1633             }
1634             elsif ($OPTS{raw}) {
1635                 printf " %*.1f", $width, $p;
1636             }
1637             else {
1638                 printf " %*.2f", $width, $p * 100;
1639             }
1640
1641         }
1642
1643         print "  $test_name\n";
1644     }
1645 }
1646
1647
1648 # do_selftest(): check that we can parse known cachegrind()
1649 # output formats. If the output of cachegrind changes, add a *new*
1650 # test here; keep the old tests to make sure we continue to parse
1651 # old cachegrinds
1652
1653 sub do_selftest {
1654
1655     my @tests = (
1656         'standard',
1657         <<'EOF',
1658 ==32350== Cachegrind, a cache and branch-prediction profiler
1659 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1660 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1661 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1662 ==32350==
1663 --32350-- warning: L3 cache found, using its data for the LL simulation.
1664 ==32350==
1665 ==32350== I   refs:      1,124,055
1666 ==32350== I1  misses:        5,573
1667 ==32350== LLi misses:        3,338
1668 ==32350== I1  miss rate:      0.49%
1669 ==32350== LLi miss rate:      0.29%
1670 ==32350==
1671 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1672 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1673 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1674 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1675 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1676 ==32350==
1677 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1678 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1679 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1680 ==32350==
1681 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1682 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1683 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1684 EOF
1685         {
1686             COND    =>  197050,
1687             COND_m  =>   17742,
1688             Dr      =>  259191,
1689             Dr_m1   =>    6098,
1690             Dr_mm   =>    2781,
1691             Dw      =>  145084,
1692             Dw_m1   =>    3510,
1693             Dw_mm   =>    3013,
1694             IND     =>    5322,
1695             IND_m   =>    1411,
1696             Ir      => 1124055,
1697             Ir_m1   =>    5573,
1698             Ir_mm   =>    3338,
1699         },
1700     );
1701
1702     for ('./t', '.') {
1703         my $t = "$_/test.pl";
1704         next unless  -f $t;
1705         require $t;
1706     }
1707     plan(@tests / 3 * keys %VALID_FIELDS);
1708
1709     while (@tests) {
1710         my $desc     = shift @tests;
1711         my $output   = shift @tests;
1712         my $expected = shift @tests;
1713         my $p = parse_cachegrind($output);
1714         for (sort keys %VALID_FIELDS) {
1715             is($p->{$_}, $expected->{$_}, "$desc, $_");
1716         }
1717     }
1718 }