Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] perlA[=labelA] perlB[=labelB] ...
  18
  19     # run the tests against the same perl twice, with varying options
  20
  21     bench.pl [options] perlA=bigint --args='-Mbigint' perlA=plain
  22
  23     # Run bench on blead, saving results to file; then modify the blead
  24     # binary, and benchmark again, comparing against the saved results
  25
  26     bench.pl [options] --write=blead.time ./perl=blead
  27     # ... hack hack hack, updating ./perl ...
  28     bench.pl --read=blead.time ./perl=hacked
  29
  30     # You can also combine --read with --write and new benchmark runs
  31
  32     bench.pl --read=blead.time --write=last.time -- ./perl=hacked
  33
  34 =head1 DESCRIPTION
  35
  36 By default, F<bench.pl> will run code snippets found in
  37 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  38 how many instruction reads, data writes, branches, cache misses, etc. that
  39 one execution of the snippet uses. Usually it will run them against two or
  40 more perl executables and show how much each test has gotten better or
  41 worse.
  42
  43 It is modelled on the F<perlbench> tool, but since it measures instruction
  44 reads etc., rather than timings, it is much more precise and reproducible.
  45 It is also considerably faster, and is capable of running tests in
  46 parallel (with C<-j>). Rather than  displaying a single relative
  47 percentage per test/perl combination, it displays values for 13 different
  48 measurements, such as instruction reads, conditional branch misses etc.
  49
  50 There are options to write the raw data to a file, and to read it back.
  51 This means that you can view the same run data in different views with
  52 different selection and sort options. You can also use this mechanism
  53 to save the results of timing one perl, and then read it back while timing
  54 a modification, so that you don't have rerun the same tests on the same
  55 perl over and over, or have two perl executables built at the same time.
  56
  57 The optional C<=label> after each perl executable is used in the display
  58 output. If you are doing a two step benchmark then you should provide
  59 a label for at least the "base" perl. If a label isn't specified, it
  60 defaults to the name of the perl executable. Labels must be unique across
  61 all current executables, plus any previous ones obtained via --read.
  62
  63 In its most general form, the specification of a perl executable is:
  64
  65     path/perl=+mylabel --args='-foo -bar' --args='-baz' \
  66                        --env='A=a' --env='B=b'
  67
  68 This defines how to run the executable F<path/perl>. It has a label,
  69 which due to the C<+>, is appended to the binary name to give a label of
  70 C<path/perl=+mylabel> (without the C<+>, the label would be just
  71 C<mylabel>).
  72
  73 It can be optionally followed by one or more C<--args> or C<--env>
  74 switches, which specify extra command line arguments or environment
  75 variables to use when invoking that executable. Each C<--env> switch
  76 should be of the form C<--env=VARIABLE=value>. Any C<--arg> values are
  77 concatenated to the eventual command line, along with the global
  78 C<--perlargs> value if any. The above would cause a system() call looking
  79 something like:
  80
  81     PERL_HASH_SEED=0 A=a B=b valgrind --tool=cachegrind \
  82         path/perl -foo -bar -baz ....
  83
  84 =head1 OPTIONS
  85
  86 =head2 General options
  87
  88 =over 4
  89
  90 =item *
  91
  92 --action=I<foo>
  93
  94 What action to perform. The default is  I<grind>, which runs the benchmarks
  95 using I<cachegrind> as the back end. The only other action at the moment is
  96 I<selftest>, which runs some basic sanity checks and produces TAP output.
  97
  98 =item *
  99
 100 --debug
 101
 102 Enable debugging output.
 103
 104 =item *
 105
 106 ---help
 107
 108 Display basic usage information.
 109
 110 =item *
 111
 112 -v
 113 --verbose
 114
 115 Display progress information.
 116
 117 =back
 118
 119 =head2 Test selection options
 120
 121 =over 4
 122
 123 =item *
 124
 125 --tests=I<FOO>
 126
 127 Specify a subset of tests to run (or in the case of C<--read>, to read).
 128 It may be either a comma-separated list of test names, or a regular
 129 expression. For example
 130
 131     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 132     --tests=/^expr::/
 133
 134
 135 =back
 136
 137 =head2 Input options
 138
 139 =over 4
 140
 141
 142 =item *
 143
 144 -r I<file>
 145 --read=I<file>
 146
 147 Read in saved data from a previous C<--write> run from the specified file.
 148 If C<--tests> is present too, then only tests matching those conditions
 149 are read from the file.
 150
 151 C<--read> may be specified multiple times, in which case the results
 152 across all files are aggregated. The list of test names from each file
 153 (after filtering by C<--tests>) must be identical across all files.
 154
 155 This list of tests is used instead of that obtained from the normal
 156 benchmark file (or C<--benchfile>) for any benchmarks that are run.
 157
 158 The perl labels must be unique across all read in test results.
 159
 160 Requires C<JSON::PP> to be available.
 161
 162 =back
 163
 164 =head2 Benchmarking options
 165
 166 Benchmarks will be run for all perls specified on the command line.
 167 These options can be used to modify the benchmarking behavior:
 168
 169 =over 4
 170
 171 =item *
 172
 173 --autolabel
 174
 175 Generate a unique label for every executable which doesn't have an
 176 explicit C<=label>. Works by stripping out common prefixes and suffixes
 177 from the executable names, then for any non-unique names, appending
 178 C<-0>, C<-1>, etc. text directly surrounding the unique part which look
 179 like version numbers (i.e. which match C</[0-9\.]+/>) aren't stripped.
 180 For example,
 181
 182     perl-5.20.0-threaded  perl-5.22.0-threaded  perl-5.24.0-threaded
 183
 184 stripped to unique parts would be:
 185
 186     20  22  24
 187
 188 but is actually only stripped down to:
 189
 190     5.20.0  5.22.0  5.24.0
 191
 192 If the final results are plain integers, they are prefixed with "p"
 193 to avoid looking like column numbers to switches like C<--norm=2>.
 194
 195
 196 =item *
 197
 198 --benchfile=I<foo>
 199
 200 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
 201 by default).
 202
 203 =item *
 204
 205 --grindargs=I<foo>
 206
 207 Optional command-line arguments to pass to all cachegrind invocations.
 208
 209 =item *
 210
 211 -j I<N>
 212 --jobs=I<N>
 213
 214 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 215 process will run at a time, and should generally be set to the number
 216 of CPUs available.
 217
 218 =item *
 219
 220 --perlargs=I<foo>
 221
 222 Optional command-line arguments to pass to every perl executable.  This
 223 may optionaly be combined with C<--args> switches following individual
 224 perls. For example:
 225
 226     bench.pl --perlargs='-Ilib -It/lib' .... \
 227         perlA --args='-Mstrict' \
 228         perlB --args='-Mwarnings'
 229
 230 would cause the invocations
 231
 232     perlA -Ilib -It/lib -Mstrict
 233     perlB -Ilib -It/lib -Mwarnings
 234
 235 =back
 236
 237 =head2 Output options
 238
 239 Any results accumulated via --read or by running benchmarks can be output
 240 in any or all of these three ways:
 241
 242 =over 4
 243
 244 =item *
 245
 246 -w I<file>
 247 --write=I<file>
 248
 249 Save the raw data to the specified file. It can be read back later with
 250 C<--read>. If combined with C<--read> then the output file will be
 251 the merge of the file read and any additional perls added on the command
 252 line.
 253
 254 Requires C<JSON::PP> to be available.
 255
 256 =item *
 257
 258 --bisect=I<field,minval,maxval>
 259
 260 Exit with a zero status if the named field is in the specified range;
 261 exit with 1 otherwise. It will complain if more than one test or perl has
 262 been specified. It is intended to be called as part of a bisect run, to
 263 determine when something changed.  For example,
 264
 265     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
 266         ./miniperl
 267
 268 might be called from bisect to find when the number of instruction reads
 269 for test I<foo> falls outside the range 100..105.
 270
 271 =item *
 272
 273 --show
 274
 275 Display the results to stdout in human-readable form.  This is enabled by
 276 default, except with --write and --bisect. The following sub-options alter
 277 how --show behaves.
 278
 279 =over 4
 280
 281 =item *
 282
 283 --average
 284
 285 Only display the overall average, rather than the results for each
 286 individual test.
 287
 288 =item *
 289
 290 --compact=I<perl>
 291
 292 Display the results for a single perl executable in a compact form.
 293 Which perl to display is specified in the same manner as C<--norm>.
 294
 295 =item *
 296
 297 --fields=I<a,b,c>
 298
 299 Display only the specified fields; for example,
 300
 301     --fields=Ir,Ir_m,Ir_mm
 302
 303 If only one field is selected, the output is in more compact form.
 304
 305 =item *
 306
 307 --norm=I<foo>
 308
 309 Specify which perl column in the output to treat as the 100% norm.
 310 It may be:
 311
 312 =over
 313
 314 * a column number (0..N-1),
 315
 316 * a negative column number (-1..-N) which counts from the right (so -1 is
 317 the right-most column),
 318
 319 * or a perl executable name,
 320
 321 * or a perl executable label.
 322
 323 =back
 324
 325 It defaults to the leftmost column.
 326
 327 =item *
 328
 329 --raw
 330
 331 Display raw data counts rather than percentages in the outputs. This
 332 allows you to see the exact number of intruction reads, branch misses etc.
 333 for each test/perl combination. It also causes the C<AVERAGE> display
 334 per field to be calculated based on the average of each tests's count
 335 rather than average of each percentage. This means that tests with very
 336 high counts will dominate.
 337
 338 =item *
 339
 340 --sort=I<field:perl>
 341
 342 Order the tests in the output based on the value of I<field> in the
 343 column I<perl>. The I<perl> value is as per C<--norm>. For example
 344
 345     bench.pl --sort=Dw:perl-5.20.0 \
 346         perl-5.16.0 perl-5.18.0 perl-5.20.0
 347
 348 =back
 349
 350 =back
 351
 352 =cut
 353
 354
 355
 356 use 5.010000;
 357 use warnings;
 358 use strict;
 359 use Getopt::Long qw(:config no_auto_abbrev require_order);
 360 use IPC::Open2 ();
 361 use IO::Select;
 362 use IO::File;
 363 use POSIX ":sys_wait_h";
 364
 365 # The version of the file format used to save data. We refuse to process
 366 # the file if the integer component differs.
 367
 368 my $FORMAT_VERSION = 1.0;
 369
 370 # The fields we know about
 371
 372 my %VALID_FIELDS = map { $_ => 1 }
 373     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 374
 375 sub usage {
 376     die <<EOF;
 377 Usage: $0 [options] -- perl[=label] ...
 378
 379 General options:
 380
 381   --action=foo       What action to perform [default: grind]:
 382                         grind      run the code under cachegrind
 383                         selftest   perform a selftest; produce TAP output
 384   --debug            Enable verbose debugging output.
 385   --help             Display this help.
 386   -v|--verbose       Display progress information.
 387
 388
 389 Selection:
 390
 391   --tests=FOO        Select only the specified tests for reading, benchmarking
 392                        and display.  FOO may be either a list of tests or
 393                        a pattern: 'foo,bar,baz' or '/regex/';
 394                        [default: all tests].
 395
 396 Input:
 397
 398   -r|--read=file     Read in previously saved data from the specified file.
 399                         May be repeated, and be used together with new
 400                         benchmarking to create combined results.
 401
 402 Benchmarking:
 403   Benchmarks will be run for any perl specified on the command line.
 404   These options can be used to modify the benchmarking behavior:
 405
 406   --autolabel        generate labels for any executables without one
 407   --benchfile=foo    File containing the benchmarks.
 408                          [default: t/perf/benchmarks].
 409   --grindargs=foo    Optional command-line args to pass to cachegrind.
 410   -j|--jobs=N        Run N jobs in parallel [default 1].
 411   --perlargs=foo     Optional command-line args to pass to each perl to run.
 412
 413 Output:
 414   Any results accumulated via --read or running benchmarks can be output
 415   in any or all of these three ways:
 416
 417   -w|--write=file    Save the raw data to the specified file (may be read
 418                        back later with --read).
 419
 420   --bisect=f,min,max Exit with a zero status if the named field f is in
 421                        the specified min..max range; exit 1 otherwise.
 422                        Produces no other output. Only legal if a single
 423                        benchmark test has been specified.
 424
 425   --show             Display the results to stdout in human-readable form.
 426                        This is enabled by default, except with --write and
 427                        --bisect. The following sub-options alter how
 428                        --show behaves.
 429
 430     --average          Only display average, not individual test results.
 431     --compact=perl     Display the results of a single perl in compact form.
 432                        Which perl specified like --norm
 433     --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 434     --norm=perl        Which perl column to treat as 100%; may be a column
 435                          number (0..N-1) or a perl executable name or label;
 436                          [default: 0].
 437     --raw              Display raw data counts rather than percentages.
 438     --sort=field:perl  Sort the tests based on the value of 'field' in the
 439                        column 'perl'. The perl value is as per --norm.
 440
 441
 442 The command line ends with one or more specified perl executables,
 443 which will be searched for in the current \$PATH. Each binary name may
 444 have an optional =LABEL appended, which will be used rather than the
 445 executable name in output. The labels must be unique across all current
 446 executables and previous runs obtained via --read. Each executable may
 447 optionally be succeeded by --args= and --env= to specify per-executable
 448 arguments and environmenbt variables:
 449
 450     perl-5.24.0=strict --args='-Mwarnings -Mstrict' --env='FOO=foo' \
 451     perl-5.24.0=plain
 452 EOF
 453 }
 454
 455 my %OPTS = (
 456     action    => 'grind',
 457     average   => 0,
 458     benchfile => undef,
 459     bisect    => undef,
 460     compact   => undef,
 461     debug     => 0,
 462     grindargs => '',
 463     fields    => undef,
 464     jobs      => 1,
 465     norm      => 0,
 466     perlargs  => '',
 467     raw       => 0,
 468     read      => undef,
 469     show      => undef,
 470     sort      => undef,
 471     tests     => undef,
 472     verbose   => 0,
 473     write     => undef,
 474 );
 475
 476
 477 # process command-line args and call top-level action
 478
 479 {
 480     GetOptions(
 481         'action=s'    => \$OPTS{action},
 482         'average'     => \$OPTS{average},
 483         'autolabel'   => \$OPTS{autolabel},
 484         'benchfile=s' => \$OPTS{benchfile},
 485         'bisect=s'    => \$OPTS{bisect},
 486         'compact=s'   => \$OPTS{compact},
 487         'debug'       => \$OPTS{debug},
 488         'grindargs=s' => \$OPTS{grindargs},
 489         'help|h'      => \$OPTS{help},
 490         'fields=s'    => \$OPTS{fields},
 491         'jobs|j=i'    => \$OPTS{jobs},
 492         'norm=s'      => \$OPTS{norm},
 493         'perlargs=s'  => \$OPTS{perlargs},
 494         'raw'         => \$OPTS{raw},
 495         'read|r=s@'   => \$OPTS{read},
 496         'show'        => \$OPTS{show},
 497         'sort=s'      => \$OPTS{sort},
 498         'tests=s'     => \$OPTS{tests},
 499         'v|verbose'   => \$OPTS{verbose},
 500         'write|w=s'   => \$OPTS{write},
 501     ) or die "Use the -h option for usage information.\n";
 502
 503     usage if $OPTS{help};
 504
 505
 506     if (defined $OPTS{read} or defined $OPTS{write}) {
 507         # fail early if it's not present
 508         require JSON::PP;
 509     }
 510
 511     if (defined $OPTS{fields}) {
 512         my @f = split /,/, $OPTS{fields};
 513         for (@f) {
 514             die "Error: --fields: unknown field '$_'\n"
 515                 unless $VALID_FIELDS{$_};
 516         }
 517         my %f = map { $_ => 1 } @f;
 518         $OPTS{fields} = \%f;
 519     }
 520
 521     my %valid_actions = qw(grind 1 selftest 1);
 522     unless ($valid_actions{$OPTS{action}}) {
 523         die "Error: unrecognised action '$OPTS{action}'\n"
 524           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 525     }
 526
 527     if (defined $OPTS{sort}) {
 528         my @s = split /:/, $OPTS{sort};
 529         if (@s != 2) {
 530             die "Error: --sort argument should be of the form field:perl: "
 531               . "'$OPTS{sort}'\n";
 532         }
 533         my ($field, $perl) = @s;
 534         die "Error: --sort: unknown field '$field'\n"
 535             unless $VALID_FIELDS{$field};
 536         # the 'perl' value will be validated later, after we have processed
 537         # the perls
 538         $OPTS{'sort-field'} = $field;
 539         $OPTS{'sort-perl'}  = $perl;
 540     }
 541
 542     # show is the default output action
 543     $OPTS{show} = 1 unless $OPTS{write} || $OPTS{bisect};
 544
 545     if ($OPTS{action} eq 'grind') {
 546         do_grind(\@ARGV);
 547     }
 548     elsif ($OPTS{action} eq 'selftest') {
 549         if (@ARGV) {
 550             die "Error: no perl executables may be specified with selftest\n"
 551         }
 552         do_selftest();
 553     }
 554 }
 555 exit 0;
 556
 557
 558 # Given a hash ref keyed by test names, filter it by deleting unwanted
 559 # tests, based on $OPTS{tests}.
 560
 561 sub filter_tests {
 562     my ($tests) = @_;
 563
 564     my $opt = $OPTS{tests};
 565     return unless defined $opt;
 566
 567     my @tests;
 568
 569     if ($opt =~ m{^/}) {
 570         $opt =~ s{^/(.+)/$}{$1}
 571             or die "Error: --tests regex must be of the form /.../\n";
 572         for (keys %$tests) {
 573             delete $tests->{$_} unless /$opt/;
 574         }
 575     }
 576     else {
 577         my %t;
 578         for (split /,/, $opt) {
 579             $t{$_} = 1;
 580             next if exists $tests->{$_};
 581
 582             my $e = "Error: no such test found: '$_'\n";
 583             if ($OPTS{verbose}) {
 584                 $e .= "Valid test names are:\n";
 585                 $e .= "  $_\n" for sort keys %$tests;
 586             }
 587             else {
 588                 $e .= "Re-run with --verbose for a list of valid tests.\n";
 589             }
 590             die $e;
 591         }
 592         for (keys %$tests) {
 593             delete $tests->{$_} unless exists $t{$_};
 594         }
 595     }
 596     die "Error: no tests to run\n" unless %$tests;
 597 }
 598
 599
 600 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 601 # return a hash ref { testname => { test }, ... }
 602 # and an array ref of the original test names order,
 603
 604 sub read_tests_file {
 605     my ($file) = @_;
 606
 607     my $ta;
 608     {
 609         local @INC = ('.');
 610         $ta = do $file;
 611     }
 612     unless ($ta) {
 613         die "Error: can't load '$file': code didn't return a true value\n"
 614                 if defined $ta;
 615         die "Error: can't parse '$file':\n$@\n" if $@;
 616         die "Error: can't read '$file': $!\n";
 617     }
 618
 619     # validate and process each test
 620
 621     {
 622         my %valid = map { $_ => 1 } qw(desc setup code pre post compile);
 623         my @tests = @$ta;
 624         if (!@tests || @tests % 2 != 0) {
 625             die "Error: '$file' does not contain evenly paired test names and hashes\n";
 626         }
 627         while (@tests) {
 628             my $name = shift @tests;
 629             my $hash = shift @tests;
 630
 631             unless ($name =~ /^[a-zA-Z]\w*(::\w+)*$/) {
 632                 die "Error: '$file': invalid test name: '$name'\n";
 633             }
 634
 635             for (sort keys %$hash) {
 636                 die "Error: '$file': invalid key '$_' for test '$name'\n"
 637                     unless exists $valid{$_};
 638             }
 639
 640             # make description default to the code
 641             $hash->{desc} = $hash->{code} unless exists $hash->{desc};
 642         }
 643     }
 644
 645     my @orig_order;
 646     for (my $i=0; $i < @$ta; $i += 2) {
 647         push @orig_order, $ta->[$i];
 648     }
 649
 650     my $t = { @$ta };
 651     filter_tests($t);
 652     return $t, \@orig_order;
 653 }
 654
 655
 656 # Process the perl name/label/column argument of options like --norm and
 657 # --sort.  Return the index of the matching perl.
 658
 659 sub select_a_perl {
 660     my ($perl, $perls, $who) = @_;
 661     $perls ||= [];
 662     my $n = @$perls;
 663
 664     if ($perl =~ /^-([0-9]+)$/) {
 665         my $p = $1;
 666         die "Error: $who value $perl outside range -1..-$n\n"
 667                                         if $p < 1 || $p > $n;
 668         return $n - $p;
 669     }
 670
 671     if ($perl =~ /^[0-9]+$/) {
 672         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 673                                         unless $perl < $n;
 674         return $perl;
 675     }
 676     else {
 677         my @perl = grep    $perls->[$_][0] eq $perl
 678                         || $perls->[$_][1] eq $perl,
 679                         0..$#$perls;
 680         unless (@perl) {
 681             my $valid = '';
 682             for (@$perls) {
 683                 $valid .= "    $_->[1]";
 684                 $valid .= "  $_->[0]" if $_->[0] ne  $_->[1];
 685                 $valid .= "\n";
 686             }
 687             die "Error: $who: unrecognised perl '$perl'\n"
 688               . "Valid perl names are:\n$valid";
 689         }
 690         die "Error: $who: ambiguous perl '$perl'\n"
 691                                         if @perl > 1;
 692         return $perl[0];
 693     }
 694 }
 695
 696
 697 # Validate the list of perl executables on the command line.
 698 # The general form is
 699 #
 700 #      a_perl_exe[=label] [ --args='perl args'] [ --env='FOO=foo' ]
 701 #
 702 # Return a list of [ exe, label, {env}, 'args' ] tuples
 703
 704 sub process_executables_list {
 705     my ($read_perls, @cmd_line_args) = @_;
 706
 707     my @results; # returned, each item is [ perlexe, label, {env}, 'args' ]
 708     my %seen_from_reads = map { $_->[1] => 1 } @$read_perls;
 709     my %seen;
 710     my @labels;
 711
 712     while (@cmd_line_args) {
 713         my $item = shift @cmd_line_args;
 714
 715         if ($item =~ /^--(.*)$/) {
 716             my ($switch, $val) = split /=/, $1, 2;
 717             die "Error: unrecognised executable switch '--$switch'\n"
 718                 unless $switch =~  /^(args|env)$/;
 719
 720             die "Error: --$switch without a preceding executable name\n"
 721                 unless @results;
 722
 723             unless (defined $val) {
 724                 $val = shift @cmd_line_args;
 725                 die "Error: --$switch is missing value\n"
 726                     unless defined $val;
 727             }
 728
 729             if ($switch eq 'args') {
 730                 $results[-1][3] .= " $val";
 731             }
 732             else {
 733                 # --env
 734                 $val =~ /^(\w+)=(.*)$/
 735                     or die "Error: --env is missing =value\n";
 736                 $results[-1][2]{$1} = $2;
 737             }
 738
 739             next;
 740         }
 741
 742         # whatever is left must be the name of an executable
 743
 744         my ($perl, $label) = split /=/, $item, 2;
 745         push @labels, $label;
 746         unless ($OPTS{autolabel}) {
 747             $label //= $perl;
 748             $label = $perl.$label if $label =~ /^\+/;
 749         }
 750
 751         die "Error: duplicate label '$label': "
 752                         . "each executable must have a unique label\n"
 753             if defined $label && $seen{$label}++;
 754
 755         die "Error: duplicate label '$label': "
 756                         . "seen both in --read file and on command line\n"
 757             if defined $label && $seen_from_reads{$label};
 758
 759         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 760         die "Error: unable to execute '$perl': $r\n" if $r ne "ok\n";
 761
 762         push @results, [ $perl, $label,  { }, '' ];
 763     }
 764
 765     # make args '' by default
 766     for (@results) {
 767         push @$_, '' unless @$_ > 3;
 768     }
 769
 770     if ($OPTS{autolabel}) {
 771
 772         # create a list of [ 'perl-path', $i ] pairs for all
 773         # $results[$i] which don't have a label
 774         my @labels;
 775         for (0..$#results)  {
 776             push @labels, [ $results[$_][0], $_ ]
 777                         unless defined $results[$_][1];
 778         }
 779
 780         if (@labels) {
 781             # strip off common prefixes
 782             my $pre = '';
 783           STRIP_PREFIX:
 784             while (length $labels[0][0]) {
 785                 my $c = substr($labels[0][0], 0, 1);
 786                 for my $i (1..$#labels) {
 787                     last STRIP_PREFIX if substr($labels[$i][0], 0, 1) ne $c;
 788                 }
 789                 substr($labels[$_][0], 0, 1)  = '' for 0..$#labels;
 790                 $pre .= $c;
 791             }
 792             # add back any final "version-ish" prefix
 793             $pre =~ s/^.*?([0-9\.]*)$/$1/;
 794             substr($labels[$_][0], 0, 0) = $pre for 0..$#labels;
 795
 796             # strip off common suffixes
 797             my $post = '';
 798           STRIP_SUFFFIX:
 799             while (length $labels[0][0]) {
 800                 my $c = substr($labels[0][0], -1, 1);
 801                 for my $i (1..$#labels) {
 802                     last STRIP_SUFFFIX if substr($labels[$i][0], -1, 1) ne $c;
 803                 }
 804                 chop $labels[$_][0] for 0..$#labels;
 805                 $post = "$c$post";
 806             }
 807             # add back any initial "version-ish" suffix
 808             $post =~ s/^([0-9\.]*).*$/$1/;
 809             $labels[$_][0] .= $post for 0..$#labels;
 810
 811             # avoid degenerate empty string for single executable name
 812             $labels[0][0] = '0' if @labels == 1 && !length $labels[0][0];
 813
 814             # if the auto-generated labels are plain integers, prefix
 815             # them with 'p' (for perl) to distinguish them from column
 816             # indices (otherwise e.g. --norm=2 is ambiguous)
 817
 818             if ($labels[0][0] =~ /^\d*$/) {
 819                 $labels[$_][0] = "p$labels[$_][0]" for 0..$#labels;
 820             }
 821
 822             # now de-duplicate labels
 823
 824             my (%seen, %index);
 825             $seen{$read_perls->[$_][1]}++ for 0..$#$read_perls;
 826             $seen{$labels[$_][0]}++ for 0..$#labels;
 827
 828             for my $i (0..$#labels)  {
 829                 my $label = $labels[$i][0];
 830                 next unless $seen{$label} > 1;
 831                 my $d = length($label) ? '-' : '';
 832                 my $n = $index{$label} // 0;
 833                 $n++ while exists $seen{"$label$d$n"};
 834                 $labels[$i][0] .= "$d$n";
 835                 $index{$label} = $n + 1;
 836             }
 837
 838             # finally, store them
 839             $results[$_->[1]][1]= $_->[0] for @labels;
 840         }
 841     }
 842
 843
 844     return @results;
 845 }
 846
 847
 848
 849 # Return a string containing a perl program which runs the benchmark code
 850 # $ARGV[0] times. If $body is true, include the main body (setup) in
 851 # the loop; otherwise create an empty loop with just pre and post.
 852 # Note that an empty body is handled with '1;' so that a completely empty
 853 # loop has a single nextstate rather than a stub op, so more closely
 854 # matches the active loop; e.g.:
 855 #   {1;}    => nextstate;                       unstack
 856 #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
 857 # Note also that each statement is prefixed with a label; this avoids
 858 # adjacent nextstate ops being optimised away.
 859 #
 860 # A final 1; statement is added so that the code is always in void
 861 # context.
 862 #
 863 # It the compile flag is set for a test, the body of the loop is wrapped in
 864 # eval 'sub { .... }' to measure compile time rather than execution time
 865
 866 sub make_perl_prog {
 867     my ($name, $test, $body) = @_;
 868     my ($desc, $setup, $code, $pre, $post, $compile) =
 869                                 @$test{qw(desc setup code pre post compile)};
 870
 871     $setup //= '';
 872     $pre  = defined $pre  ? "_PRE_: $pre; " : "";
 873     $post = defined $post ? "_POST_: $post; " : "";
 874     $code = $body ? $code : "1";
 875     $code = "_CODE_: $code; ";
 876     my $full = "$pre$code$post _CXT_: 1; ";
 877     $full = "eval q{sub { $full }};" if $compile;
 878
 879     return <<EOF;
 880 # $desc
 881 package $name;
 882 BEGIN { srand(0) }
 883 $setup;
 884 for my \$__loop__ (1..\$ARGV[0]) {
 885     $full
 886 }
 887 EOF
 888 }
 889
 890
 891 # Parse the output from cachegrind. Return a hash ref.
 892 # See do_selftest() for examples of the output format.
 893
 894 sub parse_cachegrind {
 895     my ($output, $id, $perl) = @_;
 896
 897     my %res;
 898
 899     my @lines = split /\n/, $output;
 900     for (@lines) {
 901         unless (s/(==\d+==)|(--\d+--) //) {
 902             die "Error: while executing $id:\n"
 903               . "unexpected code or cachegrind output:\n$_\n";
 904         }
 905         if (/I   refs:\s+([\d,]+)/) {
 906             $res{Ir} = $1;
 907         }
 908         elsif (/I1  misses:\s+([\d,]+)/) {
 909             $res{Ir_m1} = $1;
 910         }
 911         elsif (/LLi misses:\s+([\d,]+)/) {
 912             $res{Ir_mm} = $1;
 913         }
 914         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 915             @res{qw(Dr Dw)} = ($1,$2);
 916         }
 917         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 918             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 919         }
 920         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 921             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 922         }
 923         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 924             @res{qw(COND IND)} = ($1,$2);
 925         }
 926         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 927             @res{qw(COND_m IND_m)} = ($1,$2);
 928         }
 929     }
 930
 931     for my $field (keys %VALID_FIELDS) {
 932         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 933             unless exists $res{$field};
 934         $res{$field} =~ s/,//g;
 935     }
 936
 937     return \%res;
 938 }
 939
 940
 941 # Handle the 'grind' action
 942
 943 sub do_grind {
 944     my ($cmd_line_args) = @_; # the residue of @ARGV after option processing
 945
 946     my ($loop_counts, $perls, $results, $tests, $order, @run_perls);
 947     my ($bisect_field, $bisect_min, $bisect_max);
 948     my ($done_read, $processed, $averages, %seen_labels);
 949
 950     if (defined $OPTS{bisect}) {
 951         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 952         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 953             unless
 954                     defined $bisect_max
 955                 and $bisect_min =~ /^[0-9]+$/
 956                 and $bisect_max =~ /^[0-9]+$/;
 957
 958         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 959             unless $VALID_FIELDS{$bisect_field};
 960
 961         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 962             if $bisect_min > $bisect_max;
 963     }
 964
 965     # Read in previous benchmark results
 966
 967     foreach my $file (@{$OPTS{read}}) {
 968         open my $in, '<:encoding(UTF-8)', $file
 969             or die "Error: can't open '$file' for reading: $!\n";
 970         my $data = do { local $/; <$in> };
 971         close $in;
 972
 973         my $hash = JSON::PP::decode_json($data);
 974         if (int($FORMAT_VERSION) < int($hash->{version})) {
 975             die "Error: unsupported version $hash->{version} in file"
 976               . " '$file' (too new)\n";
 977         }
 978         my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) =
 979             @$hash{qw(loop_counts perls results tests order)};
 980
 981         # check file contents for consistency
 982         my $k_o = join ';', sort @$read_order;
 983         my $k_r = join ';', sort keys %$read_results;
 984         my $k_t = join ';', sort keys %$read_tests;
 985         die "File '$file' contains no results\n" unless length $k_r;
 986         die "File '$file' contains differing test and results names\n"
 987             unless $k_r eq $k_t;
 988         die "File '$file' contains differing test and sort order names\n"
 989             unless $k_o eq $k_t;
 990
 991         # delete tests not matching --tests= criteria, if any
 992         filter_tests($read_results);
 993         filter_tests($read_tests);
 994
 995         for my $perl (@$read_perls) {
 996             my $label = $perl->[1];
 997             die "Error: duplicate label '$label': seen in file '$file'\n"
 998                 if exists $seen_labels{$label};
 999             $seen_labels{$label}++;
1000         }
1001
1002         if (!$done_read) {
1003             ($loop_counts, $perls, $results, $tests, $order) =
1004                 ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order);
1005             $done_read = 1;
1006         }
1007         else {
1008             # merge results across multiple files
1009
1010             if (   join(';', sort keys %$tests)
1011                 ne join(';', sort keys %$read_tests))
1012             {
1013                 my $err = "Can't merge multiple read files: "
1014                         . "they contain differing test sets.\n";
1015                 if ($OPTS{verbose}) {
1016                     $err .= "Previous tests:\n";
1017                     $err .= "  $_\n" for sort keys %$tests;
1018                     $err .= "tests from '$file':\n";
1019                     $err .= "  $_\n" for sort keys %$read_tests;
1020                 }
1021                 else {
1022                     $err .= "Re-run with --verbose to see the differences.\n";
1023                 }
1024                 die $err;
1025             }
1026
1027             if ("@$read_loop_counts" ne "@$loop_counts") {
1028                 die "Can't merge multiple read files: differing loop counts:\n"
1029                 . "  (previous=(@$loop_counts), "
1030                 . "'$file'=(@$read_loop_counts))\n";
1031             }
1032
1033             push @$perls, @{$read_perls};
1034             foreach my $test (keys %{$read_results}) {
1035                 foreach my $label (keys %{$read_results->{$test}}) {
1036                     $results->{$test}{$label}= $read_results->{$test}{$label};
1037                 }
1038             }
1039         }
1040     }
1041     die "Error: --benchfile cannot be used when --read is present\n"
1042         if $done_read && defined $OPTS{benchfile};
1043
1044     # Gather list of perls to benchmark:
1045
1046     if (@$cmd_line_args) {
1047         unless ($done_read) {
1048             # How many times to execute the loop for the two trials. The lower
1049             # value is intended to do the loop enough times that branch
1050             # prediction has taken hold; the higher loop allows us to see the
1051             # branch misses after that
1052             $loop_counts = [10, 20];
1053
1054             ($tests, $order) =
1055                 read_tests_file($OPTS{benchfile} // 't/perf/benchmarks');
1056         }
1057
1058         @run_perls = process_executables_list($perls, @$cmd_line_args);
1059         push @$perls, @run_perls;
1060     }
1061
1062     # strip @$order to just the actual tests present
1063     $order = [ grep exists $tests->{$_}, @$order ];
1064
1065     # Now we know what perls and tests we have, do extra option processing
1066     # and checking (done before grinding, so time isn't wasted if we die).
1067
1068     if (!$perls or !@$perls) {
1069         die "Error: nothing to do: no perls to run, no data to read.\n";
1070     }
1071     if (@$perls < 2 and $OPTS{show} and !$OPTS{raw}) {
1072         die "Error: need at least 2 perls for comparison.\n"
1073     }
1074
1075     if ($OPTS{bisect}) {
1076         die "Error: exactly one perl executable must be specified for bisect\n"
1077             unless @$perls == 1;
1078         die "Error: only a single test may be specified with --bisect\n"
1079             unless keys %$tests == 1;
1080     }
1081
1082     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
1083
1084     if (defined $OPTS{'sort-perl'}) {
1085         $OPTS{'sort-perl'} =
1086                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
1087     }
1088
1089     if (defined $OPTS{'compact'}) {
1090         $OPTS{'compact'} =
1091                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
1092     }
1093
1094
1095     # Run the benchmarks; accumulate with any previously read # results.
1096
1097     if (@run_perls) {
1098         $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
1099     }
1100
1101
1102     # Handle the 3 forms of output
1103
1104     if (defined $OPTS{write}) {
1105         my $json = JSON::PP::encode_json({
1106                     version      => $FORMAT_VERSION,
1107                     loop_counts  => $loop_counts,
1108                     perls        => $perls,
1109                     results      => $results,
1110                     tests        => $tests,
1111                     order        => $order,
1112                 });
1113
1114         open my $out, '>:encoding(UTF-8)', $OPTS{write}
1115             or die "Error: can't open '$OPTS{write}' for writing: $!\n";
1116         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
1117         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
1118     }
1119
1120     if ($OPTS{show} or $OPTS{bisect}) {
1121         # numerically process the raw data
1122         ($processed, $averages) =
1123                     grind_process($results, $perls, $loop_counts);
1124     }
1125
1126     if ($OPTS{show}) {
1127         if (defined $OPTS{compact}) {
1128             grind_print_compact($processed, $averages, $OPTS{compact},
1129                                 $perls, $tests, $order);
1130         }
1131         else {
1132             grind_print($processed, $averages, $perls, $tests, $order);
1133         }
1134     }
1135
1136     if ($OPTS{bisect}) {
1137         # these panics shouldn't happen if the bisect checks above are sound
1138         my @r = values %$results;
1139         die "Panic: expected exactly one test result in bisect\n"
1140                                                         if @r != 1;
1141         @r = values %{$r[0]};
1142         die "Panic: expected exactly one perl result in bisect\n"
1143                                                         if @r != 1;
1144         my $c = $r[0]{$bisect_field};
1145         die "Panic: no result in bisect for field '$bisect_field'\n"
1146                                                         unless defined $c;
1147
1148         print "Bisect: $bisect_field had the value $c\n";
1149
1150         exit 0 if $bisect_min <= $c and $c <= $bisect_max;
1151         exit 1;
1152     }
1153 }
1154
1155
1156 # Run cachegrind for every test/perl combo.
1157 # It may run several processes in parallel when -j is specified.
1158 # Return a hash ref suitable for input to grind_process()
1159
1160 sub grind_run {
1161     my ($tests, $order, $perls, $counts, $results) = @_;
1162
1163     # Build a list of all the jobs to run
1164
1165     my @jobs;
1166
1167     for my $test (grep $tests->{$_}, @$order) {
1168
1169         # Create two test progs: one with an empty loop and one with code.
1170         my @prog = (
1171             make_perl_prog($test, $tests->{$test}, 0),
1172             make_perl_prog($test, $tests->{$test}, 1),
1173         );
1174
1175         for my $p (@$perls) {
1176             my ($perl, $label, $env, $args) = @$p;
1177
1178             # Run both the empty loop and the active loop
1179             # $counts->[0] and $counts->[1] times.
1180
1181             for my $i (0,1) {
1182                 for my $j (0,1) {
1183                     my $envstr = '';
1184                     if (ref $env) {
1185                         $envstr .= "$_=$env->{$_} " for sort keys %$env;
1186                     }
1187                     my $cmd = "PERL_HASH_SEED=0 $envstr"
1188                             . "valgrind --tool=cachegrind  --branch-sim=yes "
1189                             . "--cachegrind-out-file=/dev/null "
1190                             . "$OPTS{grindargs} "
1191                             . "$perl $OPTS{perlargs} $args - $counts->[$j] 2>&1";
1192                     # for debugging and error messages
1193                     my $id = "$test/$label "
1194                         . ($i ? "active" : "empty") . "/"
1195                         . ($j ? "long"   : "short") . " loop";
1196
1197                     push @jobs, {
1198                         test   => $test,
1199                         perl   => $perl,
1200                         plabel => $label,
1201                         cmd    => $cmd,
1202                         prog   => $prog[$i],
1203                         active => $i,
1204                         loopix => $j,
1205                         id     => $id,
1206                     };
1207                 }
1208             }
1209         }
1210     }
1211
1212     # Execute each cachegrind and store the results in %results.
1213
1214     local $SIG{PIPE} = 'IGNORE';
1215
1216     my $max_jobs = $OPTS{jobs};
1217     my $running  = 0; # count of executing jobs
1218     my %pids;         # map pids to jobs
1219     my %fds;          # map fds  to jobs
1220     my $select = IO::Select->new();
1221
1222     my $njobs     = scalar @jobs;
1223     my $donejobs  = 0;
1224     my $starttime = time();
1225
1226     while (@jobs or $running) {
1227
1228         if ($OPTS{debug}) {
1229             printf "Main loop: pending=%d running=%d\n",
1230                 scalar(@jobs), $running;
1231         }
1232
1233         # Start new jobs
1234
1235         while (@jobs && $running < $max_jobs) {
1236             my $job = shift @jobs;
1237             my ($id, $cmd) =@$job{qw(id cmd)};
1238
1239             my ($in, $out, $pid);
1240             $donejobs++;
1241             if($OPTS{verbose}) {
1242                 my $donefrac = $donejobs / $njobs;
1243                 my $eta = "";
1244                 # Once we've done at least 20% we'll have a good estimate of
1245                 # the total runtime, hence ETA
1246                 if($donefrac >= 0.2) {
1247                     my $now = time();
1248                     my $duration  = ($now - $starttime) / $donefrac;
1249                     my $remaining = ($starttime + $duration) - $now;
1250                     $eta = sprintf ", remaining %d:%02d",
1251                         $remaining / 60, $remaining % 60;
1252                 }
1253                 warn sprintf "Starting %s (%d of %d, %.2f%%%s)\n",
1254                     $id, $donejobs, $njobs, 100 * $donefrac, $eta;
1255             }
1256             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
1257                 or die "Error: while starting cachegrind subprocess"
1258                    ." for $id:\n$@";
1259             $running++;
1260             $pids{$pid}    = $job;
1261             $fds{"$out"}   = $job;
1262             $job->{out_fd} = $out;
1263             $job->{output} = '';
1264             $job->{pid}    = $pid;
1265
1266             $out->blocking(0);
1267             $select->add($out);
1268
1269             if ($OPTS{debug}) {
1270                 print "Started pid $pid for $id\n";
1271             }
1272
1273             # Note:
1274             # In principle we should write to $in in the main select loop,
1275             # since it may block. In reality,
1276             #  a) the code we write to the perl process's stdin is likely
1277             #     to be less than the OS's pipe buffer size;
1278             #  b) by the time the perl process has read in all its stdin,
1279             #     the only output it should have generated is a few lines
1280             #     of cachegrind output preamble.
1281             # If these assumptions change, then perform the following print
1282             # in the select loop instead.
1283
1284             print $in $job->{prog};
1285             close $in;
1286         }
1287
1288         # Get output of running jobs
1289
1290         if ($OPTS{debug}) {
1291             printf "Select: waiting on (%s)\n",
1292                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
1293                             $select->handles;
1294         }
1295
1296         my @ready = $select->can_read;
1297
1298         if ($OPTS{debug}) {
1299             printf "Select: pids (%s) ready\n",
1300                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
1301         }
1302
1303         unless (@ready) {
1304             die "Panic: select returned no file handles\n";
1305         }
1306
1307         for my $fd (@ready) {
1308             my $j = $fds{"$fd"};
1309             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
1310             unless (defined $r) {
1311                 die "Panic: Read from process running $j->{id} gave:\n$!";
1312             }
1313             next if $r;
1314
1315             # EOF
1316
1317             if ($OPTS{debug}) {
1318                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
1319             }
1320
1321             $select->remove($j->{out_fd});
1322             close($j->{out_fd})
1323                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
1324             $running--;
1325             delete $fds{"$j->{out_fd}"};
1326             my $output = $j->{output};
1327
1328             if ($OPTS{debug}) {
1329                 my $p = $j->{prog};
1330                 $p =~ s/^/    : /mg;
1331                 my $o = $output;
1332                 $o =~ s/^/    : /mg;
1333
1334                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
1335                     . "Input:\n$p"
1336                     . "Output\n$o";
1337             }
1338
1339             $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
1340                     = parse_cachegrind($output, $j->{id}, $j->{perl});
1341         }
1342
1343         # Reap finished jobs
1344
1345         while (1) {
1346             my $kid = waitpid(-1, WNOHANG);
1347             my $ret = $?;
1348             last if $kid <= 0;
1349
1350             unless (exists $pids{$kid}) {
1351                 die "Panic: reaped unexpected child $kid";
1352             }
1353             my $j = $pids{$kid};
1354             if ($ret) {
1355                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
1356                     . "with the following output\n:$j->{output}\n";
1357             }
1358             delete $pids{$kid};
1359         }
1360     }
1361
1362     return $results;
1363 }
1364
1365
1366
1367
1368 # grind_process(): process the data that has been extracted from
1369 # cachgegrind's output.
1370 #
1371 # $res is of the form ->{benchmark_name}{perl_label}[active][count]{field_name},
1372 # where active is 0 or 1 indicating an empty or active loop,
1373 # count is 0 or 1 indicating a short or long loop. E.g.
1374 #
1375 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
1376 #
1377 # The $res data structure is modified in-place by this sub.
1378 #
1379 # $perls is [ [ perl-exe, perl-label], .... ].
1380 #
1381 # $counts is [ N, M ] indicating the counts for the short and long loops.
1382 #
1383 #
1384 # return \%output, \%averages, where
1385 #
1386 # $output{benchmark_name}{perl_label}{field_name} = N
1387 # $averages{perl_label}{field_name} = M
1388 #
1389 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
1390 # M is the average raw count over all tests ($OPTS{raw}), or
1391 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
1392
1393 sub grind_process {
1394     my ($res, $perls, $counts) = @_;
1395
1396     # Process the four results for each test/perf combo:
1397     # Convert
1398     #    $res->{benchmark_name}{perl_label}[active][count]{field_name} = n
1399     # to
1400     #    $res->{benchmark_name}{perl_label}{field_name} = averaged_n
1401     #
1402     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1403     #                     empty loops, eliminating startup time
1404     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1405     #                     active loops, eliminating startup time
1406     # (the two startup times may be different because different code
1407     # is being compiled); the difference of the two results above
1408     # divided by the count difference is the time to execute the
1409     # active code once, eliminating both startup and loop overhead.
1410
1411     for my $tests (values %$res) {
1412         for my $r (values %$tests) {
1413             my $r2;
1414             for (keys %{$r->[0][0]}) {
1415                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
1416                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
1417                         ) / ($counts->[1] - $counts->[0]);
1418                 $r2->{$_} = $n;
1419             }
1420             $r = $r2;
1421         }
1422     }
1423
1424     my %totals;
1425     my %counts;
1426     my %data;
1427
1428     my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
1429
1430     for my $test_name (keys %$res) {
1431         my $res1 = $res->{$test_name};
1432         my $res2_norm = $res1->{$perl_norm};
1433         for my $perl (keys %$res1) {
1434             my $res2 = $res1->{$perl};
1435             for my $field (keys %$res2) {
1436                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1437
1438                 if ($OPTS{raw}) {
1439                     # Avoid annoying '-0.0' displays. Ideally this number
1440                     # should never be negative, but fluctuations in
1441                     # startup etc can theoretically make this happen
1442                     $q = 0 if ($q <= 0 && $q > -0.1);
1443                     $totals{$perl}{$field} += $q;
1444                     $counts{$perl}{$field}++;
1445                     $data{$test_name}{$perl}{$field} = $q;
1446                     next;
1447                 }
1448
1449                 # $p and $q are notionally integer counts, but
1450                 # due to variations in startup etc, it's possible for a
1451                 # count which is supposedly zero to be calculated as a
1452                 # small positive or negative value.
1453                 # In this case, set it to zero. Further below we
1454                 # special-case zeros to avoid division by zero errors etc.
1455
1456                 $p = 0.0 if $p < 0.01;
1457                 $q = 0.0 if $q < 0.01;
1458
1459                 if ($p == 0.0 && $q == 0.0) {
1460                     # Both perls gave a count of zero, so no change:
1461                     # treat as 100%
1462                     $totals{$perl}{$field} += 1;
1463                     $counts{$perl}{$field}++;
1464                     $data{$test_name}{$perl}{$field} = 1;
1465                 }
1466                 elsif ($p == 0.0 || $q == 0.0) {
1467                     # If either count is zero, there were too few events
1468                     # to give a meaningful ratio (and we will end up with
1469                     # division by zero if we try). Mark the result undef,
1470                     # indicating that it shouldn't be displayed; and skip
1471                     # adding to the average
1472                     $data{$test_name}{$perl}{$field} = undef;
1473                 }
1474                 else {
1475                     # For averages, we record q/p rather than p/q.
1476                     # Consider a test where perl_norm took 1000 cycles
1477                     # and perlN took 800 cycles. For the individual
1478                     # results we display p/q, or 1.25; i.e. a quarter
1479                     # quicker. For the averages, we instead sum all
1480                     # the 0.8's, which gives the total cycles required to
1481                     # execute all tests, with all tests given equal
1482                     # weight. Later we reciprocate the final result,
1483                     # i.e. 1/(sum(qi/pi)/n)
1484
1485                     $totals{$perl}{$field} += $q/$p;
1486                     $counts{$perl}{$field}++;
1487                     $data{$test_name}{$perl}{$field} = $p/$q;
1488                 }
1489             }
1490         }
1491     }
1492
1493     # Calculate averages based on %totals and %counts accumulated earlier.
1494
1495     my %averages;
1496     for my $perl (keys %totals) {
1497         my $t = $totals{$perl};
1498         for my $field (keys %$t) {
1499             $averages{$perl}{$field} = $OPTS{raw}
1500                 ? $t->{$field} / $counts{$perl}{$field}
1501                   # reciprocal - see comments above
1502                 : $counts{$perl}{$field} / $t->{$field};
1503         }
1504     }
1505
1506     return \%data, \%averages;
1507 }
1508
1509
1510
1511 # print a standard blurb at the start of the grind display
1512
1513 sub grind_blurb {
1514     my ($perls) = @_;
1515
1516     print <<EOF;
1517 Key:
1518     Ir   Instruction read
1519     Dr   Data read
1520     Dw   Data write
1521     COND conditional branches
1522     IND  indirect branches
1523     _m   branch predict miss
1524     _m1  level 1 cache miss
1525     _mm  last cache (e.g. L3) miss
1526     -    indeterminate percentage (e.g. 1/0)
1527
1528 EOF
1529
1530     if ($OPTS{raw}) {
1531         print "The numbers represent raw counts per loop iteration.\n";
1532     }
1533     else {
1534         print <<EOF;
1535 The numbers represent relative counts per loop iteration, compared to
1536 $perls->[$OPTS{norm}][1] at 100.0%.
1537 Higher is better: for example, using half as many instructions gives 200%,
1538 while using twice as many gives 50%.
1539 EOF
1540     }
1541 }
1542
1543
1544 # return a sorted list of the test names, plus 'AVERAGE'
1545
1546 sub sorted_test_names {
1547     my ($results, $order, $perls) = @_;
1548
1549     my @names;
1550     unless ($OPTS{average}) {
1551         if (defined $OPTS{'sort-field'}) {
1552             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1553             my $perl = $perls->[$perlix][1];
1554             @names = sort
1555                 {
1556                         $results->{$a}{$perl}{$field}
1557                     <=> $results->{$b}{$perl}{$field}
1558                 }
1559                 keys %$results;
1560         }
1561         else {
1562             @names = grep $results->{$_}, @$order;
1563         }
1564     }
1565
1566     # No point in displaying average for only one test.
1567     push @names,  'AVERAGE' unless @names == 1;
1568     @names;
1569 }
1570
1571
1572 # format one cell data item
1573
1574 sub grind_format_cell {
1575     my ($val, $width) = @_;
1576     my $s;
1577     if (!defined $val) {
1578         return sprintf "%*s", $width, '-';
1579     }
1580     elsif (abs($val) >= 1_000_000) {
1581         # avoid displaying very large numbers (which might be the
1582         # result of e.g. 1 / 0.000001)
1583         return sprintf "%*s", $width, 'Inf';
1584     }
1585     elsif ($OPTS{raw}) {
1586         return sprintf "%*.1f", $width, $val;
1587     }
1588     else {
1589         return sprintf "%*.2f", $width, $val * 100;
1590     }
1591 }
1592
1593 # grind_print(): display the tabulated results of all the cachegrinds.
1594 #
1595 # Arguments are of the form:
1596 #    $results->{benchmark_name}{perl_label}{field_name} = N
1597 #    $averages->{perl_label}{field_name} = M
1598 #    $perls = [ [ perl-exe, perl-label ], ... ]
1599 #    $tests->{test_name}{desc => ..., ...}
1600 #    $order = [ 'foo::bar1', ... ]  # order to display tests
1601
1602 sub grind_print {
1603     my ($results, $averages, $perls, $tests, $order) = @_;
1604
1605     my @perl_names = map $_->[0], @$perls;
1606     my @perl_labels = map $_->[1], @$perls;
1607     my %perl_labels;
1608     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1609
1610     # Print standard header.
1611     grind_blurb($perls);
1612
1613     my @test_names = sorted_test_names($results, $order, $perls);
1614
1615     my @fields = qw(Ir Dr Dw COND IND
1616                      COND_m IND_m
1617                      Ir_m1 Dr_m1 Dw_m1
1618                      Ir_mm Dr_mm Dw_mm
1619                   );
1620
1621     if ($OPTS{fields}) {
1622         @fields = grep exists $OPTS{fields}{$_}, @fields;
1623     }
1624
1625     # If only a single field is to be displayed, use a more compact
1626     # format with only a single line of output per test.
1627
1628     my $one_field = @fields == 1;
1629
1630     # The width of column 0: this is either field names, or for
1631     # $one_field, test names
1632
1633     my $width0 = 0;
1634     for ($one_field ? @test_names : @fields) {
1635         $width0 = length if length > $width0;
1636     }
1637
1638     # Calculate the widths of the data columns
1639
1640     my @widths = map length, @perl_labels;
1641
1642     for my $test (@test_names) {
1643         my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test};
1644         for my $field (@fields) {
1645             for my $i (0..$#widths) {
1646                 my $l = length grind_format_cell(
1647                                     $res->{$perl_labels[$i]}{$field}, 1);
1648                 $widths[$i] = $l if $l > $widths[$i];
1649             }
1650         }
1651     }
1652
1653     # Print the results for each test
1654
1655     for my $test (0..$#test_names) {
1656         my $test_name = $test_names[$test];
1657         my $doing_ave = ($test_name eq 'AVERAGE');
1658         my $res = $doing_ave ? $averages : $results->{$test_name};
1659
1660         # print per-test header
1661
1662         if ($one_field) {
1663             print "\nResults for field $fields[0]\n\n" if $test == 0;
1664         }
1665         else {
1666             print "\n$test_name";
1667             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1668             print "\n\n";
1669         }
1670
1671         # Print the perl executable names header.
1672
1673         if (!$one_field || $test == 0) {
1674             for my $i (0,1) {
1675                 print " " x $width0;
1676                 for (0..$#widths) {
1677                     printf " %*s", $widths[$_],
1678                         $i ? ('-' x$widths[$_]) : $perl_labels[$_];
1679                 }
1680                 print "\n";
1681             }
1682         }
1683
1684         my $field_suffix = '';
1685
1686         # print a line of data
1687
1688         for my $field (@fields) {
1689             if ($one_field) {
1690                 printf "%-*s", $width0, $test_name;
1691             }
1692             else {
1693                 # If there are enough fields, print a blank line
1694                 # between groups of fields that have the same suffix
1695                 if (@fields > 4) {
1696                     my $s = '';
1697                     $s = $1 if $field =~ /(_\w+)$/;
1698                     print "\n" if $s ne $field_suffix;
1699                     $field_suffix = $s;
1700                 }
1701                 printf "%*s", $width0, $field;
1702             }
1703
1704             for my $i (0..$#widths) {
1705                 print " ", grind_format_cell($res->{$perl_labels[$i]}{$field},
1706                                             $widths[$i]);
1707             }
1708             print "\n";
1709         }
1710     }
1711 }
1712
1713
1714
1715 # grind_print_compact(): like grind_print(), but display a single perl
1716 # in a compact form. Has an additional arg, $which_perl, which specifies
1717 # which perl to display.
1718 #
1719 # Arguments are of the form:
1720 #    $results->{benchmark_name}{perl_label}{field_name} = N
1721 #    $averages->{perl_label}{field_name} = M
1722 #    $perls = [ [ perl-exe, perl-label ], ... ]
1723 #    $tests->{test_name}{desc => ..., ...}
1724 #    $order = [ 'foo::bar1', ... ]  # order to display tests
1725
1726 sub grind_print_compact {
1727     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1728
1729     # Print standard header.
1730     grind_blurb($perls);
1731
1732     print "\nResults for $perls->[$which_perl][1]\n\n";
1733
1734     my @test_names = sorted_test_names($results, $order, $perls);
1735
1736     # Dump the results for each test.
1737
1738      my @fields = qw( Ir Dr Dw
1739                       COND IND
1740                       COND_m IND_m
1741                       Ir_m1 Dr_m1 Dw_m1
1742                       Ir_mm Dr_mm Dw_mm
1743                     );
1744     if ($OPTS{fields}) {
1745         @fields = grep exists $OPTS{fields}{$_}, @fields;
1746     }
1747
1748     # calculate the max width of the test names
1749
1750     my $name_width = 0;
1751     for (@test_names) {
1752         $name_width = length if length > $name_width;
1753     }
1754
1755     # Calculate the widths of the data columns
1756
1757     my @widths = map length, @fields;
1758
1759     for my $test (@test_names) {
1760         my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test};
1761         $res = $res->{$perls->[$which_perl][1]};
1762         for my $i (0..$#fields) {
1763             my $l = length grind_format_cell($res->{$fields[$i]}, 1);
1764             $widths[$i] = $l if $l > $widths[$i];
1765         }
1766     }
1767
1768     # Print header
1769
1770     printf " %*s", $widths[$_], $fields[$_] for 0..$#fields;
1771     print "\n";
1772     printf " %*s", $_, ('-' x $_) for @widths;
1773     print "\n";
1774
1775     # Print the results for each test
1776
1777     for my $test_name (@test_names) {
1778         my $doing_ave = ($test_name eq 'AVERAGE');
1779         my $res = $doing_ave ? $averages : $results->{$test_name};
1780         $res = $res->{$perls->[$which_perl][1]};
1781         my $desc = $doing_ave
1782             ? $test_name
1783             : sprintf "%-*s   %s", $name_width, $test_name,
1784                                  $tests->{$test_name}{desc};
1785
1786         for my $i (0..$#fields) {
1787             print " ", grind_format_cell($res->{$fields[$i]}, $widths[$i]);
1788         }
1789         print "  $desc\n";
1790     }
1791 }
1792
1793
1794 # do_selftest(): check that we can parse known cachegrind()
1795 # output formats. If the output of cachegrind changes, add a *new*
1796 # test here; keep the old tests to make sure we continue to parse
1797 # old cachegrinds
1798
1799 sub do_selftest {
1800
1801     my @tests = (
1802         'standard',
1803         <<'EOF',
1804 ==32350== Cachegrind, a cache and branch-prediction profiler
1805 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1806 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1807 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1808 ==32350==
1809 --32350-- warning: L3 cache found, using its data for the LL simulation.
1810 ==32350==
1811 ==32350== I   refs:      1,124,055
1812 ==32350== I1  misses:        5,573
1813 ==32350== LLi misses:        3,338
1814 ==32350== I1  miss rate:      0.49%
1815 ==32350== LLi miss rate:      0.29%
1816 ==32350==
1817 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1818 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1819 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1820 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1821 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1822 ==32350==
1823 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1824 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1825 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1826 ==32350==
1827 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1828 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1829 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1830 EOF
1831         {
1832             COND    =>  197050,
1833             COND_m  =>   17742,
1834             Dr      =>  259191,
1835             Dr_m1   =>    6098,
1836             Dr_mm   =>    2781,
1837             Dw      =>  145084,
1838             Dw_m1   =>    3510,
1839             Dw_mm   =>    3013,
1840             IND     =>    5322,
1841             IND_m   =>    1411,
1842             Ir      => 1124055,
1843             Ir_m1   =>    5573,
1844             Ir_mm   =>    3338,
1845         },
1846     );
1847
1848     for ('./t', '.') {
1849         my $t = "$_/test.pl";
1850         next unless  -f $t;
1851         require $t;
1852     }
1853     plan(@tests / 3 * keys %VALID_FIELDS);
1854
1855     while (@tests) {
1856         my $desc     = shift @tests;
1857         my $output   = shift @tests;
1858         my $expected = shift @tests;
1859         my $p = parse_cachegrind($output);
1860         for (sort keys %VALID_FIELDS) {
1861             is($p->{$_}, $expected->{$_}, "$desc, $_");
1862         }
1863     }
1864 }