Porting/bench.pl

   1 #!/usr/bin/perl
   2 #
   3 # A tool for analysing the performance of the code snippets found in
   4 # t/perf/benchmarks or similar
   5
   6
   7 =head1 NAME
   8
   9 bench.pl - Compare the performance of perl code snippets across multiple
  10 perls.
  11
  12 =head1 SYNOPSIS
  13
  14     # Basic: run the tests in t/perf/benchmarks against two or
  15     # more perls
  16
  17     bench.pl [options] perl1[=label1] perl2[=label2] ...
  18
  19     # Run bench.pl's own built-in sanity tests
  20
  21     bench.pl --action=selftest
  22
  23 =head1 DESCRIPTION
  24
  25 By default, F<bench.pl> will run code snippets found in
  26 F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
  27 how many instruction reads, data writes, branches, cache misses, etc. that
  28 one execution of the snippet uses. It will run them against two or more
  29 perl executables and show how much each test has gotten better or worse.
  30
  31 It is modelled on the F<perlbench> tool, but since it measures instruction
  32 reads etc., rather than timings, it is much more precise and reproducible.
  33 It is also considerably faster, and is capable or running tests in
  34 parallel (with C<-j>). Rather than  displaying a single relative
  35 percentage per test/perl combination, it displays values for 13 different
  36 measurements, such as instruction reads, conditional branch misses etc.
  37
  38 There are options to write the raw data to a file, and to read it back.
  39 This means that you can view the same run data in different views with
  40 different selection and sort options.
  41
  42 The optional C<=label> after each perl executable is used in the display
  43 output.
  44
  45 =head1 OPTIONS
  46
  47 =over 4
  48
  49 =item *
  50
  51 --action=I<foo>
  52
  53 What action to perform. The default is  I<grind>, which runs the benchmarks
  54 using I<cachegrind> as the back end. The only other action at the moment is
  55 I<selftest>, which runs some basic sanity checks and produces TAP output.
  56
  57 =item *
  58
  59 --average
  60
  61 Only display the overall average, rather than the results for each
  62 individual test.
  63
  64 =item *
  65
  66 --benchfile=I<foo>
  67
  68 The path of the file which contains the benchmarks (F<t/perf/benchmarks>
  69 by default).
  70
  71 =item *
  72
  73 --bisect=I<field,minval,maxval>
  74
  75 Run a single test against one perl and exit with a zero status if the
  76 named field is in the specified range; exit 1 otherwise. It will complain
  77 if more than one test or perl has been specified. It is intended to be
  78 called as part of a bisect run, to determine when something changed.
  79 For example,
  80
  81     bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
  82         ./miniperl
  83
  84 might be called from bisect to find when the number of instruction reads
  85 for test I<foo> falls outside the range 100..105.
  86
  87 =item *
  88
  89 --compact=<Iperl>
  90
  91 Display the results for a single perl executable in a compact form.
  92 Which perl to display is specified in the same manner as C<--norm>.
  93
  94 =item *
  95
  96 --debug
  97
  98 Enable verbose debugging output.
  99
 100 =item *
 101
 102 --fields=I<a,b,c>
 103
 104 Display only the specified fields; for example,
 105
 106     --fields=Ir,Ir_m,Ir_mm
 107
 108 If only one field is selected, the output is in more compact form.
 109
 110 =item *
 111
 112 --grindargs=I<foo>
 113
 114 Optional command-line arguments to pass to cachegrind invocations.
 115
 116 =item *
 117
 118 ---help
 119
 120 Display basic usage information.
 121
 122 =item *
 123
 124 -j I<N>
 125 --jobs=I<N>
 126
 127 Run I<N> jobs in parallel (default 1). This determines how many cachegrind
 128 process will running at a time, and should generally be set to the number
 129 of CPUs available.
 130
 131 =item *
 132
 133 --norm=I<foo>
 134
 135 Specify which perl column in the output to treat as the 100% norm.
 136 It may be a column number (0..N-1) or a perl executable name or label.
 137 It defaults to the leftmost column.
 138
 139 =item *
 140
 141 --perlargs=I<foo>
 142
 143 Optional command-line arguments to pass to each perl that is run as part of
 144 a cachegrind session. For example, C<--perlargs=-Ilib>.
 145
 146 =item *
 147
 148 --raw
 149
 150 Display raw data counts rather than percentages in the outputs. This
 151 allows you to see the exact number of intruction reads, branch misses etc.
 152 for each test/perl combination. It also causes the C<AVERAGE> display
 153 per field to be calculated based on the average of each tests's count
 154 rather than average of each percentage. This means that tests with very
 155 high counts will dominate.
 156
 157 =item *
 158
 159 --sort=I<field:perl>
 160
 161 Order the tests in the output based on the value of I<field> in the
 162 column I<perl>. The I<perl> value is as per C<--norm>. For example
 163
 164     bench.pl --sort=Dw:perl-5.20.0 \
 165         perl-5.16.0 perl-5.18.0 perl-5.20.0
 166
 167 =item *
 168
 169 -r I<file>
 170 --read=I<file>
 171
 172 Read in saved data from a previous C<--write> run from the specified file.
 173
 174 Requires C<JSON::PP> to be available.
 175
 176 =item *
 177
 178 --tests=I<FOO>
 179
 180 Specify a subset of tests to run (or in the case of C<--read>, to display).
 181 It may be either a comma-separated list of test names, or a regular
 182 expression. For example
 183
 184     --tests=expr::assign::scalar_lex,expr::assign::2list_lex
 185     --tests=/^expr::/
 186
 187 =item *
 188
 189 --verbose
 190
 191 Display progress information.
 192
 193 =item *
 194
 195 -w I<file>
 196 --write=I<file>
 197
 198 Save the raw data to the specified file. It can be read back later with
 199 C<--read>.
 200
 201 Requires C<JSON::PP> to be available.
 202
 203 =back
 204
 205 =cut
 206
 207
 208
 209 use 5.010000;
 210 use warnings;
 211 use strict;
 212 use Getopt::Long qw(:config no_auto_abbrev);
 213 use IPC::Open2 ();
 214 use IO::Select;
 215 use IO::File;
 216 use POSIX ":sys_wait_h";
 217
 218 # The version of the file format used to save data. We refuse to process
 219 # the file if the integer component differs.
 220
 221 my $FORMAT_VERSION = 1.0;
 222
 223 # The fields we know about
 224
 225 my %VALID_FIELDS = map { $_ => 1 }
 226     qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
 227
 228 sub usage {
 229     die <<EOF;
 230 usage: $0 [options] perl[=label] ...
 231   --action=foo       What action to perform [default: grind].
 232   --average          Only display average, not individual test results.
 233   --benchfile=foo    File containing the benchmarks;
 234                        [default: t/perf/benchmarks].
 235   --bisect=f,min,max run a single test against one perl and exit with a
 236                        zero status if the named field is in the specified
 237                        range; exit 1 otherwise.
 238   --compact=perl     Display the results of a single perl in compact form.
 239                      Which perl specified like --norm
 240   --debug            Enable verbose debugging output.
 241   --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
 242   --grindargs=foo    Optional command-line args to pass to cachegrind.
 243   --help             Display this help.
 244   -j|--jobs=N        Run N jobs in parallel [default 1].
 245   --norm=perl        Which perl column to treat as 100%; may be a column
 246                        number (0..N-1) or a perl executable name or label;
 247                        [default: 0].
 248   --perlargs=foo     Optional command-line args to pass to each perl to run.
 249   --raw              Display raw data counts rather than percentages.
 250   --sort=field:perl  Sort the tests based on the value of 'field' in the
 251                        column 'perl'. The perl value is as per --norm.
 252   -r|--read=file     Read in previously saved data from the specified file.
 253   --tests=FOO        Select only the specified tests from the benchmarks file;
 254                        FOO may be either of the form 'foo,bar' or '/regex/';
 255                        [default: all tests].
 256   --verbose          Display progress information.
 257   -w|--write=file    Save the raw data to the specified file.
 258
 259 --action is one of:
 260     grind            run the code under cachegrind
 261     selftest         perform a selftest; produce TAP output
 262
 263 The command line ends with one or more specified perl executables,
 264 which will be searched for in the current \$PATH. Each binary name may
 265 have an optional =LABEL appended, which will be used rather than the
 266 executable name in output. E.g.
 267
 268     perl-5.20.1=PRE-BUGFIX  perl-5.20.1-new=POST-BUGFIX
 269 EOF
 270 }
 271
 272 my %OPTS = (
 273     action    => 'grind',
 274     average   => 0,
 275     benchfile => 't/perf/benchmarks',
 276     bisect    => undef,
 277     compact   => undef,
 278     debug     => 0,
 279     grindargs => '',
 280     fields    => undef,
 281     jobs      => 1,
 282     norm      => 0,
 283     perlargs  => '',
 284     raw       => 0,
 285     read      => undef,
 286     sort      => undef,
 287     tests     => undef,
 288     verbose   => 0,
 289     write     => undef,
 290 );
 291
 292
 293 # process command-line args and call top-level action
 294
 295 {
 296     GetOptions(
 297         'action=s'    => \$OPTS{action},
 298         'average'     => \$OPTS{average},
 299         'benchfile=s' => \$OPTS{benchfile},
 300         'bisect=s'    => \$OPTS{bisect},
 301         'compact=s'   => \$OPTS{compact},
 302         'debug'       => \$OPTS{debug},
 303         'grindargs=s' => \$OPTS{grindargs},
 304         'help'        => \$OPTS{help},
 305         'fields=s'    => \$OPTS{fields},
 306         'jobs|j=i'    => \$OPTS{jobs},
 307         'norm=s'      => \$OPTS{norm},
 308         'perlargs=s'  => \$OPTS{perlargs},
 309         'raw'         => \$OPTS{raw},
 310         'read|r=s'    => \$OPTS{read},
 311         'sort=s'      => \$OPTS{sort},
 312         'tests=s'     => \$OPTS{tests},
 313         'verbose'     => \$OPTS{verbose},
 314         'write|w=s'   => \$OPTS{write},
 315     ) or usage;
 316
 317     usage if $OPTS{help};
 318
 319
 320     if (defined $OPTS{read} and defined $OPTS{write}) {
 321         die "Error: can't specify both --read and --write options\n";
 322     }
 323
 324     if (defined $OPTS{read} or defined $OPTS{write}) {
 325         # fail early if it's not present
 326         require JSON::PP;
 327     }
 328
 329     if (defined $OPTS{fields}) {
 330         my @f = split /,/, $OPTS{fields};
 331         for (@f) {
 332             die "Error: --fields: unknown field '$_'\n"
 333                 unless $VALID_FIELDS{$_};
 334         }
 335         my %f = map { $_ => 1 } @f;
 336         $OPTS{fields} = \%f;
 337     }
 338
 339     my %valid_actions = qw(grind 1 selftest 1);
 340     unless ($valid_actions{$OPTS{action}}) {
 341         die "Error: unrecognised action '$OPTS{action}'\n"
 342           . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
 343     }
 344
 345     if (defined $OPTS{sort}) {
 346         my @s = split /:/, $OPTS{sort};
 347         if (@s != 2) {
 348             die "Error: --sort argument should be of the form field:perl: "
 349               . "'$OPTS{sort}'\n";
 350         }
 351         my ($field, $perl) = @s;
 352         die "Error: --sort: unknown field '$field\n"
 353             unless $VALID_FIELDS{$field};
 354         # the 'perl' value will be validated later, after we have processed
 355         # the perls
 356         $OPTS{'sort-field'} = $field;
 357         $OPTS{'sort-perl'}  = $perl;
 358     }
 359
 360     if ($OPTS{action} eq 'selftest') {
 361         if (@ARGV) {
 362             die "Error: no perl executables may be specified with --read\n"
 363         }
 364     }
 365     elsif (defined $OPTS{bisect}) {
 366         die "Error: exactly one perl executable must be specified for bisect\n"
 367                                                 unless @ARGV == 1;
 368         die "Error: Can't specify both --bisect and --read\n"
 369                                                 if defined $OPTS{read};
 370         die "Error: Can't specify both --bisect and --write\n"
 371                                                 if defined $OPTS{write};
 372     }
 373     elsif (defined $OPTS{read}) {
 374         if (@ARGV) {
 375             die "Error: no perl executables may be specified with --read\n"
 376         }
 377     }
 378     elsif ($OPTS{raw}) {
 379         unless (@ARGV) {
 380             die "Error: at least one perl executable must be specified\n";
 381         }
 382     }
 383     else {
 384         unless (@ARGV >= 2) {
 385             die "Error: at least two perl executables must be specified\n";
 386         }
 387     }
 388
 389     if ($OPTS{action} eq 'grind') {
 390         do_grind(\@ARGV);
 391     }
 392     elsif ($OPTS{action} eq 'selftest') {
 393         do_selftest();
 394     }
 395 }
 396 exit 0;
 397
 398
 399 # Given a hash ref keyed by test names, filter it by deleting unwanted
 400 # tests, based on $OPTS{tests}.
 401
 402 sub filter_tests {
 403     my ($tests) = @_;
 404
 405     my $opt = $OPTS{tests};
 406     return unless defined $opt;
 407
 408     my @tests;
 409
 410     if ($opt =~ m{^/}) {
 411         $opt =~ s{^/(.+)/$}{$1}
 412             or die "Error: --tests regex must be of the form /.../\n";
 413         for (keys %$tests) {
 414             delete $tests->{$_} unless /$opt/;
 415         }
 416     }
 417     else {
 418         my %t;
 419         for (split /,/, $opt) {
 420             die "Error: no such test found: '$_'\n" unless exists $tests->{$_};
 421             $t{$_} = 1;
 422         }
 423         for (keys %$tests) {
 424             delete $tests->{$_} unless exists $t{$_};
 425         }
 426     }
 427 }
 428
 429
 430 # Read in the test file, and filter out any tests excluded by $OPTS{tests}
 431 # return a hash ref { testname => { test }, ... }
 432 # and an array ref of the original test names order,
 433
 434 sub read_tests_file {
 435     my ($file) = @_;
 436
 437     my $ta = do $file;
 438     unless ($ta) {
 439         die "Error: can't parse '$file': $@\n" if $@;
 440         die "Error: can't read '$file': $!\n";
 441     }
 442
 443     my @orig_order;
 444     for (my $i=0; $i < @$ta; $i += 2) {
 445         push @orig_order, $ta->[$i];
 446     }
 447
 448     my $t = { @$ta };
 449     filter_tests($t);
 450     return $t, \@orig_order;
 451 }
 452
 453
 454 # Process the perl/column argument of options like --norm and --sort.
 455 # Return the index of the matching perl.
 456
 457 sub select_a_perl {
 458     my ($perl, $perls, $who) = @_;
 459
 460     if ($perl =~ /^[0-9]$/) {
 461         die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
 462                                         unless $perl < @$perls;
 463         return $perl;
 464     }
 465     else {
 466         my @perl = grep    $perls->[$_][0] eq $perl
 467                         || $perls->[$_][1] eq $perl,
 468                         0..$#$perls;
 469         die "Error: $who: unrecognised perl '$perl'\n"
 470                                         unless @perl;
 471         die "Error: $who: ambiguous perl '$perl'\n"
 472                                         if @perl > 1;
 473         return $perl[0];
 474     }
 475 }
 476
 477
 478 # Validate the list of perl=label on the command line.
 479 # Return a list of [ exe, label ] pairs.
 480
 481 sub process_perls {
 482     my @results;
 483     for my $p (@_) {
 484         my ($perl, $label) = split /=/, $p, 2;
 485         $label //= $perl;
 486         my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
 487         die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
 488         push @results, [ $perl, $label ];
 489     }
 490     return @results;
 491 }
 492
 493
 494
 495 # Return a string containing perl test code wrapped in a loop
 496 # that runs $ARGV[0] times
 497
 498 sub make_perl_prog {
 499     my ($test, $desc, $setup, $code) = @_;
 500
 501     return <<EOF;
 502 # $desc
 503 package $test;
 504 BEGIN { srand(0) }
 505 $setup;
 506 for my \$__loop__ (1..\$ARGV[0]) {
 507     $code;
 508 }
 509 EOF
 510 }
 511
 512
 513 # Parse the output from cachegrind. Return a hash ref.
 514 # See do_selftest() for examples of the output format.
 515
 516 sub parse_cachegrind {
 517     my ($output, $id, $perl) = @_;
 518
 519     my %res;
 520
 521     my @lines = split /\n/, $output;
 522     for (@lines) {
 523         unless (s/(==\d+==)|(--\d+--) //) {
 524             die "Error: while executing $id:\n"
 525               . "unexpected code or cachegrind output:\n$_\n";
 526         }
 527         if (/I   refs:\s+([\d,]+)/) {
 528             $res{Ir} = $1;
 529         }
 530         elsif (/I1  misses:\s+([\d,]+)/) {
 531             $res{Ir_m1} = $1;
 532         }
 533         elsif (/LLi misses:\s+([\d,]+)/) {
 534             $res{Ir_mm} = $1;
 535         }
 536         elsif (/D   refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 537             @res{qw(Dr Dw)} = ($1,$2);
 538         }
 539         elsif (/D1  misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 540             @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
 541         }
 542         elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
 543             @res{qw(Dr_mm Dw_mm)} = ($1,$2);
 544         }
 545         elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 546             @res{qw(COND IND)} = ($1,$2);
 547         }
 548         elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
 549             @res{qw(COND_m IND_m)} = ($1,$2);
 550         }
 551     }
 552
 553     for my $field (keys %VALID_FIELDS) {
 554         die "Error: can't parse '$field' field from cachegrind output:\n$output"
 555             unless exists $res{$field};
 556         $res{$field} =~ s/,//g;
 557     }
 558
 559     return \%res;
 560 }
 561
 562
 563 # Handle the 'grind' action
 564
 565 sub do_grind {
 566     my ($perl_args) = @_; # the residue of @ARGV after option processing
 567
 568     my ($loop_counts, $perls, $results, $tests, $order);
 569     my ($bisect_field, $bisect_min, $bisect_max);
 570
 571     if (defined $OPTS{bisect}) {
 572         ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
 573         die "Error: --bisect option must be of form 'field,integer,integer'\n"
 574             unless
 575                     defined $bisect_max
 576                 and $bisect_min =~ /^[0-9]+$/
 577                 and $bisect_max =~ /^[0-9]+$/;
 578
 579         die "Error: unrecognised field '$bisect_field' in --bisect option\n"
 580             unless $VALID_FIELDS{$bisect_field};
 581
 582         die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
 583             if $bisect_min > $bisect_max;
 584     }
 585
 586     if (defined $OPTS{read}) {
 587         open my $in, '<:encoding(UTF-8)', $OPTS{read}
 588             or die " Error: can't open $OPTS{read} for reading: $!\n";
 589         my $data = do { local $/; <$in> };
 590         close $in;
 591
 592         my $hash = JSON::PP::decode_json($data);
 593         if (int($FORMAT_VERSION) < int($hash->{version})) {
 594             die "Error: unsupported version $hash->{version} in file"
 595               . "'$OPTS{read}' (too new)\n";
 596         }
 597         ($loop_counts, $perls, $results, $tests, $order) =
 598             @$hash{qw(loop_counts perls results tests order)};
 599
 600         filter_tests($results);
 601         filter_tests($tests);
 602
 603         if (!$order) {
 604             $order = [ sort keys %$tests ];
 605         }
 606     }
 607     else {
 608         # How many times to execute the loop for the two trials. The lower
 609         # value is intended to do the loop enough times that branch
 610         # prediction has taken hold; the higher loop allows us to see the
 611         # branch misses after that
 612         $loop_counts = [10, 20];
 613
 614         ($tests, $order) = read_tests_file($OPTS{benchfile});
 615         die "Error: only a single test may be specified with --bisect\n"
 616             if defined $OPTS{bisect} and keys %$tests != 1;
 617
 618         $perls = [ process_perls(@$perl_args) ];
 619
 620
 621         $results = grind_run($tests, $order, $perls, $loop_counts);
 622     }
 623
 624     # now that we have a list of perls, use it to process the
 625     # 'perl' component of the --norm and --sort args
 626
 627     $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
 628     if (defined $OPTS{'sort-perl'}) {
 629         $OPTS{'sort-perl'} =
 630                 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
 631     }
 632
 633     if (defined $OPTS{'compact'}) {
 634         $OPTS{'compact'} =
 635                 select_a_perl($OPTS{'compact'}, $perls, "--compact");
 636     }
 637     if (defined $OPTS{write}) {
 638         my $json = JSON::PP::encode_json({
 639                     version      => $FORMAT_VERSION,
 640                     loop_counts  => $loop_counts,
 641                     perls        => $perls,
 642                     results      => $results,
 643                     tests        => $tests,
 644                     order        => $order,
 645                 });
 646
 647         open my $out, '>:encoding(UTF-8)', $OPTS{write}
 648             or die " Error: can't open $OPTS{write} for writing: $!\n";
 649         print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
 650         close $out       or die "Error: closing file '$OPTS{write}': $!\n";
 651     }
 652     else {
 653         my ($processed, $averages) =
 654                     grind_process($results, $perls, $loop_counts);
 655
 656         if (defined $OPTS{bisect}) {
 657             my @r = values %$results;
 658             die "Panic: expected exactly one test result in bisect\n"
 659                                                             if @r != 1;
 660             @r = values %{$r[0]};
 661             die "Panic: expected exactly one perl result in bisect\n"
 662                                                             if @r != 1;
 663             my $c = $r[0]{$bisect_field};
 664             die "Panic: no result in bisect for field '$bisect_field'\n"
 665                                                             unless defined $c;
 666             exit 0 if $bisect_min <= $c and $c <= $bisect_max;
 667             exit 1;
 668         }
 669         elsif (defined $OPTS{compact}) {
 670             grind_print_compact($processed, $averages, $OPTS{compact},
 671                                 $perls, $tests, $order);
 672         }
 673         else {
 674             grind_print($processed, $averages, $perls, $tests, $order);
 675         }
 676     }
 677 }
 678
 679
 680 # Run cachegrind for every test/perl combo.
 681 # It may run several processes in parallel when -j is specified.
 682 # Return a hash ref suitable for input to grind_process()
 683
 684 sub grind_run {
 685     my ($tests, $order, $perls, $counts) = @_;
 686
 687     # Build a list of all the jobs to run
 688
 689     my @jobs;
 690
 691     for my $test (grep $tests->{$_}, @$order) {
 692
 693         # Create two test progs: one with an empty loop and one with code.
 694         # Note that the empty loop is actually '{1;}' rather than '{}';
 695         # this causes the loop to have a single nextstate rather than a
 696         # stub op, so more closely matches the active loop; e.g.:
 697         #   {1;}    => nextstate;                       unstack
 698         #   {$x=1;} => nextstate; const; gvsv; sassign; unstack
 699         my @prog = (
 700             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
 701             make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
 702         );
 703
 704         for my $p (@$perls) {
 705             my ($perl, $label) = @$p;
 706
 707             # Run both the empty loop and the active loop
 708             # $counts->[0] and $counts->[1] times.
 709
 710             for my $i (0,1) {
 711                 for my $j (0,1) {
 712                     my $cmd = "PERL_HASH_SEED=0 "
 713                             . "valgrind --tool=cachegrind  --branch-sim=yes "
 714                             . "--cachegrind-out-file=/dev/null "
 715                             . "$OPTS{grindargs} "
 716                             . "$perl $OPTS{perlargs} - $counts->[$j] 2>&1";
 717                     # for debugging and error messages
 718                     my $id = "$test/$perl "
 719                         . ($i ? "active" : "empty") . "/"
 720                         . ($j ? "long"   : "short") . " loop";
 721
 722                     push @jobs, {
 723                         test   => $test,
 724                         perl   => $perl,
 725                         plabel => $label,
 726                         cmd    => $cmd,
 727                         prog   => $prog[$i],
 728                         active => $i,
 729                         loopix => $j,
 730                         id     => $id,
 731                     };
 732                 }
 733             }
 734         }
 735     }
 736
 737     # Execute each cachegrind and store the results in %results.
 738
 739     local $SIG{PIPE} = 'IGNORE';
 740
 741     my $max_jobs = $OPTS{jobs};
 742     my $running  = 0; # count of executing jobs
 743     my %pids;         # map pids to jobs
 744     my %fds;          # map fds  to jobs
 745     my %results;
 746     my $select = IO::Select->new();
 747
 748     while (@jobs or $running) {
 749
 750         if ($OPTS{debug}) {
 751             printf "Main loop: pending=%d running=%d\n",
 752                 scalar(@jobs), $running;
 753         }
 754
 755         # Start new jobs
 756
 757         while (@jobs && $running < $max_jobs) {
 758             my $job = shift @jobs;
 759             my ($id, $cmd) =@$job{qw(id cmd)};
 760
 761             my ($in, $out, $pid);
 762             warn "Starting $id\n" if $OPTS{verbose};
 763             eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
 764                 or die "Error: while starting cachegrind subprocess"
 765                    ." for $id:\n$@";
 766             $running++;
 767             $pids{$pid}    = $job;
 768             $fds{"$out"}   = $job;
 769             $job->{out_fd} = $out;
 770             $job->{output} = '';
 771             $job->{pid}    = $pid;
 772
 773             $out->blocking(0);
 774             $select->add($out);
 775
 776             if ($OPTS{debug}) {
 777                 print "Started pid $pid for $id\n";
 778             }
 779
 780             # Note:
 781             # In principle we should write to $in in the main select loop,
 782             # since it may block. In reality,
 783             #  a) the code we write to the perl process's stdin is likely
 784             #     to be less than the OS's pipe buffer size;
 785             #  b) by the time the perl process has read in all its stdin,
 786             #     the only output it should have generated is a few lines
 787             #     of cachegrind output preamble.
 788             # If these assumptions change, then perform the following print
 789             # in the select loop instead.
 790
 791             print $in $job->{prog};
 792             close $in;
 793         }
 794
 795         # Get output of running jobs
 796
 797         if ($OPTS{debug}) {
 798             printf "Select: waiting on (%s)\n",
 799                 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
 800                             $select->handles;
 801         }
 802
 803         my @ready = $select->can_read;
 804
 805         if ($OPTS{debug}) {
 806             printf "Select: pids (%s) ready\n",
 807                 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
 808         }
 809
 810         unless (@ready) {
 811             die "Panic: select returned no file handles\n";
 812         }
 813
 814         for my $fd (@ready) {
 815             my $j = $fds{"$fd"};
 816             my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
 817             unless (defined $r) {
 818                 die "Panic: Read from process running $j->{id} gave:\n$!";
 819             }
 820             next if $r;
 821
 822             # EOF
 823
 824             if ($OPTS{debug}) {
 825                 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
 826             }
 827
 828             $select->remove($j->{out_fd});
 829             close($j->{out_fd})
 830                 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
 831             $running--;
 832             delete $fds{"$j->{out_fd}"};
 833             my $output = $j->{output};
 834
 835             if ($OPTS{debug}) {
 836                 my $p = $j->{prog};
 837                 $p =~ s/^/    : /mg;
 838                 my $o = $output;
 839                 $o =~ s/^/    : /mg;
 840
 841                 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
 842                     . "Input:\n$p"
 843                     . "Output\n$o";
 844             }
 845
 846             $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
 847                     = parse_cachegrind($output, $j->{id}, $j->{perl});
 848         }
 849
 850         # Reap finished jobs
 851
 852         while (1) {
 853             my $kid = waitpid(-1, WNOHANG);
 854             my $ret = $?;
 855             last if $kid <= 0;
 856
 857             unless (exists $pids{$kid}) {
 858                 die "Panic: reaped unexpected child $kid";
 859             }
 860             my $j = $pids{$kid};
 861             if ($ret) {
 862                 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
 863                     . "with the following output\n:$j->{output}\n";
 864             }
 865             delete $pids{$kid};
 866         }
 867     }
 868
 869     return \%results;
 870 }
 871
 872
 873
 874
 875 # grind_process(): process the data that has been extracted from
 876 # cachgegrind's output.
 877 #
 878 # $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
 879 # where active is 0 or 1 indicating an empty or active loop,
 880 # count is 0 or 1 indicating a short or long loop. E.g.
 881 #
 882 #    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
 883 #
 884 # The $res data structure is modified in-place by this sub.
 885 #
 886 # $perls is [ [ perl-exe, perl-label], .... ].
 887 #
 888 # $counts is [ N, M ] indicating the counts for the short and long loops.
 889 #
 890 #
 891 # return \%output, \%averages, where
 892 #
 893 # $output{benchmark_name}{perl_name}{field_name} = N
 894 # $averages{perl_name}{field_name} = M
 895 #
 896 # where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
 897 # M is the average raw count over all tests ($OPTS{raw}), or
 898 # 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
 899
 900 sub grind_process {
 901     my ($res, $perls, $counts) = @_;
 902
 903     # Process the four results for each test/perf combo:
 904     # Convert
 905     #    $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
 906     # to
 907     #    $res->{benchmark_name}{perl_name}{field_name} = averaged_n
 908     #
 909     # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
 910     #                     empty loops, eliminating startup time
 911     # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
 912     #                     active loops, eliminating startup time
 913     # (the two startup times may be different because different code
 914     # is being compiled); the difference of the two results above
 915     # divided by the count difference is the time to execute the
 916     # active code once, eliminating both startup and loop overhead.
 917
 918     for my $tests (values %$res) {
 919         for my $r (values %$tests) {
 920             my $r2;
 921             for (keys %{$r->[0][0]}) {
 922                 my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
 923                          - ($r->[0][1]{$_} - $r->[0][0]{$_})
 924                         ) / ($counts->[1] - $counts->[0]);
 925                 $r2->{$_} = $n;
 926             }
 927             $r = $r2;
 928         }
 929     }
 930
 931     my %totals;
 932     my %counts;
 933     my %data;
 934
 935     my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
 936
 937     for my $test_name (keys %$res) {
 938         my $res1 = $res->{$test_name};
 939         my $res2_norm = $res1->{$perl_norm};
 940         for my $perl (keys %$res1) {
 941             my $res2 = $res1->{$perl};
 942             for my $field (keys %$res2) {
 943                 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
 944
 945                 if ($OPTS{raw}) {
 946                     # Avoid annoying '-0.0' displays. Ideally this number
 947                     # should never be negative, but fluctuations in
 948                     # startup etc can theoretically make this happen
 949                     $q = 0 if ($q <= 0 && $q > -0.1);
 950                     $totals{$perl}{$field} += $q;
 951                     $counts{$perl}{$field}++;
 952                     $data{$test_name}{$perl}{$field} = $q;
 953                     next;
 954                 }
 955
 956                 # $p and $q are notionally integer counts, but
 957                 # due to variations in startup etc, it's possible for a
 958                 # count which is supposedly zero to be calculated as a
 959                 # small positive or negative value.
 960                 # In this case, set it to zero. Further below we
 961                 # special-case zeros to avoid division by zero errors etc.
 962
 963                 $p = 0.0 if $p < 0.01;
 964                 $q = 0.0 if $q < 0.01;
 965
 966                 if ($p == 0.0 && $q == 0.0) {
 967                     # Both perls gave a count of zero, so no change:
 968                     # treat as 100%
 969                     $totals{$perl}{$field} += 1;
 970                     $counts{$perl}{$field}++;
 971                     $data{$test_name}{$perl}{$field} = 1;
 972                 }
 973                 elsif ($p == 0.0 || $q == 0.0) {
 974                     # If either count is zero, there were too few events
 975                     # to give a meaningful ratio (and we will end up with
 976                     # division by zero if we try). Mark the result undef,
 977                     # indicating that it shouldn't be displayed; and skip
 978                     # adding to the average
 979                     $data{$test_name}{$perl}{$field} = undef;
 980                 }
 981                 else {
 982                     # For averages, we record q/p rather than p/q.
 983                     # Consider a test where perl_norm took 1000 cycles
 984                     # and perlN took 800 cycles. For the individual
 985                     # results we display p/q, or 1.25; i.e. a quarter
 986                     # quicker. For the averages, we instead sum all
 987                     # the 0.8's, which gives the total cycles required to
 988                     # execute all tests, with all tests given equal
 989                     # weight. Later we reciprocate the final result,
 990                     # i.e. 1/(sum(qi/pi)/n)
 991
 992                     $totals{$perl}{$field} += $q/$p;
 993                     $counts{$perl}{$field}++;
 994                     $data{$test_name}{$perl}{$field} = $p/$q;
 995                 }
 996             }
 997         }
 998     }
 999
1000     # Calculate averages based on %totals and %counts accumulated earlier.
1001
1002     my %averages;
1003     for my $perl (keys %totals) {
1004         my $t = $totals{$perl};
1005         for my $field (keys %$t) {
1006             $averages{$perl}{$field} = $OPTS{raw}
1007                 ? $t->{$field} / $counts{$perl}{$field}
1008                   # reciprocal - see comments above
1009                 : $counts{$perl}{$field} / $t->{$field};
1010         }
1011     }
1012
1013     return \%data, \%averages;
1014 }
1015
1016
1017
1018 # print a standard blurb at the start of the grind display
1019
1020 sub grind_blurb {
1021     my ($perls) = @_;
1022
1023     print <<EOF;
1024 Key:
1025     Ir   Instruction read
1026     Dr   Data read
1027     Dw   Data write
1028     COND conditional branches
1029     IND  indirect branches
1030     _m   branch predict miss
1031     _m1  level 1 cache miss
1032     _mm  last cache (e.g. L3) miss
1033     -    indeterminate percentage (e.g. 1/0)
1034
1035 EOF
1036
1037     if ($OPTS{raw}) {
1038         print "The numbers represent raw counts per loop iteration.\n";
1039     }
1040     else {
1041         print <<EOF;
1042 The numbers represent relative counts per loop iteration, compared to
1043 $perls->[$OPTS{norm}][1] at 100.0%.
1044 Higher is better: for example, using half as many instructions gives 200%,
1045 while using twice as many gives 50%.
1046 EOF
1047     }
1048 }
1049
1050
1051 # return a sorted list of the test names, plus 'AVERAGE'
1052
1053 sub sorted_test_names {
1054     my ($results, $order, $perls) = @_;
1055
1056     my @names;
1057     unless ($OPTS{average}) {
1058         if (defined $OPTS{'sort-field'}) {
1059             my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1060             my $perl = $perls->[$perlix][0];
1061             @names = sort
1062                 {
1063                         $results->{$a}{$perl}{$field}
1064                     <=> $results->{$b}{$perl}{$field}
1065                 }
1066                 keys %$results;
1067         }
1068         else {
1069             @names = grep $results->{$_}, @$order;
1070         }
1071     }
1072
1073     # No point in displaying average for only one test.
1074     push @names,  'AVERAGE' unless @names == 1;
1075     @names;
1076 }
1077
1078
1079 # grind_print(): display the tabulated results of all the cachegrinds.
1080 #
1081 # Arguments are of the form:
1082 #    $results->{benchmark_name}{perl_name}{field_name} = N
1083 #    $averages->{perl_name}{field_name} = M
1084 #    $perls = [ [ perl-exe, perl-label ], ... ]
1085 #    $tests->{test_name}{desc => ..., ...}
1086
1087 sub grind_print {
1088     my ($results, $averages, $perls, $tests, $order) = @_;
1089
1090     my @perl_names = map $_->[0], @$perls;
1091     my %perl_labels;
1092     $perl_labels{$_->[0]} = $_->[1] for @$perls;
1093
1094     my $field_label_width = 6;
1095     # Calculate the width to display for each column.
1096     my $min_width = $OPTS{raw} ? 8 : 6;
1097     my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1098                             @perl_labels{@perl_names};
1099
1100     # Print standard header.
1101     grind_blurb($perls);
1102
1103     my @test_names = sorted_test_names($results, $order, $perls);
1104
1105     # If only a single field is to be displayed, use a more compact
1106     # format with only a single line of output per test.
1107
1108     my $one_field = defined $OPTS{fields} &&  keys(%{$OPTS{fields}}) == 1;
1109
1110     if ($one_field) {
1111         print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
1112
1113         # The first column will now contain test names rather than
1114         # field names; Calculate the max width.
1115
1116         $field_label_width = 0;
1117         for (@test_names) {
1118             $field_label_width = length if length > $field_label_width;
1119         }
1120
1121         # Print the perl executables header.
1122
1123         print "\n";
1124         for my $i (0,1) {
1125             print " " x $field_label_width;
1126             for (0..$#widths) {
1127                 printf " %*s", $widths[$_],
1128                     $i ? ('-' x$widths[$_]) :  $perl_labels{$perl_names[$_]};
1129             }
1130             print "\n";
1131         }
1132     }
1133
1134     # Dump the results for each test.
1135
1136     for my $test_name (@test_names) {
1137         my $doing_ave = ($test_name eq 'AVERAGE');
1138         my $res1 = $doing_ave ? $averages : $results->{$test_name};
1139
1140         unless ($one_field) {
1141             print "\n$test_name";
1142             print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1143             print "\n\n";
1144
1145             # Print the perl executables header.
1146             for my $i (0,1) {
1147                 print " " x $field_label_width;
1148                 for (0..$#widths) {
1149                     printf " %*s", $widths[$_],
1150                         $i ? ('-' x$widths[$_]) :  $perl_labels{$perl_names[$_]};
1151                 }
1152                 print "\n";
1153             }
1154         }
1155
1156         for my $field (qw(Ir Dr Dw COND IND
1157                           N
1158                           COND_m IND_m
1159                           N
1160                           Ir_m1 Dr_m1 Dw_m1
1161                           N
1162                           Ir_mm Dr_mm Dw_mm
1163                       ))
1164         {
1165             next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1166
1167             if ($field eq 'N') {
1168                 print "\n";
1169                 next;
1170             }
1171
1172             if ($one_field) {
1173                 printf "%-*s", $field_label_width, $test_name;
1174             }
1175             else {
1176                 printf "%*s", $field_label_width, $field;
1177             }
1178
1179             for my $i (0..$#widths) {
1180                 my $res2 = $res1->{$perl_names[$i]};
1181                 my $p = $res2->{$field};
1182                 if (!defined $p) {
1183                     printf " %*s", $widths[$i], '-';
1184                 }
1185                 elsif ($OPTS{raw}) {
1186                     printf " %*.1f", $widths[$i], $p;
1187                 }
1188                 else {
1189                     printf " %*.2f", $widths[$i], $p * 100;
1190                 }
1191             }
1192             print "\n";
1193         }
1194     }
1195 }
1196
1197
1198
1199 # grind_print_compact(): like grind_print(), but display a single perl
1200 # in a compact form. Has an additional arg, $which_perl, which specifies
1201 # which perl to display.
1202 #
1203 # Arguments are of the form:
1204 #    $results->{benchmark_name}{perl_name}{field_name} = N
1205 #    $averages->{perl_name}{field_name} = M
1206 #    $perls = [ [ perl-exe, perl-label ], ... ]
1207 #    $tests->{test_name}{desc => ..., ...}
1208
1209 sub grind_print_compact {
1210     my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1211
1212
1213     # the width to display for each column.
1214     my $width = $OPTS{raw} ? 7 : 6;
1215
1216     # Print standard header.
1217     grind_blurb($perls);
1218
1219     print "\nResults for $perls->[$which_perl][1]\n\n";
1220
1221     my @test_names = sorted_test_names($results, $order, $perls);
1222
1223     # Dump the results for each test.
1224
1225      my @fields = qw( Ir Dr Dw
1226                       COND IND
1227                       COND_m IND_m
1228                       Ir_m1 Dr_m1 Dw_m1
1229                       Ir_mm Dr_mm Dw_mm
1230                     );
1231     if ($OPTS{fields}) {
1232         @fields = grep exists $OPTS{fields}{$_}, @fields;
1233     }
1234
1235     printf " %*s", $width, $_      for @fields;
1236     print "\n";
1237     printf " %*s", $width, '------' for @fields;
1238     print "\n";
1239
1240     for my $test_name (@test_names) {
1241         my $doing_ave = ($test_name eq 'AVERAGE');
1242         my $res = $doing_ave ? $averages : $results->{$test_name};
1243         $res = $res->{$perls->[$which_perl][0]};
1244
1245         for my $field (@fields) {
1246             my $p = $res->{$field};
1247             if (!defined $p) {
1248                 printf " %*s", $width, '-';
1249             }
1250             elsif ($OPTS{raw}) {
1251                 printf " %*.1f", $width, $p;
1252             }
1253             else {
1254                 printf " %*.2f", $width, $p * 100;
1255             }
1256
1257         }
1258
1259         print "  $test_name\n";
1260     }
1261 }
1262
1263
1264 # do_selftest(): check that we can parse known cachegrind()
1265 # output formats. If the output of cachegrind changes, add a *new*
1266 # test here; keep the old tests to make sure we continue to parse
1267 # old cachegrinds
1268
1269 sub do_selftest {
1270
1271     my @tests = (
1272         'standard',
1273         <<'EOF',
1274 ==32350== Cachegrind, a cache and branch-prediction profiler
1275 ==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1276 ==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1277 ==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1278 ==32350==
1279 --32350-- warning: L3 cache found, using its data for the LL simulation.
1280 ==32350==
1281 ==32350== I   refs:      1,124,055
1282 ==32350== I1  misses:        5,573
1283 ==32350== LLi misses:        3,338
1284 ==32350== I1  miss rate:      0.49%
1285 ==32350== LLi miss rate:      0.29%
1286 ==32350==
1287 ==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1288 ==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1289 ==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1290 ==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1291 ==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1292 ==32350==
1293 ==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1294 ==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1295 ==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1296 ==32350==
1297 ==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1298 ==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1299 ==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1300 EOF
1301         {
1302             COND    =>  197050,
1303             COND_m  =>   17742,
1304             Dr      =>  259191,
1305             Dr_m1   =>    6098,
1306             Dr_mm   =>    2781,
1307             Dw      =>  145084,
1308             Dw_m1   =>    3510,
1309             Dw_mm   =>    3013,
1310             IND     =>    5322,
1311             IND_m   =>    1411,
1312             Ir      => 1124055,
1313             Ir_m1   =>    5573,
1314             Ir_mm   =>    3338,
1315         },
1316     );
1317
1318     for ('t', '.') {
1319         last if require "$_/test.pl";
1320     }
1321     plan(@tests / 3 * keys %VALID_FIELDS);
1322
1323     while (@tests) {
1324         my $desc     = shift @tests;
1325         my $output   = shift @tests;
1326         my $expected = shift @tests;
1327         my $p = parse_cachegrind($output);
1328         for (sort keys %VALID_FIELDS) {
1329             is($p->{$_}, $expected->{$_}, "$desc, $_");
1330         }
1331     }
1332 }