This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
prep grind-run to accept args from PUTs
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
300b1525 17 bench.pl [options] perl1[=label1] perl2[=label2] ...
9e7973fa
DM
18
19 # Run bench.pl's own built-in sanity tests
20
21 bench.pl --action=selftest
22
23=head1 DESCRIPTION
24
25By default, F<bench.pl> will run code snippets found in
26F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
27how many instruction reads, data writes, branches, cache misses, etc. that
28one execution of the snippet uses. It will run them against two or more
29perl executables and show how much each test has gotten better or worse.
30
31It is modelled on the F<perlbench> tool, but since it measures instruction
32reads etc., rather than timings, it is much more precise and reproducible.
e34630bf 33It is also considerably faster, and is capable of running tests in
9e7973fa
DM
34parallel (with C<-j>). Rather than displaying a single relative
35percentage per test/perl combination, it displays values for 13 different
36measurements, such as instruction reads, conditional branch misses etc.
37
38There are options to write the raw data to a file, and to read it back.
39This means that you can view the same run data in different views with
40different selection and sort options.
41
42The optional C<=label> after each perl executable is used in the display
43output.
44
45=head1 OPTIONS
46
47=over 4
48
49=item *
50
51--action=I<foo>
52
53What action to perform. The default is I<grind>, which runs the benchmarks
54using I<cachegrind> as the back end. The only other action at the moment is
55I<selftest>, which runs some basic sanity checks and produces TAP output.
56
57=item *
58
59--average
60
61Only display the overall average, rather than the results for each
62individual test.
63
64=item *
65
66--benchfile=I<foo>
67
68The path of the file which contains the benchmarks (F<t/perf/benchmarks>
69by default).
70
71=item *
72
73--bisect=I<field,minval,maxval>
74
75Run a single test against one perl and exit with a zero status if the
76named field is in the specified range; exit 1 otherwise. It will complain
77if more than one test or perl has been specified. It is intended to be
78called as part of a bisect run, to determine when something changed.
79For example,
80
4a1358c2
FC
81 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
82 ./miniperl
9e7973fa
DM
83
84might be called from bisect to find when the number of instruction reads
85for test I<foo> falls outside the range 100..105.
86
87=item *
88
df3d7b3a
DM
89--compact=<Iperl>
90
91Display the results for a single perl executable in a compact form.
92Which perl to display is specified in the same manner as C<--norm>.
93
94=item *
95
9e7973fa
DM
96--debug
97
98Enable verbose debugging output.
99
100=item *
101
102--fields=I<a,b,c>
103
104Display only the specified fields; for example,
105
106 --fields=Ir,Ir_m,Ir_mm
107
108If only one field is selected, the output is in more compact form.
109
110=item *
111
112--grindargs=I<foo>
113
114Optional command-line arguments to pass to cachegrind invocations.
115
116=item *
117
118---help
119
120Display basic usage information.
121
122=item *
123
124-j I<N>
125--jobs=I<N>
126
127Run I<N> jobs in parallel (default 1). This determines how many cachegrind
128process will running at a time, and should generally be set to the number
129of CPUs available.
130
131=item *
132
133--norm=I<foo>
134
135Specify which perl column in the output to treat as the 100% norm.
136It may be a column number (0..N-1) or a perl executable name or label.
137It defaults to the leftmost column.
138
139=item *
140
141--perlargs=I<foo>
142
143Optional command-line arguments to pass to each perl that is run as part of
144a cachegrind session. For example, C<--perlargs=-Ilib>.
145
146=item *
147
148--raw
149
150Display raw data counts rather than percentages in the outputs. This
151allows you to see the exact number of intruction reads, branch misses etc.
152for each test/perl combination. It also causes the C<AVERAGE> display
153per field to be calculated based on the average of each tests's count
154rather than average of each percentage. This means that tests with very
155high counts will dominate.
156
157=item *
158
65f0b1c9 159--sort=I<field:perl>
9e7973fa
DM
160
161Order the tests in the output based on the value of I<field> in the
162column I<perl>. The I<perl> value is as per C<--norm>. For example
163
4a1358c2
FC
164 bench.pl --sort=Dw:perl-5.20.0 \
165 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
166
167=item *
168
169-r I<file>
170--read=I<file>
171
172Read in saved data from a previous C<--write> run from the specified file.
173
174Requires C<JSON::PP> to be available.
175
176=item *
177
178--tests=I<FOO>
179
180Specify a subset of tests to run (or in the case of C<--read>, to display).
181It may be either a comma-separated list of test names, or a regular
182expression. For example
183
184 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
185 --tests=/^expr::/
186
187=item *
188
189--verbose
190
191Display progress information.
192
193=item *
194
195-w I<file>
196--write=I<file>
197
198Save the raw data to the specified file. It can be read back later with
199C<--read>.
200
201Requires C<JSON::PP> to be available.
202
203=back
204
205=cut
206
207
208
209use 5.010000;
210use warnings;
211use strict;
212use Getopt::Long qw(:config no_auto_abbrev);
213use IPC::Open2 ();
214use IO::Select;
c2d21e7a 215use IO::File;
9e7973fa
DM
216use POSIX ":sys_wait_h";
217
218# The version of the file format used to save data. We refuse to process
219# the file if the integer component differs.
220
221my $FORMAT_VERSION = 1.0;
222
223# The fields we know about
224
225my %VALID_FIELDS = map { $_ => 1 }
226 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
227
228sub usage {
229 die <<EOF;
230usage: $0 [options] perl[=label] ...
231 --action=foo What action to perform [default: grind].
232 --average Only display average, not individual test results.
233 --benchfile=foo File containing the benchmarks;
234 [default: t/perf/benchmarks].
235 --bisect=f,min,max run a single test against one perl and exit with a
236 zero status if the named field is in the specified
237 range; exit 1 otherwise.
df3d7b3a
DM
238 --compact=perl Display the results of a single perl in compact form.
239 Which perl specified like --norm
9e7973fa
DM
240 --debug Enable verbose debugging output.
241 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
242 --grindargs=foo Optional command-line args to pass to cachegrind.
243 --help Display this help.
244 -j|--jobs=N Run N jobs in parallel [default 1].
245 --norm=perl Which perl column to treat as 100%; may be a column
246 number (0..N-1) or a perl executable name or label;
247 [default: 0].
248 --perlargs=foo Optional command-line args to pass to each perl to run.
249 --raw Display raw data counts rather than percentages.
65f0b1c9 250 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
251 column 'perl'. The perl value is as per --norm.
252 -r|--read=file Read in previously saved data from the specified file.
253 --tests=FOO Select only the specified tests from the benchmarks file;
254 FOO may be either of the form 'foo,bar' or '/regex/';
255 [default: all tests].
256 --verbose Display progress information.
257 -w|--write=file Save the raw data to the specified file.
258
259--action is one of:
260 grind run the code under cachegrind
261 selftest perform a selftest; produce TAP output
262
263The command line ends with one or more specified perl executables,
264which will be searched for in the current \$PATH. Each binary name may
265have an optional =LABEL appended, which will be used rather than the
266executable name in output. E.g.
267
268 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
269EOF
270}
271
272my %OPTS = (
273 action => 'grind',
274 average => 0,
275 benchfile => 't/perf/benchmarks',
276 bisect => undef,
df3d7b3a 277 compact => undef,
9e7973fa
DM
278 debug => 0,
279 grindargs => '',
280 fields => undef,
281 jobs => 1,
282 norm => 0,
283 perlargs => '',
284 raw => 0,
285 read => undef,
286 sort => undef,
287 tests => undef,
288 verbose => 0,
289 write => undef,
290);
291
292
293# process command-line args and call top-level action
294
295{
296 GetOptions(
297 'action=s' => \$OPTS{action},
298 'average' => \$OPTS{average},
299 'benchfile=s' => \$OPTS{benchfile},
300 'bisect=s' => \$OPTS{bisect},
df3d7b3a 301 'compact=s' => \$OPTS{compact},
9e7973fa
DM
302 'debug' => \$OPTS{debug},
303 'grindargs=s' => \$OPTS{grindargs},
304 'help' => \$OPTS{help},
305 'fields=s' => \$OPTS{fields},
306 'jobs|j=i' => \$OPTS{jobs},
307 'norm=s' => \$OPTS{norm},
308 'perlargs=s' => \$OPTS{perlargs},
309 'raw' => \$OPTS{raw},
310 'read|r=s' => \$OPTS{read},
311 'sort=s' => \$OPTS{sort},
312 'tests=s' => \$OPTS{tests},
313 'verbose' => \$OPTS{verbose},
314 'write|w=s' => \$OPTS{write},
315 ) or usage;
316
317 usage if $OPTS{help};
318
319
320 if (defined $OPTS{read} and defined $OPTS{write}) {
321 die "Error: can't specify both --read and --write options\n";
322 }
323
324 if (defined $OPTS{read} or defined $OPTS{write}) {
325 # fail early if it's not present
326 require JSON::PP;
327 }
328
329 if (defined $OPTS{fields}) {
330 my @f = split /,/, $OPTS{fields};
331 for (@f) {
332 die "Error: --fields: unknown field '$_'\n"
333 unless $VALID_FIELDS{$_};
334 }
335 my %f = map { $_ => 1 } @f;
336 $OPTS{fields} = \%f;
337 }
338
339 my %valid_actions = qw(grind 1 selftest 1);
340 unless ($valid_actions{$OPTS{action}}) {
341 die "Error: unrecognised action '$OPTS{action}'\n"
342 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
343 }
344
345 if (defined $OPTS{sort}) {
346 my @s = split /:/, $OPTS{sort};
347 if (@s != 2) {
348 die "Error: --sort argument should be of the form field:perl: "
349 . "'$OPTS{sort}'\n";
350 }
351 my ($field, $perl) = @s;
352 die "Error: --sort: unknown field '$field\n"
353 unless $VALID_FIELDS{$field};
354 # the 'perl' value will be validated later, after we have processed
355 # the perls
356 $OPTS{'sort-field'} = $field;
357 $OPTS{'sort-perl'} = $perl;
358 }
359
360 if ($OPTS{action} eq 'selftest') {
361 if (@ARGV) {
362 die "Error: no perl executables may be specified with --read\n"
363 }
364 }
365 elsif (defined $OPTS{bisect}) {
366 die "Error: exactly one perl executable must be specified for bisect\n"
367 unless @ARGV == 1;
368 die "Error: Can't specify both --bisect and --read\n"
369 if defined $OPTS{read};
370 die "Error: Can't specify both --bisect and --write\n"
371 if defined $OPTS{write};
372 }
373 elsif (defined $OPTS{read}) {
374 if (@ARGV) {
375 die "Error: no perl executables may be specified with --read\n"
376 }
377 }
378 elsif ($OPTS{raw}) {
379 unless (@ARGV) {
380 die "Error: at least one perl executable must be specified\n";
381 }
382 }
383 else {
384 unless (@ARGV >= 2) {
385 die "Error: at least two perl executables must be specified\n";
386 }
387 }
388
389 if ($OPTS{action} eq 'grind') {
390 do_grind(\@ARGV);
391 }
392 elsif ($OPTS{action} eq 'selftest') {
393 do_selftest();
394 }
395}
396exit 0;
397
398
399# Given a hash ref keyed by test names, filter it by deleting unwanted
400# tests, based on $OPTS{tests}.
401
402sub filter_tests {
403 my ($tests) = @_;
404
405 my $opt = $OPTS{tests};
406 return unless defined $opt;
407
408 my @tests;
409
410 if ($opt =~ m{^/}) {
411 $opt =~ s{^/(.+)/$}{$1}
412 or die "Error: --tests regex must be of the form /.../\n";
413 for (keys %$tests) {
414 delete $tests->{$_} unless /$opt/;
415 }
416 }
417 else {
418 my %t;
419 for (split /,/, $opt) {
24293753
JC
420 die "Error: no such test found: '$_'\n"
421 . ($OPTS{verbose} ? " have: @{[ sort keys %$tests ]}\n" : "")
422 unless exists $tests->{$_};
9e7973fa
DM
423 $t{$_} = 1;
424 }
425 for (keys %$tests) {
426 delete $tests->{$_} unless exists $t{$_};
427 }
428 }
429}
430
431
432# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
433# return a hash ref { testname => { test }, ... }
434# and an array ref of the original test names order,
9e7973fa
DM
435
436sub read_tests_file {
437 my ($file) = @_;
438
439 my $ta = do $file;
440 unless ($ta) {
441 die "Error: can't parse '$file': $@\n" if $@;
442 die "Error: can't read '$file': $!\n";
443 }
444
957d8930
DM
445 my @orig_order;
446 for (my $i=0; $i < @$ta; $i += 2) {
447 push @orig_order, $ta->[$i];
448 }
449
9e7973fa
DM
450 my $t = { @$ta };
451 filter_tests($t);
957d8930 452 return $t, \@orig_order;
9e7973fa
DM
453}
454
455
456# Process the perl/column argument of options like --norm and --sort.
457# Return the index of the matching perl.
458
459sub select_a_perl {
460 my ($perl, $perls, $who) = @_;
461
462 if ($perl =~ /^[0-9]$/) {
463 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
464 unless $perl < @$perls;
465 return $perl;
466 }
467 else {
468 my @perl = grep $perls->[$_][0] eq $perl
469 || $perls->[$_][1] eq $perl,
470 0..$#$perls;
471 die "Error: $who: unrecognised perl '$perl'\n"
472 unless @perl;
473 die "Error: $who: ambiguous perl '$perl'\n"
474 if @perl > 1;
475 return $perl[0];
476 }
477}
478
479
480# Validate the list of perl=label on the command line.
9e7973fa
DM
481# Return a list of [ exe, label ] pairs.
482
483sub process_perls {
484 my @results;
485 for my $p (@_) {
486 my ($perl, $label) = split /=/, $p, 2;
487 $label //= $perl;
488 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
489 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
490 push @results, [ $perl, $label ];
491 }
9e7973fa
DM
492 return @results;
493}
494
495
8fbd1c2c 496
9e7973fa
DM
497# Return a string containing perl test code wrapped in a loop
498# that runs $ARGV[0] times
499
500sub make_perl_prog {
501 my ($test, $desc, $setup, $code) = @_;
502
503 return <<EOF;
504# $desc
505package $test;
506BEGIN { srand(0) }
507$setup;
508for my \$__loop__ (1..\$ARGV[0]) {
509 $code;
510}
511EOF
512}
513
514
515# Parse the output from cachegrind. Return a hash ref.
516# See do_selftest() for examples of the output format.
517
518sub parse_cachegrind {
519 my ($output, $id, $perl) = @_;
520
521 my %res;
522
523 my @lines = split /\n/, $output;
524 for (@lines) {
525 unless (s/(==\d+==)|(--\d+--) //) {
526 die "Error: while executing $id:\n"
527 . "unexpected code or cachegrind output:\n$_\n";
528 }
529 if (/I refs:\s+([\d,]+)/) {
530 $res{Ir} = $1;
531 }
532 elsif (/I1 misses:\s+([\d,]+)/) {
533 $res{Ir_m1} = $1;
534 }
535 elsif (/LLi misses:\s+([\d,]+)/) {
536 $res{Ir_mm} = $1;
537 }
538 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
539 @res{qw(Dr Dw)} = ($1,$2);
540 }
541 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
542 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
543 }
544 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
545 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
546 }
547 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
548 @res{qw(COND IND)} = ($1,$2);
549 }
550 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
551 @res{qw(COND_m IND_m)} = ($1,$2);
552 }
553 }
554
555 for my $field (keys %VALID_FIELDS) {
556 die "Error: can't parse '$field' field from cachegrind output:\n$output"
557 unless exists $res{$field};
558 $res{$field} =~ s/,//g;
559 }
560
561 return \%res;
562}
563
564
565# Handle the 'grind' action
566
567sub do_grind {
568 my ($perl_args) = @_; # the residue of @ARGV after option processing
569
957d8930 570 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
571 my ($bisect_field, $bisect_min, $bisect_max);
572
573 if (defined $OPTS{bisect}) {
574 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
575 die "Error: --bisect option must be of form 'field,integer,integer'\n"
576 unless
577 defined $bisect_max
578 and $bisect_min =~ /^[0-9]+$/
579 and $bisect_max =~ /^[0-9]+$/;
580
581 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
582 unless $VALID_FIELDS{$bisect_field};
583
584 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
585 if $bisect_min > $bisect_max;
586 }
587
588 if (defined $OPTS{read}) {
589 open my $in, '<:encoding(UTF-8)', $OPTS{read}
590 or die " Error: can't open $OPTS{read} for reading: $!\n";
591 my $data = do { local $/; <$in> };
592 close $in;
593
594 my $hash = JSON::PP::decode_json($data);
595 if (int($FORMAT_VERSION) < int($hash->{version})) {
596 die "Error: unsupported version $hash->{version} in file"
597 . "'$OPTS{read}' (too new)\n";
598 }
957d8930
DM
599 ($loop_counts, $perls, $results, $tests, $order) =
600 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
601
602 filter_tests($results);
603 filter_tests($tests);
957d8930
DM
604
605 if (!$order) {
606 $order = [ sort keys %$tests ];
607 }
9e7973fa
DM
608 }
609 else {
610 # How many times to execute the loop for the two trials. The lower
611 # value is intended to do the loop enough times that branch
612 # prediction has taken hold; the higher loop allows us to see the
613 # branch misses after that
614 $loop_counts = [10, 20];
615
957d8930 616 ($tests, $order) = read_tests_file($OPTS{benchfile});
9e7973fa
DM
617 die "Error: only a single test may be specified with --bisect\n"
618 if defined $OPTS{bisect} and keys %$tests != 1;
619
620 $perls = [ process_perls(@$perl_args) ];
8fbd1c2c
DM
621
622
957d8930 623 $results = grind_run($tests, $order, $perls, $loop_counts);
9e7973fa
DM
624 }
625
8fbd1c2c
DM
626 # now that we have a list of perls, use it to process the
627 # 'perl' component of the --norm and --sort args
628
629 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
630 if (defined $OPTS{'sort-perl'}) {
631 $OPTS{'sort-perl'} =
632 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
633 }
634
df3d7b3a
DM
635 if (defined $OPTS{'compact'}) {
636 $OPTS{'compact'} =
637 select_a_perl($OPTS{'compact'}, $perls, "--compact");
638 }
9e7973fa
DM
639 if (defined $OPTS{write}) {
640 my $json = JSON::PP::encode_json({
641 version => $FORMAT_VERSION,
642 loop_counts => $loop_counts,
643 perls => $perls,
644 results => $results,
645 tests => $tests,
957d8930 646 order => $order,
9e7973fa
DM
647 });
648
649 open my $out, '>:encoding(UTF-8)', $OPTS{write}
650 or die " Error: can't open $OPTS{write} for writing: $!\n";
651 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
652 close $out or die "Error: closing file '$OPTS{write}': $!\n";
653 }
654 else {
655 my ($processed, $averages) =
656 grind_process($results, $perls, $loop_counts);
657
658 if (defined $OPTS{bisect}) {
659 my @r = values %$results;
660 die "Panic: expected exactly one test result in bisect\n"
661 if @r != 1;
662 @r = values %{$r[0]};
663 die "Panic: expected exactly one perl result in bisect\n"
664 if @r != 1;
665 my $c = $r[0]{$bisect_field};
666 die "Panic: no result in bisect for field '$bisect_field'\n"
667 unless defined $c;
668 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
669 exit 1;
670 }
df3d7b3a
DM
671 elsif (defined $OPTS{compact}) {
672 grind_print_compact($processed, $averages, $OPTS{compact},
673 $perls, $tests, $order);
674 }
9e7973fa 675 else {
957d8930 676 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
677 }
678 }
679}
680
681
682# Run cachegrind for every test/perl combo.
683# It may run several processes in parallel when -j is specified.
684# Return a hash ref suitable for input to grind_process()
685
686sub grind_run {
957d8930 687 my ($tests, $order, $perls, $counts) = @_;
9e7973fa
DM
688
689 # Build a list of all the jobs to run
690
691 my @jobs;
692
957d8930 693 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
694
695 # Create two test progs: one with an empty loop and one with code.
696 # Note that the empty loop is actually '{1;}' rather than '{}';
697 # this causes the loop to have a single nextstate rather than a
698 # stub op, so more closely matches the active loop; e.g.:
699 # {1;} => nextstate; unstack
700 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
701 my @prog = (
702 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
703 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
704 );
705
706 for my $p (@$perls) {
a9eceb2d 707 my ($perl, $label, @putargs) = @$p;
9e7973fa
DM
708
709 # Run both the empty loop and the active loop
710 # $counts->[0] and $counts->[1] times.
711
712 for my $i (0,1) {
713 for my $j (0,1) {
714 my $cmd = "PERL_HASH_SEED=0 "
715 . "valgrind --tool=cachegrind --branch-sim=yes "
716 . "--cachegrind-out-file=/dev/null "
717 . "$OPTS{grindargs} "
a9eceb2d 718 . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
9e7973fa
DM
719 # for debugging and error messages
720 my $id = "$test/$perl "
721 . ($i ? "active" : "empty") . "/"
722 . ($j ? "long" : "short") . " loop";
723
724 push @jobs, {
725 test => $test,
726 perl => $perl,
727 plabel => $label,
728 cmd => $cmd,
729 prog => $prog[$i],
730 active => $i,
731 loopix => $j,
732 id => $id,
733 };
734 }
735 }
736 }
737 }
738
739 # Execute each cachegrind and store the results in %results.
740
741 local $SIG{PIPE} = 'IGNORE';
742
743 my $max_jobs = $OPTS{jobs};
744 my $running = 0; # count of executing jobs
745 my %pids; # map pids to jobs
746 my %fds; # map fds to jobs
747 my %results;
748 my $select = IO::Select->new();
749
750 while (@jobs or $running) {
751
752 if ($OPTS{debug}) {
753 printf "Main loop: pending=%d running=%d\n",
754 scalar(@jobs), $running;
755 }
756
757 # Start new jobs
758
759 while (@jobs && $running < $max_jobs) {
760 my $job = shift @jobs;
761 my ($id, $cmd) =@$job{qw(id cmd)};
762
763 my ($in, $out, $pid);
764 warn "Starting $id\n" if $OPTS{verbose};
765 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
766 or die "Error: while starting cachegrind subprocess"
767 ." for $id:\n$@";
768 $running++;
769 $pids{$pid} = $job;
770 $fds{"$out"} = $job;
771 $job->{out_fd} = $out;
772 $job->{output} = '';
773 $job->{pid} = $pid;
774
775 $out->blocking(0);
776 $select->add($out);
777
778 if ($OPTS{debug}) {
779 print "Started pid $pid for $id\n";
780 }
781
782 # Note:
783 # In principle we should write to $in in the main select loop,
784 # since it may block. In reality,
785 # a) the code we write to the perl process's stdin is likely
786 # to be less than the OS's pipe buffer size;
787 # b) by the time the perl process has read in all its stdin,
788 # the only output it should have generated is a few lines
789 # of cachegrind output preamble.
790 # If these assumptions change, then perform the following print
791 # in the select loop instead.
792
793 print $in $job->{prog};
794 close $in;
795 }
796
797 # Get output of running jobs
798
799 if ($OPTS{debug}) {
800 printf "Select: waiting on (%s)\n",
801 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
802 $select->handles;
803 }
804
805 my @ready = $select->can_read;
806
807 if ($OPTS{debug}) {
808 printf "Select: pids (%s) ready\n",
809 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
810 }
811
812 unless (@ready) {
813 die "Panic: select returned no file handles\n";
814 }
815
816 for my $fd (@ready) {
817 my $j = $fds{"$fd"};
818 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
819 unless (defined $r) {
820 die "Panic: Read from process running $j->{id} gave:\n$!";
821 }
822 next if $r;
823
824 # EOF
825
826 if ($OPTS{debug}) {
827 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
828 }
829
830 $select->remove($j->{out_fd});
831 close($j->{out_fd})
832 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
833 $running--;
834 delete $fds{"$j->{out_fd}"};
835 my $output = $j->{output};
836
837 if ($OPTS{debug}) {
838 my $p = $j->{prog};
839 $p =~ s/^/ : /mg;
840 my $o = $output;
841 $o =~ s/^/ : /mg;
842
843 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
844 . "Input:\n$p"
845 . "Output\n$o";
846 }
847
848 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
849 = parse_cachegrind($output, $j->{id}, $j->{perl});
850 }
851
852 # Reap finished jobs
853
854 while (1) {
855 my $kid = waitpid(-1, WNOHANG);
856 my $ret = $?;
857 last if $kid <= 0;
858
859 unless (exists $pids{$kid}) {
860 die "Panic: reaped unexpected child $kid";
861 }
862 my $j = $pids{$kid};
863 if ($ret) {
864 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
865 . "with the following output\n:$j->{output}\n";
866 }
867 delete $pids{$kid};
868 }
869 }
870
871 return \%results;
872}
873
874
875
876
877# grind_process(): process the data that has been extracted from
878# cachgegrind's output.
879#
880# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
881# where active is 0 or 1 indicating an empty or active loop,
882# count is 0 or 1 indicating a short or long loop. E.g.
883#
884# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
885#
886# The $res data structure is modified in-place by this sub.
887#
888# $perls is [ [ perl-exe, perl-label], .... ].
889#
890# $counts is [ N, M ] indicating the counts for the short and long loops.
891#
892#
893# return \%output, \%averages, where
894#
895# $output{benchmark_name}{perl_name}{field_name} = N
896# $averages{perl_name}{field_name} = M
897#
898# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
899# M is the average raw count over all tests ($OPTS{raw}), or
900# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
901
902sub grind_process {
903 my ($res, $perls, $counts) = @_;
904
905 # Process the four results for each test/perf combo:
906 # Convert
907 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
908 # to
909 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
910 #
911 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
912 # empty loops, eliminating startup time
913 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
914 # active loops, eliminating startup time
915 # (the two startup times may be different because different code
916 # is being compiled); the difference of the two results above
917 # divided by the count difference is the time to execute the
918 # active code once, eliminating both startup and loop overhead.
919
920 for my $tests (values %$res) {
921 for my $r (values %$tests) {
922 my $r2;
923 for (keys %{$r->[0][0]}) {
924 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
925 - ($r->[0][1]{$_} - $r->[0][0]{$_})
926 ) / ($counts->[1] - $counts->[0]);
927 $r2->{$_} = $n;
928 }
929 $r = $r2;
930 }
931 }
932
933 my %totals;
934 my %counts;
935 my %data;
936
937 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
938
939 for my $test_name (keys %$res) {
940 my $res1 = $res->{$test_name};
941 my $res2_norm = $res1->{$perl_norm};
942 for my $perl (keys %$res1) {
943 my $res2 = $res1->{$perl};
944 for my $field (keys %$res2) {
945 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
946
947 if ($OPTS{raw}) {
948 # Avoid annoying '-0.0' displays. Ideally this number
949 # should never be negative, but fluctuations in
950 # startup etc can theoretically make this happen
951 $q = 0 if ($q <= 0 && $q > -0.1);
952 $totals{$perl}{$field} += $q;
953 $counts{$perl}{$field}++;
954 $data{$test_name}{$perl}{$field} = $q;
955 next;
956 }
957
958 # $p and $q are notionally integer counts, but
959 # due to variations in startup etc, it's possible for a
960 # count which is supposedly zero to be calculated as a
961 # small positive or negative value.
962 # In this case, set it to zero. Further below we
963 # special-case zeros to avoid division by zero errors etc.
964
965 $p = 0.0 if $p < 0.01;
966 $q = 0.0 if $q < 0.01;
967
968 if ($p == 0.0 && $q == 0.0) {
969 # Both perls gave a count of zero, so no change:
970 # treat as 100%
971 $totals{$perl}{$field} += 1;
972 $counts{$perl}{$field}++;
973 $data{$test_name}{$perl}{$field} = 1;
974 }
975 elsif ($p == 0.0 || $q == 0.0) {
976 # If either count is zero, there were too few events
977 # to give a meaningful ratio (and we will end up with
978 # division by zero if we try). Mark the result undef,
979 # indicating that it shouldn't be displayed; and skip
980 # adding to the average
981 $data{$test_name}{$perl}{$field} = undef;
982 }
983 else {
984 # For averages, we record q/p rather than p/q.
985 # Consider a test where perl_norm took 1000 cycles
986 # and perlN took 800 cycles. For the individual
987 # results we display p/q, or 1.25; i.e. a quarter
988 # quicker. For the averages, we instead sum all
989 # the 0.8's, which gives the total cycles required to
990 # execute all tests, with all tests given equal
991 # weight. Later we reciprocate the final result,
992 # i.e. 1/(sum(qi/pi)/n)
993
994 $totals{$perl}{$field} += $q/$p;
995 $counts{$perl}{$field}++;
996 $data{$test_name}{$perl}{$field} = $p/$q;
997 }
998 }
999 }
1000 }
1001
1002 # Calculate averages based on %totals and %counts accumulated earlier.
1003
1004 my %averages;
1005 for my $perl (keys %totals) {
1006 my $t = $totals{$perl};
1007 for my $field (keys %$t) {
1008 $averages{$perl}{$field} = $OPTS{raw}
1009 ? $t->{$field} / $counts{$perl}{$field}
1010 # reciprocal - see comments above
1011 : $counts{$perl}{$field} / $t->{$field};
1012 }
1013 }
1014
1015 return \%data, \%averages;
1016}
1017
1018
9e7973fa 1019
df3d7b3a 1020# print a standard blurb at the start of the grind display
9e7973fa 1021
df3d7b3a
DM
1022sub grind_blurb {
1023 my ($perls) = @_;
9e7973fa
DM
1024
1025 print <<EOF;
1026Key:
1027 Ir Instruction read
1028 Dr Data read
1029 Dw Data write
1030 COND conditional branches
1031 IND indirect branches
1032 _m branch predict miss
1033 _m1 level 1 cache miss
1034 _mm last cache (e.g. L3) miss
1035 - indeterminate percentage (e.g. 1/0)
1036
1037EOF
1038
1039 if ($OPTS{raw}) {
1040 print "The numbers represent raw counts per loop iteration.\n";
1041 }
1042 else {
1043 print <<EOF;
1044The numbers represent relative counts per loop iteration, compared to
df3d7b3a 1045$perls->[$OPTS{norm}][1] at 100.0%.
9e7973fa
DM
1046Higher is better: for example, using half as many instructions gives 200%,
1047while using twice as many gives 50%.
1048EOF
1049 }
df3d7b3a
DM
1050}
1051
1052
1053# return a sorted list of the test names, plus 'AVERAGE'
9e7973fa 1054
df3d7b3a
DM
1055sub sorted_test_names {
1056 my ($results, $order, $perls) = @_;
9e7973fa 1057
df3d7b3a 1058 my @names;
9e7973fa
DM
1059 unless ($OPTS{average}) {
1060 if (defined $OPTS{'sort-field'}) {
1061 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1062 my $perl = $perls->[$perlix][0];
df3d7b3a 1063 @names = sort
9e7973fa
DM
1064 {
1065 $results->{$a}{$perl}{$field}
1066 <=> $results->{$b}{$perl}{$field}
1067 }
1068 keys %$results;
1069 }
1070 else {
df3d7b3a 1071 @names = grep $results->{$_}, @$order;
9e7973fa
DM
1072 }
1073 }
1074
1075 # No point in displaying average for only one test.
df3d7b3a
DM
1076 push @names, 'AVERAGE' unless @names == 1;
1077 @names;
1078}
1079
1080
1081# grind_print(): display the tabulated results of all the cachegrinds.
1082#
1083# Arguments are of the form:
1084# $results->{benchmark_name}{perl_name}{field_name} = N
1085# $averages->{perl_name}{field_name} = M
1086# $perls = [ [ perl-exe, perl-label ], ... ]
1087# $tests->{test_name}{desc => ..., ...}
1088
1089sub grind_print {
1090 my ($results, $averages, $perls, $tests, $order) = @_;
1091
1092 my @perl_names = map $_->[0], @$perls;
1093 my %perl_labels;
1094 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1095
1096 my $field_label_width = 6;
1097 # Calculate the width to display for each column.
1098 my $min_width = $OPTS{raw} ? 8 : 6;
1099 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1100 @perl_labels{@perl_names};
1101
1102 # Print standard header.
1103 grind_blurb($perls);
1104
1105 my @test_names = sorted_test_names($results, $order, $perls);
9e7973fa
DM
1106
1107 # If only a single field is to be displayed, use a more compact
1108 # format with only a single line of output per test.
1109
1110 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1111
1112 if ($one_field) {
91cde97c 1113 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1114
1115 # The first column will now contain test names rather than
1116 # field names; Calculate the max width.
1117
1118 $field_label_width = 0;
1119 for (@test_names) {
1120 $field_label_width = length if length > $field_label_width;
1121 }
1122
1123 # Print the perl executables header.
1124
1125 print "\n";
1126 for my $i (0,1) {
1127 print " " x $field_label_width;
1128 for (0..$#widths) {
1129 printf " %*s", $widths[$_],
1130 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1131 }
1132 print "\n";
1133 }
1134 }
1135
1136 # Dump the results for each test.
1137
1138 for my $test_name (@test_names) {
1139 my $doing_ave = ($test_name eq 'AVERAGE');
1140 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1141
1142 unless ($one_field) {
1143 print "\n$test_name";
1144 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1145 print "\n\n";
1146
1147 # Print the perl executables header.
1148 for my $i (0,1) {
1149 print " " x $field_label_width;
1150 for (0..$#widths) {
1151 printf " %*s", $widths[$_],
1152 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1153 }
1154 print "\n";
1155 }
1156 }
1157
1158 for my $field (qw(Ir Dr Dw COND IND
1159 N
1160 COND_m IND_m
1161 N
1162 Ir_m1 Dr_m1 Dw_m1
1163 N
1164 Ir_mm Dr_mm Dw_mm
1165 ))
1166 {
1167 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1168
1169 if ($field eq 'N') {
1170 print "\n";
1171 next;
1172 }
1173
91cde97c
DM
1174 if ($one_field) {
1175 printf "%-*s", $field_label_width, $test_name;
1176 }
1177 else {
1178 printf "%*s", $field_label_width, $field;
1179 }
9e7973fa
DM
1180
1181 for my $i (0..$#widths) {
1182 my $res2 = $res1->{$perl_names[$i]};
1183 my $p = $res2->{$field};
1184 if (!defined $p) {
1185 printf " %*s", $widths[$i], '-';
1186 }
1187 elsif ($OPTS{raw}) {
1188 printf " %*.1f", $widths[$i], $p;
1189 }
1190 else {
1191 printf " %*.2f", $widths[$i], $p * 100;
1192 }
1193 }
1194 print "\n";
1195 }
1196 }
1197}
1198
1199
df3d7b3a
DM
1200
1201# grind_print_compact(): like grind_print(), but display a single perl
1202# in a compact form. Has an additional arg, $which_perl, which specifies
1203# which perl to display.
1204#
1205# Arguments are of the form:
1206# $results->{benchmark_name}{perl_name}{field_name} = N
1207# $averages->{perl_name}{field_name} = M
1208# $perls = [ [ perl-exe, perl-label ], ... ]
1209# $tests->{test_name}{desc => ..., ...}
1210
1211sub grind_print_compact {
1212 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1213
1214
1215 # the width to display for each column.
1216 my $width = $OPTS{raw} ? 7 : 6;
1217
1218 # Print standard header.
1219 grind_blurb($perls);
1220
1221 print "\nResults for $perls->[$which_perl][1]\n\n";
1222
1223 my @test_names = sorted_test_names($results, $order, $perls);
1224
1225 # Dump the results for each test.
1226
1227 my @fields = qw( Ir Dr Dw
1228 COND IND
1229 COND_m IND_m
1230 Ir_m1 Dr_m1 Dw_m1
1231 Ir_mm Dr_mm Dw_mm
1232 );
1233 if ($OPTS{fields}) {
1234 @fields = grep exists $OPTS{fields}{$_}, @fields;
1235 }
1236
1237 printf " %*s", $width, $_ for @fields;
1238 print "\n";
1239 printf " %*s", $width, '------' for @fields;
1240 print "\n";
1241
1242 for my $test_name (@test_names) {
1243 my $doing_ave = ($test_name eq 'AVERAGE');
1244 my $res = $doing_ave ? $averages : $results->{$test_name};
1245 $res = $res->{$perls->[$which_perl][0]};
1246
1247 for my $field (@fields) {
1248 my $p = $res->{$field};
1249 if (!defined $p) {
1250 printf " %*s", $width, '-';
1251 }
1252 elsif ($OPTS{raw}) {
1253 printf " %*.1f", $width, $p;
1254 }
1255 else {
1256 printf " %*.2f", $width, $p * 100;
1257 }
1258
1259 }
1260
1261 print " $test_name\n";
1262 }
1263}
1264
1265
9e7973fa
DM
1266# do_selftest(): check that we can parse known cachegrind()
1267# output formats. If the output of cachegrind changes, add a *new*
1268# test here; keep the old tests to make sure we continue to parse
1269# old cachegrinds
1270
1271sub do_selftest {
1272
1273 my @tests = (
1274 'standard',
1275 <<'EOF',
1276==32350== Cachegrind, a cache and branch-prediction profiler
1277==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1278==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1279==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1280==32350==
1281--32350-- warning: L3 cache found, using its data for the LL simulation.
1282==32350==
1283==32350== I refs: 1,124,055
1284==32350== I1 misses: 5,573
1285==32350== LLi misses: 3,338
1286==32350== I1 miss rate: 0.49%
1287==32350== LLi miss rate: 0.29%
1288==32350==
1289==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1290==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1291==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1292==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1293==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1294==32350==
1295==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1296==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1297==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1298==32350==
1299==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1300==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1301==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1302EOF
1303 {
1304 COND => 197050,
1305 COND_m => 17742,
1306 Dr => 259191,
1307 Dr_m1 => 6098,
1308 Dr_mm => 2781,
1309 Dw => 145084,
1310 Dw_m1 => 3510,
1311 Dw_mm => 3013,
1312 IND => 5322,
1313 IND_m => 1411,
1314 Ir => 1124055,
1315 Ir_m1 => 5573,
1316 Ir_mm => 3338,
1317 },
1318 );
1319
1320 for ('t', '.') {
1321 last if require "$_/test.pl";
1322 }
1323 plan(@tests / 3 * keys %VALID_FIELDS);
1324
1325 while (@tests) {
1326 my $desc = shift @tests;
1327 my $output = shift @tests;
1328 my $expected = shift @tests;
1329 my $p = parse_cachegrind($output);
1330 for (sort keys %VALID_FIELDS) {
1331 is($p->{$_}, $expected->{$_}, "$desc, $_");
1332 }
1333 }
1334}