This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Porting/bench.pl: protect against data loss
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
300b1525 17 bench.pl [options] perl1[=label1] perl2[=label2] ...
9e7973fa
DM
18
19 # Run bench.pl's own built-in sanity tests
20
21 bench.pl --action=selftest
22
23=head1 DESCRIPTION
24
25By default, F<bench.pl> will run code snippets found in
26F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
27how many instruction reads, data writes, branches, cache misses, etc. that
28one execution of the snippet uses. It will run them against two or more
29perl executables and show how much each test has gotten better or worse.
30
31It is modelled on the F<perlbench> tool, but since it measures instruction
32reads etc., rather than timings, it is much more precise and reproducible.
e34630bf 33It is also considerably faster, and is capable of running tests in
9e7973fa
DM
34parallel (with C<-j>). Rather than displaying a single relative
35percentage per test/perl combination, it displays values for 13 different
36measurements, such as instruction reads, conditional branch misses etc.
37
38There are options to write the raw data to a file, and to read it back.
39This means that you can view the same run data in different views with
40different selection and sort options.
41
42The optional C<=label> after each perl executable is used in the display
43output.
44
45=head1 OPTIONS
46
47=over 4
48
49=item *
50
51--action=I<foo>
52
53What action to perform. The default is I<grind>, which runs the benchmarks
54using I<cachegrind> as the back end. The only other action at the moment is
55I<selftest>, which runs some basic sanity checks and produces TAP output.
56
57=item *
58
59--average
60
61Only display the overall average, rather than the results for each
62individual test.
63
64=item *
65
66--benchfile=I<foo>
67
68The path of the file which contains the benchmarks (F<t/perf/benchmarks>
69by default).
70
71=item *
72
73--bisect=I<field,minval,maxval>
74
75Run a single test against one perl and exit with a zero status if the
76named field is in the specified range; exit 1 otherwise. It will complain
77if more than one test or perl has been specified. It is intended to be
78called as part of a bisect run, to determine when something changed.
79For example,
80
4a1358c2
FC
81 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
82 ./miniperl
9e7973fa
DM
83
84might be called from bisect to find when the number of instruction reads
85for test I<foo> falls outside the range 100..105.
86
87=item *
88
df3d7b3a
DM
89--compact=<Iperl>
90
91Display the results for a single perl executable in a compact form.
92Which perl to display is specified in the same manner as C<--norm>.
93
94=item *
95
9e7973fa
DM
96--debug
97
98Enable verbose debugging output.
99
100=item *
101
102--fields=I<a,b,c>
103
104Display only the specified fields; for example,
105
106 --fields=Ir,Ir_m,Ir_mm
107
108If only one field is selected, the output is in more compact form.
109
110=item *
111
112--grindargs=I<foo>
113
114Optional command-line arguments to pass to cachegrind invocations.
115
116=item *
117
118---help
119
120Display basic usage information.
121
122=item *
123
124-j I<N>
125--jobs=I<N>
126
127Run I<N> jobs in parallel (default 1). This determines how many cachegrind
128process will running at a time, and should generally be set to the number
129of CPUs available.
130
131=item *
132
133--norm=I<foo>
134
135Specify which perl column in the output to treat as the 100% norm.
136It may be a column number (0..N-1) or a perl executable name or label.
137It defaults to the leftmost column.
138
139=item *
140
141--perlargs=I<foo>
142
143Optional command-line arguments to pass to each perl that is run as part of
144a cachegrind session. For example, C<--perlargs=-Ilib>.
145
146=item *
147
148--raw
149
150Display raw data counts rather than percentages in the outputs. This
151allows you to see the exact number of intruction reads, branch misses etc.
152for each test/perl combination. It also causes the C<AVERAGE> display
153per field to be calculated based on the average of each tests's count
154rather than average of each percentage. This means that tests with very
155high counts will dominate.
156
157=item *
158
65f0b1c9 159--sort=I<field:perl>
9e7973fa
DM
160
161Order the tests in the output based on the value of I<field> in the
162column I<perl>. The I<perl> value is as per C<--norm>. For example
163
4a1358c2
FC
164 bench.pl --sort=Dw:perl-5.20.0 \
165 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
166
167=item *
168
169-r I<file>
170--read=I<file>
171
172Read in saved data from a previous C<--write> run from the specified file.
173
174Requires C<JSON::PP> to be available.
175
176=item *
177
178--tests=I<FOO>
179
180Specify a subset of tests to run (or in the case of C<--read>, to display).
181It may be either a comma-separated list of test names, or a regular
182expression. For example
183
184 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
185 --tests=/^expr::/
186
187=item *
188
189--verbose
190
191Display progress information.
192
193=item *
194
195-w I<file>
196--write=I<file>
197
198Save the raw data to the specified file. It can be read back later with
199C<--read>.
200
201Requires C<JSON::PP> to be available.
202
203=back
204
205=cut
206
207
208
209use 5.010000;
210use warnings;
211use strict;
212use Getopt::Long qw(:config no_auto_abbrev);
213use IPC::Open2 ();
214use IO::Select;
c2d21e7a 215use IO::File;
9e7973fa
DM
216use POSIX ":sys_wait_h";
217
218# The version of the file format used to save data. We refuse to process
219# the file if the integer component differs.
220
221my $FORMAT_VERSION = 1.0;
222
223# The fields we know about
224
225my %VALID_FIELDS = map { $_ => 1 }
226 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
227
228sub usage {
229 die <<EOF;
230usage: $0 [options] perl[=label] ...
231 --action=foo What action to perform [default: grind].
232 --average Only display average, not individual test results.
233 --benchfile=foo File containing the benchmarks;
234 [default: t/perf/benchmarks].
235 --bisect=f,min,max run a single test against one perl and exit with a
236 zero status if the named field is in the specified
237 range; exit 1 otherwise.
df3d7b3a
DM
238 --compact=perl Display the results of a single perl in compact form.
239 Which perl specified like --norm
9e7973fa
DM
240 --debug Enable verbose debugging output.
241 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
242 --grindargs=foo Optional command-line args to pass to cachegrind.
243 --help Display this help.
244 -j|--jobs=N Run N jobs in parallel [default 1].
245 --norm=perl Which perl column to treat as 100%; may be a column
246 number (0..N-1) or a perl executable name or label;
247 [default: 0].
248 --perlargs=foo Optional command-line args to pass to each perl to run.
249 --raw Display raw data counts rather than percentages.
65f0b1c9 250 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
251 column 'perl'. The perl value is as per --norm.
252 -r|--read=file Read in previously saved data from the specified file.
253 --tests=FOO Select only the specified tests from the benchmarks file;
254 FOO may be either of the form 'foo,bar' or '/regex/';
255 [default: all tests].
256 --verbose Display progress information.
257 -w|--write=file Save the raw data to the specified file.
258
259--action is one of:
260 grind run the code under cachegrind
261 selftest perform a selftest; produce TAP output
262
263The command line ends with one or more specified perl executables,
264which will be searched for in the current \$PATH. Each binary name may
265have an optional =LABEL appended, which will be used rather than the
266executable name in output. E.g.
267
268 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
269EOF
270}
271
272my %OPTS = (
273 action => 'grind',
274 average => 0,
275 benchfile => 't/perf/benchmarks',
276 bisect => undef,
df3d7b3a 277 compact => undef,
9e7973fa
DM
278 debug => 0,
279 grindargs => '',
280 fields => undef,
281 jobs => 1,
282 norm => 0,
283 perlargs => '',
284 raw => 0,
285 read => undef,
286 sort => undef,
287 tests => undef,
288 verbose => 0,
289 write => undef,
290);
291
292
293# process command-line args and call top-level action
294
295{
296 GetOptions(
297 'action=s' => \$OPTS{action},
298 'average' => \$OPTS{average},
299 'benchfile=s' => \$OPTS{benchfile},
300 'bisect=s' => \$OPTS{bisect},
df3d7b3a 301 'compact=s' => \$OPTS{compact},
9e7973fa
DM
302 'debug' => \$OPTS{debug},
303 'grindargs=s' => \$OPTS{grindargs},
304 'help' => \$OPTS{help},
305 'fields=s' => \$OPTS{fields},
306 'jobs|j=i' => \$OPTS{jobs},
307 'norm=s' => \$OPTS{norm},
308 'perlargs=s' => \$OPTS{perlargs},
309 'raw' => \$OPTS{raw},
310 'read|r=s' => \$OPTS{read},
311 'sort=s' => \$OPTS{sort},
312 'tests=s' => \$OPTS{tests},
313 'verbose' => \$OPTS{verbose},
314 'write|w=s' => \$OPTS{write},
315 ) or usage;
316
317 usage if $OPTS{help};
318
319
320 if (defined $OPTS{read} and defined $OPTS{write}) {
321 die "Error: can't specify both --read and --write options\n";
322 }
323
324 if (defined $OPTS{read} or defined $OPTS{write}) {
325 # fail early if it's not present
326 require JSON::PP;
327 }
328
329 if (defined $OPTS{fields}) {
330 my @f = split /,/, $OPTS{fields};
331 for (@f) {
332 die "Error: --fields: unknown field '$_'\n"
333 unless $VALID_FIELDS{$_};
334 }
335 my %f = map { $_ => 1 } @f;
336 $OPTS{fields} = \%f;
337 }
338
339 my %valid_actions = qw(grind 1 selftest 1);
340 unless ($valid_actions{$OPTS{action}}) {
341 die "Error: unrecognised action '$OPTS{action}'\n"
342 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
343 }
344
345 if (defined $OPTS{sort}) {
346 my @s = split /:/, $OPTS{sort};
347 if (@s != 2) {
348 die "Error: --sort argument should be of the form field:perl: "
349 . "'$OPTS{sort}'\n";
350 }
351 my ($field, $perl) = @s;
352 die "Error: --sort: unknown field '$field\n"
353 unless $VALID_FIELDS{$field};
354 # the 'perl' value will be validated later, after we have processed
355 # the perls
356 $OPTS{'sort-field'} = $field;
357 $OPTS{'sort-perl'} = $perl;
358 }
359
360 if ($OPTS{action} eq 'selftest') {
361 if (@ARGV) {
362 die "Error: no perl executables may be specified with --read\n"
363 }
364 }
365 elsif (defined $OPTS{bisect}) {
366 die "Error: exactly one perl executable must be specified for bisect\n"
367 unless @ARGV == 1;
368 die "Error: Can't specify both --bisect and --read\n"
369 if defined $OPTS{read};
370 die "Error: Can't specify both --bisect and --write\n"
371 if defined $OPTS{write};
372 }
373 elsif (defined $OPTS{read}) {
374 if (@ARGV) {
375 die "Error: no perl executables may be specified with --read\n"
376 }
377 }
378 elsif ($OPTS{raw}) {
379 unless (@ARGV) {
380 die "Error: at least one perl executable must be specified\n";
381 }
382 }
383 else {
384 unless (@ARGV >= 2) {
385 die "Error: at least two perl executables must be specified\n";
386 }
387 }
388
389 if ($OPTS{action} eq 'grind') {
390 do_grind(\@ARGV);
391 }
392 elsif ($OPTS{action} eq 'selftest') {
393 do_selftest();
394 }
395}
396exit 0;
397
398
399# Given a hash ref keyed by test names, filter it by deleting unwanted
400# tests, based on $OPTS{tests}.
401
402sub filter_tests {
403 my ($tests) = @_;
404
405 my $opt = $OPTS{tests};
406 return unless defined $opt;
407
408 my @tests;
409
410 if ($opt =~ m{^/}) {
411 $opt =~ s{^/(.+)/$}{$1}
412 or die "Error: --tests regex must be of the form /.../\n";
413 for (keys %$tests) {
414 delete $tests->{$_} unless /$opt/;
415 }
416 }
417 else {
418 my %t;
419 for (split /,/, $opt) {
24293753
JC
420 die "Error: no such test found: '$_'\n"
421 . ($OPTS{verbose} ? " have: @{[ sort keys %$tests ]}\n" : "")
422 unless exists $tests->{$_};
9e7973fa
DM
423 $t{$_} = 1;
424 }
425 for (keys %$tests) {
426 delete $tests->{$_} unless exists $t{$_};
427 }
428 }
429}
430
431
432# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
433# return a hash ref { testname => { test }, ... }
434# and an array ref of the original test names order,
9e7973fa
DM
435
436sub read_tests_file {
437 my ($file) = @_;
438
439 my $ta = do $file;
440 unless ($ta) {
441 die "Error: can't parse '$file': $@\n" if $@;
442 die "Error: can't read '$file': $!\n";
443 }
444
957d8930
DM
445 my @orig_order;
446 for (my $i=0; $i < @$ta; $i += 2) {
447 push @orig_order, $ta->[$i];
448 }
449
9e7973fa
DM
450 my $t = { @$ta };
451 filter_tests($t);
957d8930 452 return $t, \@orig_order;
9e7973fa
DM
453}
454
455
456# Process the perl/column argument of options like --norm and --sort.
457# Return the index of the matching perl.
458
459sub select_a_perl {
460 my ($perl, $perls, $who) = @_;
461
462 if ($perl =~ /^[0-9]$/) {
463 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
464 unless $perl < @$perls;
465 return $perl;
466 }
467 else {
468 my @perl = grep $perls->[$_][0] eq $perl
469 || $perls->[$_][1] eq $perl,
470 0..$#$perls;
471 die "Error: $who: unrecognised perl '$perl'\n"
472 unless @perl;
473 die "Error: $who: ambiguous perl '$perl'\n"
474 if @perl > 1;
475 return $perl[0];
476 }
477}
478
479
480# Validate the list of perl=label on the command line.
9e7973fa
DM
481# Return a list of [ exe, label ] pairs.
482
483sub process_perls {
484 my @results;
955a736c 485 my %seen;
9e7973fa
DM
486 for my $p (@_) {
487 my ($perl, $label) = split /=/, $p, 2;
488 $label //= $perl;
955a736c
JC
489 die "$label cannot be used on 2 different PUTs\n" if $seen{$label}++;
490
9e7973fa
DM
491 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
492 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
493 push @results, [ $perl, $label ];
494 }
9e7973fa
DM
495 return @results;
496}
497
498
8fbd1c2c 499
9e7973fa
DM
500# Return a string containing perl test code wrapped in a loop
501# that runs $ARGV[0] times
502
503sub make_perl_prog {
504 my ($test, $desc, $setup, $code) = @_;
505
506 return <<EOF;
507# $desc
508package $test;
509BEGIN { srand(0) }
510$setup;
511for my \$__loop__ (1..\$ARGV[0]) {
512 $code;
513}
514EOF
515}
516
517
518# Parse the output from cachegrind. Return a hash ref.
519# See do_selftest() for examples of the output format.
520
521sub parse_cachegrind {
522 my ($output, $id, $perl) = @_;
523
524 my %res;
525
526 my @lines = split /\n/, $output;
527 for (@lines) {
528 unless (s/(==\d+==)|(--\d+--) //) {
529 die "Error: while executing $id:\n"
530 . "unexpected code or cachegrind output:\n$_\n";
531 }
532 if (/I refs:\s+([\d,]+)/) {
533 $res{Ir} = $1;
534 }
535 elsif (/I1 misses:\s+([\d,]+)/) {
536 $res{Ir_m1} = $1;
537 }
538 elsif (/LLi misses:\s+([\d,]+)/) {
539 $res{Ir_mm} = $1;
540 }
541 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
542 @res{qw(Dr Dw)} = ($1,$2);
543 }
544 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
545 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
546 }
547 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
548 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
549 }
550 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
551 @res{qw(COND IND)} = ($1,$2);
552 }
553 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
554 @res{qw(COND_m IND_m)} = ($1,$2);
555 }
556 }
557
558 for my $field (keys %VALID_FIELDS) {
559 die "Error: can't parse '$field' field from cachegrind output:\n$output"
560 unless exists $res{$field};
561 $res{$field} =~ s/,//g;
562 }
563
564 return \%res;
565}
566
567
568# Handle the 'grind' action
569
570sub do_grind {
571 my ($perl_args) = @_; # the residue of @ARGV after option processing
572
957d8930 573 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
574 my ($bisect_field, $bisect_min, $bisect_max);
575
576 if (defined $OPTS{bisect}) {
577 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
578 die "Error: --bisect option must be of form 'field,integer,integer'\n"
579 unless
580 defined $bisect_max
581 and $bisect_min =~ /^[0-9]+$/
582 and $bisect_max =~ /^[0-9]+$/;
583
584 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
585 unless $VALID_FIELDS{$bisect_field};
586
587 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
588 if $bisect_min > $bisect_max;
589 }
590
591 if (defined $OPTS{read}) {
592 open my $in, '<:encoding(UTF-8)', $OPTS{read}
593 or die " Error: can't open $OPTS{read} for reading: $!\n";
594 my $data = do { local $/; <$in> };
595 close $in;
596
597 my $hash = JSON::PP::decode_json($data);
598 if (int($FORMAT_VERSION) < int($hash->{version})) {
599 die "Error: unsupported version $hash->{version} in file"
600 . "'$OPTS{read}' (too new)\n";
601 }
957d8930
DM
602 ($loop_counts, $perls, $results, $tests, $order) =
603 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
604
605 filter_tests($results);
606 filter_tests($tests);
957d8930
DM
607
608 if (!$order) {
609 $order = [ sort keys %$tests ];
610 }
9e7973fa
DM
611 }
612 else {
613 # How many times to execute the loop for the two trials. The lower
614 # value is intended to do the loop enough times that branch
615 # prediction has taken hold; the higher loop allows us to see the
616 # branch misses after that
617 $loop_counts = [10, 20];
618
957d8930 619 ($tests, $order) = read_tests_file($OPTS{benchfile});
9e7973fa
DM
620 die "Error: only a single test may be specified with --bisect\n"
621 if defined $OPTS{bisect} and keys %$tests != 1;
622
623 $perls = [ process_perls(@$perl_args) ];
8fbd1c2c
DM
624
625
957d8930 626 $results = grind_run($tests, $order, $perls, $loop_counts);
9e7973fa
DM
627 }
628
8fbd1c2c
DM
629 # now that we have a list of perls, use it to process the
630 # 'perl' component of the --norm and --sort args
631
632 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
633 if (defined $OPTS{'sort-perl'}) {
634 $OPTS{'sort-perl'} =
635 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
636 }
637
df3d7b3a
DM
638 if (defined $OPTS{'compact'}) {
639 $OPTS{'compact'} =
640 select_a_perl($OPTS{'compact'}, $perls, "--compact");
641 }
9e7973fa
DM
642 if (defined $OPTS{write}) {
643 my $json = JSON::PP::encode_json({
644 version => $FORMAT_VERSION,
645 loop_counts => $loop_counts,
646 perls => $perls,
647 results => $results,
648 tests => $tests,
957d8930 649 order => $order,
9e7973fa
DM
650 });
651
652 open my $out, '>:encoding(UTF-8)', $OPTS{write}
653 or die " Error: can't open $OPTS{write} for writing: $!\n";
654 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
655 close $out or die "Error: closing file '$OPTS{write}': $!\n";
656 }
657 else {
658 my ($processed, $averages) =
659 grind_process($results, $perls, $loop_counts);
660
661 if (defined $OPTS{bisect}) {
662 my @r = values %$results;
663 die "Panic: expected exactly one test result in bisect\n"
664 if @r != 1;
665 @r = values %{$r[0]};
666 die "Panic: expected exactly one perl result in bisect\n"
667 if @r != 1;
668 my $c = $r[0]{$bisect_field};
669 die "Panic: no result in bisect for field '$bisect_field'\n"
670 unless defined $c;
671 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
672 exit 1;
673 }
df3d7b3a
DM
674 elsif (defined $OPTS{compact}) {
675 grind_print_compact($processed, $averages, $OPTS{compact},
676 $perls, $tests, $order);
677 }
9e7973fa 678 else {
957d8930 679 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
680 }
681 }
682}
683
684
685# Run cachegrind for every test/perl combo.
686# It may run several processes in parallel when -j is specified.
687# Return a hash ref suitable for input to grind_process()
688
689sub grind_run {
957d8930 690 my ($tests, $order, $perls, $counts) = @_;
9e7973fa
DM
691
692 # Build a list of all the jobs to run
693
694 my @jobs;
695
957d8930 696 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
697
698 # Create two test progs: one with an empty loop and one with code.
699 # Note that the empty loop is actually '{1;}' rather than '{}';
700 # this causes the loop to have a single nextstate rather than a
701 # stub op, so more closely matches the active loop; e.g.:
702 # {1;} => nextstate; unstack
703 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
704 my @prog = (
705 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
706 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
707 );
708
709 for my $p (@$perls) {
a9eceb2d 710 my ($perl, $label, @putargs) = @$p;
9e7973fa
DM
711
712 # Run both the empty loop and the active loop
713 # $counts->[0] and $counts->[1] times.
714
715 for my $i (0,1) {
716 for my $j (0,1) {
717 my $cmd = "PERL_HASH_SEED=0 "
718 . "valgrind --tool=cachegrind --branch-sim=yes "
719 . "--cachegrind-out-file=/dev/null "
720 . "$OPTS{grindargs} "
a9eceb2d 721 . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
9e7973fa
DM
722 # for debugging and error messages
723 my $id = "$test/$perl "
724 . ($i ? "active" : "empty") . "/"
725 . ($j ? "long" : "short") . " loop";
726
727 push @jobs, {
728 test => $test,
729 perl => $perl,
730 plabel => $label,
731 cmd => $cmd,
732 prog => $prog[$i],
733 active => $i,
734 loopix => $j,
735 id => $id,
736 };
737 }
738 }
739 }
740 }
741
742 # Execute each cachegrind and store the results in %results.
743
744 local $SIG{PIPE} = 'IGNORE';
745
746 my $max_jobs = $OPTS{jobs};
747 my $running = 0; # count of executing jobs
748 my %pids; # map pids to jobs
749 my %fds; # map fds to jobs
750 my %results;
751 my $select = IO::Select->new();
752
753 while (@jobs or $running) {
754
755 if ($OPTS{debug}) {
756 printf "Main loop: pending=%d running=%d\n",
757 scalar(@jobs), $running;
758 }
759
760 # Start new jobs
761
762 while (@jobs && $running < $max_jobs) {
763 my $job = shift @jobs;
764 my ($id, $cmd) =@$job{qw(id cmd)};
765
766 my ($in, $out, $pid);
767 warn "Starting $id\n" if $OPTS{verbose};
768 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
769 or die "Error: while starting cachegrind subprocess"
770 ." for $id:\n$@";
771 $running++;
772 $pids{$pid} = $job;
773 $fds{"$out"} = $job;
774 $job->{out_fd} = $out;
775 $job->{output} = '';
776 $job->{pid} = $pid;
777
778 $out->blocking(0);
779 $select->add($out);
780
781 if ($OPTS{debug}) {
782 print "Started pid $pid for $id\n";
783 }
784
785 # Note:
786 # In principle we should write to $in in the main select loop,
787 # since it may block. In reality,
788 # a) the code we write to the perl process's stdin is likely
789 # to be less than the OS's pipe buffer size;
790 # b) by the time the perl process has read in all its stdin,
791 # the only output it should have generated is a few lines
792 # of cachegrind output preamble.
793 # If these assumptions change, then perform the following print
794 # in the select loop instead.
795
796 print $in $job->{prog};
797 close $in;
798 }
799
800 # Get output of running jobs
801
802 if ($OPTS{debug}) {
803 printf "Select: waiting on (%s)\n",
804 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
805 $select->handles;
806 }
807
808 my @ready = $select->can_read;
809
810 if ($OPTS{debug}) {
811 printf "Select: pids (%s) ready\n",
812 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
813 }
814
815 unless (@ready) {
816 die "Panic: select returned no file handles\n";
817 }
818
819 for my $fd (@ready) {
820 my $j = $fds{"$fd"};
821 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
822 unless (defined $r) {
823 die "Panic: Read from process running $j->{id} gave:\n$!";
824 }
825 next if $r;
826
827 # EOF
828
829 if ($OPTS{debug}) {
830 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
831 }
832
833 $select->remove($j->{out_fd});
834 close($j->{out_fd})
835 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
836 $running--;
837 delete $fds{"$j->{out_fd}"};
838 my $output = $j->{output};
839
840 if ($OPTS{debug}) {
841 my $p = $j->{prog};
842 $p =~ s/^/ : /mg;
843 my $o = $output;
844 $o =~ s/^/ : /mg;
845
846 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
847 . "Input:\n$p"
848 . "Output\n$o";
849 }
850
851 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
852 = parse_cachegrind($output, $j->{id}, $j->{perl});
853 }
854
855 # Reap finished jobs
856
857 while (1) {
858 my $kid = waitpid(-1, WNOHANG);
859 my $ret = $?;
860 last if $kid <= 0;
861
862 unless (exists $pids{$kid}) {
863 die "Panic: reaped unexpected child $kid";
864 }
865 my $j = $pids{$kid};
866 if ($ret) {
867 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
868 . "with the following output\n:$j->{output}\n";
869 }
870 delete $pids{$kid};
871 }
872 }
873
874 return \%results;
875}
876
877
878
879
880# grind_process(): process the data that has been extracted from
881# cachgegrind's output.
882#
883# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
884# where active is 0 or 1 indicating an empty or active loop,
885# count is 0 or 1 indicating a short or long loop. E.g.
886#
887# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
888#
889# The $res data structure is modified in-place by this sub.
890#
891# $perls is [ [ perl-exe, perl-label], .... ].
892#
893# $counts is [ N, M ] indicating the counts for the short and long loops.
894#
895#
896# return \%output, \%averages, where
897#
898# $output{benchmark_name}{perl_name}{field_name} = N
899# $averages{perl_name}{field_name} = M
900#
901# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
902# M is the average raw count over all tests ($OPTS{raw}), or
903# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
904
905sub grind_process {
906 my ($res, $perls, $counts) = @_;
907
908 # Process the four results for each test/perf combo:
909 # Convert
910 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
911 # to
912 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
913 #
914 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
915 # empty loops, eliminating startup time
916 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
917 # active loops, eliminating startup time
918 # (the two startup times may be different because different code
919 # is being compiled); the difference of the two results above
920 # divided by the count difference is the time to execute the
921 # active code once, eliminating both startup and loop overhead.
922
923 for my $tests (values %$res) {
924 for my $r (values %$tests) {
925 my $r2;
926 for (keys %{$r->[0][0]}) {
927 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
928 - ($r->[0][1]{$_} - $r->[0][0]{$_})
929 ) / ($counts->[1] - $counts->[0]);
930 $r2->{$_} = $n;
931 }
932 $r = $r2;
933 }
934 }
935
936 my %totals;
937 my %counts;
938 my %data;
939
940 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
941
942 for my $test_name (keys %$res) {
943 my $res1 = $res->{$test_name};
944 my $res2_norm = $res1->{$perl_norm};
945 for my $perl (keys %$res1) {
946 my $res2 = $res1->{$perl};
947 for my $field (keys %$res2) {
948 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
949
950 if ($OPTS{raw}) {
951 # Avoid annoying '-0.0' displays. Ideally this number
952 # should never be negative, but fluctuations in
953 # startup etc can theoretically make this happen
954 $q = 0 if ($q <= 0 && $q > -0.1);
955 $totals{$perl}{$field} += $q;
956 $counts{$perl}{$field}++;
957 $data{$test_name}{$perl}{$field} = $q;
958 next;
959 }
960
961 # $p and $q are notionally integer counts, but
962 # due to variations in startup etc, it's possible for a
963 # count which is supposedly zero to be calculated as a
964 # small positive or negative value.
965 # In this case, set it to zero. Further below we
966 # special-case zeros to avoid division by zero errors etc.
967
968 $p = 0.0 if $p < 0.01;
969 $q = 0.0 if $q < 0.01;
970
971 if ($p == 0.0 && $q == 0.0) {
972 # Both perls gave a count of zero, so no change:
973 # treat as 100%
974 $totals{$perl}{$field} += 1;
975 $counts{$perl}{$field}++;
976 $data{$test_name}{$perl}{$field} = 1;
977 }
978 elsif ($p == 0.0 || $q == 0.0) {
979 # If either count is zero, there were too few events
980 # to give a meaningful ratio (and we will end up with
981 # division by zero if we try). Mark the result undef,
982 # indicating that it shouldn't be displayed; and skip
983 # adding to the average
984 $data{$test_name}{$perl}{$field} = undef;
985 }
986 else {
987 # For averages, we record q/p rather than p/q.
988 # Consider a test where perl_norm took 1000 cycles
989 # and perlN took 800 cycles. For the individual
990 # results we display p/q, or 1.25; i.e. a quarter
991 # quicker. For the averages, we instead sum all
992 # the 0.8's, which gives the total cycles required to
993 # execute all tests, with all tests given equal
994 # weight. Later we reciprocate the final result,
995 # i.e. 1/(sum(qi/pi)/n)
996
997 $totals{$perl}{$field} += $q/$p;
998 $counts{$perl}{$field}++;
999 $data{$test_name}{$perl}{$field} = $p/$q;
1000 }
1001 }
1002 }
1003 }
1004
1005 # Calculate averages based on %totals and %counts accumulated earlier.
1006
1007 my %averages;
1008 for my $perl (keys %totals) {
1009 my $t = $totals{$perl};
1010 for my $field (keys %$t) {
1011 $averages{$perl}{$field} = $OPTS{raw}
1012 ? $t->{$field} / $counts{$perl}{$field}
1013 # reciprocal - see comments above
1014 : $counts{$perl}{$field} / $t->{$field};
1015 }
1016 }
1017
1018 return \%data, \%averages;
1019}
1020
1021
9e7973fa 1022
df3d7b3a 1023# print a standard blurb at the start of the grind display
9e7973fa 1024
df3d7b3a
DM
1025sub grind_blurb {
1026 my ($perls) = @_;
9e7973fa
DM
1027
1028 print <<EOF;
1029Key:
1030 Ir Instruction read
1031 Dr Data read
1032 Dw Data write
1033 COND conditional branches
1034 IND indirect branches
1035 _m branch predict miss
1036 _m1 level 1 cache miss
1037 _mm last cache (e.g. L3) miss
1038 - indeterminate percentage (e.g. 1/0)
1039
1040EOF
1041
1042 if ($OPTS{raw}) {
1043 print "The numbers represent raw counts per loop iteration.\n";
1044 }
1045 else {
1046 print <<EOF;
1047The numbers represent relative counts per loop iteration, compared to
df3d7b3a 1048$perls->[$OPTS{norm}][1] at 100.0%.
9e7973fa
DM
1049Higher is better: for example, using half as many instructions gives 200%,
1050while using twice as many gives 50%.
1051EOF
1052 }
df3d7b3a
DM
1053}
1054
1055
1056# return a sorted list of the test names, plus 'AVERAGE'
9e7973fa 1057
df3d7b3a
DM
1058sub sorted_test_names {
1059 my ($results, $order, $perls) = @_;
9e7973fa 1060
df3d7b3a 1061 my @names;
9e7973fa
DM
1062 unless ($OPTS{average}) {
1063 if (defined $OPTS{'sort-field'}) {
1064 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1065 my $perl = $perls->[$perlix][0];
df3d7b3a 1066 @names = sort
9e7973fa
DM
1067 {
1068 $results->{$a}{$perl}{$field}
1069 <=> $results->{$b}{$perl}{$field}
1070 }
1071 keys %$results;
1072 }
1073 else {
df3d7b3a 1074 @names = grep $results->{$_}, @$order;
9e7973fa
DM
1075 }
1076 }
1077
1078 # No point in displaying average for only one test.
df3d7b3a
DM
1079 push @names, 'AVERAGE' unless @names == 1;
1080 @names;
1081}
1082
1083
1084# grind_print(): display the tabulated results of all the cachegrinds.
1085#
1086# Arguments are of the form:
1087# $results->{benchmark_name}{perl_name}{field_name} = N
1088# $averages->{perl_name}{field_name} = M
1089# $perls = [ [ perl-exe, perl-label ], ... ]
1090# $tests->{test_name}{desc => ..., ...}
1091
1092sub grind_print {
1093 my ($results, $averages, $perls, $tests, $order) = @_;
1094
1095 my @perl_names = map $_->[0], @$perls;
1096 my %perl_labels;
1097 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1098
1099 my $field_label_width = 6;
1100 # Calculate the width to display for each column.
1101 my $min_width = $OPTS{raw} ? 8 : 6;
1102 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1103 @perl_labels{@perl_names};
1104
1105 # Print standard header.
1106 grind_blurb($perls);
1107
1108 my @test_names = sorted_test_names($results, $order, $perls);
9e7973fa
DM
1109
1110 # If only a single field is to be displayed, use a more compact
1111 # format with only a single line of output per test.
1112
1113 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1114
1115 if ($one_field) {
91cde97c 1116 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1117
1118 # The first column will now contain test names rather than
1119 # field names; Calculate the max width.
1120
1121 $field_label_width = 0;
1122 for (@test_names) {
1123 $field_label_width = length if length > $field_label_width;
1124 }
1125
1126 # Print the perl executables header.
1127
1128 print "\n";
1129 for my $i (0,1) {
1130 print " " x $field_label_width;
1131 for (0..$#widths) {
1132 printf " %*s", $widths[$_],
1133 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1134 }
1135 print "\n";
1136 }
1137 }
1138
1139 # Dump the results for each test.
1140
1141 for my $test_name (@test_names) {
1142 my $doing_ave = ($test_name eq 'AVERAGE');
1143 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1144
1145 unless ($one_field) {
1146 print "\n$test_name";
1147 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1148 print "\n\n";
1149
1150 # Print the perl executables header.
1151 for my $i (0,1) {
1152 print " " x $field_label_width;
1153 for (0..$#widths) {
1154 printf " %*s", $widths[$_],
1155 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1156 }
1157 print "\n";
1158 }
1159 }
1160
1161 for my $field (qw(Ir Dr Dw COND IND
1162 N
1163 COND_m IND_m
1164 N
1165 Ir_m1 Dr_m1 Dw_m1
1166 N
1167 Ir_mm Dr_mm Dw_mm
1168 ))
1169 {
1170 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1171
1172 if ($field eq 'N') {
1173 print "\n";
1174 next;
1175 }
1176
91cde97c
DM
1177 if ($one_field) {
1178 printf "%-*s", $field_label_width, $test_name;
1179 }
1180 else {
1181 printf "%*s", $field_label_width, $field;
1182 }
9e7973fa
DM
1183
1184 for my $i (0..$#widths) {
1185 my $res2 = $res1->{$perl_names[$i]};
1186 my $p = $res2->{$field};
1187 if (!defined $p) {
1188 printf " %*s", $widths[$i], '-';
1189 }
1190 elsif ($OPTS{raw}) {
1191 printf " %*.1f", $widths[$i], $p;
1192 }
1193 else {
1194 printf " %*.2f", $widths[$i], $p * 100;
1195 }
1196 }
1197 print "\n";
1198 }
1199 }
1200}
1201
1202
df3d7b3a
DM
1203
1204# grind_print_compact(): like grind_print(), but display a single perl
1205# in a compact form. Has an additional arg, $which_perl, which specifies
1206# which perl to display.
1207#
1208# Arguments are of the form:
1209# $results->{benchmark_name}{perl_name}{field_name} = N
1210# $averages->{perl_name}{field_name} = M
1211# $perls = [ [ perl-exe, perl-label ], ... ]
1212# $tests->{test_name}{desc => ..., ...}
1213
1214sub grind_print_compact {
1215 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1216
1217
1218 # the width to display for each column.
1219 my $width = $OPTS{raw} ? 7 : 6;
1220
1221 # Print standard header.
1222 grind_blurb($perls);
1223
1224 print "\nResults for $perls->[$which_perl][1]\n\n";
1225
1226 my @test_names = sorted_test_names($results, $order, $perls);
1227
1228 # Dump the results for each test.
1229
1230 my @fields = qw( Ir Dr Dw
1231 COND IND
1232 COND_m IND_m
1233 Ir_m1 Dr_m1 Dw_m1
1234 Ir_mm Dr_mm Dw_mm
1235 );
1236 if ($OPTS{fields}) {
1237 @fields = grep exists $OPTS{fields}{$_}, @fields;
1238 }
1239
1240 printf " %*s", $width, $_ for @fields;
1241 print "\n";
1242 printf " %*s", $width, '------' for @fields;
1243 print "\n";
1244
1245 for my $test_name (@test_names) {
1246 my $doing_ave = ($test_name eq 'AVERAGE');
1247 my $res = $doing_ave ? $averages : $results->{$test_name};
1248 $res = $res->{$perls->[$which_perl][0]};
1249
1250 for my $field (@fields) {
1251 my $p = $res->{$field};
1252 if (!defined $p) {
1253 printf " %*s", $width, '-';
1254 }
1255 elsif ($OPTS{raw}) {
1256 printf " %*.1f", $width, $p;
1257 }
1258 else {
1259 printf " %*.2f", $width, $p * 100;
1260 }
1261
1262 }
1263
1264 print " $test_name\n";
1265 }
1266}
1267
1268
9e7973fa
DM
1269# do_selftest(): check that we can parse known cachegrind()
1270# output formats. If the output of cachegrind changes, add a *new*
1271# test here; keep the old tests to make sure we continue to parse
1272# old cachegrinds
1273
1274sub do_selftest {
1275
1276 my @tests = (
1277 'standard',
1278 <<'EOF',
1279==32350== Cachegrind, a cache and branch-prediction profiler
1280==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1281==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1282==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1283==32350==
1284--32350-- warning: L3 cache found, using its data for the LL simulation.
1285==32350==
1286==32350== I refs: 1,124,055
1287==32350== I1 misses: 5,573
1288==32350== LLi misses: 3,338
1289==32350== I1 miss rate: 0.49%
1290==32350== LLi miss rate: 0.29%
1291==32350==
1292==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1293==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1294==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1295==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1296==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1297==32350==
1298==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1299==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1300==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1301==32350==
1302==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1303==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1304==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1305EOF
1306 {
1307 COND => 197050,
1308 COND_m => 17742,
1309 Dr => 259191,
1310 Dr_m1 => 6098,
1311 Dr_mm => 2781,
1312 Dw => 145084,
1313 Dw_m1 => 3510,
1314 Dw_mm => 3013,
1315 IND => 5322,
1316 IND_m => 1411,
1317 Ir => 1124055,
1318 Ir_m1 => 5573,
1319 Ir_mm => 3338,
1320 },
1321 );
1322
1323 for ('t', '.') {
1324 last if require "$_/test.pl";
1325 }
1326 plan(@tests / 3 * keys %VALID_FIELDS);
1327
1328 while (@tests) {
1329 my $desc = shift @tests;
1330 my $output = shift @tests;
1331 my $expected = shift @tests;
1332 my $p = parse_cachegrind($output);
1333 for (sort keys %VALID_FIELDS) {
1334 is($p->{$_}, $expected->{$_}, "$desc, $_");
1335 }
1336 }
1337}