This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
add Porting/bench.pl
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
14 # Basic: run the tests in t/perf/benchmarks against two or more perls
15
16 bench.pl [options] perl1=[label1] perl2[=label2] ...
17
18 # Run bench.pl's own built-in sanity tests
19
20 bench.pl --action=selftest
21
22=head1 DESCRIPTION
23
24By default, F<bench.pl> will run code snippets found in
25F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
26how many instruction reads, data writes, branches, cache misses, etc. that
27one execution of the snippet uses. It will run them against two or more
28perl executables and show how much each test has gotten better or worse.
29
30It is modelled on the F<perlbench> tool, but since it measures instruction
31reads etc., rather than timings, it is much more precise and reproducible.
32It is also considerably faster, and is capable or running tests in
33parallel (with C<-j>). Rather than displaying a single relative
34percentage per test/perl combination, it displays values for 13 different
35measurements, such as instruction reads, conditional branch misses etc.
36
37There are options to write the raw data to a file, and to read it back.
38This means that you can view the same run data in different views with
39different selection and sort options.
40
41The optional C<=label> after each perl executable is used in the display
42output.
43
44=head1 OPTIONS
45
46=over 4
47
48=item *
49
50--action=I<foo>
51
52What action to perform. The default is I<grind>, which runs the benchmarks
53using I<cachegrind> as the back end. The only other action at the moment is
54I<selftest>, which runs some basic sanity checks and produces TAP output.
55
56=item *
57
58--average
59
60Only display the overall average, rather than the results for each
61individual test.
62
63=item *
64
65--benchfile=I<foo>
66
67The path of the file which contains the benchmarks (F<t/perf/benchmarks>
68by default).
69
70=item *
71
72--bisect=I<field,minval,maxval>
73
74Run a single test against one perl and exit with a zero status if the
75named field is in the specified range; exit 1 otherwise. It will complain
76if more than one test or perl has been specified. It is intended to be
77called as part of a bisect run, to determine when something changed.
78For example,
79
80 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib ./miniperl
81
82might be called from bisect to find when the number of instruction reads
83for test I<foo> falls outside the range 100..105.
84
85=item *
86
87--debug
88
89Enable verbose debugging output.
90
91=item *
92
93--fields=I<a,b,c>
94
95Display only the specified fields; for example,
96
97 --fields=Ir,Ir_m,Ir_mm
98
99If only one field is selected, the output is in more compact form.
100
101=item *
102
103--grindargs=I<foo>
104
105Optional command-line arguments to pass to cachegrind invocations.
106
107=item *
108
109---help
110
111Display basic usage information.
112
113=item *
114
115-j I<N>
116--jobs=I<N>
117
118Run I<N> jobs in parallel (default 1). This determines how many cachegrind
119process will running at a time, and should generally be set to the number
120of CPUs available.
121
122=item *
123
124--norm=I<foo>
125
126Specify which perl column in the output to treat as the 100% norm.
127It may be a column number (0..N-1) or a perl executable name or label.
128It defaults to the leftmost column.
129
130=item *
131
132--perlargs=I<foo>
133
134Optional command-line arguments to pass to each perl that is run as part of
135a cachegrind session. For example, C<--perlargs=-Ilib>.
136
137=item *
138
139--raw
140
141Display raw data counts rather than percentages in the outputs. This
142allows you to see the exact number of intruction reads, branch misses etc.
143for each test/perl combination. It also causes the C<AVERAGE> display
144per field to be calculated based on the average of each tests's count
145rather than average of each percentage. This means that tests with very
146high counts will dominate.
147
148=item *
149
150--sort=I<perl:field>
151
152Order the tests in the output based on the value of I<field> in the
153column I<perl>. The I<perl> value is as per C<--norm>. For example
154
155 bench.pl --sort=Dw:perl-5.20.0 perl-5.16.0 perl-5.18.0 perl-5.20.0
156
157=item *
158
159-r I<file>
160--read=I<file>
161
162Read in saved data from a previous C<--write> run from the specified file.
163
164Requires C<JSON::PP> to be available.
165
166=item *
167
168--tests=I<FOO>
169
170Specify a subset of tests to run (or in the case of C<--read>, to display).
171It may be either a comma-separated list of test names, or a regular
172expression. For example
173
174 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
175 --tests=/^expr::/
176
177=item *
178
179--verbose
180
181Display progress information.
182
183=item *
184
185-w I<file>
186--write=I<file>
187
188Save the raw data to the specified file. It can be read back later with
189C<--read>.
190
191Requires C<JSON::PP> to be available.
192
193=back
194
195=cut
196
197
198
199use 5.010000;
200use warnings;
201use strict;
202use Getopt::Long qw(:config no_auto_abbrev);
203use IPC::Open2 ();
204use IO::Select;
205use POSIX ":sys_wait_h";
206
207# The version of the file format used to save data. We refuse to process
208# the file if the integer component differs.
209
210my $FORMAT_VERSION = 1.0;
211
212# The fields we know about
213
214my %VALID_FIELDS = map { $_ => 1 }
215 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
216
217sub usage {
218 die <<EOF;
219usage: $0 [options] perl[=label] ...
220 --action=foo What action to perform [default: grind].
221 --average Only display average, not individual test results.
222 --benchfile=foo File containing the benchmarks;
223 [default: t/perf/benchmarks].
224 --bisect=f,min,max run a single test against one perl and exit with a
225 zero status if the named field is in the specified
226 range; exit 1 otherwise.
227 --debug Enable verbose debugging output.
228 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
229 --grindargs=foo Optional command-line args to pass to cachegrind.
230 --help Display this help.
231 -j|--jobs=N Run N jobs in parallel [default 1].
232 --norm=perl Which perl column to treat as 100%; may be a column
233 number (0..N-1) or a perl executable name or label;
234 [default: 0].
235 --perlargs=foo Optional command-line args to pass to each perl to run.
236 --raw Display raw data counts rather than percentages.
237 --sort=perl:field Sort the tests based on the value of 'field' in the
238 column 'perl'. The perl value is as per --norm.
239 -r|--read=file Read in previously saved data from the specified file.
240 --tests=FOO Select only the specified tests from the benchmarks file;
241 FOO may be either of the form 'foo,bar' or '/regex/';
242 [default: all tests].
243 --verbose Display progress information.
244 -w|--write=file Save the raw data to the specified file.
245
246--action is one of:
247 grind run the code under cachegrind
248 selftest perform a selftest; produce TAP output
249
250The command line ends with one or more specified perl executables,
251which will be searched for in the current \$PATH. Each binary name may
252have an optional =LABEL appended, which will be used rather than the
253executable name in output. E.g.
254
255 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
256EOF
257}
258
259my %OPTS = (
260 action => 'grind',
261 average => 0,
262 benchfile => 't/perf/benchmarks',
263 bisect => undef,
264 debug => 0,
265 grindargs => '',
266 fields => undef,
267 jobs => 1,
268 norm => 0,
269 perlargs => '',
270 raw => 0,
271 read => undef,
272 sort => undef,
273 tests => undef,
274 verbose => 0,
275 write => undef,
276);
277
278
279# process command-line args and call top-level action
280
281{
282 GetOptions(
283 'action=s' => \$OPTS{action},
284 'average' => \$OPTS{average},
285 'benchfile=s' => \$OPTS{benchfile},
286 'bisect=s' => \$OPTS{bisect},
287 'debug' => \$OPTS{debug},
288 'grindargs=s' => \$OPTS{grindargs},
289 'help' => \$OPTS{help},
290 'fields=s' => \$OPTS{fields},
291 'jobs|j=i' => \$OPTS{jobs},
292 'norm=s' => \$OPTS{norm},
293 'perlargs=s' => \$OPTS{perlargs},
294 'raw' => \$OPTS{raw},
295 'read|r=s' => \$OPTS{read},
296 'sort=s' => \$OPTS{sort},
297 'tests=s' => \$OPTS{tests},
298 'verbose' => \$OPTS{verbose},
299 'write|w=s' => \$OPTS{write},
300 ) or usage;
301
302 usage if $OPTS{help};
303
304
305 if (defined $OPTS{read} and defined $OPTS{write}) {
306 die "Error: can't specify both --read and --write options\n";
307 }
308
309 if (defined $OPTS{read} or defined $OPTS{write}) {
310 # fail early if it's not present
311 require JSON::PP;
312 }
313
314 if (defined $OPTS{fields}) {
315 my @f = split /,/, $OPTS{fields};
316 for (@f) {
317 die "Error: --fields: unknown field '$_'\n"
318 unless $VALID_FIELDS{$_};
319 }
320 my %f = map { $_ => 1 } @f;
321 $OPTS{fields} = \%f;
322 }
323
324 my %valid_actions = qw(grind 1 selftest 1);
325 unless ($valid_actions{$OPTS{action}}) {
326 die "Error: unrecognised action '$OPTS{action}'\n"
327 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
328 }
329
330 if (defined $OPTS{sort}) {
331 my @s = split /:/, $OPTS{sort};
332 if (@s != 2) {
333 die "Error: --sort argument should be of the form field:perl: "
334 . "'$OPTS{sort}'\n";
335 }
336 my ($field, $perl) = @s;
337 die "Error: --sort: unknown field '$field\n"
338 unless $VALID_FIELDS{$field};
339 # the 'perl' value will be validated later, after we have processed
340 # the perls
341 $OPTS{'sort-field'} = $field;
342 $OPTS{'sort-perl'} = $perl;
343 }
344
345 if ($OPTS{action} eq 'selftest') {
346 if (@ARGV) {
347 die "Error: no perl executables may be specified with --read\n"
348 }
349 }
350 elsif (defined $OPTS{bisect}) {
351 die "Error: exactly one perl executable must be specified for bisect\n"
352 unless @ARGV == 1;
353 die "Error: Can't specify both --bisect and --read\n"
354 if defined $OPTS{read};
355 die "Error: Can't specify both --bisect and --write\n"
356 if defined $OPTS{write};
357 }
358 elsif (defined $OPTS{read}) {
359 if (@ARGV) {
360 die "Error: no perl executables may be specified with --read\n"
361 }
362 }
363 elsif ($OPTS{raw}) {
364 unless (@ARGV) {
365 die "Error: at least one perl executable must be specified\n";
366 }
367 }
368 else {
369 unless (@ARGV >= 2) {
370 die "Error: at least two perl executables must be specified\n";
371 }
372 }
373
374 if ($OPTS{action} eq 'grind') {
375 do_grind(\@ARGV);
376 }
377 elsif ($OPTS{action} eq 'selftest') {
378 do_selftest();
379 }
380}
381exit 0;
382
383
384# Given a hash ref keyed by test names, filter it by deleting unwanted
385# tests, based on $OPTS{tests}.
386
387sub filter_tests {
388 my ($tests) = @_;
389
390 my $opt = $OPTS{tests};
391 return unless defined $opt;
392
393 my @tests;
394
395 if ($opt =~ m{^/}) {
396 $opt =~ s{^/(.+)/$}{$1}
397 or die "Error: --tests regex must be of the form /.../\n";
398 for (keys %$tests) {
399 delete $tests->{$_} unless /$opt/;
400 }
401 }
402 else {
403 my %t;
404 for (split /,/, $opt) {
405 die "Error: no such test found: '$_'\n" unless exists $tests->{$_};
406 $t{$_} = 1;
407 }
408 for (keys %$tests) {
409 delete $tests->{$_} unless exists $t{$_};
410 }
411 }
412}
413
414
415# Read in the test file, and filter out any tests excluded by $OPTS{tests}
416
417sub read_tests_file {
418 my ($file) = @_;
419
420 my $ta = do $file;
421 unless ($ta) {
422 die "Error: can't parse '$file': $@\n" if $@;
423 die "Error: can't read '$file': $!\n";
424 }
425
426 my $t = { @$ta };
427 filter_tests($t);
428 return $t;
429}
430
431
432# Process the perl/column argument of options like --norm and --sort.
433# Return the index of the matching perl.
434
435sub select_a_perl {
436 my ($perl, $perls, $who) = @_;
437
438 if ($perl =~ /^[0-9]$/) {
439 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
440 unless $perl < @$perls;
441 return $perl;
442 }
443 else {
444 my @perl = grep $perls->[$_][0] eq $perl
445 || $perls->[$_][1] eq $perl,
446 0..$#$perls;
447 die "Error: $who: unrecognised perl '$perl'\n"
448 unless @perl;
449 die "Error: $who: ambiguous perl '$perl'\n"
450 if @perl > 1;
451 return $perl[0];
452 }
453}
454
455
456# Validate the list of perl=label on the command line.
457# Also validate $OPTS{norm}, $OPTS{sort};
458# Return a list of [ exe, label ] pairs.
459
460sub process_perls {
461 my @results;
462 for my $p (@_) {
463 my ($perl, $label) = split /=/, $p, 2;
464 $label //= $perl;
465 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
466 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
467 push @results, [ $perl, $label ];
468 }
469
470 $OPTS{norm} = select_a_perl($OPTS{norm}, \@results, "--norm");
471 if (defined $OPTS{'sort-perl'}) {
472 $OPTS{'sort-perl'} =
473 select_a_perl($OPTS{'sort-perl'}, \@results, "--sort");
474 }
475
476 return @results;
477}
478
479
480# Return a string containing perl test code wrapped in a loop
481# that runs $ARGV[0] times
482
483sub make_perl_prog {
484 my ($test, $desc, $setup, $code) = @_;
485
486 return <<EOF;
487# $desc
488package $test;
489BEGIN { srand(0) }
490$setup;
491for my \$__loop__ (1..\$ARGV[0]) {
492 $code;
493}
494EOF
495}
496
497
498# Parse the output from cachegrind. Return a hash ref.
499# See do_selftest() for examples of the output format.
500
501sub parse_cachegrind {
502 my ($output, $id, $perl) = @_;
503
504 my %res;
505
506 my @lines = split /\n/, $output;
507 for (@lines) {
508 unless (s/(==\d+==)|(--\d+--) //) {
509 die "Error: while executing $id:\n"
510 . "unexpected code or cachegrind output:\n$_\n";
511 }
512 if (/I refs:\s+([\d,]+)/) {
513 $res{Ir} = $1;
514 }
515 elsif (/I1 misses:\s+([\d,]+)/) {
516 $res{Ir_m1} = $1;
517 }
518 elsif (/LLi misses:\s+([\d,]+)/) {
519 $res{Ir_mm} = $1;
520 }
521 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
522 @res{qw(Dr Dw)} = ($1,$2);
523 }
524 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
525 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
526 }
527 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
528 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
529 }
530 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
531 @res{qw(COND IND)} = ($1,$2);
532 }
533 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
534 @res{qw(COND_m IND_m)} = ($1,$2);
535 }
536 }
537
538 for my $field (keys %VALID_FIELDS) {
539 die "Error: can't parse '$field' field from cachegrind output:\n$output"
540 unless exists $res{$field};
541 $res{$field} =~ s/,//g;
542 }
543
544 return \%res;
545}
546
547
548# Handle the 'grind' action
549
550sub do_grind {
551 my ($perl_args) = @_; # the residue of @ARGV after option processing
552
553 my ($loop_counts, $perls, $results, $tests);
554 my ($bisect_field, $bisect_min, $bisect_max);
555
556 if (defined $OPTS{bisect}) {
557 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
558 die "Error: --bisect option must be of form 'field,integer,integer'\n"
559 unless
560 defined $bisect_max
561 and $bisect_min =~ /^[0-9]+$/
562 and $bisect_max =~ /^[0-9]+$/;
563
564 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
565 unless $VALID_FIELDS{$bisect_field};
566
567 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
568 if $bisect_min > $bisect_max;
569 }
570
571 if (defined $OPTS{read}) {
572 open my $in, '<:encoding(UTF-8)', $OPTS{read}
573 or die " Error: can't open $OPTS{read} for reading: $!\n";
574 my $data = do { local $/; <$in> };
575 close $in;
576
577 my $hash = JSON::PP::decode_json($data);
578 if (int($FORMAT_VERSION) < int($hash->{version})) {
579 die "Error: unsupported version $hash->{version} in file"
580 . "'$OPTS{read}' (too new)\n";
581 }
582 ($loop_counts, $perls, $results, $tests) =
583 @$hash{qw(loop_counts perls results tests)};
584
585 filter_tests($results);
586 filter_tests($tests);
587 }
588 else {
589 # How many times to execute the loop for the two trials. The lower
590 # value is intended to do the loop enough times that branch
591 # prediction has taken hold; the higher loop allows us to see the
592 # branch misses after that
593 $loop_counts = [10, 20];
594
595 $tests = read_tests_file($OPTS{benchfile});
596 die "Error: only a single test may be specified with --bisect\n"
597 if defined $OPTS{bisect} and keys %$tests != 1;
598
599 $perls = [ process_perls(@$perl_args) ];
600 $results = grind_run($tests, $perls, $loop_counts);
601 }
602
603 if (defined $OPTS{write}) {
604 my $json = JSON::PP::encode_json({
605 version => $FORMAT_VERSION,
606 loop_counts => $loop_counts,
607 perls => $perls,
608 results => $results,
609 tests => $tests,
610 });
611
612 open my $out, '>:encoding(UTF-8)', $OPTS{write}
613 or die " Error: can't open $OPTS{write} for writing: $!\n";
614 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
615 close $out or die "Error: closing file '$OPTS{write}': $!\n";
616 }
617 else {
618 my ($processed, $averages) =
619 grind_process($results, $perls, $loop_counts);
620
621 if (defined $OPTS{bisect}) {
622 my @r = values %$results;
623 die "Panic: expected exactly one test result in bisect\n"
624 if @r != 1;
625 @r = values %{$r[0]};
626 die "Panic: expected exactly one perl result in bisect\n"
627 if @r != 1;
628 my $c = $r[0]{$bisect_field};
629 die "Panic: no result in bisect for field '$bisect_field'\n"
630 unless defined $c;
631 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
632 exit 1;
633 }
634 else {
635 grind_print($processed, $averages, $perls, $tests);
636 }
637 }
638}
639
640
641# Run cachegrind for every test/perl combo.
642# It may run several processes in parallel when -j is specified.
643# Return a hash ref suitable for input to grind_process()
644
645sub grind_run {
646 my ($tests, $perls, $counts) = @_;
647
648 # Build a list of all the jobs to run
649
650 my @jobs;
651
652 for my $test (sort keys %$tests) {
653
654 # Create two test progs: one with an empty loop and one with code.
655 # Note that the empty loop is actually '{1;}' rather than '{}';
656 # this causes the loop to have a single nextstate rather than a
657 # stub op, so more closely matches the active loop; e.g.:
658 # {1;} => nextstate; unstack
659 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
660 my @prog = (
661 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
662 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
663 );
664
665 for my $p (@$perls) {
666 my ($perl, $label) = @$p;
667
668 # Run both the empty loop and the active loop
669 # $counts->[0] and $counts->[1] times.
670
671 for my $i (0,1) {
672 for my $j (0,1) {
673 my $cmd = "PERL_HASH_SEED=0 "
674 . "valgrind --tool=cachegrind --branch-sim=yes "
675 . "--cachegrind-out-file=/dev/null "
676 . "$OPTS{grindargs} "
677 . "$perl $OPTS{perlargs} - $counts->[$j] 2>&1";
678 # for debugging and error messages
679 my $id = "$test/$perl "
680 . ($i ? "active" : "empty") . "/"
681 . ($j ? "long" : "short") . " loop";
682
683 push @jobs, {
684 test => $test,
685 perl => $perl,
686 plabel => $label,
687 cmd => $cmd,
688 prog => $prog[$i],
689 active => $i,
690 loopix => $j,
691 id => $id,
692 };
693 }
694 }
695 }
696 }
697
698 # Execute each cachegrind and store the results in %results.
699
700 local $SIG{PIPE} = 'IGNORE';
701
702 my $max_jobs = $OPTS{jobs};
703 my $running = 0; # count of executing jobs
704 my %pids; # map pids to jobs
705 my %fds; # map fds to jobs
706 my %results;
707 my $select = IO::Select->new();
708
709 while (@jobs or $running) {
710
711 if ($OPTS{debug}) {
712 printf "Main loop: pending=%d running=%d\n",
713 scalar(@jobs), $running;
714 }
715
716 # Start new jobs
717
718 while (@jobs && $running < $max_jobs) {
719 my $job = shift @jobs;
720 my ($id, $cmd) =@$job{qw(id cmd)};
721
722 my ($in, $out, $pid);
723 warn "Starting $id\n" if $OPTS{verbose};
724 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
725 or die "Error: while starting cachegrind subprocess"
726 ." for $id:\n$@";
727 $running++;
728 $pids{$pid} = $job;
729 $fds{"$out"} = $job;
730 $job->{out_fd} = $out;
731 $job->{output} = '';
732 $job->{pid} = $pid;
733
734 $out->blocking(0);
735 $select->add($out);
736
737 if ($OPTS{debug}) {
738 print "Started pid $pid for $id\n";
739 }
740
741 # Note:
742 # In principle we should write to $in in the main select loop,
743 # since it may block. In reality,
744 # a) the code we write to the perl process's stdin is likely
745 # to be less than the OS's pipe buffer size;
746 # b) by the time the perl process has read in all its stdin,
747 # the only output it should have generated is a few lines
748 # of cachegrind output preamble.
749 # If these assumptions change, then perform the following print
750 # in the select loop instead.
751
752 print $in $job->{prog};
753 close $in;
754 }
755
756 # Get output of running jobs
757
758 if ($OPTS{debug}) {
759 printf "Select: waiting on (%s)\n",
760 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
761 $select->handles;
762 }
763
764 my @ready = $select->can_read;
765
766 if ($OPTS{debug}) {
767 printf "Select: pids (%s) ready\n",
768 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
769 }
770
771 unless (@ready) {
772 die "Panic: select returned no file handles\n";
773 }
774
775 for my $fd (@ready) {
776 my $j = $fds{"$fd"};
777 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
778 unless (defined $r) {
779 die "Panic: Read from process running $j->{id} gave:\n$!";
780 }
781 next if $r;
782
783 # EOF
784
785 if ($OPTS{debug}) {
786 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
787 }
788
789 $select->remove($j->{out_fd});
790 close($j->{out_fd})
791 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
792 $running--;
793 delete $fds{"$j->{out_fd}"};
794 my $output = $j->{output};
795
796 if ($OPTS{debug}) {
797 my $p = $j->{prog};
798 $p =~ s/^/ : /mg;
799 my $o = $output;
800 $o =~ s/^/ : /mg;
801
802 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
803 . "Input:\n$p"
804 . "Output\n$o";
805 }
806
807 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
808 = parse_cachegrind($output, $j->{id}, $j->{perl});
809 }
810
811 # Reap finished jobs
812
813 while (1) {
814 my $kid = waitpid(-1, WNOHANG);
815 my $ret = $?;
816 last if $kid <= 0;
817
818 unless (exists $pids{$kid}) {
819 die "Panic: reaped unexpected child $kid";
820 }
821 my $j = $pids{$kid};
822 if ($ret) {
823 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
824 . "with the following output\n:$j->{output}\n";
825 }
826 delete $pids{$kid};
827 }
828 }
829
830 return \%results;
831}
832
833
834
835
836# grind_process(): process the data that has been extracted from
837# cachgegrind's output.
838#
839# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
840# where active is 0 or 1 indicating an empty or active loop,
841# count is 0 or 1 indicating a short or long loop. E.g.
842#
843# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
844#
845# The $res data structure is modified in-place by this sub.
846#
847# $perls is [ [ perl-exe, perl-label], .... ].
848#
849# $counts is [ N, M ] indicating the counts for the short and long loops.
850#
851#
852# return \%output, \%averages, where
853#
854# $output{benchmark_name}{perl_name}{field_name} = N
855# $averages{perl_name}{field_name} = M
856#
857# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
858# M is the average raw count over all tests ($OPTS{raw}), or
859# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
860
861sub grind_process {
862 my ($res, $perls, $counts) = @_;
863
864 # Process the four results for each test/perf combo:
865 # Convert
866 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
867 # to
868 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
869 #
870 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
871 # empty loops, eliminating startup time
872 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
873 # active loops, eliminating startup time
874 # (the two startup times may be different because different code
875 # is being compiled); the difference of the two results above
876 # divided by the count difference is the time to execute the
877 # active code once, eliminating both startup and loop overhead.
878
879 for my $tests (values %$res) {
880 for my $r (values %$tests) {
881 my $r2;
882 for (keys %{$r->[0][0]}) {
883 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
884 - ($r->[0][1]{$_} - $r->[0][0]{$_})
885 ) / ($counts->[1] - $counts->[0]);
886 $r2->{$_} = $n;
887 }
888 $r = $r2;
889 }
890 }
891
892 my %totals;
893 my %counts;
894 my %data;
895
896 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
897
898 for my $test_name (keys %$res) {
899 my $res1 = $res->{$test_name};
900 my $res2_norm = $res1->{$perl_norm};
901 for my $perl (keys %$res1) {
902 my $res2 = $res1->{$perl};
903 for my $field (keys %$res2) {
904 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
905
906 if ($OPTS{raw}) {
907 # Avoid annoying '-0.0' displays. Ideally this number
908 # should never be negative, but fluctuations in
909 # startup etc can theoretically make this happen
910 $q = 0 if ($q <= 0 && $q > -0.1);
911 $totals{$perl}{$field} += $q;
912 $counts{$perl}{$field}++;
913 $data{$test_name}{$perl}{$field} = $q;
914 next;
915 }
916
917 # $p and $q are notionally integer counts, but
918 # due to variations in startup etc, it's possible for a
919 # count which is supposedly zero to be calculated as a
920 # small positive or negative value.
921 # In this case, set it to zero. Further below we
922 # special-case zeros to avoid division by zero errors etc.
923
924 $p = 0.0 if $p < 0.01;
925 $q = 0.0 if $q < 0.01;
926
927 if ($p == 0.0 && $q == 0.0) {
928 # Both perls gave a count of zero, so no change:
929 # treat as 100%
930 $totals{$perl}{$field} += 1;
931 $counts{$perl}{$field}++;
932 $data{$test_name}{$perl}{$field} = 1;
933 }
934 elsif ($p == 0.0 || $q == 0.0) {
935 # If either count is zero, there were too few events
936 # to give a meaningful ratio (and we will end up with
937 # division by zero if we try). Mark the result undef,
938 # indicating that it shouldn't be displayed; and skip
939 # adding to the average
940 $data{$test_name}{$perl}{$field} = undef;
941 }
942 else {
943 # For averages, we record q/p rather than p/q.
944 # Consider a test where perl_norm took 1000 cycles
945 # and perlN took 800 cycles. For the individual
946 # results we display p/q, or 1.25; i.e. a quarter
947 # quicker. For the averages, we instead sum all
948 # the 0.8's, which gives the total cycles required to
949 # execute all tests, with all tests given equal
950 # weight. Later we reciprocate the final result,
951 # i.e. 1/(sum(qi/pi)/n)
952
953 $totals{$perl}{$field} += $q/$p;
954 $counts{$perl}{$field}++;
955 $data{$test_name}{$perl}{$field} = $p/$q;
956 }
957 }
958 }
959 }
960
961 # Calculate averages based on %totals and %counts accumulated earlier.
962
963 my %averages;
964 for my $perl (keys %totals) {
965 my $t = $totals{$perl};
966 for my $field (keys %$t) {
967 $averages{$perl}{$field} = $OPTS{raw}
968 ? $t->{$field} / $counts{$perl}{$field}
969 # reciprocal - see comments above
970 : $counts{$perl}{$field} / $t->{$field};
971 }
972 }
973
974 return \%data, \%averages;
975}
976
977
978# grind_print(): display the tabulated results of all the cachegrinds.
979#
980# Arguments are of the form:
981# $results->{benchmark_name}{perl_name}{field_name} = N
982# $averages->{perl_name}{field_name} = M
983# $perls = [ [ perl-exe, perl-label ], ... ]
984# $tests->{test_name}{desc => ..., ...}
985
986sub grind_print {
987 my ($results, $averages, $perls, $tests) = @_;
988
989 my @perl_names = map $_->[0], @$perls;
990 my %perl_labels;
991 $perl_labels{$_->[0]} = $_->[1] for @$perls;
992
993 my $field_label_width = 6;
994 # Calculate the width to display for each column.
995 my $min_width = $OPTS{raw} ? 8 : 6;
996 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
997 @perl_labels{@perl_names};
998
999 # Print header.
1000
1001 print <<EOF;
1002Key:
1003 Ir Instruction read
1004 Dr Data read
1005 Dw Data write
1006 COND conditional branches
1007 IND indirect branches
1008 _m branch predict miss
1009 _m1 level 1 cache miss
1010 _mm last cache (e.g. L3) miss
1011 - indeterminate percentage (e.g. 1/0)
1012
1013EOF
1014
1015 if ($OPTS{raw}) {
1016 print "The numbers represent raw counts per loop iteration.\n";
1017 }
1018 else {
1019 print <<EOF;
1020The numbers represent relative counts per loop iteration, compared to
1021$perl_labels{$perl_names[0]} at 100.0%.
1022Higher is better: for example, using half as many instructions gives 200%,
1023while using twice as many gives 50%.
1024EOF
1025 }
1026
1027 # Populate @test_names with the tests in sorted order.
1028
1029 my @test_names;
1030 unless ($OPTS{average}) {
1031 if (defined $OPTS{'sort-field'}) {
1032 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1033 my $perl = $perls->[$perlix][0];
1034 @test_names = sort
1035 {
1036 $results->{$a}{$perl}{$field}
1037 <=> $results->{$b}{$perl}{$field}
1038 }
1039 keys %$results;
1040 }
1041 else {
1042 @test_names = sort(keys %$results);
1043 }
1044 }
1045
1046 # No point in displaying average for only one test.
1047 push @test_names, 'AVERAGE' unless @test_names == 1;
1048
1049 # If only a single field is to be displayed, use a more compact
1050 # format with only a single line of output per test.
1051
1052 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1053
1054 if ($one_field) {
1055
1056 # The first column will now contain test names rather than
1057 # field names; Calculate the max width.
1058
1059 $field_label_width = 0;
1060 for (@test_names) {
1061 $field_label_width = length if length > $field_label_width;
1062 }
1063
1064 # Print the perl executables header.
1065
1066 print "\n";
1067 for my $i (0,1) {
1068 print " " x $field_label_width;
1069 for (0..$#widths) {
1070 printf " %*s", $widths[$_],
1071 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1072 }
1073 print "\n";
1074 }
1075 }
1076
1077 # Dump the results for each test.
1078
1079 for my $test_name (@test_names) {
1080 my $doing_ave = ($test_name eq 'AVERAGE');
1081 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1082
1083 unless ($one_field) {
1084 print "\n$test_name";
1085 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1086 print "\n\n";
1087
1088 # Print the perl executables header.
1089 for my $i (0,1) {
1090 print " " x $field_label_width;
1091 for (0..$#widths) {
1092 printf " %*s", $widths[$_],
1093 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1094 }
1095 print "\n";
1096 }
1097 }
1098
1099 for my $field (qw(Ir Dr Dw COND IND
1100 N
1101 COND_m IND_m
1102 N
1103 Ir_m1 Dr_m1 Dw_m1
1104 N
1105 Ir_mm Dr_mm Dw_mm
1106 ))
1107 {
1108 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1109
1110 if ($field eq 'N') {
1111 print "\n";
1112 next;
1113 }
1114
1115 printf "%*s", $field_label_width,
1116 $one_field ? $test_name : $field;
1117
1118 for my $i (0..$#widths) {
1119 my $res2 = $res1->{$perl_names[$i]};
1120 my $p = $res2->{$field};
1121 if (!defined $p) {
1122 printf " %*s", $widths[$i], '-';
1123 }
1124 elsif ($OPTS{raw}) {
1125 printf " %*.1f", $widths[$i], $p;
1126 }
1127 else {
1128 printf " %*.2f", $widths[$i], $p * 100;
1129 }
1130 }
1131 print "\n";
1132 }
1133 }
1134}
1135
1136
1137# do_selftest(): check that we can parse known cachegrind()
1138# output formats. If the output of cachegrind changes, add a *new*
1139# test here; keep the old tests to make sure we continue to parse
1140# old cachegrinds
1141
1142sub do_selftest {
1143
1144 my @tests = (
1145 'standard',
1146 <<'EOF',
1147==32350== Cachegrind, a cache and branch-prediction profiler
1148==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1149==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1150==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1151==32350==
1152--32350-- warning: L3 cache found, using its data for the LL simulation.
1153==32350==
1154==32350== I refs: 1,124,055
1155==32350== I1 misses: 5,573
1156==32350== LLi misses: 3,338
1157==32350== I1 miss rate: 0.49%
1158==32350== LLi miss rate: 0.29%
1159==32350==
1160==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1161==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1162==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1163==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1164==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1165==32350==
1166==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1167==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1168==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1169==32350==
1170==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1171==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1172==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1173EOF
1174 {
1175 COND => 197050,
1176 COND_m => 17742,
1177 Dr => 259191,
1178 Dr_m1 => 6098,
1179 Dr_mm => 2781,
1180 Dw => 145084,
1181 Dw_m1 => 3510,
1182 Dw_mm => 3013,
1183 IND => 5322,
1184 IND_m => 1411,
1185 Ir => 1124055,
1186 Ir_m1 => 5573,
1187 Ir_mm => 3338,
1188 },
1189 );
1190
1191 for ('t', '.') {
1192 last if require "$_/test.pl";
1193 }
1194 plan(@tests / 3 * keys %VALID_FIELDS);
1195
1196 while (@tests) {
1197 my $desc = shift @tests;
1198 my $output = shift @tests;
1199 my $expected = shift @tests;
1200 my $p = parse_cachegrind($output);
1201 for (sort keys %VALID_FIELDS) {
1202 is($p->{$_}, $expected->{$_}, "$desc, $_");
1203 }
1204 }
1205}