This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Filter::Util::Call to CPAN version 1.54.
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
300b1525 17 bench.pl [options] perl1[=label1] perl2[=label2] ...
9e7973fa
DM
18
19 # Run bench.pl's own built-in sanity tests
20
21 bench.pl --action=selftest
22
23=head1 DESCRIPTION
24
25By default, F<bench.pl> will run code snippets found in
26F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
27how many instruction reads, data writes, branches, cache misses, etc. that
28one execution of the snippet uses. It will run them against two or more
29perl executables and show how much each test has gotten better or worse.
30
31It is modelled on the F<perlbench> tool, but since it measures instruction
32reads etc., rather than timings, it is much more precise and reproducible.
33It is also considerably faster, and is capable or running tests in
34parallel (with C<-j>). Rather than displaying a single relative
35percentage per test/perl combination, it displays values for 13 different
36measurements, such as instruction reads, conditional branch misses etc.
37
38There are options to write the raw data to a file, and to read it back.
39This means that you can view the same run data in different views with
40different selection and sort options.
41
42The optional C<=label> after each perl executable is used in the display
43output.
44
45=head1 OPTIONS
46
47=over 4
48
49=item *
50
51--action=I<foo>
52
53What action to perform. The default is I<grind>, which runs the benchmarks
54using I<cachegrind> as the back end. The only other action at the moment is
55I<selftest>, which runs some basic sanity checks and produces TAP output.
56
57=item *
58
59--average
60
61Only display the overall average, rather than the results for each
62individual test.
63
64=item *
65
66--benchfile=I<foo>
67
68The path of the file which contains the benchmarks (F<t/perf/benchmarks>
69by default).
70
71=item *
72
73--bisect=I<field,minval,maxval>
74
75Run a single test against one perl and exit with a zero status if the
76named field is in the specified range; exit 1 otherwise. It will complain
77if more than one test or perl has been specified. It is intended to be
78called as part of a bisect run, to determine when something changed.
79For example,
80
4a1358c2
FC
81 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
82 ./miniperl
9e7973fa
DM
83
84might be called from bisect to find when the number of instruction reads
85for test I<foo> falls outside the range 100..105.
86
87=item *
88
89--debug
90
91Enable verbose debugging output.
92
93=item *
94
95--fields=I<a,b,c>
96
97Display only the specified fields; for example,
98
99 --fields=Ir,Ir_m,Ir_mm
100
101If only one field is selected, the output is in more compact form.
102
103=item *
104
105--grindargs=I<foo>
106
107Optional command-line arguments to pass to cachegrind invocations.
108
109=item *
110
111---help
112
113Display basic usage information.
114
115=item *
116
117-j I<N>
118--jobs=I<N>
119
120Run I<N> jobs in parallel (default 1). This determines how many cachegrind
121process will running at a time, and should generally be set to the number
122of CPUs available.
123
124=item *
125
126--norm=I<foo>
127
128Specify which perl column in the output to treat as the 100% norm.
129It may be a column number (0..N-1) or a perl executable name or label.
130It defaults to the leftmost column.
131
132=item *
133
134--perlargs=I<foo>
135
136Optional command-line arguments to pass to each perl that is run as part of
137a cachegrind session. For example, C<--perlargs=-Ilib>.
138
139=item *
140
141--raw
142
143Display raw data counts rather than percentages in the outputs. This
144allows you to see the exact number of intruction reads, branch misses etc.
145for each test/perl combination. It also causes the C<AVERAGE> display
146per field to be calculated based on the average of each tests's count
147rather than average of each percentage. This means that tests with very
148high counts will dominate.
149
150=item *
151
152--sort=I<perl:field>
153
154Order the tests in the output based on the value of I<field> in the
155column I<perl>. The I<perl> value is as per C<--norm>. For example
156
4a1358c2
FC
157 bench.pl --sort=Dw:perl-5.20.0 \
158 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
159
160=item *
161
162-r I<file>
163--read=I<file>
164
165Read in saved data from a previous C<--write> run from the specified file.
166
167Requires C<JSON::PP> to be available.
168
169=item *
170
171--tests=I<FOO>
172
173Specify a subset of tests to run (or in the case of C<--read>, to display).
174It may be either a comma-separated list of test names, or a regular
175expression. For example
176
177 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
178 --tests=/^expr::/
179
180=item *
181
182--verbose
183
184Display progress information.
185
186=item *
187
188-w I<file>
189--write=I<file>
190
191Save the raw data to the specified file. It can be read back later with
192C<--read>.
193
194Requires C<JSON::PP> to be available.
195
196=back
197
198=cut
199
200
201
202use 5.010000;
203use warnings;
204use strict;
205use Getopt::Long qw(:config no_auto_abbrev);
206use IPC::Open2 ();
207use IO::Select;
c2d21e7a 208use IO::File;
9e7973fa
DM
209use POSIX ":sys_wait_h";
210
211# The version of the file format used to save data. We refuse to process
212# the file if the integer component differs.
213
214my $FORMAT_VERSION = 1.0;
215
216# The fields we know about
217
218my %VALID_FIELDS = map { $_ => 1 }
219 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
220
221sub usage {
222 die <<EOF;
223usage: $0 [options] perl[=label] ...
224 --action=foo What action to perform [default: grind].
225 --average Only display average, not individual test results.
226 --benchfile=foo File containing the benchmarks;
227 [default: t/perf/benchmarks].
228 --bisect=f,min,max run a single test against one perl and exit with a
229 zero status if the named field is in the specified
230 range; exit 1 otherwise.
231 --debug Enable verbose debugging output.
232 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
233 --grindargs=foo Optional command-line args to pass to cachegrind.
234 --help Display this help.
235 -j|--jobs=N Run N jobs in parallel [default 1].
236 --norm=perl Which perl column to treat as 100%; may be a column
237 number (0..N-1) or a perl executable name or label;
238 [default: 0].
239 --perlargs=foo Optional command-line args to pass to each perl to run.
240 --raw Display raw data counts rather than percentages.
241 --sort=perl:field Sort the tests based on the value of 'field' in the
242 column 'perl'. The perl value is as per --norm.
243 -r|--read=file Read in previously saved data from the specified file.
244 --tests=FOO Select only the specified tests from the benchmarks file;
245 FOO may be either of the form 'foo,bar' or '/regex/';
246 [default: all tests].
247 --verbose Display progress information.
248 -w|--write=file Save the raw data to the specified file.
249
250--action is one of:
251 grind run the code under cachegrind
252 selftest perform a selftest; produce TAP output
253
254The command line ends with one or more specified perl executables,
255which will be searched for in the current \$PATH. Each binary name may
256have an optional =LABEL appended, which will be used rather than the
257executable name in output. E.g.
258
259 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
260EOF
261}
262
263my %OPTS = (
264 action => 'grind',
265 average => 0,
266 benchfile => 't/perf/benchmarks',
267 bisect => undef,
268 debug => 0,
269 grindargs => '',
270 fields => undef,
271 jobs => 1,
272 norm => 0,
273 perlargs => '',
274 raw => 0,
275 read => undef,
276 sort => undef,
277 tests => undef,
278 verbose => 0,
279 write => undef,
280);
281
282
283# process command-line args and call top-level action
284
285{
286 GetOptions(
287 'action=s' => \$OPTS{action},
288 'average' => \$OPTS{average},
289 'benchfile=s' => \$OPTS{benchfile},
290 'bisect=s' => \$OPTS{bisect},
291 'debug' => \$OPTS{debug},
292 'grindargs=s' => \$OPTS{grindargs},
293 'help' => \$OPTS{help},
294 'fields=s' => \$OPTS{fields},
295 'jobs|j=i' => \$OPTS{jobs},
296 'norm=s' => \$OPTS{norm},
297 'perlargs=s' => \$OPTS{perlargs},
298 'raw' => \$OPTS{raw},
299 'read|r=s' => \$OPTS{read},
300 'sort=s' => \$OPTS{sort},
301 'tests=s' => \$OPTS{tests},
302 'verbose' => \$OPTS{verbose},
303 'write|w=s' => \$OPTS{write},
304 ) or usage;
305
306 usage if $OPTS{help};
307
308
309 if (defined $OPTS{read} and defined $OPTS{write}) {
310 die "Error: can't specify both --read and --write options\n";
311 }
312
313 if (defined $OPTS{read} or defined $OPTS{write}) {
314 # fail early if it's not present
315 require JSON::PP;
316 }
317
318 if (defined $OPTS{fields}) {
319 my @f = split /,/, $OPTS{fields};
320 for (@f) {
321 die "Error: --fields: unknown field '$_'\n"
322 unless $VALID_FIELDS{$_};
323 }
324 my %f = map { $_ => 1 } @f;
325 $OPTS{fields} = \%f;
326 }
327
328 my %valid_actions = qw(grind 1 selftest 1);
329 unless ($valid_actions{$OPTS{action}}) {
330 die "Error: unrecognised action '$OPTS{action}'\n"
331 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
332 }
333
334 if (defined $OPTS{sort}) {
335 my @s = split /:/, $OPTS{sort};
336 if (@s != 2) {
337 die "Error: --sort argument should be of the form field:perl: "
338 . "'$OPTS{sort}'\n";
339 }
340 my ($field, $perl) = @s;
341 die "Error: --sort: unknown field '$field\n"
342 unless $VALID_FIELDS{$field};
343 # the 'perl' value will be validated later, after we have processed
344 # the perls
345 $OPTS{'sort-field'} = $field;
346 $OPTS{'sort-perl'} = $perl;
347 }
348
349 if ($OPTS{action} eq 'selftest') {
350 if (@ARGV) {
351 die "Error: no perl executables may be specified with --read\n"
352 }
353 }
354 elsif (defined $OPTS{bisect}) {
355 die "Error: exactly one perl executable must be specified for bisect\n"
356 unless @ARGV == 1;
357 die "Error: Can't specify both --bisect and --read\n"
358 if defined $OPTS{read};
359 die "Error: Can't specify both --bisect and --write\n"
360 if defined $OPTS{write};
361 }
362 elsif (defined $OPTS{read}) {
363 if (@ARGV) {
364 die "Error: no perl executables may be specified with --read\n"
365 }
366 }
367 elsif ($OPTS{raw}) {
368 unless (@ARGV) {
369 die "Error: at least one perl executable must be specified\n";
370 }
371 }
372 else {
373 unless (@ARGV >= 2) {
374 die "Error: at least two perl executables must be specified\n";
375 }
376 }
377
378 if ($OPTS{action} eq 'grind') {
379 do_grind(\@ARGV);
380 }
381 elsif ($OPTS{action} eq 'selftest') {
382 do_selftest();
383 }
384}
385exit 0;
386
387
388# Given a hash ref keyed by test names, filter it by deleting unwanted
389# tests, based on $OPTS{tests}.
390
391sub filter_tests {
392 my ($tests) = @_;
393
394 my $opt = $OPTS{tests};
395 return unless defined $opt;
396
397 my @tests;
398
399 if ($opt =~ m{^/}) {
400 $opt =~ s{^/(.+)/$}{$1}
401 or die "Error: --tests regex must be of the form /.../\n";
402 for (keys %$tests) {
403 delete $tests->{$_} unless /$opt/;
404 }
405 }
406 else {
407 my %t;
408 for (split /,/, $opt) {
409 die "Error: no such test found: '$_'\n" unless exists $tests->{$_};
410 $t{$_} = 1;
411 }
412 for (keys %$tests) {
413 delete $tests->{$_} unless exists $t{$_};
414 }
415 }
416}
417
418
419# Read in the test file, and filter out any tests excluded by $OPTS{tests}
420
421sub read_tests_file {
422 my ($file) = @_;
423
424 my $ta = do $file;
425 unless ($ta) {
426 die "Error: can't parse '$file': $@\n" if $@;
427 die "Error: can't read '$file': $!\n";
428 }
429
430 my $t = { @$ta };
431 filter_tests($t);
432 return $t;
433}
434
435
436# Process the perl/column argument of options like --norm and --sort.
437# Return the index of the matching perl.
438
439sub select_a_perl {
440 my ($perl, $perls, $who) = @_;
441
442 if ($perl =~ /^[0-9]$/) {
443 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
444 unless $perl < @$perls;
445 return $perl;
446 }
447 else {
448 my @perl = grep $perls->[$_][0] eq $perl
449 || $perls->[$_][1] eq $perl,
450 0..$#$perls;
451 die "Error: $who: unrecognised perl '$perl'\n"
452 unless @perl;
453 die "Error: $who: ambiguous perl '$perl'\n"
454 if @perl > 1;
455 return $perl[0];
456 }
457}
458
459
460# Validate the list of perl=label on the command line.
9e7973fa
DM
461# Return a list of [ exe, label ] pairs.
462
463sub process_perls {
464 my @results;
465 for my $p (@_) {
466 my ($perl, $label) = split /=/, $p, 2;
467 $label //= $perl;
468 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
469 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
470 push @results, [ $perl, $label ];
471 }
9e7973fa
DM
472 return @results;
473}
474
475
8fbd1c2c 476
9e7973fa
DM
477# Return a string containing perl test code wrapped in a loop
478# that runs $ARGV[0] times
479
480sub make_perl_prog {
481 my ($test, $desc, $setup, $code) = @_;
482
483 return <<EOF;
484# $desc
485package $test;
486BEGIN { srand(0) }
487$setup;
488for my \$__loop__ (1..\$ARGV[0]) {
489 $code;
490}
491EOF
492}
493
494
495# Parse the output from cachegrind. Return a hash ref.
496# See do_selftest() for examples of the output format.
497
498sub parse_cachegrind {
499 my ($output, $id, $perl) = @_;
500
501 my %res;
502
503 my @lines = split /\n/, $output;
504 for (@lines) {
505 unless (s/(==\d+==)|(--\d+--) //) {
506 die "Error: while executing $id:\n"
507 . "unexpected code or cachegrind output:\n$_\n";
508 }
509 if (/I refs:\s+([\d,]+)/) {
510 $res{Ir} = $1;
511 }
512 elsif (/I1 misses:\s+([\d,]+)/) {
513 $res{Ir_m1} = $1;
514 }
515 elsif (/LLi misses:\s+([\d,]+)/) {
516 $res{Ir_mm} = $1;
517 }
518 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
519 @res{qw(Dr Dw)} = ($1,$2);
520 }
521 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
522 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
523 }
524 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
525 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
526 }
527 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
528 @res{qw(COND IND)} = ($1,$2);
529 }
530 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
531 @res{qw(COND_m IND_m)} = ($1,$2);
532 }
533 }
534
535 for my $field (keys %VALID_FIELDS) {
536 die "Error: can't parse '$field' field from cachegrind output:\n$output"
537 unless exists $res{$field};
538 $res{$field} =~ s/,//g;
539 }
540
541 return \%res;
542}
543
544
545# Handle the 'grind' action
546
547sub do_grind {
548 my ($perl_args) = @_; # the residue of @ARGV after option processing
549
550 my ($loop_counts, $perls, $results, $tests);
551 my ($bisect_field, $bisect_min, $bisect_max);
552
553 if (defined $OPTS{bisect}) {
554 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
555 die "Error: --bisect option must be of form 'field,integer,integer'\n"
556 unless
557 defined $bisect_max
558 and $bisect_min =~ /^[0-9]+$/
559 and $bisect_max =~ /^[0-9]+$/;
560
561 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
562 unless $VALID_FIELDS{$bisect_field};
563
564 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
565 if $bisect_min > $bisect_max;
566 }
567
568 if (defined $OPTS{read}) {
569 open my $in, '<:encoding(UTF-8)', $OPTS{read}
570 or die " Error: can't open $OPTS{read} for reading: $!\n";
571 my $data = do { local $/; <$in> };
572 close $in;
573
574 my $hash = JSON::PP::decode_json($data);
575 if (int($FORMAT_VERSION) < int($hash->{version})) {
576 die "Error: unsupported version $hash->{version} in file"
577 . "'$OPTS{read}' (too new)\n";
578 }
579 ($loop_counts, $perls, $results, $tests) =
580 @$hash{qw(loop_counts perls results tests)};
581
582 filter_tests($results);
583 filter_tests($tests);
584 }
585 else {
586 # How many times to execute the loop for the two trials. The lower
587 # value is intended to do the loop enough times that branch
588 # prediction has taken hold; the higher loop allows us to see the
589 # branch misses after that
590 $loop_counts = [10, 20];
591
592 $tests = read_tests_file($OPTS{benchfile});
593 die "Error: only a single test may be specified with --bisect\n"
594 if defined $OPTS{bisect} and keys %$tests != 1;
595
596 $perls = [ process_perls(@$perl_args) ];
8fbd1c2c
DM
597
598
9e7973fa
DM
599 $results = grind_run($tests, $perls, $loop_counts);
600 }
601
8fbd1c2c
DM
602 # now that we have a list of perls, use it to process the
603 # 'perl' component of the --norm and --sort args
604
605 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
606 if (defined $OPTS{'sort-perl'}) {
607 $OPTS{'sort-perl'} =
608 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
609 }
610
9e7973fa
DM
611 if (defined $OPTS{write}) {
612 my $json = JSON::PP::encode_json({
613 version => $FORMAT_VERSION,
614 loop_counts => $loop_counts,
615 perls => $perls,
616 results => $results,
617 tests => $tests,
618 });
619
620 open my $out, '>:encoding(UTF-8)', $OPTS{write}
621 or die " Error: can't open $OPTS{write} for writing: $!\n";
622 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
623 close $out or die "Error: closing file '$OPTS{write}': $!\n";
624 }
625 else {
626 my ($processed, $averages) =
627 grind_process($results, $perls, $loop_counts);
628
629 if (defined $OPTS{bisect}) {
630 my @r = values %$results;
631 die "Panic: expected exactly one test result in bisect\n"
632 if @r != 1;
633 @r = values %{$r[0]};
634 die "Panic: expected exactly one perl result in bisect\n"
635 if @r != 1;
636 my $c = $r[0]{$bisect_field};
637 die "Panic: no result in bisect for field '$bisect_field'\n"
638 unless defined $c;
639 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
640 exit 1;
641 }
642 else {
643 grind_print($processed, $averages, $perls, $tests);
644 }
645 }
646}
647
648
649# Run cachegrind for every test/perl combo.
650# It may run several processes in parallel when -j is specified.
651# Return a hash ref suitable for input to grind_process()
652
653sub grind_run {
654 my ($tests, $perls, $counts) = @_;
655
656 # Build a list of all the jobs to run
657
658 my @jobs;
659
660 for my $test (sort keys %$tests) {
661
662 # Create two test progs: one with an empty loop and one with code.
663 # Note that the empty loop is actually '{1;}' rather than '{}';
664 # this causes the loop to have a single nextstate rather than a
665 # stub op, so more closely matches the active loop; e.g.:
666 # {1;} => nextstate; unstack
667 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
668 my @prog = (
669 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
670 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
671 );
672
673 for my $p (@$perls) {
674 my ($perl, $label) = @$p;
675
676 # Run both the empty loop and the active loop
677 # $counts->[0] and $counts->[1] times.
678
679 for my $i (0,1) {
680 for my $j (0,1) {
681 my $cmd = "PERL_HASH_SEED=0 "
682 . "valgrind --tool=cachegrind --branch-sim=yes "
683 . "--cachegrind-out-file=/dev/null "
684 . "$OPTS{grindargs} "
685 . "$perl $OPTS{perlargs} - $counts->[$j] 2>&1";
686 # for debugging and error messages
687 my $id = "$test/$perl "
688 . ($i ? "active" : "empty") . "/"
689 . ($j ? "long" : "short") . " loop";
690
691 push @jobs, {
692 test => $test,
693 perl => $perl,
694 plabel => $label,
695 cmd => $cmd,
696 prog => $prog[$i],
697 active => $i,
698 loopix => $j,
699 id => $id,
700 };
701 }
702 }
703 }
704 }
705
706 # Execute each cachegrind and store the results in %results.
707
708 local $SIG{PIPE} = 'IGNORE';
709
710 my $max_jobs = $OPTS{jobs};
711 my $running = 0; # count of executing jobs
712 my %pids; # map pids to jobs
713 my %fds; # map fds to jobs
714 my %results;
715 my $select = IO::Select->new();
716
717 while (@jobs or $running) {
718
719 if ($OPTS{debug}) {
720 printf "Main loop: pending=%d running=%d\n",
721 scalar(@jobs), $running;
722 }
723
724 # Start new jobs
725
726 while (@jobs && $running < $max_jobs) {
727 my $job = shift @jobs;
728 my ($id, $cmd) =@$job{qw(id cmd)};
729
730 my ($in, $out, $pid);
731 warn "Starting $id\n" if $OPTS{verbose};
732 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
733 or die "Error: while starting cachegrind subprocess"
734 ." for $id:\n$@";
735 $running++;
736 $pids{$pid} = $job;
737 $fds{"$out"} = $job;
738 $job->{out_fd} = $out;
739 $job->{output} = '';
740 $job->{pid} = $pid;
741
742 $out->blocking(0);
743 $select->add($out);
744
745 if ($OPTS{debug}) {
746 print "Started pid $pid for $id\n";
747 }
748
749 # Note:
750 # In principle we should write to $in in the main select loop,
751 # since it may block. In reality,
752 # a) the code we write to the perl process's stdin is likely
753 # to be less than the OS's pipe buffer size;
754 # b) by the time the perl process has read in all its stdin,
755 # the only output it should have generated is a few lines
756 # of cachegrind output preamble.
757 # If these assumptions change, then perform the following print
758 # in the select loop instead.
759
760 print $in $job->{prog};
761 close $in;
762 }
763
764 # Get output of running jobs
765
766 if ($OPTS{debug}) {
767 printf "Select: waiting on (%s)\n",
768 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
769 $select->handles;
770 }
771
772 my @ready = $select->can_read;
773
774 if ($OPTS{debug}) {
775 printf "Select: pids (%s) ready\n",
776 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
777 }
778
779 unless (@ready) {
780 die "Panic: select returned no file handles\n";
781 }
782
783 for my $fd (@ready) {
784 my $j = $fds{"$fd"};
785 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
786 unless (defined $r) {
787 die "Panic: Read from process running $j->{id} gave:\n$!";
788 }
789 next if $r;
790
791 # EOF
792
793 if ($OPTS{debug}) {
794 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
795 }
796
797 $select->remove($j->{out_fd});
798 close($j->{out_fd})
799 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
800 $running--;
801 delete $fds{"$j->{out_fd}"};
802 my $output = $j->{output};
803
804 if ($OPTS{debug}) {
805 my $p = $j->{prog};
806 $p =~ s/^/ : /mg;
807 my $o = $output;
808 $o =~ s/^/ : /mg;
809
810 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
811 . "Input:\n$p"
812 . "Output\n$o";
813 }
814
815 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
816 = parse_cachegrind($output, $j->{id}, $j->{perl});
817 }
818
819 # Reap finished jobs
820
821 while (1) {
822 my $kid = waitpid(-1, WNOHANG);
823 my $ret = $?;
824 last if $kid <= 0;
825
826 unless (exists $pids{$kid}) {
827 die "Panic: reaped unexpected child $kid";
828 }
829 my $j = $pids{$kid};
830 if ($ret) {
831 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
832 . "with the following output\n:$j->{output}\n";
833 }
834 delete $pids{$kid};
835 }
836 }
837
838 return \%results;
839}
840
841
842
843
844# grind_process(): process the data that has been extracted from
845# cachgegrind's output.
846#
847# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
848# where active is 0 or 1 indicating an empty or active loop,
849# count is 0 or 1 indicating a short or long loop. E.g.
850#
851# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
852#
853# The $res data structure is modified in-place by this sub.
854#
855# $perls is [ [ perl-exe, perl-label], .... ].
856#
857# $counts is [ N, M ] indicating the counts for the short and long loops.
858#
859#
860# return \%output, \%averages, where
861#
862# $output{benchmark_name}{perl_name}{field_name} = N
863# $averages{perl_name}{field_name} = M
864#
865# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
866# M is the average raw count over all tests ($OPTS{raw}), or
867# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
868
869sub grind_process {
870 my ($res, $perls, $counts) = @_;
871
872 # Process the four results for each test/perf combo:
873 # Convert
874 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
875 # to
876 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
877 #
878 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
879 # empty loops, eliminating startup time
880 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
881 # active loops, eliminating startup time
882 # (the two startup times may be different because different code
883 # is being compiled); the difference of the two results above
884 # divided by the count difference is the time to execute the
885 # active code once, eliminating both startup and loop overhead.
886
887 for my $tests (values %$res) {
888 for my $r (values %$tests) {
889 my $r2;
890 for (keys %{$r->[0][0]}) {
891 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
892 - ($r->[0][1]{$_} - $r->[0][0]{$_})
893 ) / ($counts->[1] - $counts->[0]);
894 $r2->{$_} = $n;
895 }
896 $r = $r2;
897 }
898 }
899
900 my %totals;
901 my %counts;
902 my %data;
903
904 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
905
906 for my $test_name (keys %$res) {
907 my $res1 = $res->{$test_name};
908 my $res2_norm = $res1->{$perl_norm};
909 for my $perl (keys %$res1) {
910 my $res2 = $res1->{$perl};
911 for my $field (keys %$res2) {
912 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
913
914 if ($OPTS{raw}) {
915 # Avoid annoying '-0.0' displays. Ideally this number
916 # should never be negative, but fluctuations in
917 # startup etc can theoretically make this happen
918 $q = 0 if ($q <= 0 && $q > -0.1);
919 $totals{$perl}{$field} += $q;
920 $counts{$perl}{$field}++;
921 $data{$test_name}{$perl}{$field} = $q;
922 next;
923 }
924
925 # $p and $q are notionally integer counts, but
926 # due to variations in startup etc, it's possible for a
927 # count which is supposedly zero to be calculated as a
928 # small positive or negative value.
929 # In this case, set it to zero. Further below we
930 # special-case zeros to avoid division by zero errors etc.
931
932 $p = 0.0 if $p < 0.01;
933 $q = 0.0 if $q < 0.01;
934
935 if ($p == 0.0 && $q == 0.0) {
936 # Both perls gave a count of zero, so no change:
937 # treat as 100%
938 $totals{$perl}{$field} += 1;
939 $counts{$perl}{$field}++;
940 $data{$test_name}{$perl}{$field} = 1;
941 }
942 elsif ($p == 0.0 || $q == 0.0) {
943 # If either count is zero, there were too few events
944 # to give a meaningful ratio (and we will end up with
945 # division by zero if we try). Mark the result undef,
946 # indicating that it shouldn't be displayed; and skip
947 # adding to the average
948 $data{$test_name}{$perl}{$field} = undef;
949 }
950 else {
951 # For averages, we record q/p rather than p/q.
952 # Consider a test where perl_norm took 1000 cycles
953 # and perlN took 800 cycles. For the individual
954 # results we display p/q, or 1.25; i.e. a quarter
955 # quicker. For the averages, we instead sum all
956 # the 0.8's, which gives the total cycles required to
957 # execute all tests, with all tests given equal
958 # weight. Later we reciprocate the final result,
959 # i.e. 1/(sum(qi/pi)/n)
960
961 $totals{$perl}{$field} += $q/$p;
962 $counts{$perl}{$field}++;
963 $data{$test_name}{$perl}{$field} = $p/$q;
964 }
965 }
966 }
967 }
968
969 # Calculate averages based on %totals and %counts accumulated earlier.
970
971 my %averages;
972 for my $perl (keys %totals) {
973 my $t = $totals{$perl};
974 for my $field (keys %$t) {
975 $averages{$perl}{$field} = $OPTS{raw}
976 ? $t->{$field} / $counts{$perl}{$field}
977 # reciprocal - see comments above
978 : $counts{$perl}{$field} / $t->{$field};
979 }
980 }
981
982 return \%data, \%averages;
983}
984
985
986# grind_print(): display the tabulated results of all the cachegrinds.
987#
988# Arguments are of the form:
989# $results->{benchmark_name}{perl_name}{field_name} = N
990# $averages->{perl_name}{field_name} = M
991# $perls = [ [ perl-exe, perl-label ], ... ]
992# $tests->{test_name}{desc => ..., ...}
993
994sub grind_print {
995 my ($results, $averages, $perls, $tests) = @_;
996
997 my @perl_names = map $_->[0], @$perls;
998 my %perl_labels;
999 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1000
1001 my $field_label_width = 6;
1002 # Calculate the width to display for each column.
1003 my $min_width = $OPTS{raw} ? 8 : 6;
1004 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1005 @perl_labels{@perl_names};
1006
1007 # Print header.
1008
1009 print <<EOF;
1010Key:
1011 Ir Instruction read
1012 Dr Data read
1013 Dw Data write
1014 COND conditional branches
1015 IND indirect branches
1016 _m branch predict miss
1017 _m1 level 1 cache miss
1018 _mm last cache (e.g. L3) miss
1019 - indeterminate percentage (e.g. 1/0)
1020
1021EOF
1022
1023 if ($OPTS{raw}) {
1024 print "The numbers represent raw counts per loop iteration.\n";
1025 }
1026 else {
1027 print <<EOF;
1028The numbers represent relative counts per loop iteration, compared to
1029$perl_labels{$perl_names[0]} at 100.0%.
1030Higher is better: for example, using half as many instructions gives 200%,
1031while using twice as many gives 50%.
1032EOF
1033 }
1034
1035 # Populate @test_names with the tests in sorted order.
1036
1037 my @test_names;
1038 unless ($OPTS{average}) {
1039 if (defined $OPTS{'sort-field'}) {
1040 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1041 my $perl = $perls->[$perlix][0];
1042 @test_names = sort
1043 {
1044 $results->{$a}{$perl}{$field}
1045 <=> $results->{$b}{$perl}{$field}
1046 }
1047 keys %$results;
1048 }
1049 else {
1050 @test_names = sort(keys %$results);
1051 }
1052 }
1053
1054 # No point in displaying average for only one test.
1055 push @test_names, 'AVERAGE' unless @test_names == 1;
1056
1057 # If only a single field is to be displayed, use a more compact
1058 # format with only a single line of output per test.
1059
1060 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1061
1062 if ($one_field) {
91cde97c 1063 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1064
1065 # The first column will now contain test names rather than
1066 # field names; Calculate the max width.
1067
1068 $field_label_width = 0;
1069 for (@test_names) {
1070 $field_label_width = length if length > $field_label_width;
1071 }
1072
1073 # Print the perl executables header.
1074
1075 print "\n";
1076 for my $i (0,1) {
1077 print " " x $field_label_width;
1078 for (0..$#widths) {
1079 printf " %*s", $widths[$_],
1080 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1081 }
1082 print "\n";
1083 }
1084 }
1085
1086 # Dump the results for each test.
1087
1088 for my $test_name (@test_names) {
1089 my $doing_ave = ($test_name eq 'AVERAGE');
1090 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1091
1092 unless ($one_field) {
1093 print "\n$test_name";
1094 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1095 print "\n\n";
1096
1097 # Print the perl executables header.
1098 for my $i (0,1) {
1099 print " " x $field_label_width;
1100 for (0..$#widths) {
1101 printf " %*s", $widths[$_],
1102 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1103 }
1104 print "\n";
1105 }
1106 }
1107
1108 for my $field (qw(Ir Dr Dw COND IND
1109 N
1110 COND_m IND_m
1111 N
1112 Ir_m1 Dr_m1 Dw_m1
1113 N
1114 Ir_mm Dr_mm Dw_mm
1115 ))
1116 {
1117 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1118
1119 if ($field eq 'N') {
1120 print "\n";
1121 next;
1122 }
1123
91cde97c
DM
1124 if ($one_field) {
1125 printf "%-*s", $field_label_width, $test_name;
1126 }
1127 else {
1128 printf "%*s", $field_label_width, $field;
1129 }
9e7973fa
DM
1130
1131 for my $i (0..$#widths) {
1132 my $res2 = $res1->{$perl_names[$i]};
1133 my $p = $res2->{$field};
1134 if (!defined $p) {
1135 printf " %*s", $widths[$i], '-';
1136 }
1137 elsif ($OPTS{raw}) {
1138 printf " %*.1f", $widths[$i], $p;
1139 }
1140 else {
1141 printf " %*.2f", $widths[$i], $p * 100;
1142 }
1143 }
1144 print "\n";
1145 }
1146 }
1147}
1148
1149
1150# do_selftest(): check that we can parse known cachegrind()
1151# output formats. If the output of cachegrind changes, add a *new*
1152# test here; keep the old tests to make sure we continue to parse
1153# old cachegrinds
1154
1155sub do_selftest {
1156
1157 my @tests = (
1158 'standard',
1159 <<'EOF',
1160==32350== Cachegrind, a cache and branch-prediction profiler
1161==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1162==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1163==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1164==32350==
1165--32350-- warning: L3 cache found, using its data for the LL simulation.
1166==32350==
1167==32350== I refs: 1,124,055
1168==32350== I1 misses: 5,573
1169==32350== LLi misses: 3,338
1170==32350== I1 miss rate: 0.49%
1171==32350== LLi miss rate: 0.29%
1172==32350==
1173==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1174==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1175==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1176==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1177==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1178==32350==
1179==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1180==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1181==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1182==32350==
1183==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1184==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1185==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1186EOF
1187 {
1188 COND => 197050,
1189 COND_m => 17742,
1190 Dr => 259191,
1191 Dr_m1 => 6098,
1192 Dr_mm => 2781,
1193 Dw => 145084,
1194 Dw_m1 => 3510,
1195 Dw_mm => 3013,
1196 IND => 5322,
1197 IND_m => 1411,
1198 Ir => 1124055,
1199 Ir_m1 => 5573,
1200 Ir_mm => 3338,
1201 },
1202 );
1203
1204 for ('t', '.') {
1205 last if require "$_/test.pl";
1206 }
1207 plan(@tests / 3 * keys %VALID_FIELDS);
1208
1209 while (@tests) {
1210 my $desc = shift @tests;
1211 my $output = shift @tests;
1212 my $expected = shift @tests;
1213 my $p = parse_cachegrind($output);
1214 for (sort keys %VALID_FIELDS) {
1215 is($p->{$_}, $expected->{$_}, "$desc, $_");
1216 }
1217 }
1218}