This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Porting/bench.pl: preserve test order
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
300b1525 17 bench.pl [options] perl1[=label1] perl2[=label2] ...
9e7973fa
DM
18
19 # Run bench.pl's own built-in sanity tests
20
21 bench.pl --action=selftest
22
23=head1 DESCRIPTION
24
25By default, F<bench.pl> will run code snippets found in
26F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
27how many instruction reads, data writes, branches, cache misses, etc. that
28one execution of the snippet uses. It will run them against two or more
29perl executables and show how much each test has gotten better or worse.
30
31It is modelled on the F<perlbench> tool, but since it measures instruction
32reads etc., rather than timings, it is much more precise and reproducible.
33It is also considerably faster, and is capable or running tests in
34parallel (with C<-j>). Rather than displaying a single relative
35percentage per test/perl combination, it displays values for 13 different
36measurements, such as instruction reads, conditional branch misses etc.
37
38There are options to write the raw data to a file, and to read it back.
39This means that you can view the same run data in different views with
40different selection and sort options.
41
42The optional C<=label> after each perl executable is used in the display
43output.
44
45=head1 OPTIONS
46
47=over 4
48
49=item *
50
51--action=I<foo>
52
53What action to perform. The default is I<grind>, which runs the benchmarks
54using I<cachegrind> as the back end. The only other action at the moment is
55I<selftest>, which runs some basic sanity checks and produces TAP output.
56
57=item *
58
59--average
60
61Only display the overall average, rather than the results for each
62individual test.
63
64=item *
65
66--benchfile=I<foo>
67
68The path of the file which contains the benchmarks (F<t/perf/benchmarks>
69by default).
70
71=item *
72
73--bisect=I<field,minval,maxval>
74
75Run a single test against one perl and exit with a zero status if the
76named field is in the specified range; exit 1 otherwise. It will complain
77if more than one test or perl has been specified. It is intended to be
78called as part of a bisect run, to determine when something changed.
79For example,
80
4a1358c2
FC
81 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
82 ./miniperl
9e7973fa
DM
83
84might be called from bisect to find when the number of instruction reads
85for test I<foo> falls outside the range 100..105.
86
87=item *
88
89--debug
90
91Enable verbose debugging output.
92
93=item *
94
95--fields=I<a,b,c>
96
97Display only the specified fields; for example,
98
99 --fields=Ir,Ir_m,Ir_mm
100
101If only one field is selected, the output is in more compact form.
102
103=item *
104
105--grindargs=I<foo>
106
107Optional command-line arguments to pass to cachegrind invocations.
108
109=item *
110
111---help
112
113Display basic usage information.
114
115=item *
116
117-j I<N>
118--jobs=I<N>
119
120Run I<N> jobs in parallel (default 1). This determines how many cachegrind
121process will running at a time, and should generally be set to the number
122of CPUs available.
123
124=item *
125
126--norm=I<foo>
127
128Specify which perl column in the output to treat as the 100% norm.
129It may be a column number (0..N-1) or a perl executable name or label.
130It defaults to the leftmost column.
131
132=item *
133
134--perlargs=I<foo>
135
136Optional command-line arguments to pass to each perl that is run as part of
137a cachegrind session. For example, C<--perlargs=-Ilib>.
138
139=item *
140
141--raw
142
143Display raw data counts rather than percentages in the outputs. This
144allows you to see the exact number of intruction reads, branch misses etc.
145for each test/perl combination. It also causes the C<AVERAGE> display
146per field to be calculated based on the average of each tests's count
147rather than average of each percentage. This means that tests with very
148high counts will dominate.
149
150=item *
151
65f0b1c9 152--sort=I<field:perl>
9e7973fa
DM
153
154Order the tests in the output based on the value of I<field> in the
155column I<perl>. The I<perl> value is as per C<--norm>. For example
156
4a1358c2
FC
157 bench.pl --sort=Dw:perl-5.20.0 \
158 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
159
160=item *
161
162-r I<file>
163--read=I<file>
164
165Read in saved data from a previous C<--write> run from the specified file.
166
167Requires C<JSON::PP> to be available.
168
169=item *
170
171--tests=I<FOO>
172
173Specify a subset of tests to run (or in the case of C<--read>, to display).
174It may be either a comma-separated list of test names, or a regular
175expression. For example
176
177 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
178 --tests=/^expr::/
179
180=item *
181
182--verbose
183
184Display progress information.
185
186=item *
187
188-w I<file>
189--write=I<file>
190
191Save the raw data to the specified file. It can be read back later with
192C<--read>.
193
194Requires C<JSON::PP> to be available.
195
196=back
197
198=cut
199
200
201
202use 5.010000;
203use warnings;
204use strict;
205use Getopt::Long qw(:config no_auto_abbrev);
206use IPC::Open2 ();
207use IO::Select;
c2d21e7a 208use IO::File;
9e7973fa
DM
209use POSIX ":sys_wait_h";
210
211# The version of the file format used to save data. We refuse to process
212# the file if the integer component differs.
213
214my $FORMAT_VERSION = 1.0;
215
216# The fields we know about
217
218my %VALID_FIELDS = map { $_ => 1 }
219 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
220
221sub usage {
222 die <<EOF;
223usage: $0 [options] perl[=label] ...
224 --action=foo What action to perform [default: grind].
225 --average Only display average, not individual test results.
226 --benchfile=foo File containing the benchmarks;
227 [default: t/perf/benchmarks].
228 --bisect=f,min,max run a single test against one perl and exit with a
229 zero status if the named field is in the specified
230 range; exit 1 otherwise.
231 --debug Enable verbose debugging output.
232 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
233 --grindargs=foo Optional command-line args to pass to cachegrind.
234 --help Display this help.
235 -j|--jobs=N Run N jobs in parallel [default 1].
236 --norm=perl Which perl column to treat as 100%; may be a column
237 number (0..N-1) or a perl executable name or label;
238 [default: 0].
239 --perlargs=foo Optional command-line args to pass to each perl to run.
240 --raw Display raw data counts rather than percentages.
65f0b1c9 241 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
242 column 'perl'. The perl value is as per --norm.
243 -r|--read=file Read in previously saved data from the specified file.
244 --tests=FOO Select only the specified tests from the benchmarks file;
245 FOO may be either of the form 'foo,bar' or '/regex/';
246 [default: all tests].
247 --verbose Display progress information.
248 -w|--write=file Save the raw data to the specified file.
249
250--action is one of:
251 grind run the code under cachegrind
252 selftest perform a selftest; produce TAP output
253
254The command line ends with one or more specified perl executables,
255which will be searched for in the current \$PATH. Each binary name may
256have an optional =LABEL appended, which will be used rather than the
257executable name in output. E.g.
258
259 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
260EOF
261}
262
263my %OPTS = (
264 action => 'grind',
265 average => 0,
266 benchfile => 't/perf/benchmarks',
267 bisect => undef,
268 debug => 0,
269 grindargs => '',
270 fields => undef,
271 jobs => 1,
272 norm => 0,
273 perlargs => '',
274 raw => 0,
275 read => undef,
276 sort => undef,
277 tests => undef,
278 verbose => 0,
279 write => undef,
280);
281
282
283# process command-line args and call top-level action
284
285{
286 GetOptions(
287 'action=s' => \$OPTS{action},
288 'average' => \$OPTS{average},
289 'benchfile=s' => \$OPTS{benchfile},
290 'bisect=s' => \$OPTS{bisect},
291 'debug' => \$OPTS{debug},
292 'grindargs=s' => \$OPTS{grindargs},
293 'help' => \$OPTS{help},
294 'fields=s' => \$OPTS{fields},
295 'jobs|j=i' => \$OPTS{jobs},
296 'norm=s' => \$OPTS{norm},
297 'perlargs=s' => \$OPTS{perlargs},
298 'raw' => \$OPTS{raw},
299 'read|r=s' => \$OPTS{read},
300 'sort=s' => \$OPTS{sort},
301 'tests=s' => \$OPTS{tests},
302 'verbose' => \$OPTS{verbose},
303 'write|w=s' => \$OPTS{write},
304 ) or usage;
305
306 usage if $OPTS{help};
307
308
309 if (defined $OPTS{read} and defined $OPTS{write}) {
310 die "Error: can't specify both --read and --write options\n";
311 }
312
313 if (defined $OPTS{read} or defined $OPTS{write}) {
314 # fail early if it's not present
315 require JSON::PP;
316 }
317
318 if (defined $OPTS{fields}) {
319 my @f = split /,/, $OPTS{fields};
320 for (@f) {
321 die "Error: --fields: unknown field '$_'\n"
322 unless $VALID_FIELDS{$_};
323 }
324 my %f = map { $_ => 1 } @f;
325 $OPTS{fields} = \%f;
326 }
327
328 my %valid_actions = qw(grind 1 selftest 1);
329 unless ($valid_actions{$OPTS{action}}) {
330 die "Error: unrecognised action '$OPTS{action}'\n"
331 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
332 }
333
334 if (defined $OPTS{sort}) {
335 my @s = split /:/, $OPTS{sort};
336 if (@s != 2) {
337 die "Error: --sort argument should be of the form field:perl: "
338 . "'$OPTS{sort}'\n";
339 }
340 my ($field, $perl) = @s;
341 die "Error: --sort: unknown field '$field\n"
342 unless $VALID_FIELDS{$field};
343 # the 'perl' value will be validated later, after we have processed
344 # the perls
345 $OPTS{'sort-field'} = $field;
346 $OPTS{'sort-perl'} = $perl;
347 }
348
349 if ($OPTS{action} eq 'selftest') {
350 if (@ARGV) {
351 die "Error: no perl executables may be specified with --read\n"
352 }
353 }
354 elsif (defined $OPTS{bisect}) {
355 die "Error: exactly one perl executable must be specified for bisect\n"
356 unless @ARGV == 1;
357 die "Error: Can't specify both --bisect and --read\n"
358 if defined $OPTS{read};
359 die "Error: Can't specify both --bisect and --write\n"
360 if defined $OPTS{write};
361 }
362 elsif (defined $OPTS{read}) {
363 if (@ARGV) {
364 die "Error: no perl executables may be specified with --read\n"
365 }
366 }
367 elsif ($OPTS{raw}) {
368 unless (@ARGV) {
369 die "Error: at least one perl executable must be specified\n";
370 }
371 }
372 else {
373 unless (@ARGV >= 2) {
374 die "Error: at least two perl executables must be specified\n";
375 }
376 }
377
378 if ($OPTS{action} eq 'grind') {
379 do_grind(\@ARGV);
380 }
381 elsif ($OPTS{action} eq 'selftest') {
382 do_selftest();
383 }
384}
385exit 0;
386
387
388# Given a hash ref keyed by test names, filter it by deleting unwanted
389# tests, based on $OPTS{tests}.
390
391sub filter_tests {
392 my ($tests) = @_;
393
394 my $opt = $OPTS{tests};
395 return unless defined $opt;
396
397 my @tests;
398
399 if ($opt =~ m{^/}) {
400 $opt =~ s{^/(.+)/$}{$1}
401 or die "Error: --tests regex must be of the form /.../\n";
402 for (keys %$tests) {
403 delete $tests->{$_} unless /$opt/;
404 }
405 }
406 else {
407 my %t;
408 for (split /,/, $opt) {
409 die "Error: no such test found: '$_'\n" unless exists $tests->{$_};
410 $t{$_} = 1;
411 }
412 for (keys %$tests) {
413 delete $tests->{$_} unless exists $t{$_};
414 }
415 }
416}
417
418
419# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
420# return a hash ref { testname => { test }, ... }
421# and an array ref of the original test names order,
9e7973fa
DM
422
423sub read_tests_file {
424 my ($file) = @_;
425
426 my $ta = do $file;
427 unless ($ta) {
428 die "Error: can't parse '$file': $@\n" if $@;
429 die "Error: can't read '$file': $!\n";
430 }
431
957d8930
DM
432 my @orig_order;
433 for (my $i=0; $i < @$ta; $i += 2) {
434 push @orig_order, $ta->[$i];
435 }
436
9e7973fa
DM
437 my $t = { @$ta };
438 filter_tests($t);
957d8930 439 return $t, \@orig_order;
9e7973fa
DM
440}
441
442
443# Process the perl/column argument of options like --norm and --sort.
444# Return the index of the matching perl.
445
446sub select_a_perl {
447 my ($perl, $perls, $who) = @_;
448
449 if ($perl =~ /^[0-9]$/) {
450 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
451 unless $perl < @$perls;
452 return $perl;
453 }
454 else {
455 my @perl = grep $perls->[$_][0] eq $perl
456 || $perls->[$_][1] eq $perl,
457 0..$#$perls;
458 die "Error: $who: unrecognised perl '$perl'\n"
459 unless @perl;
460 die "Error: $who: ambiguous perl '$perl'\n"
461 if @perl > 1;
462 return $perl[0];
463 }
464}
465
466
467# Validate the list of perl=label on the command line.
9e7973fa
DM
468# Return a list of [ exe, label ] pairs.
469
470sub process_perls {
471 my @results;
472 for my $p (@_) {
473 my ($perl, $label) = split /=/, $p, 2;
474 $label //= $perl;
475 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
476 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
477 push @results, [ $perl, $label ];
478 }
9e7973fa
DM
479 return @results;
480}
481
482
8fbd1c2c 483
9e7973fa
DM
484# Return a string containing perl test code wrapped in a loop
485# that runs $ARGV[0] times
486
487sub make_perl_prog {
488 my ($test, $desc, $setup, $code) = @_;
489
490 return <<EOF;
491# $desc
492package $test;
493BEGIN { srand(0) }
494$setup;
495for my \$__loop__ (1..\$ARGV[0]) {
496 $code;
497}
498EOF
499}
500
501
502# Parse the output from cachegrind. Return a hash ref.
503# See do_selftest() for examples of the output format.
504
505sub parse_cachegrind {
506 my ($output, $id, $perl) = @_;
507
508 my %res;
509
510 my @lines = split /\n/, $output;
511 for (@lines) {
512 unless (s/(==\d+==)|(--\d+--) //) {
513 die "Error: while executing $id:\n"
514 . "unexpected code or cachegrind output:\n$_\n";
515 }
516 if (/I refs:\s+([\d,]+)/) {
517 $res{Ir} = $1;
518 }
519 elsif (/I1 misses:\s+([\d,]+)/) {
520 $res{Ir_m1} = $1;
521 }
522 elsif (/LLi misses:\s+([\d,]+)/) {
523 $res{Ir_mm} = $1;
524 }
525 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
526 @res{qw(Dr Dw)} = ($1,$2);
527 }
528 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
529 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
530 }
531 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
532 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
533 }
534 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
535 @res{qw(COND IND)} = ($1,$2);
536 }
537 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
538 @res{qw(COND_m IND_m)} = ($1,$2);
539 }
540 }
541
542 for my $field (keys %VALID_FIELDS) {
543 die "Error: can't parse '$field' field from cachegrind output:\n$output"
544 unless exists $res{$field};
545 $res{$field} =~ s/,//g;
546 }
547
548 return \%res;
549}
550
551
552# Handle the 'grind' action
553
554sub do_grind {
555 my ($perl_args) = @_; # the residue of @ARGV after option processing
556
957d8930 557 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
558 my ($bisect_field, $bisect_min, $bisect_max);
559
560 if (defined $OPTS{bisect}) {
561 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
562 die "Error: --bisect option must be of form 'field,integer,integer'\n"
563 unless
564 defined $bisect_max
565 and $bisect_min =~ /^[0-9]+$/
566 and $bisect_max =~ /^[0-9]+$/;
567
568 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
569 unless $VALID_FIELDS{$bisect_field};
570
571 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
572 if $bisect_min > $bisect_max;
573 }
574
575 if (defined $OPTS{read}) {
576 open my $in, '<:encoding(UTF-8)', $OPTS{read}
577 or die " Error: can't open $OPTS{read} for reading: $!\n";
578 my $data = do { local $/; <$in> };
579 close $in;
580
581 my $hash = JSON::PP::decode_json($data);
582 if (int($FORMAT_VERSION) < int($hash->{version})) {
583 die "Error: unsupported version $hash->{version} in file"
584 . "'$OPTS{read}' (too new)\n";
585 }
957d8930
DM
586 ($loop_counts, $perls, $results, $tests, $order) =
587 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
588
589 filter_tests($results);
590 filter_tests($tests);
957d8930
DM
591
592 if (!$order) {
593 $order = [ sort keys %$tests ];
594 }
9e7973fa
DM
595 }
596 else {
597 # How many times to execute the loop for the two trials. The lower
598 # value is intended to do the loop enough times that branch
599 # prediction has taken hold; the higher loop allows us to see the
600 # branch misses after that
601 $loop_counts = [10, 20];
602
957d8930 603 ($tests, $order) = read_tests_file($OPTS{benchfile});
9e7973fa
DM
604 die "Error: only a single test may be specified with --bisect\n"
605 if defined $OPTS{bisect} and keys %$tests != 1;
606
607 $perls = [ process_perls(@$perl_args) ];
8fbd1c2c
DM
608
609
957d8930 610 $results = grind_run($tests, $order, $perls, $loop_counts);
9e7973fa
DM
611 }
612
8fbd1c2c
DM
613 # now that we have a list of perls, use it to process the
614 # 'perl' component of the --norm and --sort args
615
616 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
617 if (defined $OPTS{'sort-perl'}) {
618 $OPTS{'sort-perl'} =
619 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
620 }
621
9e7973fa
DM
622 if (defined $OPTS{write}) {
623 my $json = JSON::PP::encode_json({
624 version => $FORMAT_VERSION,
625 loop_counts => $loop_counts,
626 perls => $perls,
627 results => $results,
628 tests => $tests,
957d8930 629 order => $order,
9e7973fa
DM
630 });
631
632 open my $out, '>:encoding(UTF-8)', $OPTS{write}
633 or die " Error: can't open $OPTS{write} for writing: $!\n";
634 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
635 close $out or die "Error: closing file '$OPTS{write}': $!\n";
636 }
637 else {
638 my ($processed, $averages) =
639 grind_process($results, $perls, $loop_counts);
640
641 if (defined $OPTS{bisect}) {
642 my @r = values %$results;
643 die "Panic: expected exactly one test result in bisect\n"
644 if @r != 1;
645 @r = values %{$r[0]};
646 die "Panic: expected exactly one perl result in bisect\n"
647 if @r != 1;
648 my $c = $r[0]{$bisect_field};
649 die "Panic: no result in bisect for field '$bisect_field'\n"
650 unless defined $c;
651 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
652 exit 1;
653 }
654 else {
957d8930 655 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
656 }
657 }
658}
659
660
661# Run cachegrind for every test/perl combo.
662# It may run several processes in parallel when -j is specified.
663# Return a hash ref suitable for input to grind_process()
664
665sub grind_run {
957d8930 666 my ($tests, $order, $perls, $counts) = @_;
9e7973fa
DM
667
668 # Build a list of all the jobs to run
669
670 my @jobs;
671
957d8930 672 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
673
674 # Create two test progs: one with an empty loop and one with code.
675 # Note that the empty loop is actually '{1;}' rather than '{}';
676 # this causes the loop to have a single nextstate rather than a
677 # stub op, so more closely matches the active loop; e.g.:
678 # {1;} => nextstate; unstack
679 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
680 my @prog = (
681 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
682 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
683 );
684
685 for my $p (@$perls) {
686 my ($perl, $label) = @$p;
687
688 # Run both the empty loop and the active loop
689 # $counts->[0] and $counts->[1] times.
690
691 for my $i (0,1) {
692 for my $j (0,1) {
693 my $cmd = "PERL_HASH_SEED=0 "
694 . "valgrind --tool=cachegrind --branch-sim=yes "
695 . "--cachegrind-out-file=/dev/null "
696 . "$OPTS{grindargs} "
697 . "$perl $OPTS{perlargs} - $counts->[$j] 2>&1";
698 # for debugging and error messages
699 my $id = "$test/$perl "
700 . ($i ? "active" : "empty") . "/"
701 . ($j ? "long" : "short") . " loop";
702
703 push @jobs, {
704 test => $test,
705 perl => $perl,
706 plabel => $label,
707 cmd => $cmd,
708 prog => $prog[$i],
709 active => $i,
710 loopix => $j,
711 id => $id,
712 };
713 }
714 }
715 }
716 }
717
718 # Execute each cachegrind and store the results in %results.
719
720 local $SIG{PIPE} = 'IGNORE';
721
722 my $max_jobs = $OPTS{jobs};
723 my $running = 0; # count of executing jobs
724 my %pids; # map pids to jobs
725 my %fds; # map fds to jobs
726 my %results;
727 my $select = IO::Select->new();
728
729 while (@jobs or $running) {
730
731 if ($OPTS{debug}) {
732 printf "Main loop: pending=%d running=%d\n",
733 scalar(@jobs), $running;
734 }
735
736 # Start new jobs
737
738 while (@jobs && $running < $max_jobs) {
739 my $job = shift @jobs;
740 my ($id, $cmd) =@$job{qw(id cmd)};
741
742 my ($in, $out, $pid);
743 warn "Starting $id\n" if $OPTS{verbose};
744 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
745 or die "Error: while starting cachegrind subprocess"
746 ." for $id:\n$@";
747 $running++;
748 $pids{$pid} = $job;
749 $fds{"$out"} = $job;
750 $job->{out_fd} = $out;
751 $job->{output} = '';
752 $job->{pid} = $pid;
753
754 $out->blocking(0);
755 $select->add($out);
756
757 if ($OPTS{debug}) {
758 print "Started pid $pid for $id\n";
759 }
760
761 # Note:
762 # In principle we should write to $in in the main select loop,
763 # since it may block. In reality,
764 # a) the code we write to the perl process's stdin is likely
765 # to be less than the OS's pipe buffer size;
766 # b) by the time the perl process has read in all its stdin,
767 # the only output it should have generated is a few lines
768 # of cachegrind output preamble.
769 # If these assumptions change, then perform the following print
770 # in the select loop instead.
771
772 print $in $job->{prog};
773 close $in;
774 }
775
776 # Get output of running jobs
777
778 if ($OPTS{debug}) {
779 printf "Select: waiting on (%s)\n",
780 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
781 $select->handles;
782 }
783
784 my @ready = $select->can_read;
785
786 if ($OPTS{debug}) {
787 printf "Select: pids (%s) ready\n",
788 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
789 }
790
791 unless (@ready) {
792 die "Panic: select returned no file handles\n";
793 }
794
795 for my $fd (@ready) {
796 my $j = $fds{"$fd"};
797 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
798 unless (defined $r) {
799 die "Panic: Read from process running $j->{id} gave:\n$!";
800 }
801 next if $r;
802
803 # EOF
804
805 if ($OPTS{debug}) {
806 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
807 }
808
809 $select->remove($j->{out_fd});
810 close($j->{out_fd})
811 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
812 $running--;
813 delete $fds{"$j->{out_fd}"};
814 my $output = $j->{output};
815
816 if ($OPTS{debug}) {
817 my $p = $j->{prog};
818 $p =~ s/^/ : /mg;
819 my $o = $output;
820 $o =~ s/^/ : /mg;
821
822 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
823 . "Input:\n$p"
824 . "Output\n$o";
825 }
826
827 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
828 = parse_cachegrind($output, $j->{id}, $j->{perl});
829 }
830
831 # Reap finished jobs
832
833 while (1) {
834 my $kid = waitpid(-1, WNOHANG);
835 my $ret = $?;
836 last if $kid <= 0;
837
838 unless (exists $pids{$kid}) {
839 die "Panic: reaped unexpected child $kid";
840 }
841 my $j = $pids{$kid};
842 if ($ret) {
843 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
844 . "with the following output\n:$j->{output}\n";
845 }
846 delete $pids{$kid};
847 }
848 }
849
850 return \%results;
851}
852
853
854
855
856# grind_process(): process the data that has been extracted from
857# cachgegrind's output.
858#
859# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
860# where active is 0 or 1 indicating an empty or active loop,
861# count is 0 or 1 indicating a short or long loop. E.g.
862#
863# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
864#
865# The $res data structure is modified in-place by this sub.
866#
867# $perls is [ [ perl-exe, perl-label], .... ].
868#
869# $counts is [ N, M ] indicating the counts for the short and long loops.
870#
871#
872# return \%output, \%averages, where
873#
874# $output{benchmark_name}{perl_name}{field_name} = N
875# $averages{perl_name}{field_name} = M
876#
877# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
878# M is the average raw count over all tests ($OPTS{raw}), or
879# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
880
881sub grind_process {
882 my ($res, $perls, $counts) = @_;
883
884 # Process the four results for each test/perf combo:
885 # Convert
886 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
887 # to
888 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
889 #
890 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
891 # empty loops, eliminating startup time
892 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
893 # active loops, eliminating startup time
894 # (the two startup times may be different because different code
895 # is being compiled); the difference of the two results above
896 # divided by the count difference is the time to execute the
897 # active code once, eliminating both startup and loop overhead.
898
899 for my $tests (values %$res) {
900 for my $r (values %$tests) {
901 my $r2;
902 for (keys %{$r->[0][0]}) {
903 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
904 - ($r->[0][1]{$_} - $r->[0][0]{$_})
905 ) / ($counts->[1] - $counts->[0]);
906 $r2->{$_} = $n;
907 }
908 $r = $r2;
909 }
910 }
911
912 my %totals;
913 my %counts;
914 my %data;
915
916 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
917
918 for my $test_name (keys %$res) {
919 my $res1 = $res->{$test_name};
920 my $res2_norm = $res1->{$perl_norm};
921 for my $perl (keys %$res1) {
922 my $res2 = $res1->{$perl};
923 for my $field (keys %$res2) {
924 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
925
926 if ($OPTS{raw}) {
927 # Avoid annoying '-0.0' displays. Ideally this number
928 # should never be negative, but fluctuations in
929 # startup etc can theoretically make this happen
930 $q = 0 if ($q <= 0 && $q > -0.1);
931 $totals{$perl}{$field} += $q;
932 $counts{$perl}{$field}++;
933 $data{$test_name}{$perl}{$field} = $q;
934 next;
935 }
936
937 # $p and $q are notionally integer counts, but
938 # due to variations in startup etc, it's possible for a
939 # count which is supposedly zero to be calculated as a
940 # small positive or negative value.
941 # In this case, set it to zero. Further below we
942 # special-case zeros to avoid division by zero errors etc.
943
944 $p = 0.0 if $p < 0.01;
945 $q = 0.0 if $q < 0.01;
946
947 if ($p == 0.0 && $q == 0.0) {
948 # Both perls gave a count of zero, so no change:
949 # treat as 100%
950 $totals{$perl}{$field} += 1;
951 $counts{$perl}{$field}++;
952 $data{$test_name}{$perl}{$field} = 1;
953 }
954 elsif ($p == 0.0 || $q == 0.0) {
955 # If either count is zero, there were too few events
956 # to give a meaningful ratio (and we will end up with
957 # division by zero if we try). Mark the result undef,
958 # indicating that it shouldn't be displayed; and skip
959 # adding to the average
960 $data{$test_name}{$perl}{$field} = undef;
961 }
962 else {
963 # For averages, we record q/p rather than p/q.
964 # Consider a test where perl_norm took 1000 cycles
965 # and perlN took 800 cycles. For the individual
966 # results we display p/q, or 1.25; i.e. a quarter
967 # quicker. For the averages, we instead sum all
968 # the 0.8's, which gives the total cycles required to
969 # execute all tests, with all tests given equal
970 # weight. Later we reciprocate the final result,
971 # i.e. 1/(sum(qi/pi)/n)
972
973 $totals{$perl}{$field} += $q/$p;
974 $counts{$perl}{$field}++;
975 $data{$test_name}{$perl}{$field} = $p/$q;
976 }
977 }
978 }
979 }
980
981 # Calculate averages based on %totals and %counts accumulated earlier.
982
983 my %averages;
984 for my $perl (keys %totals) {
985 my $t = $totals{$perl};
986 for my $field (keys %$t) {
987 $averages{$perl}{$field} = $OPTS{raw}
988 ? $t->{$field} / $counts{$perl}{$field}
989 # reciprocal - see comments above
990 : $counts{$perl}{$field} / $t->{$field};
991 }
992 }
993
994 return \%data, \%averages;
995}
996
997
998# grind_print(): display the tabulated results of all the cachegrinds.
999#
1000# Arguments are of the form:
1001# $results->{benchmark_name}{perl_name}{field_name} = N
1002# $averages->{perl_name}{field_name} = M
1003# $perls = [ [ perl-exe, perl-label ], ... ]
1004# $tests->{test_name}{desc => ..., ...}
1005
1006sub grind_print {
957d8930 1007 my ($results, $averages, $perls, $tests, $order) = @_;
9e7973fa
DM
1008
1009 my @perl_names = map $_->[0], @$perls;
1010 my %perl_labels;
1011 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1012
1013 my $field_label_width = 6;
1014 # Calculate the width to display for each column.
1015 my $min_width = $OPTS{raw} ? 8 : 6;
1016 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1017 @perl_labels{@perl_names};
1018
1019 # Print header.
1020
1021 print <<EOF;
1022Key:
1023 Ir Instruction read
1024 Dr Data read
1025 Dw Data write
1026 COND conditional branches
1027 IND indirect branches
1028 _m branch predict miss
1029 _m1 level 1 cache miss
1030 _mm last cache (e.g. L3) miss
1031 - indeterminate percentage (e.g. 1/0)
1032
1033EOF
1034
1035 if ($OPTS{raw}) {
1036 print "The numbers represent raw counts per loop iteration.\n";
1037 }
1038 else {
1039 print <<EOF;
1040The numbers represent relative counts per loop iteration, compared to
1041$perl_labels{$perl_names[0]} at 100.0%.
1042Higher is better: for example, using half as many instructions gives 200%,
1043while using twice as many gives 50%.
1044EOF
1045 }
1046
1047 # Populate @test_names with the tests in sorted order.
1048
1049 my @test_names;
1050 unless ($OPTS{average}) {
1051 if (defined $OPTS{'sort-field'}) {
1052 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1053 my $perl = $perls->[$perlix][0];
1054 @test_names = sort
1055 {
1056 $results->{$a}{$perl}{$field}
1057 <=> $results->{$b}{$perl}{$field}
1058 }
1059 keys %$results;
1060 }
1061 else {
957d8930 1062 @test_names = grep $results->{$_}, @$order;
9e7973fa
DM
1063 }
1064 }
1065
1066 # No point in displaying average for only one test.
1067 push @test_names, 'AVERAGE' unless @test_names == 1;
1068
1069 # If only a single field is to be displayed, use a more compact
1070 # format with only a single line of output per test.
1071
1072 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1073
1074 if ($one_field) {
91cde97c 1075 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1076
1077 # The first column will now contain test names rather than
1078 # field names; Calculate the max width.
1079
1080 $field_label_width = 0;
1081 for (@test_names) {
1082 $field_label_width = length if length > $field_label_width;
1083 }
1084
1085 # Print the perl executables header.
1086
1087 print "\n";
1088 for my $i (0,1) {
1089 print " " x $field_label_width;
1090 for (0..$#widths) {
1091 printf " %*s", $widths[$_],
1092 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1093 }
1094 print "\n";
1095 }
1096 }
1097
1098 # Dump the results for each test.
1099
1100 for my $test_name (@test_names) {
1101 my $doing_ave = ($test_name eq 'AVERAGE');
1102 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1103
1104 unless ($one_field) {
1105 print "\n$test_name";
1106 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1107 print "\n\n";
1108
1109 # Print the perl executables header.
1110 for my $i (0,1) {
1111 print " " x $field_label_width;
1112 for (0..$#widths) {
1113 printf " %*s", $widths[$_],
1114 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1115 }
1116 print "\n";
1117 }
1118 }
1119
1120 for my $field (qw(Ir Dr Dw COND IND
1121 N
1122 COND_m IND_m
1123 N
1124 Ir_m1 Dr_m1 Dw_m1
1125 N
1126 Ir_mm Dr_mm Dw_mm
1127 ))
1128 {
1129 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1130
1131 if ($field eq 'N') {
1132 print "\n";
1133 next;
1134 }
1135
91cde97c
DM
1136 if ($one_field) {
1137 printf "%-*s", $field_label_width, $test_name;
1138 }
1139 else {
1140 printf "%*s", $field_label_width, $field;
1141 }
9e7973fa
DM
1142
1143 for my $i (0..$#widths) {
1144 my $res2 = $res1->{$perl_names[$i]};
1145 my $p = $res2->{$field};
1146 if (!defined $p) {
1147 printf " %*s", $widths[$i], '-';
1148 }
1149 elsif ($OPTS{raw}) {
1150 printf " %*.1f", $widths[$i], $p;
1151 }
1152 else {
1153 printf " %*.2f", $widths[$i], $p * 100;
1154 }
1155 }
1156 print "\n";
1157 }
1158 }
1159}
1160
1161
1162# do_selftest(): check that we can parse known cachegrind()
1163# output formats. If the output of cachegrind changes, add a *new*
1164# test here; keep the old tests to make sure we continue to parse
1165# old cachegrinds
1166
1167sub do_selftest {
1168
1169 my @tests = (
1170 'standard',
1171 <<'EOF',
1172==32350== Cachegrind, a cache and branch-prediction profiler
1173==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1174==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1175==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1176==32350==
1177--32350-- warning: L3 cache found, using its data for the LL simulation.
1178==32350==
1179==32350== I refs: 1,124,055
1180==32350== I1 misses: 5,573
1181==32350== LLi misses: 3,338
1182==32350== I1 miss rate: 0.49%
1183==32350== LLi miss rate: 0.29%
1184==32350==
1185==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1186==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1187==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1188==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1189==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1190==32350==
1191==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1192==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1193==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1194==32350==
1195==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1196==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1197==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1198EOF
1199 {
1200 COND => 197050,
1201 COND_m => 17742,
1202 Dr => 259191,
1203 Dr_m1 => 6098,
1204 Dr_mm => 2781,
1205 Dw => 145084,
1206 Dw_m1 => 3510,
1207 Dw_mm => 3013,
1208 IND => 5322,
1209 IND_m => 1411,
1210 Ir => 1124055,
1211 Ir_m1 => 5573,
1212 Ir_mm => 3338,
1213 },
1214 );
1215
1216 for ('t', '.') {
1217 last if require "$_/test.pl";
1218 }
1219 plan(@tests / 3 * keys %VALID_FIELDS);
1220
1221 while (@tests) {
1222 my $desc = shift @tests;
1223 my $output = shift @tests;
1224 my $expected = shift @tests;
1225 my $p = parse_cachegrind($output);
1226 for (sort keys %VALID_FIELDS) {
1227 is($p->{$_}, $expected->{$_}, "$desc, $_");
1228 }
1229 }
1230}