This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #127494] TODO test for $AUTOLOAD being set for DESTROY
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
300b1525 17 bench.pl [options] perl1[=label1] perl2[=label2] ...
9e7973fa
DM
18
19 # Run bench.pl's own built-in sanity tests
20
21 bench.pl --action=selftest
22
23=head1 DESCRIPTION
24
25By default, F<bench.pl> will run code snippets found in
26F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
27how many instruction reads, data writes, branches, cache misses, etc. that
28one execution of the snippet uses. It will run them against two or more
29perl executables and show how much each test has gotten better or worse.
30
31It is modelled on the F<perlbench> tool, but since it measures instruction
32reads etc., rather than timings, it is much more precise and reproducible.
33It is also considerably faster, and is capable or running tests in
34parallel (with C<-j>). Rather than displaying a single relative
35percentage per test/perl combination, it displays values for 13 different
36measurements, such as instruction reads, conditional branch misses etc.
37
38There are options to write the raw data to a file, and to read it back.
39This means that you can view the same run data in different views with
40different selection and sort options.
41
42The optional C<=label> after each perl executable is used in the display
43output.
44
45=head1 OPTIONS
46
47=over 4
48
49=item *
50
51--action=I<foo>
52
53What action to perform. The default is I<grind>, which runs the benchmarks
54using I<cachegrind> as the back end. The only other action at the moment is
55I<selftest>, which runs some basic sanity checks and produces TAP output.
56
57=item *
58
59--average
60
61Only display the overall average, rather than the results for each
62individual test.
63
64=item *
65
66--benchfile=I<foo>
67
68The path of the file which contains the benchmarks (F<t/perf/benchmarks>
69by default).
70
71=item *
72
73--bisect=I<field,minval,maxval>
74
75Run a single test against one perl and exit with a zero status if the
76named field is in the specified range; exit 1 otherwise. It will complain
77if more than one test or perl has been specified. It is intended to be
78called as part of a bisect run, to determine when something changed.
79For example,
80
4a1358c2
FC
81 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
82 ./miniperl
9e7973fa
DM
83
84might be called from bisect to find when the number of instruction reads
85for test I<foo> falls outside the range 100..105.
86
87=item *
88
df3d7b3a
DM
89--compact=<Iperl>
90
91Display the results for a single perl executable in a compact form.
92Which perl to display is specified in the same manner as C<--norm>.
93
94=item *
95
9e7973fa
DM
96--debug
97
98Enable verbose debugging output.
99
100=item *
101
102--fields=I<a,b,c>
103
104Display only the specified fields; for example,
105
106 --fields=Ir,Ir_m,Ir_mm
107
108If only one field is selected, the output is in more compact form.
109
110=item *
111
112--grindargs=I<foo>
113
114Optional command-line arguments to pass to cachegrind invocations.
115
116=item *
117
118---help
119
120Display basic usage information.
121
122=item *
123
124-j I<N>
125--jobs=I<N>
126
127Run I<N> jobs in parallel (default 1). This determines how many cachegrind
128process will running at a time, and should generally be set to the number
129of CPUs available.
130
131=item *
132
133--norm=I<foo>
134
135Specify which perl column in the output to treat as the 100% norm.
136It may be a column number (0..N-1) or a perl executable name or label.
137It defaults to the leftmost column.
138
139=item *
140
141--perlargs=I<foo>
142
143Optional command-line arguments to pass to each perl that is run as part of
144a cachegrind session. For example, C<--perlargs=-Ilib>.
145
146=item *
147
148--raw
149
150Display raw data counts rather than percentages in the outputs. This
151allows you to see the exact number of intruction reads, branch misses etc.
152for each test/perl combination. It also causes the C<AVERAGE> display
153per field to be calculated based on the average of each tests's count
154rather than average of each percentage. This means that tests with very
155high counts will dominate.
156
157=item *
158
65f0b1c9 159--sort=I<field:perl>
9e7973fa
DM
160
161Order the tests in the output based on the value of I<field> in the
162column I<perl>. The I<perl> value is as per C<--norm>. For example
163
4a1358c2
FC
164 bench.pl --sort=Dw:perl-5.20.0 \
165 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
166
167=item *
168
169-r I<file>
170--read=I<file>
171
172Read in saved data from a previous C<--write> run from the specified file.
173
174Requires C<JSON::PP> to be available.
175
176=item *
177
178--tests=I<FOO>
179
180Specify a subset of tests to run (or in the case of C<--read>, to display).
181It may be either a comma-separated list of test names, or a regular
182expression. For example
183
184 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
185 --tests=/^expr::/
186
187=item *
188
189--verbose
190
191Display progress information.
192
193=item *
194
195-w I<file>
196--write=I<file>
197
198Save the raw data to the specified file. It can be read back later with
199C<--read>.
200
201Requires C<JSON::PP> to be available.
202
203=back
204
205=cut
206
207
208
209use 5.010000;
210use warnings;
211use strict;
212use Getopt::Long qw(:config no_auto_abbrev);
213use IPC::Open2 ();
214use IO::Select;
c2d21e7a 215use IO::File;
9e7973fa
DM
216use POSIX ":sys_wait_h";
217
218# The version of the file format used to save data. We refuse to process
219# the file if the integer component differs.
220
221my $FORMAT_VERSION = 1.0;
222
223# The fields we know about
224
225my %VALID_FIELDS = map { $_ => 1 }
226 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
227
228sub usage {
229 die <<EOF;
230usage: $0 [options] perl[=label] ...
231 --action=foo What action to perform [default: grind].
232 --average Only display average, not individual test results.
233 --benchfile=foo File containing the benchmarks;
234 [default: t/perf/benchmarks].
235 --bisect=f,min,max run a single test against one perl and exit with a
236 zero status if the named field is in the specified
237 range; exit 1 otherwise.
df3d7b3a
DM
238 --compact=perl Display the results of a single perl in compact form.
239 Which perl specified like --norm
9e7973fa
DM
240 --debug Enable verbose debugging output.
241 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
242 --grindargs=foo Optional command-line args to pass to cachegrind.
243 --help Display this help.
244 -j|--jobs=N Run N jobs in parallel [default 1].
245 --norm=perl Which perl column to treat as 100%; may be a column
246 number (0..N-1) or a perl executable name or label;
247 [default: 0].
248 --perlargs=foo Optional command-line args to pass to each perl to run.
249 --raw Display raw data counts rather than percentages.
65f0b1c9 250 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
251 column 'perl'. The perl value is as per --norm.
252 -r|--read=file Read in previously saved data from the specified file.
253 --tests=FOO Select only the specified tests from the benchmarks file;
254 FOO may be either of the form 'foo,bar' or '/regex/';
255 [default: all tests].
256 --verbose Display progress information.
257 -w|--write=file Save the raw data to the specified file.
258
259--action is one of:
260 grind run the code under cachegrind
261 selftest perform a selftest; produce TAP output
262
263The command line ends with one or more specified perl executables,
264which will be searched for in the current \$PATH. Each binary name may
265have an optional =LABEL appended, which will be used rather than the
266executable name in output. E.g.
267
268 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
269EOF
270}
271
272my %OPTS = (
273 action => 'grind',
274 average => 0,
275 benchfile => 't/perf/benchmarks',
276 bisect => undef,
df3d7b3a 277 compact => undef,
9e7973fa
DM
278 debug => 0,
279 grindargs => '',
280 fields => undef,
281 jobs => 1,
282 norm => 0,
283 perlargs => '',
284 raw => 0,
285 read => undef,
286 sort => undef,
287 tests => undef,
288 verbose => 0,
289 write => undef,
290);
291
292
293# process command-line args and call top-level action
294
295{
296 GetOptions(
297 'action=s' => \$OPTS{action},
298 'average' => \$OPTS{average},
299 'benchfile=s' => \$OPTS{benchfile},
300 'bisect=s' => \$OPTS{bisect},
df3d7b3a 301 'compact=s' => \$OPTS{compact},
9e7973fa
DM
302 'debug' => \$OPTS{debug},
303 'grindargs=s' => \$OPTS{grindargs},
304 'help' => \$OPTS{help},
305 'fields=s' => \$OPTS{fields},
306 'jobs|j=i' => \$OPTS{jobs},
307 'norm=s' => \$OPTS{norm},
308 'perlargs=s' => \$OPTS{perlargs},
309 'raw' => \$OPTS{raw},
310 'read|r=s' => \$OPTS{read},
311 'sort=s' => \$OPTS{sort},
312 'tests=s' => \$OPTS{tests},
313 'verbose' => \$OPTS{verbose},
314 'write|w=s' => \$OPTS{write},
315 ) or usage;
316
317 usage if $OPTS{help};
318
319
320 if (defined $OPTS{read} and defined $OPTS{write}) {
321 die "Error: can't specify both --read and --write options\n";
322 }
323
324 if (defined $OPTS{read} or defined $OPTS{write}) {
325 # fail early if it's not present
326 require JSON::PP;
327 }
328
329 if (defined $OPTS{fields}) {
330 my @f = split /,/, $OPTS{fields};
331 for (@f) {
332 die "Error: --fields: unknown field '$_'\n"
333 unless $VALID_FIELDS{$_};
334 }
335 my %f = map { $_ => 1 } @f;
336 $OPTS{fields} = \%f;
337 }
338
339 my %valid_actions = qw(grind 1 selftest 1);
340 unless ($valid_actions{$OPTS{action}}) {
341 die "Error: unrecognised action '$OPTS{action}'\n"
342 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
343 }
344
345 if (defined $OPTS{sort}) {
346 my @s = split /:/, $OPTS{sort};
347 if (@s != 2) {
348 die "Error: --sort argument should be of the form field:perl: "
349 . "'$OPTS{sort}'\n";
350 }
351 my ($field, $perl) = @s;
352 die "Error: --sort: unknown field '$field\n"
353 unless $VALID_FIELDS{$field};
354 # the 'perl' value will be validated later, after we have processed
355 # the perls
356 $OPTS{'sort-field'} = $field;
357 $OPTS{'sort-perl'} = $perl;
358 }
359
360 if ($OPTS{action} eq 'selftest') {
361 if (@ARGV) {
362 die "Error: no perl executables may be specified with --read\n"
363 }
364 }
365 elsif (defined $OPTS{bisect}) {
366 die "Error: exactly one perl executable must be specified for bisect\n"
367 unless @ARGV == 1;
368 die "Error: Can't specify both --bisect and --read\n"
369 if defined $OPTS{read};
370 die "Error: Can't specify both --bisect and --write\n"
371 if defined $OPTS{write};
372 }
373 elsif (defined $OPTS{read}) {
374 if (@ARGV) {
375 die "Error: no perl executables may be specified with --read\n"
376 }
377 }
378 elsif ($OPTS{raw}) {
379 unless (@ARGV) {
380 die "Error: at least one perl executable must be specified\n";
381 }
382 }
383 else {
384 unless (@ARGV >= 2) {
385 die "Error: at least two perl executables must be specified\n";
386 }
387 }
388
389 if ($OPTS{action} eq 'grind') {
390 do_grind(\@ARGV);
391 }
392 elsif ($OPTS{action} eq 'selftest') {
393 do_selftest();
394 }
395}
396exit 0;
397
398
399# Given a hash ref keyed by test names, filter it by deleting unwanted
400# tests, based on $OPTS{tests}.
401
402sub filter_tests {
403 my ($tests) = @_;
404
405 my $opt = $OPTS{tests};
406 return unless defined $opt;
407
408 my @tests;
409
410 if ($opt =~ m{^/}) {
411 $opt =~ s{^/(.+)/$}{$1}
412 or die "Error: --tests regex must be of the form /.../\n";
413 for (keys %$tests) {
414 delete $tests->{$_} unless /$opt/;
415 }
416 }
417 else {
418 my %t;
419 for (split /,/, $opt) {
420 die "Error: no such test found: '$_'\n" unless exists $tests->{$_};
421 $t{$_} = 1;
422 }
423 for (keys %$tests) {
424 delete $tests->{$_} unless exists $t{$_};
425 }
426 }
427}
428
429
430# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
431# return a hash ref { testname => { test }, ... }
432# and an array ref of the original test names order,
9e7973fa
DM
433
434sub read_tests_file {
435 my ($file) = @_;
436
437 my $ta = do $file;
438 unless ($ta) {
439 die "Error: can't parse '$file': $@\n" if $@;
440 die "Error: can't read '$file': $!\n";
441 }
442
957d8930
DM
443 my @orig_order;
444 for (my $i=0; $i < @$ta; $i += 2) {
445 push @orig_order, $ta->[$i];
446 }
447
9e7973fa
DM
448 my $t = { @$ta };
449 filter_tests($t);
957d8930 450 return $t, \@orig_order;
9e7973fa
DM
451}
452
453
454# Process the perl/column argument of options like --norm and --sort.
455# Return the index of the matching perl.
456
457sub select_a_perl {
458 my ($perl, $perls, $who) = @_;
459
460 if ($perl =~ /^[0-9]$/) {
461 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
462 unless $perl < @$perls;
463 return $perl;
464 }
465 else {
466 my @perl = grep $perls->[$_][0] eq $perl
467 || $perls->[$_][1] eq $perl,
468 0..$#$perls;
469 die "Error: $who: unrecognised perl '$perl'\n"
470 unless @perl;
471 die "Error: $who: ambiguous perl '$perl'\n"
472 if @perl > 1;
473 return $perl[0];
474 }
475}
476
477
478# Validate the list of perl=label on the command line.
9e7973fa
DM
479# Return a list of [ exe, label ] pairs.
480
481sub process_perls {
482 my @results;
483 for my $p (@_) {
484 my ($perl, $label) = split /=/, $p, 2;
485 $label //= $perl;
486 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
487 die "Error: unable to execute '$perl': $r" if $r ne "ok\n";
488 push @results, [ $perl, $label ];
489 }
9e7973fa
DM
490 return @results;
491}
492
493
8fbd1c2c 494
9e7973fa
DM
495# Return a string containing perl test code wrapped in a loop
496# that runs $ARGV[0] times
497
498sub make_perl_prog {
499 my ($test, $desc, $setup, $code) = @_;
500
501 return <<EOF;
502# $desc
503package $test;
504BEGIN { srand(0) }
505$setup;
506for my \$__loop__ (1..\$ARGV[0]) {
507 $code;
508}
509EOF
510}
511
512
513# Parse the output from cachegrind. Return a hash ref.
514# See do_selftest() for examples of the output format.
515
516sub parse_cachegrind {
517 my ($output, $id, $perl) = @_;
518
519 my %res;
520
521 my @lines = split /\n/, $output;
522 for (@lines) {
523 unless (s/(==\d+==)|(--\d+--) //) {
524 die "Error: while executing $id:\n"
525 . "unexpected code or cachegrind output:\n$_\n";
526 }
527 if (/I refs:\s+([\d,]+)/) {
528 $res{Ir} = $1;
529 }
530 elsif (/I1 misses:\s+([\d,]+)/) {
531 $res{Ir_m1} = $1;
532 }
533 elsif (/LLi misses:\s+([\d,]+)/) {
534 $res{Ir_mm} = $1;
535 }
536 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
537 @res{qw(Dr Dw)} = ($1,$2);
538 }
539 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
540 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
541 }
542 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
543 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
544 }
545 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
546 @res{qw(COND IND)} = ($1,$2);
547 }
548 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
549 @res{qw(COND_m IND_m)} = ($1,$2);
550 }
551 }
552
553 for my $field (keys %VALID_FIELDS) {
554 die "Error: can't parse '$field' field from cachegrind output:\n$output"
555 unless exists $res{$field};
556 $res{$field} =~ s/,//g;
557 }
558
559 return \%res;
560}
561
562
563# Handle the 'grind' action
564
565sub do_grind {
566 my ($perl_args) = @_; # the residue of @ARGV after option processing
567
957d8930 568 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
569 my ($bisect_field, $bisect_min, $bisect_max);
570
571 if (defined $OPTS{bisect}) {
572 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
573 die "Error: --bisect option must be of form 'field,integer,integer'\n"
574 unless
575 defined $bisect_max
576 and $bisect_min =~ /^[0-9]+$/
577 and $bisect_max =~ /^[0-9]+$/;
578
579 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
580 unless $VALID_FIELDS{$bisect_field};
581
582 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
583 if $bisect_min > $bisect_max;
584 }
585
586 if (defined $OPTS{read}) {
587 open my $in, '<:encoding(UTF-8)', $OPTS{read}
588 or die " Error: can't open $OPTS{read} for reading: $!\n";
589 my $data = do { local $/; <$in> };
590 close $in;
591
592 my $hash = JSON::PP::decode_json($data);
593 if (int($FORMAT_VERSION) < int($hash->{version})) {
594 die "Error: unsupported version $hash->{version} in file"
595 . "'$OPTS{read}' (too new)\n";
596 }
957d8930
DM
597 ($loop_counts, $perls, $results, $tests, $order) =
598 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
599
600 filter_tests($results);
601 filter_tests($tests);
957d8930
DM
602
603 if (!$order) {
604 $order = [ sort keys %$tests ];
605 }
9e7973fa
DM
606 }
607 else {
608 # How many times to execute the loop for the two trials. The lower
609 # value is intended to do the loop enough times that branch
610 # prediction has taken hold; the higher loop allows us to see the
611 # branch misses after that
612 $loop_counts = [10, 20];
613
957d8930 614 ($tests, $order) = read_tests_file($OPTS{benchfile});
9e7973fa
DM
615 die "Error: only a single test may be specified with --bisect\n"
616 if defined $OPTS{bisect} and keys %$tests != 1;
617
618 $perls = [ process_perls(@$perl_args) ];
8fbd1c2c
DM
619
620
957d8930 621 $results = grind_run($tests, $order, $perls, $loop_counts);
9e7973fa
DM
622 }
623
8fbd1c2c
DM
624 # now that we have a list of perls, use it to process the
625 # 'perl' component of the --norm and --sort args
626
627 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
628 if (defined $OPTS{'sort-perl'}) {
629 $OPTS{'sort-perl'} =
630 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
631 }
632
df3d7b3a
DM
633 if (defined $OPTS{'compact'}) {
634 $OPTS{'compact'} =
635 select_a_perl($OPTS{'compact'}, $perls, "--compact");
636 }
9e7973fa
DM
637 if (defined $OPTS{write}) {
638 my $json = JSON::PP::encode_json({
639 version => $FORMAT_VERSION,
640 loop_counts => $loop_counts,
641 perls => $perls,
642 results => $results,
643 tests => $tests,
957d8930 644 order => $order,
9e7973fa
DM
645 });
646
647 open my $out, '>:encoding(UTF-8)', $OPTS{write}
648 or die " Error: can't open $OPTS{write} for writing: $!\n";
649 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
650 close $out or die "Error: closing file '$OPTS{write}': $!\n";
651 }
652 else {
653 my ($processed, $averages) =
654 grind_process($results, $perls, $loop_counts);
655
656 if (defined $OPTS{bisect}) {
657 my @r = values %$results;
658 die "Panic: expected exactly one test result in bisect\n"
659 if @r != 1;
660 @r = values %{$r[0]};
661 die "Panic: expected exactly one perl result in bisect\n"
662 if @r != 1;
663 my $c = $r[0]{$bisect_field};
664 die "Panic: no result in bisect for field '$bisect_field'\n"
665 unless defined $c;
666 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
667 exit 1;
668 }
df3d7b3a
DM
669 elsif (defined $OPTS{compact}) {
670 grind_print_compact($processed, $averages, $OPTS{compact},
671 $perls, $tests, $order);
672 }
9e7973fa 673 else {
957d8930 674 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
675 }
676 }
677}
678
679
680# Run cachegrind for every test/perl combo.
681# It may run several processes in parallel when -j is specified.
682# Return a hash ref suitable for input to grind_process()
683
684sub grind_run {
957d8930 685 my ($tests, $order, $perls, $counts) = @_;
9e7973fa
DM
686
687 # Build a list of all the jobs to run
688
689 my @jobs;
690
957d8930 691 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
692
693 # Create two test progs: one with an empty loop and one with code.
694 # Note that the empty loop is actually '{1;}' rather than '{}';
695 # this causes the loop to have a single nextstate rather than a
696 # stub op, so more closely matches the active loop; e.g.:
697 # {1;} => nextstate; unstack
698 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
699 my @prog = (
700 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
701 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
702 );
703
704 for my $p (@$perls) {
705 my ($perl, $label) = @$p;
706
707 # Run both the empty loop and the active loop
708 # $counts->[0] and $counts->[1] times.
709
710 for my $i (0,1) {
711 for my $j (0,1) {
712 my $cmd = "PERL_HASH_SEED=0 "
713 . "valgrind --tool=cachegrind --branch-sim=yes "
714 . "--cachegrind-out-file=/dev/null "
715 . "$OPTS{grindargs} "
716 . "$perl $OPTS{perlargs} - $counts->[$j] 2>&1";
717 # for debugging and error messages
718 my $id = "$test/$perl "
719 . ($i ? "active" : "empty") . "/"
720 . ($j ? "long" : "short") . " loop";
721
722 push @jobs, {
723 test => $test,
724 perl => $perl,
725 plabel => $label,
726 cmd => $cmd,
727 prog => $prog[$i],
728 active => $i,
729 loopix => $j,
730 id => $id,
731 };
732 }
733 }
734 }
735 }
736
737 # Execute each cachegrind and store the results in %results.
738
739 local $SIG{PIPE} = 'IGNORE';
740
741 my $max_jobs = $OPTS{jobs};
742 my $running = 0; # count of executing jobs
743 my %pids; # map pids to jobs
744 my %fds; # map fds to jobs
745 my %results;
746 my $select = IO::Select->new();
747
748 while (@jobs or $running) {
749
750 if ($OPTS{debug}) {
751 printf "Main loop: pending=%d running=%d\n",
752 scalar(@jobs), $running;
753 }
754
755 # Start new jobs
756
757 while (@jobs && $running < $max_jobs) {
758 my $job = shift @jobs;
759 my ($id, $cmd) =@$job{qw(id cmd)};
760
761 my ($in, $out, $pid);
762 warn "Starting $id\n" if $OPTS{verbose};
763 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
764 or die "Error: while starting cachegrind subprocess"
765 ." for $id:\n$@";
766 $running++;
767 $pids{$pid} = $job;
768 $fds{"$out"} = $job;
769 $job->{out_fd} = $out;
770 $job->{output} = '';
771 $job->{pid} = $pid;
772
773 $out->blocking(0);
774 $select->add($out);
775
776 if ($OPTS{debug}) {
777 print "Started pid $pid for $id\n";
778 }
779
780 # Note:
781 # In principle we should write to $in in the main select loop,
782 # since it may block. In reality,
783 # a) the code we write to the perl process's stdin is likely
784 # to be less than the OS's pipe buffer size;
785 # b) by the time the perl process has read in all its stdin,
786 # the only output it should have generated is a few lines
787 # of cachegrind output preamble.
788 # If these assumptions change, then perform the following print
789 # in the select loop instead.
790
791 print $in $job->{prog};
792 close $in;
793 }
794
795 # Get output of running jobs
796
797 if ($OPTS{debug}) {
798 printf "Select: waiting on (%s)\n",
799 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
800 $select->handles;
801 }
802
803 my @ready = $select->can_read;
804
805 if ($OPTS{debug}) {
806 printf "Select: pids (%s) ready\n",
807 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
808 }
809
810 unless (@ready) {
811 die "Panic: select returned no file handles\n";
812 }
813
814 for my $fd (@ready) {
815 my $j = $fds{"$fd"};
816 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
817 unless (defined $r) {
818 die "Panic: Read from process running $j->{id} gave:\n$!";
819 }
820 next if $r;
821
822 # EOF
823
824 if ($OPTS{debug}) {
825 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
826 }
827
828 $select->remove($j->{out_fd});
829 close($j->{out_fd})
830 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
831 $running--;
832 delete $fds{"$j->{out_fd}"};
833 my $output = $j->{output};
834
835 if ($OPTS{debug}) {
836 my $p = $j->{prog};
837 $p =~ s/^/ : /mg;
838 my $o = $output;
839 $o =~ s/^/ : /mg;
840
841 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
842 . "Input:\n$p"
843 . "Output\n$o";
844 }
845
846 $results{$j->{test}}{$j->{perl}}[$j->{active}][$j->{loopix}]
847 = parse_cachegrind($output, $j->{id}, $j->{perl});
848 }
849
850 # Reap finished jobs
851
852 while (1) {
853 my $kid = waitpid(-1, WNOHANG);
854 my $ret = $?;
855 last if $kid <= 0;
856
857 unless (exists $pids{$kid}) {
858 die "Panic: reaped unexpected child $kid";
859 }
860 my $j = $pids{$kid};
861 if ($ret) {
862 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
863 . "with the following output\n:$j->{output}\n";
864 }
865 delete $pids{$kid};
866 }
867 }
868
869 return \%results;
870}
871
872
873
874
875# grind_process(): process the data that has been extracted from
876# cachgegrind's output.
877#
878# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
879# where active is 0 or 1 indicating an empty or active loop,
880# count is 0 or 1 indicating a short or long loop. E.g.
881#
882# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
883#
884# The $res data structure is modified in-place by this sub.
885#
886# $perls is [ [ perl-exe, perl-label], .... ].
887#
888# $counts is [ N, M ] indicating the counts for the short and long loops.
889#
890#
891# return \%output, \%averages, where
892#
893# $output{benchmark_name}{perl_name}{field_name} = N
894# $averages{perl_name}{field_name} = M
895#
896# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
897# M is the average raw count over all tests ($OPTS{raw}), or
898# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
899
900sub grind_process {
901 my ($res, $perls, $counts) = @_;
902
903 # Process the four results for each test/perf combo:
904 # Convert
905 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
906 # to
907 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
908 #
909 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
910 # empty loops, eliminating startup time
911 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
912 # active loops, eliminating startup time
913 # (the two startup times may be different because different code
914 # is being compiled); the difference of the two results above
915 # divided by the count difference is the time to execute the
916 # active code once, eliminating both startup and loop overhead.
917
918 for my $tests (values %$res) {
919 for my $r (values %$tests) {
920 my $r2;
921 for (keys %{$r->[0][0]}) {
922 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
923 - ($r->[0][1]{$_} - $r->[0][0]{$_})
924 ) / ($counts->[1] - $counts->[0]);
925 $r2->{$_} = $n;
926 }
927 $r = $r2;
928 }
929 }
930
931 my %totals;
932 my %counts;
933 my %data;
934
935 my $perl_norm = $perls->[$OPTS{norm}][0]; # the name of the reference perl
936
937 for my $test_name (keys %$res) {
938 my $res1 = $res->{$test_name};
939 my $res2_norm = $res1->{$perl_norm};
940 for my $perl (keys %$res1) {
941 my $res2 = $res1->{$perl};
942 for my $field (keys %$res2) {
943 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
944
945 if ($OPTS{raw}) {
946 # Avoid annoying '-0.0' displays. Ideally this number
947 # should never be negative, but fluctuations in
948 # startup etc can theoretically make this happen
949 $q = 0 if ($q <= 0 && $q > -0.1);
950 $totals{$perl}{$field} += $q;
951 $counts{$perl}{$field}++;
952 $data{$test_name}{$perl}{$field} = $q;
953 next;
954 }
955
956 # $p and $q are notionally integer counts, but
957 # due to variations in startup etc, it's possible for a
958 # count which is supposedly zero to be calculated as a
959 # small positive or negative value.
960 # In this case, set it to zero. Further below we
961 # special-case zeros to avoid division by zero errors etc.
962
963 $p = 0.0 if $p < 0.01;
964 $q = 0.0 if $q < 0.01;
965
966 if ($p == 0.0 && $q == 0.0) {
967 # Both perls gave a count of zero, so no change:
968 # treat as 100%
969 $totals{$perl}{$field} += 1;
970 $counts{$perl}{$field}++;
971 $data{$test_name}{$perl}{$field} = 1;
972 }
973 elsif ($p == 0.0 || $q == 0.0) {
974 # If either count is zero, there were too few events
975 # to give a meaningful ratio (and we will end up with
976 # division by zero if we try). Mark the result undef,
977 # indicating that it shouldn't be displayed; and skip
978 # adding to the average
979 $data{$test_name}{$perl}{$field} = undef;
980 }
981 else {
982 # For averages, we record q/p rather than p/q.
983 # Consider a test where perl_norm took 1000 cycles
984 # and perlN took 800 cycles. For the individual
985 # results we display p/q, or 1.25; i.e. a quarter
986 # quicker. For the averages, we instead sum all
987 # the 0.8's, which gives the total cycles required to
988 # execute all tests, with all tests given equal
989 # weight. Later we reciprocate the final result,
990 # i.e. 1/(sum(qi/pi)/n)
991
992 $totals{$perl}{$field} += $q/$p;
993 $counts{$perl}{$field}++;
994 $data{$test_name}{$perl}{$field} = $p/$q;
995 }
996 }
997 }
998 }
999
1000 # Calculate averages based on %totals and %counts accumulated earlier.
1001
1002 my %averages;
1003 for my $perl (keys %totals) {
1004 my $t = $totals{$perl};
1005 for my $field (keys %$t) {
1006 $averages{$perl}{$field} = $OPTS{raw}
1007 ? $t->{$field} / $counts{$perl}{$field}
1008 # reciprocal - see comments above
1009 : $counts{$perl}{$field} / $t->{$field};
1010 }
1011 }
1012
1013 return \%data, \%averages;
1014}
1015
1016
9e7973fa 1017
df3d7b3a 1018# print a standard blurb at the start of the grind display
9e7973fa 1019
df3d7b3a
DM
1020sub grind_blurb {
1021 my ($perls) = @_;
9e7973fa
DM
1022
1023 print <<EOF;
1024Key:
1025 Ir Instruction read
1026 Dr Data read
1027 Dw Data write
1028 COND conditional branches
1029 IND indirect branches
1030 _m branch predict miss
1031 _m1 level 1 cache miss
1032 _mm last cache (e.g. L3) miss
1033 - indeterminate percentage (e.g. 1/0)
1034
1035EOF
1036
1037 if ($OPTS{raw}) {
1038 print "The numbers represent raw counts per loop iteration.\n";
1039 }
1040 else {
1041 print <<EOF;
1042The numbers represent relative counts per loop iteration, compared to
df3d7b3a 1043$perls->[$OPTS{norm}][1] at 100.0%.
9e7973fa
DM
1044Higher is better: for example, using half as many instructions gives 200%,
1045while using twice as many gives 50%.
1046EOF
1047 }
df3d7b3a
DM
1048}
1049
1050
1051# return a sorted list of the test names, plus 'AVERAGE'
9e7973fa 1052
df3d7b3a
DM
1053sub sorted_test_names {
1054 my ($results, $order, $perls) = @_;
9e7973fa 1055
df3d7b3a 1056 my @names;
9e7973fa
DM
1057 unless ($OPTS{average}) {
1058 if (defined $OPTS{'sort-field'}) {
1059 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1060 my $perl = $perls->[$perlix][0];
df3d7b3a 1061 @names = sort
9e7973fa
DM
1062 {
1063 $results->{$a}{$perl}{$field}
1064 <=> $results->{$b}{$perl}{$field}
1065 }
1066 keys %$results;
1067 }
1068 else {
df3d7b3a 1069 @names = grep $results->{$_}, @$order;
9e7973fa
DM
1070 }
1071 }
1072
1073 # No point in displaying average for only one test.
df3d7b3a
DM
1074 push @names, 'AVERAGE' unless @names == 1;
1075 @names;
1076}
1077
1078
1079# grind_print(): display the tabulated results of all the cachegrinds.
1080#
1081# Arguments are of the form:
1082# $results->{benchmark_name}{perl_name}{field_name} = N
1083# $averages->{perl_name}{field_name} = M
1084# $perls = [ [ perl-exe, perl-label ], ... ]
1085# $tests->{test_name}{desc => ..., ...}
1086
1087sub grind_print {
1088 my ($results, $averages, $perls, $tests, $order) = @_;
1089
1090 my @perl_names = map $_->[0], @$perls;
1091 my %perl_labels;
1092 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1093
1094 my $field_label_width = 6;
1095 # Calculate the width to display for each column.
1096 my $min_width = $OPTS{raw} ? 8 : 6;
1097 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1098 @perl_labels{@perl_names};
1099
1100 # Print standard header.
1101 grind_blurb($perls);
1102
1103 my @test_names = sorted_test_names($results, $order, $perls);
9e7973fa
DM
1104
1105 # If only a single field is to be displayed, use a more compact
1106 # format with only a single line of output per test.
1107
1108 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1109
1110 if ($one_field) {
91cde97c 1111 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1112
1113 # The first column will now contain test names rather than
1114 # field names; Calculate the max width.
1115
1116 $field_label_width = 0;
1117 for (@test_names) {
1118 $field_label_width = length if length > $field_label_width;
1119 }
1120
1121 # Print the perl executables header.
1122
1123 print "\n";
1124 for my $i (0,1) {
1125 print " " x $field_label_width;
1126 for (0..$#widths) {
1127 printf " %*s", $widths[$_],
1128 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1129 }
1130 print "\n";
1131 }
1132 }
1133
1134 # Dump the results for each test.
1135
1136 for my $test_name (@test_names) {
1137 my $doing_ave = ($test_name eq 'AVERAGE');
1138 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1139
1140 unless ($one_field) {
1141 print "\n$test_name";
1142 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1143 print "\n\n";
1144
1145 # Print the perl executables header.
1146 for my $i (0,1) {
1147 print " " x $field_label_width;
1148 for (0..$#widths) {
1149 printf " %*s", $widths[$_],
1150 $i ? ('-' x$widths[$_]) : $perl_labels{$perl_names[$_]};
1151 }
1152 print "\n";
1153 }
1154 }
1155
1156 for my $field (qw(Ir Dr Dw COND IND
1157 N
1158 COND_m IND_m
1159 N
1160 Ir_m1 Dr_m1 Dw_m1
1161 N
1162 Ir_mm Dr_mm Dw_mm
1163 ))
1164 {
1165 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1166
1167 if ($field eq 'N') {
1168 print "\n";
1169 next;
1170 }
1171
91cde97c
DM
1172 if ($one_field) {
1173 printf "%-*s", $field_label_width, $test_name;
1174 }
1175 else {
1176 printf "%*s", $field_label_width, $field;
1177 }
9e7973fa
DM
1178
1179 for my $i (0..$#widths) {
1180 my $res2 = $res1->{$perl_names[$i]};
1181 my $p = $res2->{$field};
1182 if (!defined $p) {
1183 printf " %*s", $widths[$i], '-';
1184 }
1185 elsif ($OPTS{raw}) {
1186 printf " %*.1f", $widths[$i], $p;
1187 }
1188 else {
1189 printf " %*.2f", $widths[$i], $p * 100;
1190 }
1191 }
1192 print "\n";
1193 }
1194 }
1195}
1196
1197
df3d7b3a
DM
1198
1199# grind_print_compact(): like grind_print(), but display a single perl
1200# in a compact form. Has an additional arg, $which_perl, which specifies
1201# which perl to display.
1202#
1203# Arguments are of the form:
1204# $results->{benchmark_name}{perl_name}{field_name} = N
1205# $averages->{perl_name}{field_name} = M
1206# $perls = [ [ perl-exe, perl-label ], ... ]
1207# $tests->{test_name}{desc => ..., ...}
1208
1209sub grind_print_compact {
1210 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1211
1212
1213 # the width to display for each column.
1214 my $width = $OPTS{raw} ? 7 : 6;
1215
1216 # Print standard header.
1217 grind_blurb($perls);
1218
1219 print "\nResults for $perls->[$which_perl][1]\n\n";
1220
1221 my @test_names = sorted_test_names($results, $order, $perls);
1222
1223 # Dump the results for each test.
1224
1225 my @fields = qw( Ir Dr Dw
1226 COND IND
1227 COND_m IND_m
1228 Ir_m1 Dr_m1 Dw_m1
1229 Ir_mm Dr_mm Dw_mm
1230 );
1231 if ($OPTS{fields}) {
1232 @fields = grep exists $OPTS{fields}{$_}, @fields;
1233 }
1234
1235 printf " %*s", $width, $_ for @fields;
1236 print "\n";
1237 printf " %*s", $width, '------' for @fields;
1238 print "\n";
1239
1240 for my $test_name (@test_names) {
1241 my $doing_ave = ($test_name eq 'AVERAGE');
1242 my $res = $doing_ave ? $averages : $results->{$test_name};
1243 $res = $res->{$perls->[$which_perl][0]};
1244
1245 for my $field (@fields) {
1246 my $p = $res->{$field};
1247 if (!defined $p) {
1248 printf " %*s", $width, '-';
1249 }
1250 elsif ($OPTS{raw}) {
1251 printf " %*.1f", $width, $p;
1252 }
1253 else {
1254 printf " %*.2f", $width, $p * 100;
1255 }
1256
1257 }
1258
1259 print " $test_name\n";
1260 }
1261}
1262
1263
9e7973fa
DM
1264# do_selftest(): check that we can parse known cachegrind()
1265# output formats. If the output of cachegrind changes, add a *new*
1266# test here; keep the old tests to make sure we continue to parse
1267# old cachegrinds
1268
1269sub do_selftest {
1270
1271 my @tests = (
1272 'standard',
1273 <<'EOF',
1274==32350== Cachegrind, a cache and branch-prediction profiler
1275==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1276==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1277==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1278==32350==
1279--32350-- warning: L3 cache found, using its data for the LL simulation.
1280==32350==
1281==32350== I refs: 1,124,055
1282==32350== I1 misses: 5,573
1283==32350== LLi misses: 3,338
1284==32350== I1 miss rate: 0.49%
1285==32350== LLi miss rate: 0.29%
1286==32350==
1287==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1288==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1289==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1290==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1291==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1292==32350==
1293==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1294==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1295==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1296==32350==
1297==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1298==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1299==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1300EOF
1301 {
1302 COND => 197050,
1303 COND_m => 17742,
1304 Dr => 259191,
1305 Dr_m1 => 6098,
1306 Dr_mm => 2781,
1307 Dw => 145084,
1308 Dw_m1 => 3510,
1309 Dw_mm => 3013,
1310 IND => 5322,
1311 IND_m => 1411,
1312 Ir => 1124055,
1313 Ir_m1 => 5573,
1314 Ir_mm => 3338,
1315 },
1316 );
1317
1318 for ('t', '.') {
1319 last if require "$_/test.pl";
1320 }
1321 plan(@tests / 3 * keys %VALID_FIELDS);
1322
1323 while (@tests) {
1324 my $desc = shift @tests;
1325 my $output = shift @tests;
1326 my $expected = shift @tests;
1327 my $p = parse_cachegrind($output);
1328 for (sort keys %VALID_FIELDS) {
1329 is($p->{$_}, $expected->{$_}, "$desc, $_");
1330 }
1331 }
1332}