This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
provide per-PUT environment
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
32dfbb33
JC
17 bench.pl [options] -- perlA[=labelA] perlB[=labelB] ...
18
19 # run the tests against same perlA 2x, with and without extra
20 # options
21
22 bench.pl [options] -- perlA=fast PerlA=slow -Mstrict -Dpsltoc
9e7973fa
DM
23
24 # Run bench.pl's own built-in sanity tests
25
26 bench.pl --action=selftest
27
28=head1 DESCRIPTION
29
30By default, F<bench.pl> will run code snippets found in
31F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
32how many instruction reads, data writes, branches, cache misses, etc. that
33one execution of the snippet uses. It will run them against two or more
34perl executables and show how much each test has gotten better or worse.
35
36It is modelled on the F<perlbench> tool, but since it measures instruction
37reads etc., rather than timings, it is much more precise and reproducible.
e34630bf 38It is also considerably faster, and is capable of running tests in
9e7973fa
DM
39parallel (with C<-j>). Rather than displaying a single relative
40percentage per test/perl combination, it displays values for 13 different
41measurements, such as instruction reads, conditional branch misses etc.
42
43There are options to write the raw data to a file, and to read it back.
44This means that you can view the same run data in different views with
45different selection and sort options.
46
47The optional C<=label> after each perl executable is used in the display
48output.
49
50=head1 OPTIONS
51
52=over 4
53
54=item *
55
56--action=I<foo>
57
58What action to perform. The default is I<grind>, which runs the benchmarks
59using I<cachegrind> as the back end. The only other action at the moment is
60I<selftest>, which runs some basic sanity checks and produces TAP output.
61
62=item *
63
64--average
65
66Only display the overall average, rather than the results for each
67individual test.
68
69=item *
70
71--benchfile=I<foo>
72
73The path of the file which contains the benchmarks (F<t/perf/benchmarks>
74by default).
75
76=item *
77
78--bisect=I<field,minval,maxval>
79
80Run a single test against one perl and exit with a zero status if the
81named field is in the specified range; exit 1 otherwise. It will complain
82if more than one test or perl has been specified. It is intended to be
83called as part of a bisect run, to determine when something changed.
84For example,
85
4a1358c2
FC
86 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
87 ./miniperl
9e7973fa
DM
88
89might be called from bisect to find when the number of instruction reads
90for test I<foo> falls outside the range 100..105.
91
92=item *
93
df3d7b3a
DM
94--compact=<Iperl>
95
96Display the results for a single perl executable in a compact form.
97Which perl to display is specified in the same manner as C<--norm>.
98
99=item *
100
9e7973fa
DM
101--debug
102
103Enable verbose debugging output.
104
105=item *
106
107--fields=I<a,b,c>
108
109Display only the specified fields; for example,
110
111 --fields=Ir,Ir_m,Ir_mm
112
113If only one field is selected, the output is in more compact form.
114
115=item *
116
117--grindargs=I<foo>
118
119Optional command-line arguments to pass to cachegrind invocations.
120
121=item *
122
123---help
124
125Display basic usage information.
126
127=item *
128
129-j I<N>
130--jobs=I<N>
131
132Run I<N> jobs in parallel (default 1). This determines how many cachegrind
133process will running at a time, and should generally be set to the number
134of CPUs available.
135
136=item *
137
138--norm=I<foo>
139
140Specify which perl column in the output to treat as the 100% norm.
141It may be a column number (0..N-1) or a perl executable name or label.
142It defaults to the leftmost column.
143
144=item *
145
146--perlargs=I<foo>
147
32dfbb33
JC
148Optional command-line arguments to pass to each perl-under-test
149(perlA, perlB in synopsis) For example, C<--perlargs=-Ilib>.
9e7973fa
DM
150
151=item *
152
153--raw
154
155Display raw data counts rather than percentages in the outputs. This
156allows you to see the exact number of intruction reads, branch misses etc.
157for each test/perl combination. It also causes the C<AVERAGE> display
158per field to be calculated based on the average of each tests's count
159rather than average of each percentage. This means that tests with very
160high counts will dominate.
161
162=item *
163
65f0b1c9 164--sort=I<field:perl>
9e7973fa
DM
165
166Order the tests in the output based on the value of I<field> in the
167column I<perl>. The I<perl> value is as per C<--norm>. For example
168
4a1358c2
FC
169 bench.pl --sort=Dw:perl-5.20.0 \
170 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
171
172=item *
173
174-r I<file>
175--read=I<file>
176
177Read in saved data from a previous C<--write> run from the specified file.
178
179Requires C<JSON::PP> to be available.
180
181=item *
182
183--tests=I<FOO>
184
185Specify a subset of tests to run (or in the case of C<--read>, to display).
186It may be either a comma-separated list of test names, or a regular
187expression. For example
188
189 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
190 --tests=/^expr::/
191
192=item *
193
194--verbose
195
196Display progress information.
197
198=item *
199
200-w I<file>
201--write=I<file>
202
203Save the raw data to the specified file. It can be read back later with
204C<--read>.
205
206Requires C<JSON::PP> to be available.
207
208=back
209
210=cut
211
212
213
214use 5.010000;
215use warnings;
216use strict;
d54523c4 217use Getopt::Long qw(:config no_auto_abbrev require_order);
9e7973fa
DM
218use IPC::Open2 ();
219use IO::Select;
c2d21e7a 220use IO::File;
9e7973fa
DM
221use POSIX ":sys_wait_h";
222
223# The version of the file format used to save data. We refuse to process
224# the file if the integer component differs.
225
226my $FORMAT_VERSION = 1.0;
227
228# The fields we know about
229
230my %VALID_FIELDS = map { $_ => 1 }
231 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
232
233sub usage {
234 die <<EOF;
d54523c4 235usage: $0 [options] -- perl[=label] ...
9e7973fa
DM
236 --action=foo What action to perform [default: grind].
237 --average Only display average, not individual test results.
238 --benchfile=foo File containing the benchmarks;
239 [default: t/perf/benchmarks].
240 --bisect=f,min,max run a single test against one perl and exit with a
241 zero status if the named field is in the specified
242 range; exit 1 otherwise.
df3d7b3a
DM
243 --compact=perl Display the results of a single perl in compact form.
244 Which perl specified like --norm
9e7973fa
DM
245 --debug Enable verbose debugging output.
246 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
247 --grindargs=foo Optional command-line args to pass to cachegrind.
248 --help Display this help.
249 -j|--jobs=N Run N jobs in parallel [default 1].
250 --norm=perl Which perl column to treat as 100%; may be a column
251 number (0..N-1) or a perl executable name or label;
252 [default: 0].
253 --perlargs=foo Optional command-line args to pass to each perl to run.
254 --raw Display raw data counts rather than percentages.
65f0b1c9 255 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
256 column 'perl'. The perl value is as per --norm.
257 -r|--read=file Read in previously saved data from the specified file.
258 --tests=FOO Select only the specified tests from the benchmarks file;
259 FOO may be either of the form 'foo,bar' or '/regex/';
260 [default: all tests].
261 --verbose Display progress information.
262 -w|--write=file Save the raw data to the specified file.
263
264--action is one of:
265 grind run the code under cachegrind
266 selftest perform a selftest; produce TAP output
267
268The command line ends with one or more specified perl executables,
269which will be searched for in the current \$PATH. Each binary name may
270have an optional =LABEL appended, which will be used rather than the
271executable name in output. E.g.
272
273 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
274EOF
275}
276
277my %OPTS = (
278 action => 'grind',
279 average => 0,
280 benchfile => 't/perf/benchmarks',
281 bisect => undef,
df3d7b3a 282 compact => undef,
9e7973fa
DM
283 debug => 0,
284 grindargs => '',
285 fields => undef,
286 jobs => 1,
287 norm => 0,
288 perlargs => '',
289 raw => 0,
290 read => undef,
291 sort => undef,
292 tests => undef,
293 verbose => 0,
294 write => undef,
295);
296
297
298# process command-line args and call top-level action
299
300{
301 GetOptions(
302 'action=s' => \$OPTS{action},
303 'average' => \$OPTS{average},
304 'benchfile=s' => \$OPTS{benchfile},
305 'bisect=s' => \$OPTS{bisect},
df3d7b3a 306 'compact=s' => \$OPTS{compact},
9e7973fa
DM
307 'debug' => \$OPTS{debug},
308 'grindargs=s' => \$OPTS{grindargs},
309 'help' => \$OPTS{help},
310 'fields=s' => \$OPTS{fields},
311 'jobs|j=i' => \$OPTS{jobs},
312 'norm=s' => \$OPTS{norm},
313 'perlargs=s' => \$OPTS{perlargs},
314 'raw' => \$OPTS{raw},
315 'read|r=s' => \$OPTS{read},
316 'sort=s' => \$OPTS{sort},
317 'tests=s' => \$OPTS{tests},
318 'verbose' => \$OPTS{verbose},
319 'write|w=s' => \$OPTS{write},
320 ) or usage;
321
322 usage if $OPTS{help};
323
324
325 if (defined $OPTS{read} and defined $OPTS{write}) {
326 die "Error: can't specify both --read and --write options\n";
327 }
328
329 if (defined $OPTS{read} or defined $OPTS{write}) {
330 # fail early if it's not present
331 require JSON::PP;
332 }
333
334 if (defined $OPTS{fields}) {
335 my @f = split /,/, $OPTS{fields};
336 for (@f) {
337 die "Error: --fields: unknown field '$_'\n"
338 unless $VALID_FIELDS{$_};
339 }
340 my %f = map { $_ => 1 } @f;
341 $OPTS{fields} = \%f;
342 }
343
344 my %valid_actions = qw(grind 1 selftest 1);
345 unless ($valid_actions{$OPTS{action}}) {
346 die "Error: unrecognised action '$OPTS{action}'\n"
347 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
348 }
349
350 if (defined $OPTS{sort}) {
351 my @s = split /:/, $OPTS{sort};
352 if (@s != 2) {
353 die "Error: --sort argument should be of the form field:perl: "
354 . "'$OPTS{sort}'\n";
355 }
356 my ($field, $perl) = @s;
357 die "Error: --sort: unknown field '$field\n"
358 unless $VALID_FIELDS{$field};
359 # the 'perl' value will be validated later, after we have processed
360 # the perls
361 $OPTS{'sort-field'} = $field;
362 $OPTS{'sort-perl'} = $perl;
363 }
364
365 if ($OPTS{action} eq 'selftest') {
366 if (@ARGV) {
367 die "Error: no perl executables may be specified with --read\n"
368 }
369 }
370 elsif (defined $OPTS{bisect}) {
371 die "Error: exactly one perl executable must be specified for bisect\n"
372 unless @ARGV == 1;
373 die "Error: Can't specify both --bisect and --read\n"
374 if defined $OPTS{read};
375 die "Error: Can't specify both --bisect and --write\n"
376 if defined $OPTS{write};
377 }
378 elsif (defined $OPTS{read}) {
379 if (@ARGV) {
380 die "Error: no perl executables may be specified with --read\n"
381 }
382 }
383 elsif ($OPTS{raw}) {
384 unless (@ARGV) {
385 die "Error: at least one perl executable must be specified\n";
386 }
387 }
388 else {
389 unless (@ARGV >= 2) {
390 die "Error: at least two perl executables must be specified\n";
391 }
392 }
393
394 if ($OPTS{action} eq 'grind') {
395 do_grind(\@ARGV);
396 }
397 elsif ($OPTS{action} eq 'selftest') {
398 do_selftest();
399 }
400}
401exit 0;
402
403
404# Given a hash ref keyed by test names, filter it by deleting unwanted
405# tests, based on $OPTS{tests}.
406
407sub filter_tests {
408 my ($tests) = @_;
409
410 my $opt = $OPTS{tests};
411 return unless defined $opt;
412
413 my @tests;
414
415 if ($opt =~ m{^/}) {
416 $opt =~ s{^/(.+)/$}{$1}
417 or die "Error: --tests regex must be of the form /.../\n";
418 for (keys %$tests) {
419 delete $tests->{$_} unless /$opt/;
420 }
421 }
422 else {
423 my %t;
424 for (split /,/, $opt) {
24293753
JC
425 die "Error: no such test found: '$_'\n"
426 . ($OPTS{verbose} ? " have: @{[ sort keys %$tests ]}\n" : "")
427 unless exists $tests->{$_};
9e7973fa
DM
428 $t{$_} = 1;
429 }
430 for (keys %$tests) {
431 delete $tests->{$_} unless exists $t{$_};
432 }
433 }
434}
435
436
437# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
438# return a hash ref { testname => { test }, ... }
439# and an array ref of the original test names order,
9e7973fa
DM
440
441sub read_tests_file {
442 my ($file) = @_;
443
444 my $ta = do $file;
445 unless ($ta) {
446 die "Error: can't parse '$file': $@\n" if $@;
447 die "Error: can't read '$file': $!\n";
448 }
449
957d8930
DM
450 my @orig_order;
451 for (my $i=0; $i < @$ta; $i += 2) {
452 push @orig_order, $ta->[$i];
453 }
454
9e7973fa
DM
455 my $t = { @$ta };
456 filter_tests($t);
957d8930 457 return $t, \@orig_order;
9e7973fa
DM
458}
459
460
461# Process the perl/column argument of options like --norm and --sort.
462# Return the index of the matching perl.
463
464sub select_a_perl {
465 my ($perl, $perls, $who) = @_;
466
467 if ($perl =~ /^[0-9]$/) {
468 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
469 unless $perl < @$perls;
470 return $perl;
471 }
472 else {
473 my @perl = grep $perls->[$_][0] eq $perl
474 || $perls->[$_][1] eq $perl,
475 0..$#$perls;
476 die "Error: $who: unrecognised perl '$perl'\n"
477 unless @perl;
478 die "Error: $who: ambiguous perl '$perl'\n"
479 if @perl > 1;
480 return $perl[0];
481 }
482}
483
484
44b18357
JC
485# Validate the list of perl=label (+ cmdline options) on the command line.
486# Return a list of [ exe, label, cmdline-options ] tuples, ie PUTs
9e7973fa 487
d54523c4 488sub process_puts {
44b18357 489 my @res_puts; # returned, each item is [ perlexe, label, @putargs ]
955a736c 490 my %seen;
d54523c4
JC
491 my @putargs; # collect not-perls into args per PUT
492
493 for my $p (reverse @_) {
494 push @putargs, $p and next if $p =~ /^-/; # not-perl, dont send to qx//
495
60858fe8 496 my ($perl, $label, $env) = split /[=:,]/, $p, 3;
9e7973fa 497 $label //= $perl;
857716c9 498 $label = $perl.$label if $label =~ /^\+/;
955a736c
JC
499 die "$label cannot be used on 2 different PUTs\n" if $seen{$label}++;
500
60858fe8
JC
501 my %env;
502 if ($env) {
503 %env = split /[=,]/, $env;
504 }
9e7973fa 505 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
d54523c4 506 if ($r eq "ok\n") {
60858fe8 507 push @res_puts, [ $perl, $label, \%env, reverse @putargs ];
d54523c4 508 @putargs = ();
44b18357
JC
509 warn "Added Perl-Under-Test: [ @{[@{$res_puts[-1]}]} ]\n"
510 if $OPTS{verbose};
d54523c4 511 } else {
60858fe8 512 warn "PUT-args: @putargs + a not-perl: $p $r\n"
44b18357 513 if $OPTS{verbose};
d54523c4
JC
514 push @putargs, $p; # not-perl
515 }
9e7973fa 516 }
44b18357 517 return reverse @res_puts;
9e7973fa
DM
518}
519
520
8fbd1c2c 521
9e7973fa
DM
522# Return a string containing perl test code wrapped in a loop
523# that runs $ARGV[0] times
524
525sub make_perl_prog {
526 my ($test, $desc, $setup, $code) = @_;
527
528 return <<EOF;
529# $desc
530package $test;
531BEGIN { srand(0) }
532$setup;
533for my \$__loop__ (1..\$ARGV[0]) {
534 $code;
535}
536EOF
537}
538
539
540# Parse the output from cachegrind. Return a hash ref.
541# See do_selftest() for examples of the output format.
542
543sub parse_cachegrind {
544 my ($output, $id, $perl) = @_;
545
546 my %res;
547
548 my @lines = split /\n/, $output;
549 for (@lines) {
550 unless (s/(==\d+==)|(--\d+--) //) {
551 die "Error: while executing $id:\n"
552 . "unexpected code or cachegrind output:\n$_\n";
553 }
554 if (/I refs:\s+([\d,]+)/) {
555 $res{Ir} = $1;
556 }
557 elsif (/I1 misses:\s+([\d,]+)/) {
558 $res{Ir_m1} = $1;
559 }
560 elsif (/LLi misses:\s+([\d,]+)/) {
561 $res{Ir_mm} = $1;
562 }
563 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
564 @res{qw(Dr Dw)} = ($1,$2);
565 }
566 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
567 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
568 }
569 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
570 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
571 }
572 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
573 @res{qw(COND IND)} = ($1,$2);
574 }
575 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
576 @res{qw(COND_m IND_m)} = ($1,$2);
577 }
578 }
579
580 for my $field (keys %VALID_FIELDS) {
581 die "Error: can't parse '$field' field from cachegrind output:\n$output"
582 unless exists $res{$field};
583 $res{$field} =~ s/,//g;
584 }
585
586 return \%res;
587}
588
589
590# Handle the 'grind' action
591
592sub do_grind {
593 my ($perl_args) = @_; # the residue of @ARGV after option processing
594
957d8930 595 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
596 my ($bisect_field, $bisect_min, $bisect_max);
597
598 if (defined $OPTS{bisect}) {
599 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
600 die "Error: --bisect option must be of form 'field,integer,integer'\n"
601 unless
602 defined $bisect_max
603 and $bisect_min =~ /^[0-9]+$/
604 and $bisect_max =~ /^[0-9]+$/;
605
606 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
607 unless $VALID_FIELDS{$bisect_field};
608
609 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
610 if $bisect_min > $bisect_max;
611 }
612
613 if (defined $OPTS{read}) {
614 open my $in, '<:encoding(UTF-8)', $OPTS{read}
615 or die " Error: can't open $OPTS{read} for reading: $!\n";
616 my $data = do { local $/; <$in> };
617 close $in;
618
619 my $hash = JSON::PP::decode_json($data);
620 if (int($FORMAT_VERSION) < int($hash->{version})) {
621 die "Error: unsupported version $hash->{version} in file"
622 . "'$OPTS{read}' (too new)\n";
623 }
957d8930
DM
624 ($loop_counts, $perls, $results, $tests, $order) =
625 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
626
627 filter_tests($results);
628 filter_tests($tests);
957d8930
DM
629
630 if (!$order) {
631 $order = [ sort keys %$tests ];
632 }
9e7973fa
DM
633 }
634 else {
635 # How many times to execute the loop for the two trials. The lower
636 # value is intended to do the loop enough times that branch
637 # prediction has taken hold; the higher loop allows us to see the
638 # branch misses after that
639 $loop_counts = [10, 20];
640
957d8930 641 ($tests, $order) = read_tests_file($OPTS{benchfile});
9e7973fa
DM
642 die "Error: only a single test may be specified with --bisect\n"
643 if defined $OPTS{bisect} and keys %$tests != 1;
644
d54523c4 645 $perls = [ process_puts(@$perl_args) ];
8fbd1c2c
DM
646
647
957d8930 648 $results = grind_run($tests, $order, $perls, $loop_counts);
9e7973fa
DM
649 }
650
8fbd1c2c
DM
651 # now that we have a list of perls, use it to process the
652 # 'perl' component of the --norm and --sort args
653
654 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
655 if (defined $OPTS{'sort-perl'}) {
656 $OPTS{'sort-perl'} =
657 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
658 }
659
df3d7b3a
DM
660 if (defined $OPTS{'compact'}) {
661 $OPTS{'compact'} =
662 select_a_perl($OPTS{'compact'}, $perls, "--compact");
663 }
9e7973fa
DM
664 if (defined $OPTS{write}) {
665 my $json = JSON::PP::encode_json({
666 version => $FORMAT_VERSION,
667 loop_counts => $loop_counts,
668 perls => $perls,
669 results => $results,
670 tests => $tests,
957d8930 671 order => $order,
9e7973fa
DM
672 });
673
674 open my $out, '>:encoding(UTF-8)', $OPTS{write}
675 or die " Error: can't open $OPTS{write} for writing: $!\n";
676 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
677 close $out or die "Error: closing file '$OPTS{write}': $!\n";
678 }
679 else {
680 my ($processed, $averages) =
681 grind_process($results, $perls, $loop_counts);
682
683 if (defined $OPTS{bisect}) {
684 my @r = values %$results;
685 die "Panic: expected exactly one test result in bisect\n"
686 if @r != 1;
687 @r = values %{$r[0]};
688 die "Panic: expected exactly one perl result in bisect\n"
689 if @r != 1;
690 my $c = $r[0]{$bisect_field};
691 die "Panic: no result in bisect for field '$bisect_field'\n"
692 unless defined $c;
693 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
694 exit 1;
695 }
df3d7b3a
DM
696 elsif (defined $OPTS{compact}) {
697 grind_print_compact($processed, $averages, $OPTS{compact},
698 $perls, $tests, $order);
699 }
9e7973fa 700 else {
957d8930 701 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
702 }
703 }
704}
705
706
707# Run cachegrind for every test/perl combo.
708# It may run several processes in parallel when -j is specified.
709# Return a hash ref suitable for input to grind_process()
710
711sub grind_run {
957d8930 712 my ($tests, $order, $perls, $counts) = @_;
9e7973fa
DM
713
714 # Build a list of all the jobs to run
715
716 my @jobs;
717
957d8930 718 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
719
720 # Create two test progs: one with an empty loop and one with code.
721 # Note that the empty loop is actually '{1;}' rather than '{}';
722 # this causes the loop to have a single nextstate rather than a
723 # stub op, so more closely matches the active loop; e.g.:
724 # {1;} => nextstate; unstack
725 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
726 my @prog = (
727 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
728 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
729 );
730
731 for my $p (@$perls) {
60858fe8 732 my ($perl, $label, $env, @putargs) = @$p;
9e7973fa
DM
733
734 # Run both the empty loop and the active loop
735 # $counts->[0] and $counts->[1] times.
736
737 for my $i (0,1) {
738 for my $j (0,1) {
60858fe8
JC
739 my $envstr = '';
740 if (ref $env) {
741 $envstr .= "$_=$env->{$_} " for sort keys %$env;
742 }
743 my $cmd = "PERL_HASH_SEED=0 $envstr"
9e7973fa
DM
744 . "valgrind --tool=cachegrind --branch-sim=yes "
745 . "--cachegrind-out-file=/dev/null "
746 . "$OPTS{grindargs} "
a9eceb2d 747 . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
9e7973fa 748 # for debugging and error messages
c385646f 749 my $id = "$test/$label "
9e7973fa
DM
750 . ($i ? "active" : "empty") . "/"
751 . ($j ? "long" : "short") . " loop";
752
753 push @jobs, {
754 test => $test,
755 perl => $perl,
756 plabel => $label,
757 cmd => $cmd,
758 prog => $prog[$i],
759 active => $i,
760 loopix => $j,
761 id => $id,
762 };
763 }
764 }
765 }
766 }
767
768 # Execute each cachegrind and store the results in %results.
769
770 local $SIG{PIPE} = 'IGNORE';
771
772 my $max_jobs = $OPTS{jobs};
773 my $running = 0; # count of executing jobs
774 my %pids; # map pids to jobs
775 my %fds; # map fds to jobs
776 my %results;
777 my $select = IO::Select->new();
778
779 while (@jobs or $running) {
780
781 if ($OPTS{debug}) {
782 printf "Main loop: pending=%d running=%d\n",
783 scalar(@jobs), $running;
784 }
785
786 # Start new jobs
787
788 while (@jobs && $running < $max_jobs) {
789 my $job = shift @jobs;
790 my ($id, $cmd) =@$job{qw(id cmd)};
791
792 my ($in, $out, $pid);
793 warn "Starting $id\n" if $OPTS{verbose};
794 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
795 or die "Error: while starting cachegrind subprocess"
796 ." for $id:\n$@";
797 $running++;
798 $pids{$pid} = $job;
799 $fds{"$out"} = $job;
800 $job->{out_fd} = $out;
801 $job->{output} = '';
802 $job->{pid} = $pid;
803
804 $out->blocking(0);
805 $select->add($out);
806
807 if ($OPTS{debug}) {
808 print "Started pid $pid for $id\n";
809 }
810
811 # Note:
812 # In principle we should write to $in in the main select loop,
813 # since it may block. In reality,
814 # a) the code we write to the perl process's stdin is likely
815 # to be less than the OS's pipe buffer size;
816 # b) by the time the perl process has read in all its stdin,
817 # the only output it should have generated is a few lines
818 # of cachegrind output preamble.
819 # If these assumptions change, then perform the following print
820 # in the select loop instead.
821
822 print $in $job->{prog};
823 close $in;
824 }
825
826 # Get output of running jobs
827
828 if ($OPTS{debug}) {
829 printf "Select: waiting on (%s)\n",
830 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
831 $select->handles;
832 }
833
834 my @ready = $select->can_read;
835
836 if ($OPTS{debug}) {
837 printf "Select: pids (%s) ready\n",
838 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
839 }
840
841 unless (@ready) {
842 die "Panic: select returned no file handles\n";
843 }
844
845 for my $fd (@ready) {
846 my $j = $fds{"$fd"};
847 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
848 unless (defined $r) {
849 die "Panic: Read from process running $j->{id} gave:\n$!";
850 }
851 next if $r;
852
853 # EOF
854
855 if ($OPTS{debug}) {
856 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
857 }
858
859 $select->remove($j->{out_fd});
860 close($j->{out_fd})
861 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
862 $running--;
863 delete $fds{"$j->{out_fd}"};
864 my $output = $j->{output};
865
866 if ($OPTS{debug}) {
867 my $p = $j->{prog};
868 $p =~ s/^/ : /mg;
869 my $o = $output;
870 $o =~ s/^/ : /mg;
871
872 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
873 . "Input:\n$p"
874 . "Output\n$o";
875 }
876
c385646f 877 $results{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
9e7973fa
DM
878 = parse_cachegrind($output, $j->{id}, $j->{perl});
879 }
880
881 # Reap finished jobs
882
883 while (1) {
884 my $kid = waitpid(-1, WNOHANG);
885 my $ret = $?;
886 last if $kid <= 0;
887
888 unless (exists $pids{$kid}) {
889 die "Panic: reaped unexpected child $kid";
890 }
891 my $j = $pids{$kid};
892 if ($ret) {
893 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
894 . "with the following output\n:$j->{output}\n";
895 }
896 delete $pids{$kid};
897 }
898 }
899
900 return \%results;
901}
902
903
904
905
906# grind_process(): process the data that has been extracted from
907# cachgegrind's output.
908#
909# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
910# where active is 0 or 1 indicating an empty or active loop,
911# count is 0 or 1 indicating a short or long loop. E.g.
912#
913# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
914#
915# The $res data structure is modified in-place by this sub.
916#
917# $perls is [ [ perl-exe, perl-label], .... ].
918#
919# $counts is [ N, M ] indicating the counts for the short and long loops.
920#
921#
922# return \%output, \%averages, where
923#
924# $output{benchmark_name}{perl_name}{field_name} = N
925# $averages{perl_name}{field_name} = M
926#
927# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
928# M is the average raw count over all tests ($OPTS{raw}), or
929# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
930
931sub grind_process {
932 my ($res, $perls, $counts) = @_;
933
934 # Process the four results for each test/perf combo:
935 # Convert
936 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
937 # to
938 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
939 #
940 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
941 # empty loops, eliminating startup time
942 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
943 # active loops, eliminating startup time
944 # (the two startup times may be different because different code
945 # is being compiled); the difference of the two results above
946 # divided by the count difference is the time to execute the
947 # active code once, eliminating both startup and loop overhead.
948
949 for my $tests (values %$res) {
950 for my $r (values %$tests) {
951 my $r2;
952 for (keys %{$r->[0][0]}) {
953 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
954 - ($r->[0][1]{$_} - $r->[0][0]{$_})
955 ) / ($counts->[1] - $counts->[0]);
956 $r2->{$_} = $n;
957 }
958 $r = $r2;
959 }
960 }
961
962 my %totals;
963 my %counts;
964 my %data;
965
1a961f9f 966 my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
9e7973fa
DM
967
968 for my $test_name (keys %$res) {
969 my $res1 = $res->{$test_name};
970 my $res2_norm = $res1->{$perl_norm};
971 for my $perl (keys %$res1) {
972 my $res2 = $res1->{$perl};
973 for my $field (keys %$res2) {
974 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
975
976 if ($OPTS{raw}) {
977 # Avoid annoying '-0.0' displays. Ideally this number
978 # should never be negative, but fluctuations in
979 # startup etc can theoretically make this happen
980 $q = 0 if ($q <= 0 && $q > -0.1);
981 $totals{$perl}{$field} += $q;
982 $counts{$perl}{$field}++;
983 $data{$test_name}{$perl}{$field} = $q;
984 next;
985 }
986
987 # $p and $q are notionally integer counts, but
988 # due to variations in startup etc, it's possible for a
989 # count which is supposedly zero to be calculated as a
990 # small positive or negative value.
991 # In this case, set it to zero. Further below we
992 # special-case zeros to avoid division by zero errors etc.
993
994 $p = 0.0 if $p < 0.01;
995 $q = 0.0 if $q < 0.01;
996
997 if ($p == 0.0 && $q == 0.0) {
998 # Both perls gave a count of zero, so no change:
999 # treat as 100%
1000 $totals{$perl}{$field} += 1;
1001 $counts{$perl}{$field}++;
1002 $data{$test_name}{$perl}{$field} = 1;
1003 }
1004 elsif ($p == 0.0 || $q == 0.0) {
1005 # If either count is zero, there were too few events
1006 # to give a meaningful ratio (and we will end up with
1007 # division by zero if we try). Mark the result undef,
1008 # indicating that it shouldn't be displayed; and skip
1009 # adding to the average
1010 $data{$test_name}{$perl}{$field} = undef;
1011 }
1012 else {
1013 # For averages, we record q/p rather than p/q.
1014 # Consider a test where perl_norm took 1000 cycles
1015 # and perlN took 800 cycles. For the individual
1016 # results we display p/q, or 1.25; i.e. a quarter
1017 # quicker. For the averages, we instead sum all
1018 # the 0.8's, which gives the total cycles required to
1019 # execute all tests, with all tests given equal
1020 # weight. Later we reciprocate the final result,
1021 # i.e. 1/(sum(qi/pi)/n)
1022
1023 $totals{$perl}{$field} += $q/$p;
1024 $counts{$perl}{$field}++;
1025 $data{$test_name}{$perl}{$field} = $p/$q;
1026 }
1027 }
1028 }
1029 }
1030
1031 # Calculate averages based on %totals and %counts accumulated earlier.
1032
1033 my %averages;
1034 for my $perl (keys %totals) {
1035 my $t = $totals{$perl};
1036 for my $field (keys %$t) {
1037 $averages{$perl}{$field} = $OPTS{raw}
1038 ? $t->{$field} / $counts{$perl}{$field}
1039 # reciprocal - see comments above
1040 : $counts{$perl}{$field} / $t->{$field};
1041 }
1042 }
1043
1044 return \%data, \%averages;
1045}
1046
1047
9e7973fa 1048
df3d7b3a 1049# print a standard blurb at the start of the grind display
9e7973fa 1050
df3d7b3a
DM
1051sub grind_blurb {
1052 my ($perls) = @_;
9e7973fa
DM
1053
1054 print <<EOF;
1055Key:
1056 Ir Instruction read
1057 Dr Data read
1058 Dw Data write
1059 COND conditional branches
1060 IND indirect branches
1061 _m branch predict miss
1062 _m1 level 1 cache miss
1063 _mm last cache (e.g. L3) miss
1064 - indeterminate percentage (e.g. 1/0)
1065
1066EOF
1067
1068 if ($OPTS{raw}) {
1069 print "The numbers represent raw counts per loop iteration.\n";
1070 }
1071 else {
1072 print <<EOF;
1073The numbers represent relative counts per loop iteration, compared to
df3d7b3a 1074$perls->[$OPTS{norm}][1] at 100.0%.
9e7973fa
DM
1075Higher is better: for example, using half as many instructions gives 200%,
1076while using twice as many gives 50%.
1077EOF
1078 }
df3d7b3a
DM
1079}
1080
1081
1082# return a sorted list of the test names, plus 'AVERAGE'
9e7973fa 1083
df3d7b3a
DM
1084sub sorted_test_names {
1085 my ($results, $order, $perls) = @_;
9e7973fa 1086
df3d7b3a 1087 my @names;
9e7973fa
DM
1088 unless ($OPTS{average}) {
1089 if (defined $OPTS{'sort-field'}) {
1090 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1091 my $perl = $perls->[$perlix][0];
df3d7b3a 1092 @names = sort
9e7973fa
DM
1093 {
1094 $results->{$a}{$perl}{$field}
1095 <=> $results->{$b}{$perl}{$field}
1096 }
1097 keys %$results;
1098 }
1099 else {
df3d7b3a 1100 @names = grep $results->{$_}, @$order;
9e7973fa
DM
1101 }
1102 }
1103
1104 # No point in displaying average for only one test.
df3d7b3a
DM
1105 push @names, 'AVERAGE' unless @names == 1;
1106 @names;
1107}
1108
1109
1110# grind_print(): display the tabulated results of all the cachegrinds.
1111#
1112# Arguments are of the form:
1113# $results->{benchmark_name}{perl_name}{field_name} = N
1114# $averages->{perl_name}{field_name} = M
1115# $perls = [ [ perl-exe, perl-label ], ... ]
1116# $tests->{test_name}{desc => ..., ...}
1117
1118sub grind_print {
1119 my ($results, $averages, $perls, $tests, $order) = @_;
1120
1121 my @perl_names = map $_->[0], @$perls;
1a961f9f 1122 my @perl_labels = map $_->[1], @$perls;
df3d7b3a
DM
1123 my %perl_labels;
1124 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1125
1126 my $field_label_width = 6;
1127 # Calculate the width to display for each column.
1128 my $min_width = $OPTS{raw} ? 8 : 6;
1129 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1a961f9f 1130 @perl_labels;
df3d7b3a
DM
1131
1132 # Print standard header.
1133 grind_blurb($perls);
1134
1135 my @test_names = sorted_test_names($results, $order, $perls);
9e7973fa
DM
1136
1137 # If only a single field is to be displayed, use a more compact
1138 # format with only a single line of output per test.
1139
1140 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1141
1142 if ($one_field) {
91cde97c 1143 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1144
1145 # The first column will now contain test names rather than
1146 # field names; Calculate the max width.
1147
1148 $field_label_width = 0;
1149 for (@test_names) {
1150 $field_label_width = length if length > $field_label_width;
1151 }
1152
1153 # Print the perl executables header.
1154
1155 print "\n";
1156 for my $i (0,1) {
1157 print " " x $field_label_width;
1158 for (0..$#widths) {
1159 printf " %*s", $widths[$_],
1a961f9f 1160 $i ? ('-' x$widths[$_]) : $perl_labels[$_];
9e7973fa
DM
1161 }
1162 print "\n";
1163 }
1164 }
1165
1166 # Dump the results for each test.
1167
1168 for my $test_name (@test_names) {
1169 my $doing_ave = ($test_name eq 'AVERAGE');
1170 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1171
1172 unless ($one_field) {
1173 print "\n$test_name";
1174 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1175 print "\n\n";
1176
1177 # Print the perl executables header.
1178 for my $i (0,1) {
1179 print " " x $field_label_width;
1180 for (0..$#widths) {
1181 printf " %*s", $widths[$_],
1a961f9f 1182 $i ? ('-' x$widths[$_]) : $perl_labels[$_];
9e7973fa
DM
1183 }
1184 print "\n";
1185 }
1186 }
1187
1188 for my $field (qw(Ir Dr Dw COND IND
1189 N
1190 COND_m IND_m
1191 N
1192 Ir_m1 Dr_m1 Dw_m1
1193 N
1194 Ir_mm Dr_mm Dw_mm
1195 ))
1196 {
1197 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1198
1199 if ($field eq 'N') {
1200 print "\n";
1201 next;
1202 }
1203
91cde97c
DM
1204 if ($one_field) {
1205 printf "%-*s", $field_label_width, $test_name;
1206 }
1207 else {
1208 printf "%*s", $field_label_width, $field;
1209 }
9e7973fa
DM
1210
1211 for my $i (0..$#widths) {
1a961f9f 1212 my $res2 = $res1->{$perl_labels[$i]};
9e7973fa
DM
1213 my $p = $res2->{$field};
1214 if (!defined $p) {
1215 printf " %*s", $widths[$i], '-';
1216 }
1217 elsif ($OPTS{raw}) {
1218 printf " %*.1f", $widths[$i], $p;
1219 }
1220 else {
1221 printf " %*.2f", $widths[$i], $p * 100;
1222 }
1223 }
1224 print "\n";
1225 }
1226 }
1227}
1228
1229
df3d7b3a
DM
1230
1231# grind_print_compact(): like grind_print(), but display a single perl
1232# in a compact form. Has an additional arg, $which_perl, which specifies
1233# which perl to display.
1234#
1235# Arguments are of the form:
1236# $results->{benchmark_name}{perl_name}{field_name} = N
1237# $averages->{perl_name}{field_name} = M
1238# $perls = [ [ perl-exe, perl-label ], ... ]
1239# $tests->{test_name}{desc => ..., ...}
1240
1241sub grind_print_compact {
1242 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1243
1244
1245 # the width to display for each column.
1246 my $width = $OPTS{raw} ? 7 : 6;
1247
1248 # Print standard header.
1249 grind_blurb($perls);
1250
1251 print "\nResults for $perls->[$which_perl][1]\n\n";
1252
1253 my @test_names = sorted_test_names($results, $order, $perls);
1254
1255 # Dump the results for each test.
1256
1257 my @fields = qw( Ir Dr Dw
1258 COND IND
1259 COND_m IND_m
1260 Ir_m1 Dr_m1 Dw_m1
1261 Ir_mm Dr_mm Dw_mm
1262 );
1263 if ($OPTS{fields}) {
1264 @fields = grep exists $OPTS{fields}{$_}, @fields;
1265 }
1266
1267 printf " %*s", $width, $_ for @fields;
1268 print "\n";
1269 printf " %*s", $width, '------' for @fields;
1270 print "\n";
1271
1272 for my $test_name (@test_names) {
1273 my $doing_ave = ($test_name eq 'AVERAGE');
1274 my $res = $doing_ave ? $averages : $results->{$test_name};
1275 $res = $res->{$perls->[$which_perl][0]};
1276
1277 for my $field (@fields) {
1278 my $p = $res->{$field};
1279 if (!defined $p) {
1280 printf " %*s", $width, '-';
1281 }
1282 elsif ($OPTS{raw}) {
1283 printf " %*.1f", $width, $p;
1284 }
1285 else {
1286 printf " %*.2f", $width, $p * 100;
1287 }
1288
1289 }
1290
1291 print " $test_name\n";
1292 }
1293}
1294
1295
9e7973fa
DM
1296# do_selftest(): check that we can parse known cachegrind()
1297# output formats. If the output of cachegrind changes, add a *new*
1298# test here; keep the old tests to make sure we continue to parse
1299# old cachegrinds
1300
1301sub do_selftest {
1302
1303 my @tests = (
1304 'standard',
1305 <<'EOF',
1306==32350== Cachegrind, a cache and branch-prediction profiler
1307==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1308==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1309==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1310==32350==
1311--32350-- warning: L3 cache found, using its data for the LL simulation.
1312==32350==
1313==32350== I refs: 1,124,055
1314==32350== I1 misses: 5,573
1315==32350== LLi misses: 3,338
1316==32350== I1 miss rate: 0.49%
1317==32350== LLi miss rate: 0.29%
1318==32350==
1319==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1320==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1321==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1322==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1323==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1324==32350==
1325==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1326==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1327==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1328==32350==
1329==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1330==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1331==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1332EOF
1333 {
1334 COND => 197050,
1335 COND_m => 17742,
1336 Dr => 259191,
1337 Dr_m1 => 6098,
1338 Dr_mm => 2781,
1339 Dw => 145084,
1340 Dw_m1 => 3510,
1341 Dw_mm => 3013,
1342 IND => 5322,
1343 IND_m => 1411,
1344 Ir => 1124055,
1345 Ir_m1 => 5573,
1346 Ir_mm => 3338,
1347 },
1348 );
1349
1350 for ('t', '.') {
1351 last if require "$_/test.pl";
1352 }
1353 plan(@tests / 3 * keys %VALID_FIELDS);
1354
1355 while (@tests) {
1356 my $desc = shift @tests;
1357 my $output = shift @tests;
1358 my $expected = shift @tests;
1359 my $p = parse_cachegrind($output);
1360 for (sort keys %VALID_FIELDS) {
1361 is($p->{$_}, $expected->{$_}, "$desc, $_");
1362 }
1363 }
1364}