This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perly.c: silence a -Wempty-body warning
[perl5.git] / Porting / bench.pl
CommitLineData
9e7973fa
DM
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
4a1358c2
FC
14 # Basic: run the tests in t/perf/benchmarks against two or
15 # more perls
9e7973fa 16
32dfbb33
JC
17 bench.pl [options] -- perlA[=labelA] perlB[=labelB] ...
18
19 # run the tests against same perlA 2x, with and without extra
20 # options
21
22 bench.pl [options] -- perlA=fast PerlA=slow -Mstrict -Dpsltoc
9e7973fa
DM
23
24 # Run bench.pl's own built-in sanity tests
25
26 bench.pl --action=selftest
27
4044748b
YO
28 # Run bench on blead, which is then modified and timed again
29
30 bench.pl [options] --write=blead.time -- ./perl=blead
31 # hack hack hack
c3bb902a 32 bench.pl --read=blead.time -- ./perl=hacked
4044748b
YO
33
34 # You can also combine --read with --write
c3bb902a 35 bench.pl --read=blead.time --write=last.time -- ./perl=hacked
4044748b 36
9e7973fa
DM
37=head1 DESCRIPTION
38
39By default, F<bench.pl> will run code snippets found in
40F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
41how many instruction reads, data writes, branches, cache misses, etc. that
42one execution of the snippet uses. It will run them against two or more
43perl executables and show how much each test has gotten better or worse.
44
45It is modelled on the F<perlbench> tool, but since it measures instruction
46reads etc., rather than timings, it is much more precise and reproducible.
e34630bf 47It is also considerably faster, and is capable of running tests in
9e7973fa
DM
48parallel (with C<-j>). Rather than displaying a single relative
49percentage per test/perl combination, it displays values for 13 different
50measurements, such as instruction reads, conditional branch misses etc.
51
52There are options to write the raw data to a file, and to read it back.
53This means that you can view the same run data in different views with
4044748b
YO
54different selection and sort options. You can also use this mechanism
55to save the results of timing one perl, and then read it back while timing
56a modification, so that you dont have rerun the same tests on the same
57perl over and over, or have two perls built at the same time.
9e7973fa
DM
58
59The optional C<=label> after each perl executable is used in the display
4044748b
YO
60output. If you are doing a two step benchmark then you should provide
61a label for at least the "base" perl.
9e7973fa
DM
62
63=head1 OPTIONS
64
65=over 4
66
67=item *
68
69--action=I<foo>
70
71What action to perform. The default is I<grind>, which runs the benchmarks
72using I<cachegrind> as the back end. The only other action at the moment is
73I<selftest>, which runs some basic sanity checks and produces TAP output.
74
75=item *
76
77--average
78
79Only display the overall average, rather than the results for each
80individual test.
81
82=item *
83
84--benchfile=I<foo>
85
86The path of the file which contains the benchmarks (F<t/perf/benchmarks>
87by default).
88
89=item *
90
91--bisect=I<field,minval,maxval>
92
93Run a single test against one perl and exit with a zero status if the
94named field is in the specified range; exit 1 otherwise. It will complain
95if more than one test or perl has been specified. It is intended to be
96called as part of a bisect run, to determine when something changed.
97For example,
98
4a1358c2
FC
99 bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
100 ./miniperl
9e7973fa
DM
101
102might be called from bisect to find when the number of instruction reads
103for test I<foo> falls outside the range 100..105.
104
105=item *
106
df3d7b3a
DM
107--compact=<Iperl>
108
109Display the results for a single perl executable in a compact form.
110Which perl to display is specified in the same manner as C<--norm>.
111
112=item *
113
9e7973fa
DM
114--debug
115
116Enable verbose debugging output.
117
118=item *
119
120--fields=I<a,b,c>
121
122Display only the specified fields; for example,
123
124 --fields=Ir,Ir_m,Ir_mm
125
126If only one field is selected, the output is in more compact form.
127
128=item *
129
130--grindargs=I<foo>
131
8a094fee
JC
132Optional command-line arguments to pass to all cachegrind invocations.
133
134This option is appended to those which bench.pl uses for its own
135purposes; so it can be used to override them (see --debug output
136below), and can also be 'abused' to add redirects into the valgrind
137command invocation.
138
139For example, this writes PERL_MEM_LOG activity to foobar.$$, because
1403>foobar.$$ redirects fd 3, then perl under PERL_MEM_LOG writes to fd 3.
141
142 $ perl Porting/bench.pl --jobs=2 --verbose --debug \
143 --tests=call::sub::amp_empty \
144 \
145 --grindargs='--cachegrind-out-file=junk.$$ 3>foobar.$$' \
146 -- \
147 perl5.24.0 perl5.24.0:+memlog:PERL_MEM_LOG=3mst
148
149for the +memlog tests, this executes as: (shown via --debug, then prettyfied)
150
151 Command: PERL_HASH_SEED=0 PERL_MEM_LOG=3mst
152 valgrind --tool=cachegrind --branch-sim=yes
153 --cachegrind-out-file=/dev/null --cachegrind-out-file=junk.$$
154 3>foobar.$$ perl5.24.0 - 10 2>&1
155
156The result is that a set of junk.$$ files containing raw cachegrind
157output are written, and foobar.$$ contains the expected memlog output.
158
159Notes:
160
161Theres no obvious utility for those junk.$$ and foobar.$$ files, but
162you can have them anyway.
163
164The 3 in PERL_MEM_LOG=3mst is needed because the output would
165otherwize go to STDERR, and cause parse_cachegrind() to reject the
166test and die.
167
168The --grindargs redirect is needed to capture the memlog output;
169without it, the memlog output is written to fd3, around
170parse_cachegrind and effectively into /dev/null
171
172PERL_MEM_LOG is expensive when used.
173
174call::sub::amp_empty
175&foo function call with no args or body
176
177 perl5.24.0 perl5.24.0+memlog
178 ---------- -----------------
179 Ir 394.0 543477.5
180 Dr 161.0 146814.1
181 Dw 72.0 122304.6
182 COND 58.0 66796.4
183 IND 5.0 5537.7
184
185COND_m 0.0 6743.1
186 IND_m 5.0 1490.2
187
188 Ir_m1 0.0 683.7
189 Dr_m1 0.0 65.9
190 Dw_m1 0.0 8.5
191
192 Ir_mm 0.0 11.6
193 Dr_mm 0.0 10.6
194 Dw_mm 0.0 4.7
195
9e7973fa
DM
196
197=item *
198
199---help
200
201Display basic usage information.
202
203=item *
204
205-j I<N>
206--jobs=I<N>
207
208Run I<N> jobs in parallel (default 1). This determines how many cachegrind
209process will running at a time, and should generally be set to the number
210of CPUs available.
211
212=item *
213
214--norm=I<foo>
215
216Specify which perl column in the output to treat as the 100% norm.
217It may be a column number (0..N-1) or a perl executable name or label.
218It defaults to the leftmost column.
219
220=item *
221
222--perlargs=I<foo>
223
32dfbb33
JC
224Optional command-line arguments to pass to each perl-under-test
225(perlA, perlB in synopsis) For example, C<--perlargs=-Ilib>.
9e7973fa
DM
226
227=item *
228
229--raw
230
231Display raw data counts rather than percentages in the outputs. This
232allows you to see the exact number of intruction reads, branch misses etc.
233for each test/perl combination. It also causes the C<AVERAGE> display
234per field to be calculated based on the average of each tests's count
235rather than average of each percentage. This means that tests with very
236high counts will dominate.
237
238=item *
239
65f0b1c9 240--sort=I<field:perl>
9e7973fa
DM
241
242Order the tests in the output based on the value of I<field> in the
243column I<perl>. The I<perl> value is as per C<--norm>. For example
244
4a1358c2
FC
245 bench.pl --sort=Dw:perl-5.20.0 \
246 perl-5.16.0 perl-5.18.0 perl-5.20.0
9e7973fa
DM
247
248=item *
249
250-r I<file>
251--read=I<file>
252
253Read in saved data from a previous C<--write> run from the specified file.
254
255Requires C<JSON::PP> to be available.
256
257=item *
258
259--tests=I<FOO>
260
261Specify a subset of tests to run (or in the case of C<--read>, to display).
262It may be either a comma-separated list of test names, or a regular
263expression. For example
264
265 --tests=expr::assign::scalar_lex,expr::assign::2list_lex
266 --tests=/^expr::/
267
268=item *
269
270--verbose
271
272Display progress information.
273
274=item *
275
276-w I<file>
277--write=I<file>
278
279Save the raw data to the specified file. It can be read back later with
4044748b
YO
280C<--read>. If combined with C<--read> then the output file will be
281the merge of the file read and any additional perls added on the command
282line.
9e7973fa
DM
283
284Requires C<JSON::PP> to be available.
285
286=back
287
288=cut
289
290
291
292use 5.010000;
293use warnings;
294use strict;
d54523c4 295use Getopt::Long qw(:config no_auto_abbrev require_order);
9e7973fa
DM
296use IPC::Open2 ();
297use IO::Select;
c2d21e7a 298use IO::File;
9e7973fa
DM
299use POSIX ":sys_wait_h";
300
301# The version of the file format used to save data. We refuse to process
302# the file if the integer component differs.
303
304my $FORMAT_VERSION = 1.0;
305
306# The fields we know about
307
308my %VALID_FIELDS = map { $_ => 1 }
309 qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
310
311sub usage {
312 die <<EOF;
d54523c4 313usage: $0 [options] -- perl[=label] ...
9e7973fa
DM
314 --action=foo What action to perform [default: grind].
315 --average Only display average, not individual test results.
316 --benchfile=foo File containing the benchmarks;
317 [default: t/perf/benchmarks].
318 --bisect=f,min,max run a single test against one perl and exit with a
319 zero status if the named field is in the specified
320 range; exit 1 otherwise.
df3d7b3a
DM
321 --compact=perl Display the results of a single perl in compact form.
322 Which perl specified like --norm
9e7973fa
DM
323 --debug Enable verbose debugging output.
324 --fields=a,b,c Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
325 --grindargs=foo Optional command-line args to pass to cachegrind.
326 --help Display this help.
327 -j|--jobs=N Run N jobs in parallel [default 1].
328 --norm=perl Which perl column to treat as 100%; may be a column
329 number (0..N-1) or a perl executable name or label;
330 [default: 0].
331 --perlargs=foo Optional command-line args to pass to each perl to run.
332 --raw Display raw data counts rather than percentages.
65f0b1c9 333 --sort=field:perl Sort the tests based on the value of 'field' in the
9e7973fa
DM
334 column 'perl'. The perl value is as per --norm.
335 -r|--read=file Read in previously saved data from the specified file.
336 --tests=FOO Select only the specified tests from the benchmarks file;
337 FOO may be either of the form 'foo,bar' or '/regex/';
338 [default: all tests].
339 --verbose Display progress information.
340 -w|--write=file Save the raw data to the specified file.
341
342--action is one of:
343 grind run the code under cachegrind
344 selftest perform a selftest; produce TAP output
345
346The command line ends with one or more specified perl executables,
347which will be searched for in the current \$PATH. Each binary name may
348have an optional =LABEL appended, which will be used rather than the
349executable name in output. E.g.
350
351 perl-5.20.1=PRE-BUGFIX perl-5.20.1-new=POST-BUGFIX
352EOF
353}
354
355my %OPTS = (
356 action => 'grind',
357 average => 0,
358 benchfile => 't/perf/benchmarks',
359 bisect => undef,
df3d7b3a 360 compact => undef,
9e7973fa
DM
361 debug => 0,
362 grindargs => '',
363 fields => undef,
364 jobs => 1,
365 norm => 0,
366 perlargs => '',
367 raw => 0,
368 read => undef,
369 sort => undef,
370 tests => undef,
371 verbose => 0,
372 write => undef,
373);
374
375
376# process command-line args and call top-level action
377
378{
379 GetOptions(
380 'action=s' => \$OPTS{action},
381 'average' => \$OPTS{average},
382 'benchfile=s' => \$OPTS{benchfile},
383 'bisect=s' => \$OPTS{bisect},
df3d7b3a 384 'compact=s' => \$OPTS{compact},
9e7973fa
DM
385 'debug' => \$OPTS{debug},
386 'grindargs=s' => \$OPTS{grindargs},
387 'help' => \$OPTS{help},
388 'fields=s' => \$OPTS{fields},
389 'jobs|j=i' => \$OPTS{jobs},
390 'norm=s' => \$OPTS{norm},
391 'perlargs=s' => \$OPTS{perlargs},
392 'raw' => \$OPTS{raw},
393 'read|r=s' => \$OPTS{read},
394 'sort=s' => \$OPTS{sort},
395 'tests=s' => \$OPTS{tests},
396 'verbose' => \$OPTS{verbose},
397 'write|w=s' => \$OPTS{write},
398 ) or usage;
399
400 usage if $OPTS{help};
401
402
9e7973fa
DM
403 if (defined $OPTS{read} or defined $OPTS{write}) {
404 # fail early if it's not present
405 require JSON::PP;
406 }
407
408 if (defined $OPTS{fields}) {
409 my @f = split /,/, $OPTS{fields};
410 for (@f) {
411 die "Error: --fields: unknown field '$_'\n"
412 unless $VALID_FIELDS{$_};
413 }
414 my %f = map { $_ => 1 } @f;
415 $OPTS{fields} = \%f;
416 }
417
418 my %valid_actions = qw(grind 1 selftest 1);
419 unless ($valid_actions{$OPTS{action}}) {
420 die "Error: unrecognised action '$OPTS{action}'\n"
421 . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
422 }
423
424 if (defined $OPTS{sort}) {
425 my @s = split /:/, $OPTS{sort};
426 if (@s != 2) {
427 die "Error: --sort argument should be of the form field:perl: "
428 . "'$OPTS{sort}'\n";
429 }
430 my ($field, $perl) = @s;
431 die "Error: --sort: unknown field '$field\n"
432 unless $VALID_FIELDS{$field};
433 # the 'perl' value will be validated later, after we have processed
434 # the perls
435 $OPTS{'sort-field'} = $field;
436 $OPTS{'sort-perl'} = $perl;
437 }
438
439 if ($OPTS{action} eq 'selftest') {
440 if (@ARGV) {
441 die "Error: no perl executables may be specified with --read\n"
442 }
443 }
444 elsif (defined $OPTS{bisect}) {
445 die "Error: exactly one perl executable must be specified for bisect\n"
446 unless @ARGV == 1;
447 die "Error: Can't specify both --bisect and --read\n"
448 if defined $OPTS{read};
449 die "Error: Can't specify both --bisect and --write\n"
450 if defined $OPTS{write};
451 }
9e7973fa
DM
452
453 if ($OPTS{action} eq 'grind') {
454 do_grind(\@ARGV);
455 }
456 elsif ($OPTS{action} eq 'selftest') {
457 do_selftest();
458 }
459}
460exit 0;
461
462
463# Given a hash ref keyed by test names, filter it by deleting unwanted
464# tests, based on $OPTS{tests}.
465
466sub filter_tests {
467 my ($tests) = @_;
468
469 my $opt = $OPTS{tests};
470 return unless defined $opt;
471
472 my @tests;
473
474 if ($opt =~ m{^/}) {
475 $opt =~ s{^/(.+)/$}{$1}
476 or die "Error: --tests regex must be of the form /.../\n";
477 for (keys %$tests) {
478 delete $tests->{$_} unless /$opt/;
479 }
480 }
481 else {
482 my %t;
483 for (split /,/, $opt) {
24293753
JC
484 die "Error: no such test found: '$_'\n"
485 . ($OPTS{verbose} ? " have: @{[ sort keys %$tests ]}\n" : "")
486 unless exists $tests->{$_};
9e7973fa
DM
487 $t{$_} = 1;
488 }
489 for (keys %$tests) {
490 delete $tests->{$_} unless exists $t{$_};
491 }
492 }
4044748b 493 die "Error: no tests to run\n" unless %$tests;
9e7973fa
DM
494}
495
496
497# Read in the test file, and filter out any tests excluded by $OPTS{tests}
957d8930
DM
498# return a hash ref { testname => { test }, ... }
499# and an array ref of the original test names order,
9e7973fa
DM
500
501sub read_tests_file {
502 my ($file) = @_;
503
504 my $ta = do $file;
505 unless ($ta) {
506 die "Error: can't parse '$file': $@\n" if $@;
507 die "Error: can't read '$file': $!\n";
508 }
509
957d8930
DM
510 my @orig_order;
511 for (my $i=0; $i < @$ta; $i += 2) {
512 push @orig_order, $ta->[$i];
513 }
514
9e7973fa
DM
515 my $t = { @$ta };
516 filter_tests($t);
957d8930 517 return $t, \@orig_order;
9e7973fa
DM
518}
519
520
521# Process the perl/column argument of options like --norm and --sort.
522# Return the index of the matching perl.
523
524sub select_a_perl {
525 my ($perl, $perls, $who) = @_;
526
527 if ($perl =~ /^[0-9]$/) {
528 die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
529 unless $perl < @$perls;
530 return $perl;
531 }
532 else {
533 my @perl = grep $perls->[$_][0] eq $perl
534 || $perls->[$_][1] eq $perl,
535 0..$#$perls;
536 die "Error: $who: unrecognised perl '$perl'\n"
537 unless @perl;
538 die "Error: $who: ambiguous perl '$perl'\n"
539 if @perl > 1;
540 return $perl[0];
541 }
542}
543
544
44b18357 545# Validate the list of perl=label (+ cmdline options) on the command line.
92683b8b
DM
546# Return a list of [ exe, label, cmdline-options ] tuples, i.e.
547# 'perl-under-test's (PUTs)
9e7973fa 548
d54523c4 549sub process_puts {
4044748b 550 my $read_perls= shift;
44b18357 551 my @res_puts; # returned, each item is [ perlexe, label, @putargs ]
4044748b 552 my %seen= map { $_->[1] => 1 } @$read_perls;
d54523c4
JC
553 my @putargs; # collect not-perls into args per PUT
554
555 for my $p (reverse @_) {
556 push @putargs, $p and next if $p =~ /^-/; # not-perl, dont send to qx//
557
60858fe8 558 my ($perl, $label, $env) = split /[=:,]/, $p, 3;
9e7973fa 559 $label //= $perl;
857716c9 560 $label = $perl.$label if $label =~ /^\+/;
92683b8b 561 die "$label cannot be used on 2 different perls under test\n" if $seen{$label}++;
955a736c 562
60858fe8
JC
563 my %env;
564 if ($env) {
565 %env = split /[=,]/, $env;
566 }
9e7973fa 567 my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
d54523c4 568 if ($r eq "ok\n") {
60858fe8 569 push @res_puts, [ $perl, $label, \%env, reverse @putargs ];
d54523c4 570 @putargs = ();
44b18357
JC
571 warn "Added Perl-Under-Test: [ @{[@{$res_puts[-1]}]} ]\n"
572 if $OPTS{verbose};
d54523c4 573 } else {
92683b8b 574 warn "perl-under-test args: @putargs + a not-perl: $p $r\n"
44b18357 575 if $OPTS{verbose};
d54523c4
JC
576 push @putargs, $p; # not-perl
577 }
9e7973fa 578 }
44b18357 579 return reverse @res_puts;
9e7973fa
DM
580}
581
582
8fbd1c2c 583
9e7973fa
DM
584# Return a string containing perl test code wrapped in a loop
585# that runs $ARGV[0] times
586
587sub make_perl_prog {
588 my ($test, $desc, $setup, $code) = @_;
589
590 return <<EOF;
591# $desc
592package $test;
593BEGIN { srand(0) }
594$setup;
595for my \$__loop__ (1..\$ARGV[0]) {
596 $code;
597}
598EOF
599}
600
601
602# Parse the output from cachegrind. Return a hash ref.
603# See do_selftest() for examples of the output format.
604
605sub parse_cachegrind {
606 my ($output, $id, $perl) = @_;
607
608 my %res;
609
610 my @lines = split /\n/, $output;
611 for (@lines) {
612 unless (s/(==\d+==)|(--\d+--) //) {
613 die "Error: while executing $id:\n"
614 . "unexpected code or cachegrind output:\n$_\n";
615 }
616 if (/I refs:\s+([\d,]+)/) {
617 $res{Ir} = $1;
618 }
619 elsif (/I1 misses:\s+([\d,]+)/) {
620 $res{Ir_m1} = $1;
621 }
622 elsif (/LLi misses:\s+([\d,]+)/) {
623 $res{Ir_mm} = $1;
624 }
625 elsif (/D refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
626 @res{qw(Dr Dw)} = ($1,$2);
627 }
628 elsif (/D1 misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
629 @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
630 }
631 elsif (/LLd misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
632 @res{qw(Dr_mm Dw_mm)} = ($1,$2);
633 }
634 elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
635 @res{qw(COND IND)} = ($1,$2);
636 }
637 elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
638 @res{qw(COND_m IND_m)} = ($1,$2);
639 }
640 }
641
642 for my $field (keys %VALID_FIELDS) {
643 die "Error: can't parse '$field' field from cachegrind output:\n$output"
644 unless exists $res{$field};
645 $res{$field} =~ s/,//g;
646 }
647
648 return \%res;
649}
650
651
652# Handle the 'grind' action
653
654sub do_grind {
655 my ($perl_args) = @_; # the residue of @ARGV after option processing
656
957d8930 657 my ($loop_counts, $perls, $results, $tests, $order);
9e7973fa
DM
658 my ($bisect_field, $bisect_min, $bisect_max);
659
660 if (defined $OPTS{bisect}) {
661 ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
662 die "Error: --bisect option must be of form 'field,integer,integer'\n"
663 unless
664 defined $bisect_max
665 and $bisect_min =~ /^[0-9]+$/
666 and $bisect_max =~ /^[0-9]+$/;
667
668 die "Error: unrecognised field '$bisect_field' in --bisect option\n"
669 unless $VALID_FIELDS{$bisect_field};
670
671 die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
672 if $bisect_min > $bisect_max;
673 }
674
4044748b 675 if ($OPTS{read}) {
9e7973fa 676 open my $in, '<:encoding(UTF-8)', $OPTS{read}
4044748b 677 or die " Error: can't open '$OPTS{read}' for reading: $!\n";
9e7973fa
DM
678 my $data = do { local $/; <$in> };
679 close $in;
680
681 my $hash = JSON::PP::decode_json($data);
682 if (int($FORMAT_VERSION) < int($hash->{version})) {
683 die "Error: unsupported version $hash->{version} in file"
684 . "'$OPTS{read}' (too new)\n";
685 }
957d8930
DM
686 ($loop_counts, $perls, $results, $tests, $order) =
687 @$hash{qw(loop_counts perls results tests order)};
9e7973fa
DM
688
689 filter_tests($results);
690 filter_tests($tests);
957d8930
DM
691
692 if (!$order) {
693 $order = [ sort keys %$tests ];
694 }
9e7973fa 695 }
9e7973fa 696
4044748b
YO
697 if (@$perl_args) {
698 unless ($loop_counts) {
699 # How many times to execute the loop for the two trials. The lower
700 # value is intended to do the loop enough times that branch
701 # prediction has taken hold; the higher loop allows us to see the
702 # branch misses after that
703 $loop_counts = [10, 20];
8fbd1c2c 704
4044748b
YO
705 ($tests, $order) = read_tests_file($OPTS{benchfile});
706 die "Error: only a single test may be specified with --bisect\n"
707 if defined $OPTS{bisect} and keys %$tests != 1;
708 }
8fbd1c2c 709
4044748b
YO
710 my @run_perls= process_puts($perls, @$perl_args);
711 push @$perls, @run_perls;
712 $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
9e7973fa
DM
713 }
714
8fbd1c2c
DM
715 # now that we have a list of perls, use it to process the
716 # 'perl' component of the --norm and --sort args
717
718 $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
719 if (defined $OPTS{'sort-perl'}) {
720 $OPTS{'sort-perl'} =
721 select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
722 }
723
df3d7b3a
DM
724 if (defined $OPTS{'compact'}) {
725 $OPTS{'compact'} =
726 select_a_perl($OPTS{'compact'}, $perls, "--compact");
727 }
9e7973fa
DM
728 if (defined $OPTS{write}) {
729 my $json = JSON::PP::encode_json({
730 version => $FORMAT_VERSION,
731 loop_counts => $loop_counts,
732 perls => $perls,
733 results => $results,
734 tests => $tests,
957d8930 735 order => $order,
9e7973fa
DM
736 });
737
738 open my $out, '>:encoding(UTF-8)', $OPTS{write}
739 or die " Error: can't open $OPTS{write} for writing: $!\n";
740 print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
741 close $out or die "Error: closing file '$OPTS{write}': $!\n";
742 }
4044748b 743 if (@$perls>1) {
9e7973fa
DM
744 my ($processed, $averages) =
745 grind_process($results, $perls, $loop_counts);
746
747 if (defined $OPTS{bisect}) {
748 my @r = values %$results;
749 die "Panic: expected exactly one test result in bisect\n"
750 if @r != 1;
751 @r = values %{$r[0]};
752 die "Panic: expected exactly one perl result in bisect\n"
753 if @r != 1;
754 my $c = $r[0]{$bisect_field};
755 die "Panic: no result in bisect for field '$bisect_field'\n"
756 unless defined $c;
757 exit 0 if $bisect_min <= $c and $c <= $bisect_max;
758 exit 1;
759 }
df3d7b3a
DM
760 elsif (defined $OPTS{compact}) {
761 grind_print_compact($processed, $averages, $OPTS{compact},
762 $perls, $tests, $order);
763 }
9e7973fa 764 else {
957d8930 765 grind_print($processed, $averages, $perls, $tests, $order);
9e7973fa
DM
766 }
767 }
768}
769
770
771# Run cachegrind for every test/perl combo.
772# It may run several processes in parallel when -j is specified.
773# Return a hash ref suitable for input to grind_process()
774
775sub grind_run {
4044748b 776 my ($tests, $order, $perls, $counts, $results) = @_;
9e7973fa
DM
777
778 # Build a list of all the jobs to run
779
780 my @jobs;
781
957d8930 782 for my $test (grep $tests->{$_}, @$order) {
9e7973fa
DM
783
784 # Create two test progs: one with an empty loop and one with code.
785 # Note that the empty loop is actually '{1;}' rather than '{}';
786 # this causes the loop to have a single nextstate rather than a
787 # stub op, so more closely matches the active loop; e.g.:
788 # {1;} => nextstate; unstack
789 # {$x=1;} => nextstate; const; gvsv; sassign; unstack
790 my @prog = (
791 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup)}, '1'),
792 make_perl_prog($test, @{$tests->{$test}}{qw(desc setup code)}),
793 );
794
795 for my $p (@$perls) {
60858fe8 796 my ($perl, $label, $env, @putargs) = @$p;
9e7973fa
DM
797
798 # Run both the empty loop and the active loop
799 # $counts->[0] and $counts->[1] times.
800
801 for my $i (0,1) {
802 for my $j (0,1) {
60858fe8
JC
803 my $envstr = '';
804 if (ref $env) {
805 $envstr .= "$_=$env->{$_} " for sort keys %$env;
806 }
807 my $cmd = "PERL_HASH_SEED=0 $envstr"
9e7973fa
DM
808 . "valgrind --tool=cachegrind --branch-sim=yes "
809 . "--cachegrind-out-file=/dev/null "
810 . "$OPTS{grindargs} "
a9eceb2d 811 . "$perl $OPTS{perlargs} @putargs - $counts->[$j] 2>&1";
9e7973fa 812 # for debugging and error messages
c385646f 813 my $id = "$test/$label "
9e7973fa
DM
814 . ($i ? "active" : "empty") . "/"
815 . ($j ? "long" : "short") . " loop";
816
817 push @jobs, {
818 test => $test,
819 perl => $perl,
820 plabel => $label,
821 cmd => $cmd,
822 prog => $prog[$i],
823 active => $i,
824 loopix => $j,
825 id => $id,
826 };
827 }
828 }
829 }
830 }
831
832 # Execute each cachegrind and store the results in %results.
833
834 local $SIG{PIPE} = 'IGNORE';
835
836 my $max_jobs = $OPTS{jobs};
837 my $running = 0; # count of executing jobs
838 my %pids; # map pids to jobs
839 my %fds; # map fds to jobs
9e7973fa
DM
840 my $select = IO::Select->new();
841
842 while (@jobs or $running) {
843
844 if ($OPTS{debug}) {
845 printf "Main loop: pending=%d running=%d\n",
846 scalar(@jobs), $running;
847 }
848
849 # Start new jobs
850
851 while (@jobs && $running < $max_jobs) {
852 my $job = shift @jobs;
853 my ($id, $cmd) =@$job{qw(id cmd)};
854
855 my ($in, $out, $pid);
856 warn "Starting $id\n" if $OPTS{verbose};
857 eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
858 or die "Error: while starting cachegrind subprocess"
859 ." for $id:\n$@";
860 $running++;
861 $pids{$pid} = $job;
862 $fds{"$out"} = $job;
863 $job->{out_fd} = $out;
864 $job->{output} = '';
865 $job->{pid} = $pid;
866
867 $out->blocking(0);
868 $select->add($out);
869
870 if ($OPTS{debug}) {
871 print "Started pid $pid for $id\n";
872 }
873
874 # Note:
875 # In principle we should write to $in in the main select loop,
876 # since it may block. In reality,
877 # a) the code we write to the perl process's stdin is likely
878 # to be less than the OS's pipe buffer size;
879 # b) by the time the perl process has read in all its stdin,
880 # the only output it should have generated is a few lines
881 # of cachegrind output preamble.
882 # If these assumptions change, then perform the following print
883 # in the select loop instead.
884
885 print $in $job->{prog};
886 close $in;
887 }
888
889 # Get output of running jobs
890
891 if ($OPTS{debug}) {
892 printf "Select: waiting on (%s)\n",
893 join ', ', sort { $a <=> $b } map $fds{$_}{pid},
894 $select->handles;
895 }
896
897 my @ready = $select->can_read;
898
899 if ($OPTS{debug}) {
900 printf "Select: pids (%s) ready\n",
901 join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
902 }
903
904 unless (@ready) {
905 die "Panic: select returned no file handles\n";
906 }
907
908 for my $fd (@ready) {
909 my $j = $fds{"$fd"};
910 my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
911 unless (defined $r) {
912 die "Panic: Read from process running $j->{id} gave:\n$!";
913 }
914 next if $r;
915
916 # EOF
917
918 if ($OPTS{debug}) {
919 print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
920 }
921
922 $select->remove($j->{out_fd});
923 close($j->{out_fd})
924 or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
925 $running--;
926 delete $fds{"$j->{out_fd}"};
927 my $output = $j->{output};
928
929 if ($OPTS{debug}) {
930 my $p = $j->{prog};
931 $p =~ s/^/ : /mg;
932 my $o = $output;
933 $o =~ s/^/ : /mg;
934
935 print "\n$j->{id}/\nCommand: $j->{cmd}\n"
936 . "Input:\n$p"
937 . "Output\n$o";
938 }
939
4044748b 940 $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
9e7973fa
DM
941 = parse_cachegrind($output, $j->{id}, $j->{perl});
942 }
943
944 # Reap finished jobs
945
946 while (1) {
947 my $kid = waitpid(-1, WNOHANG);
948 my $ret = $?;
949 last if $kid <= 0;
950
951 unless (exists $pids{$kid}) {
952 die "Panic: reaped unexpected child $kid";
953 }
954 my $j = $pids{$kid};
955 if ($ret) {
956 die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
957 . "with the following output\n:$j->{output}\n";
958 }
959 delete $pids{$kid};
960 }
961 }
962
4044748b 963 return $results;
9e7973fa
DM
964}
965
966
967
968
969# grind_process(): process the data that has been extracted from
970# cachgegrind's output.
971#
972# $res is of the form ->{benchmark_name}{perl_name}[active][count]{field_name},
973# where active is 0 or 1 indicating an empty or active loop,
974# count is 0 or 1 indicating a short or long loop. E.g.
975#
976# $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
977#
978# The $res data structure is modified in-place by this sub.
979#
980# $perls is [ [ perl-exe, perl-label], .... ].
981#
982# $counts is [ N, M ] indicating the counts for the short and long loops.
983#
984#
985# return \%output, \%averages, where
986#
987# $output{benchmark_name}{perl_name}{field_name} = N
988# $averages{perl_name}{field_name} = M
989#
990# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
991# M is the average raw count over all tests ($OPTS{raw}), or
992# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
993
994sub grind_process {
995 my ($res, $perls, $counts) = @_;
996
997 # Process the four results for each test/perf combo:
998 # Convert
999 # $res->{benchmark_name}{perl_name}[active][count]{field_name} = n
1000 # to
1001 # $res->{benchmark_name}{perl_name}{field_name} = averaged_n
1002 #
1003 # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1004 # empty loops, eliminating startup time
1005 # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1006 # active loops, eliminating startup time
1007 # (the two startup times may be different because different code
1008 # is being compiled); the difference of the two results above
1009 # divided by the count difference is the time to execute the
1010 # active code once, eliminating both startup and loop overhead.
1011
1012 for my $tests (values %$res) {
1013 for my $r (values %$tests) {
1014 my $r2;
1015 for (keys %{$r->[0][0]}) {
1016 my $n = ( ($r->[1][1]{$_} - $r->[1][0]{$_})
1017 - ($r->[0][1]{$_} - $r->[0][0]{$_})
1018 ) / ($counts->[1] - $counts->[0]);
1019 $r2->{$_} = $n;
1020 }
1021 $r = $r2;
1022 }
1023 }
1024
1025 my %totals;
1026 my %counts;
1027 my %data;
1028
1a961f9f 1029 my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
9e7973fa
DM
1030
1031 for my $test_name (keys %$res) {
1032 my $res1 = $res->{$test_name};
1033 my $res2_norm = $res1->{$perl_norm};
1034 for my $perl (keys %$res1) {
1035 my $res2 = $res1->{$perl};
1036 for my $field (keys %$res2) {
1037 my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1038
1039 if ($OPTS{raw}) {
1040 # Avoid annoying '-0.0' displays. Ideally this number
1041 # should never be negative, but fluctuations in
1042 # startup etc can theoretically make this happen
1043 $q = 0 if ($q <= 0 && $q > -0.1);
1044 $totals{$perl}{$field} += $q;
1045 $counts{$perl}{$field}++;
1046 $data{$test_name}{$perl}{$field} = $q;
1047 next;
1048 }
1049
1050 # $p and $q are notionally integer counts, but
1051 # due to variations in startup etc, it's possible for a
1052 # count which is supposedly zero to be calculated as a
1053 # small positive or negative value.
1054 # In this case, set it to zero. Further below we
1055 # special-case zeros to avoid division by zero errors etc.
1056
1057 $p = 0.0 if $p < 0.01;
1058 $q = 0.0 if $q < 0.01;
1059
1060 if ($p == 0.0 && $q == 0.0) {
1061 # Both perls gave a count of zero, so no change:
1062 # treat as 100%
1063 $totals{$perl}{$field} += 1;
1064 $counts{$perl}{$field}++;
1065 $data{$test_name}{$perl}{$field} = 1;
1066 }
1067 elsif ($p == 0.0 || $q == 0.0) {
1068 # If either count is zero, there were too few events
1069 # to give a meaningful ratio (and we will end up with
1070 # division by zero if we try). Mark the result undef,
1071 # indicating that it shouldn't be displayed; and skip
1072 # adding to the average
1073 $data{$test_name}{$perl}{$field} = undef;
1074 }
1075 else {
1076 # For averages, we record q/p rather than p/q.
1077 # Consider a test where perl_norm took 1000 cycles
1078 # and perlN took 800 cycles. For the individual
1079 # results we display p/q, or 1.25; i.e. a quarter
1080 # quicker. For the averages, we instead sum all
1081 # the 0.8's, which gives the total cycles required to
1082 # execute all tests, with all tests given equal
1083 # weight. Later we reciprocate the final result,
1084 # i.e. 1/(sum(qi/pi)/n)
1085
1086 $totals{$perl}{$field} += $q/$p;
1087 $counts{$perl}{$field}++;
1088 $data{$test_name}{$perl}{$field} = $p/$q;
1089 }
1090 }
1091 }
1092 }
1093
1094 # Calculate averages based on %totals and %counts accumulated earlier.
1095
1096 my %averages;
1097 for my $perl (keys %totals) {
1098 my $t = $totals{$perl};
1099 for my $field (keys %$t) {
1100 $averages{$perl}{$field} = $OPTS{raw}
1101 ? $t->{$field} / $counts{$perl}{$field}
1102 # reciprocal - see comments above
1103 : $counts{$perl}{$field} / $t->{$field};
1104 }
1105 }
1106
1107 return \%data, \%averages;
1108}
1109
1110
9e7973fa 1111
df3d7b3a 1112# print a standard blurb at the start of the grind display
9e7973fa 1113
df3d7b3a
DM
1114sub grind_blurb {
1115 my ($perls) = @_;
9e7973fa
DM
1116
1117 print <<EOF;
1118Key:
1119 Ir Instruction read
1120 Dr Data read
1121 Dw Data write
1122 COND conditional branches
1123 IND indirect branches
1124 _m branch predict miss
1125 _m1 level 1 cache miss
1126 _mm last cache (e.g. L3) miss
1127 - indeterminate percentage (e.g. 1/0)
1128
1129EOF
1130
1131 if ($OPTS{raw}) {
1132 print "The numbers represent raw counts per loop iteration.\n";
1133 }
1134 else {
1135 print <<EOF;
1136The numbers represent relative counts per loop iteration, compared to
df3d7b3a 1137$perls->[$OPTS{norm}][1] at 100.0%.
9e7973fa
DM
1138Higher is better: for example, using half as many instructions gives 200%,
1139while using twice as many gives 50%.
1140EOF
1141 }
df3d7b3a
DM
1142}
1143
1144
1145# return a sorted list of the test names, plus 'AVERAGE'
9e7973fa 1146
df3d7b3a
DM
1147sub sorted_test_names {
1148 my ($results, $order, $perls) = @_;
9e7973fa 1149
df3d7b3a 1150 my @names;
9e7973fa
DM
1151 unless ($OPTS{average}) {
1152 if (defined $OPTS{'sort-field'}) {
1153 my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
beb8db25 1154 my $perl = $perls->[$perlix][1];
df3d7b3a 1155 @names = sort
9e7973fa
DM
1156 {
1157 $results->{$a}{$perl}{$field}
1158 <=> $results->{$b}{$perl}{$field}
1159 }
1160 keys %$results;
1161 }
1162 else {
df3d7b3a 1163 @names = grep $results->{$_}, @$order;
9e7973fa
DM
1164 }
1165 }
1166
1167 # No point in displaying average for only one test.
df3d7b3a
DM
1168 push @names, 'AVERAGE' unless @names == 1;
1169 @names;
1170}
1171
1172
1173# grind_print(): display the tabulated results of all the cachegrinds.
1174#
1175# Arguments are of the form:
1176# $results->{benchmark_name}{perl_name}{field_name} = N
1177# $averages->{perl_name}{field_name} = M
1178# $perls = [ [ perl-exe, perl-label ], ... ]
1179# $tests->{test_name}{desc => ..., ...}
1180
1181sub grind_print {
1182 my ($results, $averages, $perls, $tests, $order) = @_;
1183
1184 my @perl_names = map $_->[0], @$perls;
1a961f9f 1185 my @perl_labels = map $_->[1], @$perls;
df3d7b3a
DM
1186 my %perl_labels;
1187 $perl_labels{$_->[0]} = $_->[1] for @$perls;
1188
1189 my $field_label_width = 6;
1190 # Calculate the width to display for each column.
1191 my $min_width = $OPTS{raw} ? 8 : 6;
1192 my @widths = map { length($_) < $min_width ? $min_width : length($_) }
1a961f9f 1193 @perl_labels;
df3d7b3a
DM
1194
1195 # Print standard header.
1196 grind_blurb($perls);
1197
1198 my @test_names = sorted_test_names($results, $order, $perls);
9e7973fa
DM
1199
1200 # If only a single field is to be displayed, use a more compact
1201 # format with only a single line of output per test.
1202
1203 my $one_field = defined $OPTS{fields} && keys(%{$OPTS{fields}}) == 1;
1204
1205 if ($one_field) {
91cde97c 1206 print "Results for field " . (keys(%{$OPTS{fields}}))[0] . ".\n";
9e7973fa
DM
1207
1208 # The first column will now contain test names rather than
1209 # field names; Calculate the max width.
1210
1211 $field_label_width = 0;
1212 for (@test_names) {
1213 $field_label_width = length if length > $field_label_width;
1214 }
1215
1216 # Print the perl executables header.
1217
1218 print "\n";
1219 for my $i (0,1) {
1220 print " " x $field_label_width;
1221 for (0..$#widths) {
1222 printf " %*s", $widths[$_],
1a961f9f 1223 $i ? ('-' x$widths[$_]) : $perl_labels[$_];
9e7973fa
DM
1224 }
1225 print "\n";
1226 }
1227 }
1228
1229 # Dump the results for each test.
1230
1231 for my $test_name (@test_names) {
1232 my $doing_ave = ($test_name eq 'AVERAGE');
1233 my $res1 = $doing_ave ? $averages : $results->{$test_name};
1234
1235 unless ($one_field) {
1236 print "\n$test_name";
1237 print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1238 print "\n\n";
1239
1240 # Print the perl executables header.
1241 for my $i (0,1) {
1242 print " " x $field_label_width;
1243 for (0..$#widths) {
1244 printf " %*s", $widths[$_],
1a961f9f 1245 $i ? ('-' x$widths[$_]) : $perl_labels[$_];
9e7973fa
DM
1246 }
1247 print "\n";
1248 }
1249 }
1250
1251 for my $field (qw(Ir Dr Dw COND IND
1252 N
1253 COND_m IND_m
1254 N
1255 Ir_m1 Dr_m1 Dw_m1
1256 N
1257 Ir_mm Dr_mm Dw_mm
1258 ))
1259 {
1260 next if $OPTS{fields} and ! exists $OPTS{fields}{$field};
1261
1262 if ($field eq 'N') {
1263 print "\n";
1264 next;
1265 }
1266
91cde97c
DM
1267 if ($one_field) {
1268 printf "%-*s", $field_label_width, $test_name;
1269 }
1270 else {
1271 printf "%*s", $field_label_width, $field;
1272 }
9e7973fa
DM
1273
1274 for my $i (0..$#widths) {
1a961f9f 1275 my $res2 = $res1->{$perl_labels[$i]};
9e7973fa
DM
1276 my $p = $res2->{$field};
1277 if (!defined $p) {
1278 printf " %*s", $widths[$i], '-';
1279 }
1280 elsif ($OPTS{raw}) {
1281 printf " %*.1f", $widths[$i], $p;
1282 }
1283 else {
1284 printf " %*.2f", $widths[$i], $p * 100;
1285 }
1286 }
1287 print "\n";
1288 }
1289 }
1290}
1291
1292
df3d7b3a
DM
1293
1294# grind_print_compact(): like grind_print(), but display a single perl
1295# in a compact form. Has an additional arg, $which_perl, which specifies
1296# which perl to display.
1297#
1298# Arguments are of the form:
1299# $results->{benchmark_name}{perl_name}{field_name} = N
1300# $averages->{perl_name}{field_name} = M
1301# $perls = [ [ perl-exe, perl-label ], ... ]
1302# $tests->{test_name}{desc => ..., ...}
1303
1304sub grind_print_compact {
1305 my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1306
1307
1308 # the width to display for each column.
1309 my $width = $OPTS{raw} ? 7 : 6;
1310
1311 # Print standard header.
1312 grind_blurb($perls);
1313
1314 print "\nResults for $perls->[$which_perl][1]\n\n";
1315
1316 my @test_names = sorted_test_names($results, $order, $perls);
1317
1318 # Dump the results for each test.
1319
1320 my @fields = qw( Ir Dr Dw
1321 COND IND
1322 COND_m IND_m
1323 Ir_m1 Dr_m1 Dw_m1
1324 Ir_mm Dr_mm Dw_mm
1325 );
1326 if ($OPTS{fields}) {
1327 @fields = grep exists $OPTS{fields}{$_}, @fields;
1328 }
1329
1330 printf " %*s", $width, $_ for @fields;
1331 print "\n";
1332 printf " %*s", $width, '------' for @fields;
1333 print "\n";
1334
1335 for my $test_name (@test_names) {
1336 my $doing_ave = ($test_name eq 'AVERAGE');
1337 my $res = $doing_ave ? $averages : $results->{$test_name};
beb8db25 1338 $res = $res->{$perls->[$which_perl][1]};
df3d7b3a
DM
1339
1340 for my $field (@fields) {
1341 my $p = $res->{$field};
1342 if (!defined $p) {
1343 printf " %*s", $width, '-';
1344 }
1345 elsif ($OPTS{raw}) {
1346 printf " %*.1f", $width, $p;
1347 }
1348 else {
1349 printf " %*.2f", $width, $p * 100;
1350 }
1351
1352 }
1353
1354 print " $test_name\n";
1355 }
1356}
1357
1358
9e7973fa
DM
1359# do_selftest(): check that we can parse known cachegrind()
1360# output formats. If the output of cachegrind changes, add a *new*
1361# test here; keep the old tests to make sure we continue to parse
1362# old cachegrinds
1363
1364sub do_selftest {
1365
1366 my @tests = (
1367 'standard',
1368 <<'EOF',
1369==32350== Cachegrind, a cache and branch-prediction profiler
1370==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1371==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1372==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1373==32350==
1374--32350-- warning: L3 cache found, using its data for the LL simulation.
1375==32350==
1376==32350== I refs: 1,124,055
1377==32350== I1 misses: 5,573
1378==32350== LLi misses: 3,338
1379==32350== I1 miss rate: 0.49%
1380==32350== LLi miss rate: 0.29%
1381==32350==
1382==32350== D refs: 404,275 (259,191 rd + 145,084 wr)
1383==32350== D1 misses: 9,608 ( 6,098 rd + 3,510 wr)
1384==32350== LLd misses: 5,794 ( 2,781 rd + 3,013 wr)
1385==32350== D1 miss rate: 2.3% ( 2.3% + 2.4% )
1386==32350== LLd miss rate: 1.4% ( 1.0% + 2.0% )
1387==32350==
1388==32350== LL refs: 15,181 ( 11,671 rd + 3,510 wr)
1389==32350== LL misses: 9,132 ( 6,119 rd + 3,013 wr)
1390==32350== LL miss rate: 0.5% ( 0.4% + 2.0% )
1391==32350==
1392==32350== Branches: 202,372 (197,050 cond + 5,322 ind)
1393==32350== Mispredicts: 19,153 ( 17,742 cond + 1,411 ind)
1394==32350== Mispred rate: 9.4% ( 9.0% + 26.5% )
1395EOF
1396 {
1397 COND => 197050,
1398 COND_m => 17742,
1399 Dr => 259191,
1400 Dr_m1 => 6098,
1401 Dr_mm => 2781,
1402 Dw => 145084,
1403 Dw_m1 => 3510,
1404 Dw_mm => 3013,
1405 IND => 5322,
1406 IND_m => 1411,
1407 Ir => 1124055,
1408 Ir_m1 => 5573,
1409 Ir_mm => 3338,
1410 },
1411 );
1412
1413 for ('t', '.') {
1414 last if require "$_/test.pl";
1415 }
1416 plan(@tests / 3 * keys %VALID_FIELDS);
1417
1418 while (@tests) {
1419 my $desc = shift @tests;
1420 my $output = shift @tests;
1421 my $expected = shift @tests;
1422 my $p = parse_cachegrind($output);
1423 for (sort keys %VALID_FIELDS) {
1424 is($p->{$_}, $expected->{$_}, "$desc, $_");
1425 }
1426 }
1427}