Porting/valgrindpp.pl

   1 #!/usr/bin/perl
   2 use IO::File ();
   3 use File::Find qw(find);
   4 use Text::Wrap qw(wrap);
   5 use Getopt::Long qw(GetOptions);
   6 use Pod::Usage qw(pod2usage);
   7 use Cwd qw(cwd);
   8 use File::Spec;
   9 use strict;
  10
  11 my %opt = (
  12   frames  => 3,
  13   lines   => 0,
  14   tests   => 0,
  15   top     => 0,
  16   verbose => 0,
  17 );
  18
  19 GetOptions(\%opt, qw(
  20             dir=s
  21             frames=i
  22             hide=s@
  23             lines!
  24             output-file=s
  25             tests!
  26             top=i
  27             verbose+
  28           )) or pod2usage(2);
  29
  30 # Setup the directory to process
  31 if (exists $opt{dir}) {
  32   $opt{dir} = File::Spec->canonpath($opt{dir});
  33 }
  34 else {
  35   # Check if we're in 't'
  36   $opt{dir} = cwd =~ /\/t$/ ? '..' : '.';
  37
  38   # Check if we're in the right directory
  39   -d "$opt{dir}/$_" or die "$0: must be run from the perl source directory"
  40                          . " when --dir is not given\n"
  41       for qw(t lib ext);
  42 }
  43
  44 # Assemble regex for functions whose leaks should be hidden
  45 # (no, a hash won't be significantly faster)
  46 my $hidden = do { local $"='|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' };
  47
  48 # Setup our output file handle
  49 # (do it early, as it may fail)
  50 my $fh = \*STDOUT;
  51 if (exists $opt{'output-file'}) {
  52   $fh = new IO::File ">$opt{'output-file'}"
  53         or die "$0: cannot open $opt{'output-file'} ($!)\n";
  54 }
  55
  56 # These hashes will receive the error and leak summary data:
  57 #
  58 # %error = (
  59 #   error_name => {
  60 #                   stack_frame => {
  61 #                                    test_script => occurences
  62 #                                  }
  63 #                 }
  64 # );
  65 #
  66 # %leak = (
  67 #   leak_type => {
  68 #                  stack_frames => {
  69 #                                    test_script => occurences
  70 #                                  }
  71 #                } # stack frames are separated by '<'s
  72 # );
  73 my(%error, %leak);
  74
  75 # Collect summary data
  76 find({wanted => \&filter, no_chdir => 1}, $opt{dir});
  77
  78 # Format the output nicely
  79 $Text::Wrap::columns = 80;
  80 $Text::Wrap::unexpand = 0;
  81
  82 # Write summary
  83 summary($fh, \%error, \%leak);
  84
  85 exit 0;
  86
  87 sub summary {
  88   my($fh, $error, $leak) = @_;
  89   my(%ne, %nl, %top);
  90
  91   # Prepare the data
  92
  93   for my $e (keys %$error) {
  94     for my $f (keys %{$error->{$e}}) {
  95       my($func, $file, $line) = split /:/, $f;
  96       my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)";
  97       $ne{$e}{$nf}{count}++;
  98       while (my($k,$v) = each %{$error->{$e}{$f}}) {
  99         $ne{$e}{$nf}{tests}{$k} += $v;
 100         $top{$k}{error}++;
 101       }
 102     }
 103   }
 104
 105   for my $l (keys %$leak) {
 106     for my $s (keys %{$leak->{$l}}) {
 107       my $ns = join '<', map {
 108                  my($func, $file, $line) = split /:/;
 109                  /:/ ? $opt{lines}
 110                        ? "$func ($file:$line)" : "$func ($file)"
 111                      : $_
 112                } split /</, $s;
 113       $nl{$l}{$ns}{count}++;
 114       while (my($k,$v) = each %{$leak->{$l}{$s}}) {
 115         $nl{$l}{$ns}{tests}{$k} += $v;
 116         $top{$k}{leak}++;
 117       }
 118     }
 119   }
 120
 121   # Print the Top N
 122
 123   if ($opt{top}) {
 124     for my $what (qw(error leak)) {
 125       my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b }
 126               grep $top{$_}{$what}, keys %top;
 127       @t > $opt{top} and splice @t, $opt{top};
 128       my $n = @t;
 129       my $s = $n > 1 ? 's' : '';
 130       my $prev = 0;
 131       print $fh "Top $n test scripts for ${what}s:\n\n";
 132       for my $i (1 .. $n) {
 133         $n = $top{$t[$i-1]}{$what};
 134         $s = $n > 1 ? 's' : '';
 135         printf $fh "    %3s %-40s %3d $what$s\n",
 136                    $n != $prev ? "$i." : '', $t[$i-1], $n;
 137         $prev = $n;
 138       }
 139       print $fh "\n";
 140     }
 141   }
 142
 143   # Print the real summary
 144
 145   print $fh "MEMORY ACCESS ERRORS\n\n";
 146
 147   for my $e (sort keys %ne) {
 148     print $fh qq("$e"\n);
 149     for my $frame (sort keys %{$ne{$e}}) {
 150       my $data = $ne{$e}{$frame};
 151       my $count = $data->{count} > 1 ? " [$data->{count} paths]" : '';
 152       print $fh ' 'x4, "$frame$count\n",
 153                 format_tests($data->{tests}), "\n";
 154     }
 155     print $fh "\n";
 156   }
 157
 158   print $fh "\nMEMORY LEAKS\n\n";
 159
 160   for my $l (sort keys %nl) {
 161     print $fh qq("$l"\n);
 162     for my $frames (sort keys %{$nl{$l}}) {
 163       my $data = $nl{$l}{$frames};
 164       my @stack = split /</, $frames;
 165       $data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]";
 166       print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ),
 167                 format_tests($data->{tests}), "\n\n";
 168     }
 169   }
 170 }
 171
 172 sub format_tests {
 173   my $tests = shift;
 174   my $indent = ' 'x8;
 175
 176   if ($opt{tests}) {
 177     return wrap($indent, $indent, join ', ', sort keys %$tests);
 178   }
 179   else {
 180     my $count = keys %$tests;
 181     my $s = $count > 1 ? 's' : '';
 182     return $indent . "triggered by $count test$s";
 183   }
 184 }
 185
 186 sub filter {
 187   debug(2, "$File::Find::name\n");
 188
 189   # Only process '*.t.valgrind' files
 190   /(.*)\.t\.valgrind$/ or return;
 191
 192   # Strip all unnecessary stuff from the test name
 193   my $test = $1;
 194   $test =~ s/^(?:(?:\Q$opt{dir}\E|[.t])\/)+//;
 195
 196   debug(1, "processing $test ($_)\n");
 197
 198   # Get all the valgrind output lines
 199   my @l = do {
 200     my $fh = new IO::File $_ or die "$0: cannot open $_ ($!)\n";
 201     # Process outputs can interrupt each other, so sort by pid first
 202     my %pid; local $_;
 203     while (<$fh>) {
 204       chomp;
 205       s/^==(\d+)==\s?// and push @{$pid{$1}}, $_;
 206     }
 207     map @$_, values %pid;
 208   };
 209
 210   # Setup some useful regexes
 211   my $hexaddr  = '0x[[:xdigit:]]+';
 212   my $topframe = qr/^\s+at $hexaddr:\s+/;
 213   my $address  = qr/^\s+Address $hexaddr is \d+ bytes (?:before|inside|after) a block of size \d+/;
 214   my $leak     = qr/^\s*\d+ bytes in \d+ blocks are (still reachable|(?:definite|possib)ly lost)/;
 215
 216   for my $i (0 .. $#l) {
 217     $l[$i]   =~ $topframe or next; # Match on any topmost frame...
 218     $l[$i-1] =~ $address and next; # ...but not if it's only address details
 219     my $line = $l[$i-1]; # The error / leak description line
 220     my $j    = $i;
 221
 222     if ($line =~ $leak) {
 223       debug(2, "LEAK: $line\n");
 224
 225       my $type   = $1;     # Type of leak (still reachable, ...)
 226       my $inperl = 0;      # Are we inside the perl source? (And how deep?)
 227       my @stack;           # Call stack
 228
 229       while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)|[^)]+)\)/o) {
 230         my($func, $file, $lineno) = ($1, $2, $3);
 231
 232         # If the stack frame is inside perl => increment $inperl
 233         # If we've already been inside perl, but are no longer => leave
 234         defined $file && ++$inperl or $inperl && last;
 235
 236         # A function that should be hidden? => clear stack and leave
 237         $hidden && $func =~ $hidden and @stack = (), last;
 238
 239         # Add stack frame if it's within our threshold
 240         if ($inperl <= $opt{frames}) {
 241           push @stack, $inperl ? "$func:$file:$lineno" : $func;
 242         }
 243       }
 244
 245       # If there's something on the stack and we've seen perl code,
 246       # add this memory leak to the summary data
 247       @stack and $inperl and $leak{$type}{join '<', @stack}{$test}++;
 248     } else {
 249       debug(1, "ERROR: $line\n");
 250
 251       # Simply find the topmost frame in the call stack within
 252       # the perl source code
 253       while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) {
 254         if (defined $1) {
 255           $error{$line}{"$1:$2:$3"}{$test}++;
 256           last;
 257         }
 258       }
 259     }
 260   }
 261 }
 262
 263 sub debug {
 264   my $level = shift;
 265   $opt{verbose} >= $level and print STDERR @_;
 266 }
 267
 268 __END__
 269
 270 =head1 NAME
 271
 272 valgrindpp.pl - A post processor for make test.valgrind
 273
 274 =head1 SYNOPSIS
 275
 276 valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>]
 277 [B<--hide>=I<identifier>] [B<--lines>]
 278 [B<--output-file>=I<file>] [B<--tests>]
 279 [B<--top>=I<number>] [B<--verbose>]
 280
 281 =head1 DESCRIPTION
 282
 283 B<valgrindpp.pl> is a post processor for I<.valgrind> files
 284 created during I<make test.valgrind>. It collects all these
 285 files, extracts most of the information and produces a
 286 significantly shorter summary of all detected memory access
 287 errors and memory leaks.
 288
 289 =head1 OPTIONS
 290
 291 =over 4
 292
 293 =item B<--dir>=I<dir>
 294
 295 Recursively process I<.valgrind> files in I<dir>. If this
 296 options is not given, B<valgrindpp.pl> must be run from
 297 either the perl source or the I<t> directory and will process
 298 all I<.valgrind> files within the distribution.
 299
 300 =item B<--frames>=I<number>
 301
 302 Number of stack frames within the perl source code to
 303 consider when distinguishing between memory leak sources.
 304 Increasing this value will give you a longer backtrace,
 305 while decreasing the number will show you fewer sources
 306 for memory leaks. The default is 3 frames.
 307
 308 =item B<--hide>=I<identifier>
 309
 310 Hide all memory leaks that have I<identifier> in their backtrace.
 311 Useful if you want to hide leaks from functions that are known to
 312 have lots of memory leaks. I<identifier> can also be a regular
 313 expression, in which case all leaks with symbols matching the
 314 expression are hidden. Can be given multiple times.
 315
 316 =item B<--lines>
 317
 318 Show line numbers for stack frames. This is useful for further
 319 increasing the error/leak resolution, but makes it harder to
 320 compare different reports using I<diff>.
 321
 322 =item B<--output-file>=I<file>
 323
 324 Redirect the output into I<file>. If this option is not
 325 given, the output goes to I<stdout>.
 326
 327 =item B<--tests>
 328
 329 List all tests that trigger memory access errors or memory
 330 leaks explicitly instead of only printing a count.
 331
 332 =item B<--top>=I<number>
 333
 334 List the top I<number> test scripts for memory access errors
 335 and memory leaks. Set to C<0> for no top-I<n> statistics.
 336
 337 =item B<--verbose>
 338
 339 Increase verbosity level. Can be given multiple times.
 340
 341 =back
 342
 343 =head1 COPYRIGHT
 344
 345 Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>.
 346
 347 This program is free software; you may redistribute it
 348 and/or modify it under the same terms as Perl itself.
 349
 350 =cut