| 1 | #!/usr/bin/perl |
| 2 | use IO::File (); |
| 3 | use File::Find qw(find); |
| 4 | use Text::Wrap qw(wrap); |
| 5 | use Getopt::Long qw(GetOptions); |
| 6 | use Pod::Usage qw(pod2usage); |
| 7 | use Cwd qw(cwd); |
| 8 | use File::Spec; |
| 9 | use strict; |
| 10 | |
| 11 | my %opt = ( |
| 12 | frames => 3, |
| 13 | lines => 0, |
| 14 | tests => 0, |
| 15 | top => 0, |
| 16 | verbose => 0, |
| 17 | ); |
| 18 | |
| 19 | GetOptions(\%opt, qw( |
| 20 | dir=s |
| 21 | frames=i |
| 22 | hide=s@ |
| 23 | lines! |
| 24 | output-file=s |
| 25 | tests! |
| 26 | top=i |
| 27 | verbose+ |
| 28 | )) or pod2usage(2); |
| 29 | |
| 30 | # Setup the directory to process |
| 31 | if (exists $opt{dir}) { |
| 32 | $opt{dir} = File::Spec->canonpath($opt{dir}); |
| 33 | } |
| 34 | else { |
| 35 | # Check if we're in 't' |
| 36 | $opt{dir} = cwd =~ /\/t$/ ? '..' : '.'; |
| 37 | |
| 38 | # Check if we're in the right directory |
| 39 | -d "$opt{dir}/$_" or die "$0: must be run from the perl source directory" |
| 40 | . " when --dir is not given\n" |
| 41 | for qw(t lib ext); |
| 42 | } |
| 43 | |
| 44 | # Assemble regex for functions whose leaks should be hidden |
| 45 | # (no, a hash won't be significantly faster) |
| 46 | my $hidden = do { local $"='|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' }; |
| 47 | |
| 48 | # Setup our output file handle |
| 49 | # (do it early, as it may fail) |
| 50 | my $fh = \*STDOUT; |
| 51 | if (exists $opt{'output-file'}) { |
| 52 | $fh = new IO::File ">$opt{'output-file'}" |
| 53 | or die "$0: cannot open $opt{'output-file'} ($!)\n"; |
| 54 | } |
| 55 | |
| 56 | # These hashes will receive the error and leak summary data: |
| 57 | # |
| 58 | # %error = ( |
| 59 | # error_name => { |
| 60 | # stack_frame => { |
| 61 | # test_script => occurrences |
| 62 | # } |
| 63 | # } |
| 64 | # ); |
| 65 | # |
| 66 | # %leak = ( |
| 67 | # leak_type => { |
| 68 | # stack_frames => { |
| 69 | # test_script => occurrences |
| 70 | # } |
| 71 | # } # stack frames are separated by '<'s |
| 72 | # ); |
| 73 | my(%error, %leak); |
| 74 | |
| 75 | # Collect summary data |
| 76 | find({wanted => \&filter, no_chdir => 1}, $opt{dir}); |
| 77 | |
| 78 | # Format the output nicely |
| 79 | $Text::Wrap::columns = 80; |
| 80 | $Text::Wrap::unexpand = 0; |
| 81 | |
| 82 | # Write summary |
| 83 | summary($fh, \%error, \%leak); |
| 84 | |
| 85 | exit 0; |
| 86 | |
| 87 | sub summary { |
| 88 | my($fh, $error, $leak) = @_; |
| 89 | my(%ne, %nl, %top); |
| 90 | |
| 91 | # Prepare the data |
| 92 | |
| 93 | for my $e (keys %$error) { |
| 94 | for my $f (keys %{$error->{$e}}) { |
| 95 | my($func, $file, $line) = split /:/, $f; |
| 96 | my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)"; |
| 97 | $ne{$e}{$nf}{count}++; |
| 98 | while (my($k,$v) = each %{$error->{$e}{$f}}) { |
| 99 | $ne{$e}{$nf}{tests}{$k} += $v; |
| 100 | $top{$k}{error}++; |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | for my $l (keys %$leak) { |
| 106 | for my $s (keys %{$leak->{$l}}) { |
| 107 | my $ns = join '<', map { |
| 108 | my($func, $file, $line) = split /:/; |
| 109 | /:/ ? $opt{lines} |
| 110 | ? "$func ($file:$line)" : "$func ($file)" |
| 111 | : $_ |
| 112 | } split /</, $s; |
| 113 | $nl{$l}{$ns}{count}++; |
| 114 | while (my($k,$v) = each %{$leak->{$l}{$s}}) { |
| 115 | $nl{$l}{$ns}{tests}{$k} += $v; |
| 116 | $top{$k}{leak}++; |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | # Print the Top N |
| 122 | |
| 123 | if ($opt{top}) { |
| 124 | for my $what (qw(error leak)) { |
| 125 | my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b } |
| 126 | grep $top{$_}{$what}, keys %top; |
| 127 | @t > $opt{top} and splice @t, $opt{top}; |
| 128 | my $n = @t; |
| 129 | my $s = $n > 1 ? 's' : ''; |
| 130 | my $prev = 0; |
| 131 | print $fh "Top $n test scripts for ${what}s:\n\n"; |
| 132 | for my $i (1 .. $n) { |
| 133 | $n = $top{$t[$i-1]}{$what}; |
| 134 | $s = $n > 1 ? 's' : ''; |
| 135 | printf $fh " %3s %-40s %3d $what$s\n", |
| 136 | $n != $prev ? "$i." : '', $t[$i-1], $n; |
| 137 | $prev = $n; |
| 138 | } |
| 139 | print $fh "\n"; |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | # Print the real summary |
| 144 | |
| 145 | print $fh "MEMORY ACCESS ERRORS\n\n"; |
| 146 | |
| 147 | for my $e (sort keys %ne) { |
| 148 | print $fh qq("$e"\n); |
| 149 | for my $frame (sort keys %{$ne{$e}}) { |
| 150 | my $data = $ne{$e}{$frame}; |
| 151 | my $count = $data->{count} > 1 ? " [$data->{count} paths]" : ''; |
| 152 | print $fh ' 'x4, "$frame$count\n", |
| 153 | format_tests($data->{tests}), "\n"; |
| 154 | } |
| 155 | print $fh "\n"; |
| 156 | } |
| 157 | |
| 158 | print $fh "\nMEMORY LEAKS\n\n"; |
| 159 | |
| 160 | for my $l (sort keys %nl) { |
| 161 | print $fh qq("$l"\n); |
| 162 | for my $frames (sort keys %{$nl{$l}}) { |
| 163 | my $data = $nl{$l}{$frames}; |
| 164 | my @stack = split /</, $frames; |
| 165 | $data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]"; |
| 166 | print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ), |
| 167 | format_tests($data->{tests}), "\n\n"; |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | sub format_tests { |
| 173 | my $tests = shift; |
| 174 | my $indent = ' 'x8; |
| 175 | |
| 176 | if ($opt{tests}) { |
| 177 | return wrap($indent, $indent, join ', ', sort keys %$tests); |
| 178 | } |
| 179 | else { |
| 180 | my $count = keys %$tests; |
| 181 | my $s = $count > 1 ? 's' : ''; |
| 182 | return $indent . "triggered by $count test$s"; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | sub filter { |
| 187 | debug(2, "$File::Find::name\n"); |
| 188 | |
| 189 | # Only process '*.t.valgrind' files |
| 190 | /(.*)\.t\.valgrind$/ or return; |
| 191 | |
| 192 | # Strip all unnecessary stuff from the test name |
| 193 | my $test = $1; |
| 194 | $test =~ s/^(?:(?:\Q$opt{dir}\E|[.t])\/)+//; |
| 195 | |
| 196 | debug(1, "processing $test ($_)\n"); |
| 197 | |
| 198 | # Get all the valgrind output lines |
| 199 | my @l = do { |
| 200 | my $fh = new IO::File $_ or die "$0: cannot open $_ ($!)\n"; |
| 201 | # Process outputs can interrupt each other, so sort by pid first |
| 202 | my %pid; local $_; |
| 203 | while (<$fh>) { |
| 204 | chomp; |
| 205 | s/^==(\d+)==\s?// and push @{$pid{$1}}, $_; |
| 206 | } |
| 207 | map @$_, values %pid; |
| 208 | }; |
| 209 | |
| 210 | # Setup some useful regexes |
| 211 | my $hexaddr = '0x[[:xdigit:]]+'; |
| 212 | my $topframe = qr/^\s+at $hexaddr:\s+/; |
| 213 | my $address = qr/^\s+Address $hexaddr is \d+ bytes (?:before|inside|after) a block of size \d+/; |
| 214 | my $leak = qr/^\s*\d+ bytes in \d+ blocks are (still reachable|(?:definite|possib)ly lost)/; |
| 215 | |
| 216 | for my $i (0 .. $#l) { |
| 217 | $l[$i] =~ $topframe or next; # Match on any topmost frame... |
| 218 | $l[$i-1] =~ $address and next; # ...but not if it's only address details |
| 219 | my $line = $l[$i-1]; # The error / leak description line |
| 220 | my $j = $i; |
| 221 | |
| 222 | if ($line =~ $leak) { |
| 223 | debug(2, "LEAK: $line\n"); |
| 224 | |
| 225 | my $type = $1; # Type of leak (still reachable, ...) |
| 226 | my $inperl = 0; # Are we inside the perl source? (And how deep?) |
| 227 | my @stack; # Call stack |
| 228 | |
| 229 | while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)|[^)]+)\)/o) { |
| 230 | my($func, $file, $lineno) = ($1, $2, $3); |
| 231 | |
| 232 | # If the stack frame is inside perl => increment $inperl |
| 233 | # If we've already been inside perl, but are no longer => leave |
| 234 | defined $file && ++$inperl or $inperl && last; |
| 235 | |
| 236 | # A function that should be hidden? => clear stack and leave |
| 237 | $hidden && $func =~ $hidden and @stack = (), last; |
| 238 | |
| 239 | # Add stack frame if it's within our threshold |
| 240 | if ($inperl <= $opt{frames}) { |
| 241 | push @stack, $inperl ? "$func:$file:$lineno" : $func; |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | # If there's something on the stack and we've seen perl code, |
| 246 | # add this memory leak to the summary data |
| 247 | @stack and $inperl and $leak{$type}{join '<', @stack}{$test}++; |
| 248 | } else { |
| 249 | debug(1, "ERROR: $line\n"); |
| 250 | |
| 251 | # Simply find the topmost frame in the call stack within |
| 252 | # the perl source code |
| 253 | while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) { |
| 254 | if (defined $1) { |
| 255 | $error{$line}{"$1:$2:$3"}{$test}++; |
| 256 | last; |
| 257 | } |
| 258 | } |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | sub debug { |
| 264 | my $level = shift; |
| 265 | $opt{verbose} >= $level and print STDERR @_; |
| 266 | } |
| 267 | |
| 268 | __END__ |
| 269 | |
| 270 | =head1 NAME |
| 271 | |
| 272 | valgrindpp.pl - A post processor for make test.valgrind |
| 273 | |
| 274 | =head1 SYNOPSIS |
| 275 | |
| 276 | valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>] |
| 277 | [B<--hide>=I<identifier>] [B<--lines>] |
| 278 | [B<--output-file>=I<file>] [B<--tests>] |
| 279 | [B<--top>=I<number>] [B<--verbose>] |
| 280 | |
| 281 | =head1 DESCRIPTION |
| 282 | |
| 283 | B<valgrindpp.pl> is a post processor for I<.valgrind> files |
| 284 | created during I<make test.valgrind>. It collects all these |
| 285 | files, extracts most of the information and produces a |
| 286 | significantly shorter summary of all detected memory access |
| 287 | errors and memory leaks. |
| 288 | |
| 289 | =head1 OPTIONS |
| 290 | |
| 291 | =over 4 |
| 292 | |
| 293 | =item B<--dir>=I<dir> |
| 294 | |
| 295 | Recursively process I<.valgrind> files in I<dir>. If this |
| 296 | options is not given, B<valgrindpp.pl> must be run from |
| 297 | either the perl source or the I<t> directory and will process |
| 298 | all I<.valgrind> files within the distribution. |
| 299 | |
| 300 | =item B<--frames>=I<number> |
| 301 | |
| 302 | Number of stack frames within the perl source code to |
| 303 | consider when distinguishing between memory leak sources. |
| 304 | Increasing this value will give you a longer backtrace, |
| 305 | while decreasing the number will show you fewer sources |
| 306 | for memory leaks. The default is 3 frames. |
| 307 | |
| 308 | =item B<--hide>=I<identifier> |
| 309 | |
| 310 | Hide all memory leaks that have I<identifier> in their backtrace. |
| 311 | Useful if you want to hide leaks from functions that are known to |
| 312 | have lots of memory leaks. I<identifier> can also be a regular |
| 313 | expression, in which case all leaks with symbols matching the |
| 314 | expression are hidden. Can be given multiple times. |
| 315 | |
| 316 | =item B<--lines> |
| 317 | |
| 318 | Show line numbers for stack frames. This is useful for further |
| 319 | increasing the error/leak resolution, but makes it harder to |
| 320 | compare different reports using I<diff>. |
| 321 | |
| 322 | =item B<--output-file>=I<file> |
| 323 | |
| 324 | Redirect the output into I<file>. If this option is not |
| 325 | given, the output goes to I<stdout>. |
| 326 | |
| 327 | =item B<--tests> |
| 328 | |
| 329 | List all tests that trigger memory access errors or memory |
| 330 | leaks explicitly instead of only printing a count. |
| 331 | |
| 332 | =item B<--top>=I<number> |
| 333 | |
| 334 | List the top I<number> test scripts for memory access errors |
| 335 | and memory leaks. Set to C<0> for no top-I<n> statistics. |
| 336 | |
| 337 | =item B<--verbose> |
| 338 | |
| 339 | Increase verbosity level. Can be given multiple times. |
| 340 | |
| 341 | =back |
| 342 | |
| 343 | =head1 COPYRIGHT |
| 344 | |
| 345 | Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>. |
| 346 | |
| 347 | This program is free software; you may redistribute it |
| 348 | and/or modify it under the same terms as Perl itself. |
| 349 | |
| 350 | =cut |