Commit | Line | Data |
---|---|---|
a798dbf2 MB |
1 | package B::Xref; |
2 | ||
f8d9d21f | 3 | our $VERSION = '1.01'; |
28b605d8 | 4 | |
a798dbf2 MB |
5 | =head1 NAME |
6 | ||
7 | B::Xref - Generates cross reference reports for Perl programs | |
8 | ||
9 | =head1 SYNOPSIS | |
10 | ||
11 | perl -MO=Xref[,OPTIONS] foo.pl | |
12 | ||
13 | =head1 DESCRIPTION | |
14 | ||
15 | The B::Xref module is used to generate a cross reference listing of all | |
16 | definitions and uses of variables, subroutines and formats in a Perl program. | |
17 | It is implemented as a backend for the Perl compiler. | |
18 | ||
19 | The report generated is in the following format: | |
20 | ||
21 | File filename1 | |
22 | Subroutine subname1 | |
23 | Package package1 | |
f8d9d21f RGS |
24 | object1 line numbers |
25 | object2 line numbers | |
a798dbf2 MB |
26 | ... |
27 | Package package2 | |
28 | ... | |
29 | ||
30 | Each B<File> section reports on a single file. Each B<Subroutine> section | |
31 | reports on a single subroutine apart from the special cases | |
32 | "(definitions)" and "(main)". These report, respectively, on subroutine | |
33 | definitions found by the initial symbol table walk and on the main part of | |
34 | the program or module external to all subroutines. | |
35 | ||
36 | The report is then grouped by the B<Package> of each variable, | |
37 | subroutine or format with the special case "(lexicals)" meaning | |
38 | lexical variables. Each B<object> name (implicitly qualified by its | |
39 | containing B<Package>) includes its type character(s) at the beginning | |
40 | where possible. Lexical variables are easier to track and even | |
41 | included dereferencing information where possible. | |
42 | ||
43 | The C<line numbers> are a comma separated list of line numbers (some | |
44 | preceded by code letters) where that object is used in some way. | |
45 | Simple uses aren't preceded by a code letter. Introductions (such as | |
46 | where a lexical is first defined with C<my>) are indicated with the | |
47 | letter "i". Subroutine and method calls are indicated by the character | |
48 | "&". Subroutine definitions are indicated by "s" and format | |
49 | definitions by "f". | |
50 | ||
51 | =head1 OPTIONS | |
52 | ||
53 | Option words are separated by commas (not whitespace) and follow the | |
54 | usual conventions of compiler backend options. | |
55 | ||
56 | =over 8 | |
57 | ||
58 | =item C<-oFILENAME> | |
59 | ||
60 | Directs output to C<FILENAME> instead of standard output. | |
61 | ||
62 | =item C<-r> | |
63 | ||
64 | Raw output. Instead of producing a human-readable report, outputs a line | |
65 | in machine-readable form for each definition/use of a variable/sub/format. | |
66 | ||
f8d9d21f RGS |
67 | =item C<-d> |
68 | ||
69 | Don't output the "(definitions)" sections. | |
70 | ||
a798dbf2 MB |
71 | =item C<-D[tO]> |
72 | ||
73 | (Internal) debug options, probably only useful if C<-r> included. | |
74 | The C<t> option prints the object on the top of the stack as it's | |
75 | being tracked. The C<O> option prints each operator as it's being | |
76 | processed in the execution order of the program. | |
77 | ||
78 | =back | |
79 | ||
80 | =head1 BUGS | |
81 | ||
82 | Non-lexical variables are quite difficult to track through a program. | |
83 | Sometimes the type of a non-lexical variable's use is impossible to | |
84 | determine. Introductions of non-lexical non-scalars don't seem to be | |
85 | reported properly. | |
86 | ||
87 | =head1 AUTHOR | |
88 | ||
89 | Malcolm Beattie, mbeattie@sable.ox.ac.uk. | |
90 | ||
91 | =cut | |
92 | ||
93 | use strict; | |
18228111 | 94 | use Config; |
4c1f658f | 95 | use B qw(peekop class comppadlist main_start svref_2object walksymtable |
f8d9d21f | 96 | OPpLVAL_INTRO SVf_POK OPpOUR_INTRO cstring |
4c1f658f | 97 | ); |
a798dbf2 MB |
98 | |
99 | sub UNKNOWN { ["?", "?", "?"] } | |
100 | ||
101 | my @pad; # lexicals in current pad | |
102 | # as ["(lexical)", type, name] | |
103 | my %done; # keyed by $$op: set when each $op is done | |
104 | my $top = UNKNOWN; # shadows top element of stack as | |
105 | # [pack, type, name] (pack can be "(lexical)") | |
106 | my $file; # shadows current filename | |
107 | my $line; # shadows current line number | |
108 | my $subname; # shadows current sub name | |
109 | my %table; # Multi-level hash to record all uses etc. | |
110 | my @todo = (); # List of CVs that need processing | |
111 | ||
112 | my %code = (intro => "i", used => "", | |
113 | subdef => "s", subused => "&", | |
114 | formdef => "f", meth => "->"); | |
115 | ||
116 | ||
117 | # Options | |
118 | my ($debug_op, $debug_top, $nodefs, $raw); | |
119 | ||
120 | sub process { | |
121 | my ($var, $event) = @_; | |
122 | my ($pack, $type, $name) = @$var; | |
123 | if ($type eq "*") { | |
124 | if ($event eq "used") { | |
125 | return; | |
126 | } elsif ($event eq "subused") { | |
127 | $type = "&"; | |
128 | } | |
129 | } | |
130 | $type =~ s/(.)\*$/$1/g; | |
131 | if ($raw) { | |
132 | printf "%-16s %-12s %5d %-12s %4s %-16s %s\n", | |
133 | $file, $subname, $line, $pack, $type, $name, $event; | |
134 | } else { | |
135 | # Wheee | |
136 | push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}}, | |
137 | $line); | |
138 | } | |
139 | } | |
140 | ||
141 | sub load_pad { | |
142 | my $padlist = shift; | |
18228111 | 143 | my ($namelistav, $vallistav, @namelist, $ix); |
a798dbf2 MB |
144 | @pad = (); |
145 | return if class($padlist) eq "SPECIAL"; | |
18228111 | 146 | ($namelistav,$vallistav) = $padlist->ARRAY; |
a798dbf2 MB |
147 | @namelist = $namelistav->ARRAY; |
148 | for ($ix = 1; $ix < @namelist; $ix++) { | |
149 | my $namesv = $namelist[$ix]; | |
150 | next if class($namesv) eq "SPECIAL"; | |
51e5a3db | 151 | my ($type, $name) = $namesv->PV =~ /^(.)([^\0]*)(\0.*)?$/; |
f8d9d21f | 152 | $pad[$ix] = ["(lexical)", $type || '?', $name || '?']; |
a798dbf2 | 153 | } |
18228111 GS |
154 | if ($Config{useithreads}) { |
155 | my (@vallist); | |
156 | @vallist = $vallistav->ARRAY; | |
157 | for ($ix = 1; $ix < @vallist; $ix++) { | |
158 | my $valsv = $vallist[$ix]; | |
159 | next unless class($valsv) eq "GV"; | |
160 | # these pad GVs don't have corresponding names, so same @pad | |
161 | # array can be used without collisions | |
162 | $pad[$ix] = [$valsv->STASH->NAME, "*", $valsv->NAME]; | |
163 | } | |
164 | } | |
a798dbf2 MB |
165 | } |
166 | ||
167 | sub xref { | |
168 | my $start = shift; | |
169 | my $op; | |
170 | for ($op = $start; $$op; $op = $op->next) { | |
171 | last if $done{$$op}++; | |
172 | warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top; | |
173 | warn peekop($op), "\n" if $debug_op; | |
3f872cb9 GS |
174 | my $opname = $op->name; |
175 | if ($opname =~ /^(or|and|mapwhile|grepwhile|range|cond_expr)$/) { | |
a798dbf2 | 176 | xref($op->other); |
3f872cb9 | 177 | } elsif ($opname eq "match" || $opname eq "subst") { |
a798dbf2 | 178 | xref($op->pmreplstart); |
3f872cb9 | 179 | } elsif ($opname eq "substcont") { |
a798dbf2 MB |
180 | xref($op->other->pmreplstart); |
181 | $op = $op->other; | |
182 | redo; | |
3f872cb9 | 183 | } elsif ($opname eq "enterloop") { |
a798dbf2 MB |
184 | xref($op->redoop); |
185 | xref($op->nextop); | |
186 | xref($op->lastop); | |
3f872cb9 | 187 | } elsif ($opname eq "subst") { |
a798dbf2 MB |
188 | xref($op->pmreplstart); |
189 | } else { | |
190 | no strict 'refs'; | |
3f872cb9 | 191 | my $ppname = "pp_$opname"; |
a798dbf2 MB |
192 | &$ppname($op) if defined(&$ppname); |
193 | } | |
194 | } | |
195 | } | |
196 | ||
197 | sub xref_cv { | |
198 | my $cv = shift; | |
199 | my $pack = $cv->GV->STASH->NAME; | |
200 | $subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME; | |
201 | load_pad($cv->PADLIST); | |
202 | xref($cv->START); | |
203 | $subname = "(main)"; | |
204 | } | |
205 | ||
206 | sub xref_object { | |
207 | my $cvref = shift; | |
208 | xref_cv(svref_2object($cvref)); | |
209 | } | |
210 | ||
211 | sub xref_main { | |
212 | $subname = "(main)"; | |
213 | load_pad(comppadlist); | |
214 | xref(main_start); | |
215 | while (@todo) { | |
216 | xref_cv(shift @todo); | |
217 | } | |
218 | } | |
219 | ||
220 | sub pp_nextstate { | |
221 | my $op = shift; | |
57843af0 | 222 | $file = $op->file; |
a798dbf2 MB |
223 | $line = $op->line; |
224 | $top = UNKNOWN; | |
225 | } | |
226 | ||
227 | sub pp_padsv { | |
228 | my $op = shift; | |
229 | $top = $pad[$op->targ]; | |
230 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); | |
231 | } | |
232 | ||
233 | sub pp_padav { pp_padsv(@_) } | |
234 | sub pp_padhv { pp_padsv(@_) } | |
235 | ||
236 | sub deref { | |
8e011b7d | 237 | my ($op, $var, $as) = @_; |
a798dbf2 | 238 | $var->[1] = $as . $var->[1]; |
8e011b7d | 239 | process($var, $op->private & OPpOUR_INTRO ? "intro" : "used"); |
a798dbf2 MB |
240 | } |
241 | ||
8e011b7d WL |
242 | sub pp_rv2cv { deref(shift, $top, "&"); } |
243 | sub pp_rv2hv { deref(shift, $top, "%"); } | |
244 | sub pp_rv2sv { deref(shift, $top, "\$"); } | |
245 | sub pp_rv2av { deref(shift, $top, "\@"); } | |
246 | sub pp_rv2gv { deref(shift, $top, "*"); } | |
a798dbf2 MB |
247 | |
248 | sub pp_gvsv { | |
249 | my $op = shift; | |
18228111 GS |
250 | my $gv; |
251 | if ($Config{useithreads}) { | |
252 | $top = $pad[$op->padix]; | |
253 | $top = UNKNOWN unless $top; | |
254 | $top->[1] = '$'; | |
255 | } | |
256 | else { | |
257 | $gv = $op->gv; | |
586b0d7d | 258 | $top = [$gv->STASH->NAME, '$', $gv->SAFENAME]; |
18228111 | 259 | } |
8e011b7d WL |
260 | process($top, $op->private & OPpLVAL_INTRO || |
261 | $op->private & OPpOUR_INTRO ? "intro" : "used"); | |
a798dbf2 MB |
262 | } |
263 | ||
264 | sub pp_gv { | |
265 | my $op = shift; | |
18228111 GS |
266 | my $gv; |
267 | if ($Config{useithreads}) { | |
268 | $top = $pad[$op->padix]; | |
269 | $top = UNKNOWN unless $top; | |
270 | $top->[1] = '*'; | |
271 | } | |
272 | else { | |
273 | $gv = $op->gv; | |
586b0d7d | 274 | $top = [$gv->STASH->NAME, "*", $gv->SAFENAME]; |
18228111 | 275 | } |
a798dbf2 MB |
276 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
277 | } | |
278 | ||
279 | sub pp_const { | |
280 | my $op = shift; | |
281 | my $sv = $op->sv; | |
18228111 GS |
282 | # constant could be in the pad (under useithreads) |
283 | if ($$sv) { | |
284 | $top = ["?", "", | |
f8d9d21f RGS |
285 | (class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK) |
286 | ? cstring($sv->PV) : "?"]; | |
18228111 GS |
287 | } |
288 | else { | |
289 | $top = $pad[$op->targ]; | |
fdf1c2a9 | 290 | $top = UNKNOWN unless $top; |
18228111 | 291 | } |
a798dbf2 MB |
292 | } |
293 | ||
294 | sub pp_method { | |
295 | my $op = shift; | |
296 | $top = ["(method)", "->".$top->[1], $top->[2]]; | |
297 | } | |
298 | ||
299 | sub pp_entersub { | |
300 | my $op = shift; | |
301 | if ($top->[1] eq "m") { | |
302 | process($top, "meth"); | |
303 | } else { | |
304 | process($top, "subused"); | |
305 | } | |
306 | $top = UNKNOWN; | |
307 | } | |
308 | ||
309 | # | |
310 | # Stuff for cross referencing definitions of variables and subs | |
311 | # | |
312 | ||
313 | sub B::GV::xref { | |
314 | my $gv = shift; | |
315 | my $cv = $gv->CV; | |
316 | if ($$cv) { | |
317 | #return if $done{$$cv}++; | |
b195d487 | 318 | $file = $gv->FILE; |
a798dbf2 MB |
319 | $line = $gv->LINE; |
320 | process([$gv->STASH->NAME, "&", $gv->NAME], "subdef"); | |
321 | push(@todo, $cv); | |
322 | } | |
323 | my $form = $gv->FORM; | |
324 | if ($$form) { | |
325 | return if $done{$$form}++; | |
b195d487 | 326 | $file = $gv->FILE; |
a798dbf2 MB |
327 | $line = $gv->LINE; |
328 | process([$gv->STASH->NAME, "", $gv->NAME], "formdef"); | |
329 | } | |
330 | } | |
331 | ||
332 | sub xref_definitions { | |
333 | my ($pack, %exclude); | |
334 | return if $nodefs; | |
335 | $subname = "(definitions)"; | |
595f3c5f | 336 | foreach $pack (qw(B O AutoLoader DynaLoader XSLoader Config DB VMS |
586b0d7d RGS |
337 | strict vars FileHandle Exporter Carp PerlIO::Layer |
338 | attributes utf8 warnings)) { | |
a798dbf2 MB |
339 | $exclude{$pack."::"} = 1; |
340 | } | |
341 | no strict qw(vars refs); | |
342 | walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) }); | |
343 | } | |
344 | ||
345 | sub output { | |
346 | return if $raw; | |
347 | my ($file, $subname, $pack, $name, $ev, $perfile, $persubname, | |
348 | $perpack, $pername, $perev); | |
349 | foreach $file (sort(keys(%table))) { | |
350 | $perfile = $table{$file}; | |
351 | print "File $file\n"; | |
352 | foreach $subname (sort(keys(%$perfile))) { | |
353 | $persubname = $perfile->{$subname}; | |
354 | print " Subroutine $subname\n"; | |
355 | foreach $pack (sort(keys(%$persubname))) { | |
356 | $perpack = $persubname->{$pack}; | |
357 | print " Package $pack\n"; | |
358 | foreach $name (sort(keys(%$perpack))) { | |
359 | $pername = $perpack->{$name}; | |
360 | my @lines; | |
361 | foreach $ev (qw(intro formdef subdef meth subused used)) { | |
362 | $perev = $pername->{$ev}; | |
363 | if (defined($perev) && @$perev) { | |
364 | my $code = $code{$ev}; | |
365 | push(@lines, map("$code$_", @$perev)); | |
366 | } | |
367 | } | |
368 | printf " %-16s %s\n", $name, join(", ", @lines); | |
369 | } | |
370 | } | |
371 | } | |
372 | } | |
373 | } | |
374 | ||
375 | sub compile { | |
376 | my @options = @_; | |
377 | my ($option, $opt, $arg); | |
378 | OPTION: | |
379 | while ($option = shift @options) { | |
380 | if ($option =~ /^-(.)(.*)/) { | |
381 | $opt = $1; | |
382 | $arg = $2; | |
383 | } else { | |
384 | unshift @options, $option; | |
385 | last OPTION; | |
386 | } | |
387 | if ($opt eq "-" && $arg eq "-") { | |
388 | shift @options; | |
389 | last OPTION; | |
390 | } elsif ($opt eq "o") { | |
391 | $arg ||= shift @options; | |
392 | open(STDOUT, ">$arg") or return "$arg: $!\n"; | |
393 | } elsif ($opt eq "d") { | |
394 | $nodefs = 1; | |
395 | } elsif ($opt eq "r") { | |
396 | $raw = 1; | |
397 | } elsif ($opt eq "D") { | |
398 | $arg ||= shift @options; | |
399 | foreach $arg (split(//, $arg)) { | |
400 | if ($arg eq "o") { | |
401 | B->debug(1); | |
402 | } elsif ($arg eq "O") { | |
403 | $debug_op = 1; | |
404 | } elsif ($arg eq "t") { | |
405 | $debug_top = 1; | |
406 | } | |
407 | } | |
408 | } | |
409 | } | |
410 | if (@options) { | |
411 | return sub { | |
412 | my $objname; | |
413 | xref_definitions(); | |
414 | foreach $objname (@options) { | |
415 | $objname = "main::$objname" unless $objname =~ /::/; | |
416 | eval "xref_object(\\&$objname)"; | |
417 | die "xref_object(\\&$objname) failed: $@" if $@; | |
418 | } | |
419 | output(); | |
420 | } | |
421 | } else { | |
422 | return sub { | |
423 | xref_definitions(); | |
424 | xref_main(); | |
425 | output(); | |
426 | } | |
427 | } | |
428 | } | |
429 | ||
430 | 1; |