| 1 | package B::Concise; |
| 2 | # Copyright (C) 2000-2003 Stephen McCamant. All rights reserved. |
| 3 | # This program is free software; you can redistribute and/or modify it |
| 4 | # under the same terms as Perl itself. |
| 5 | |
| 6 | # Note: we need to keep track of how many use declarations/BEGIN |
| 7 | # blocks this module uses, so we can avoid printing them when user |
| 8 | # asks for the BEGIN blocks in her program. Update the comments and |
| 9 | # the count in concise_specials if you add or delete one. The |
| 10 | # -MO=Concise counts as use #1. |
| 11 | |
| 12 | use strict; # use #2 |
| 13 | use warnings; # uses #3 and #4, since warnings uses Carp |
| 14 | |
| 15 | use Exporter (); # use #5 |
| 16 | |
| 17 | our $VERSION = "0.81"; |
| 18 | our @ISA = qw(Exporter); |
| 19 | our @EXPORT_OK = qw( set_style set_style_standard add_callback |
| 20 | concise_subref concise_cv concise_main |
| 21 | add_style walk_output compile reset_sequence ); |
| 22 | our %EXPORT_TAGS = |
| 23 | ( io => [qw( walk_output compile reset_sequence )], |
| 24 | style => [qw( add_style set_style_standard )], |
| 25 | cb => [qw( add_callback )], |
| 26 | mech => [qw( concise_subref concise_cv concise_main )], ); |
| 27 | |
| 28 | # use #6 |
| 29 | use B qw(class ppname main_start main_root main_cv cstring svref_2object |
| 30 | SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL |
| 31 | CVf_ANON PAD_FAKELEX_ANON PAD_FAKELEX_MULTI SVf_ROK); |
| 32 | |
| 33 | my %style = |
| 34 | ("terse" => |
| 35 | ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) " |
| 36 | . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n", |
| 37 | "(*( )*)goto #class (#addr)\n", |
| 38 | "#class pp_#name"], |
| 39 | "concise" => |
| 40 | ["#hyphseq2 (*( (x( ;)x))*)<#classsym> #exname#arg(?([#targarglife])?)" |
| 41 | . "~#flags(?(/#private)?)(?(:#hints)?)(x(;~->#next)x)\n" |
| 42 | , " (*( )*) goto #seq\n", |
| 43 | "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"], |
| 44 | "linenoise" => |
| 45 | ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)", |
| 46 | "gt_#seq ", |
| 47 | "(?(#seq)?)#noise#arg(?([#targarg])?)"], |
| 48 | "debug" => |
| 49 | ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t" |
| 50 | . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n" . |
| 51 | ($] > 5.009 ? '' : "\top_seq\t\t#seqnum\n") |
| 52 | . "\top_flags\t#flagval\n\top_private\t#privval\t#hintsval\n" |
| 53 | . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)" |
| 54 | . "(?(\top_sv\t\t#svaddr\n)?)", |
| 55 | " GOTO #addr\n", |
| 56 | "#addr"], |
| 57 | "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT}, |
| 58 | $ENV{B_CONCISE_TREE_FORMAT}], |
| 59 | ); |
| 60 | |
| 61 | # Renderings, ie how Concise prints, is controlled by these vars |
| 62 | # primary: |
| 63 | our $stylename; # selects current style from %style |
| 64 | my $order = "basic"; # how optree is walked & printed: basic, exec, tree |
| 65 | |
| 66 | # rendering mechanics: |
| 67 | # these 'formats' are the line-rendering templates |
| 68 | # they're updated from %style when $stylename changes |
| 69 | my ($format, $gotofmt, $treefmt); |
| 70 | |
| 71 | # lesser players: |
| 72 | my $base = 36; # how <sequence#> is displayed |
| 73 | my $big_endian = 1; # more <sequence#> display |
| 74 | my $tree_style = 0; # tree-order details |
| 75 | my $banner = 1; # print banner before optree is traversed |
| 76 | my $do_main = 0; # force printing of main routine |
| 77 | my $show_src; # show source code |
| 78 | |
| 79 | # another factor: can affect all styles! |
| 80 | our @callbacks; # allow external management |
| 81 | |
| 82 | set_style_standard("concise"); |
| 83 | |
| 84 | my $curcv; |
| 85 | my $cop_seq_base; |
| 86 | |
| 87 | sub set_style { |
| 88 | ($format, $gotofmt, $treefmt) = @_; |
| 89 | #warn "set_style: deprecated, use set_style_standard instead\n"; # someday |
| 90 | die "expecting 3 style-format args\n" unless @_ == 3; |
| 91 | } |
| 92 | |
| 93 | sub add_style { |
| 94 | my ($newstyle,@args) = @_; |
| 95 | die "style '$newstyle' already exists, choose a new name\n" |
| 96 | if exists $style{$newstyle}; |
| 97 | die "expecting 3 style-format args\n" unless @args == 3; |
| 98 | $style{$newstyle} = [@args]; |
| 99 | $stylename = $newstyle; # update rendering state |
| 100 | } |
| 101 | |
| 102 | sub set_style_standard { |
| 103 | ($stylename) = @_; # update rendering state |
| 104 | die "err: style '$stylename' unknown\n" unless exists $style{$stylename}; |
| 105 | set_style(@{$style{$stylename}}); |
| 106 | } |
| 107 | |
| 108 | sub add_callback { |
| 109 | push @callbacks, @_; |
| 110 | } |
| 111 | |
| 112 | # output handle, used with all Concise-output printing |
| 113 | our $walkHandle; # public for your convenience |
| 114 | BEGIN { $walkHandle = \*STDOUT } |
| 115 | |
| 116 | sub walk_output { # updates $walkHandle |
| 117 | my $handle = shift; |
| 118 | return $walkHandle unless $handle; # allow use as accessor |
| 119 | |
| 120 | if (ref $handle eq 'SCALAR') { |
| 121 | require Config; |
| 122 | die "no perlio in this build, can't call walk_output (\\\$scalar)\n" |
| 123 | unless $Config::Config{useperlio}; |
| 124 | # in 5.8+, open(FILEHANDLE,MODE,REFERENCE) writes to string |
| 125 | open my $tmp, '>', $handle; # but cant re-set existing STDOUT |
| 126 | $walkHandle = $tmp; # so use my $tmp as intermediate var |
| 127 | return $walkHandle; |
| 128 | } |
| 129 | my $iotype = ref $handle; |
| 130 | die "expecting argument/object that can print\n" |
| 131 | unless $iotype eq 'GLOB' or $iotype and $handle->can('print'); |
| 132 | $walkHandle = $handle; |
| 133 | } |
| 134 | |
| 135 | sub concise_subref { |
| 136 | my($order, $coderef, $name) = @_; |
| 137 | my $codeobj = svref_2object($coderef); |
| 138 | |
| 139 | return concise_stashref(@_) |
| 140 | unless ref $codeobj eq 'B::CV'; |
| 141 | concise_cv_obj($order, $codeobj, $name); |
| 142 | } |
| 143 | |
| 144 | sub concise_stashref { |
| 145 | my($order, $h) = @_; |
| 146 | local *s; |
| 147 | foreach my $k (sort keys %$h) { |
| 148 | next unless defined $h->{$k}; |
| 149 | *s = $h->{$k}; |
| 150 | my $coderef = *s{CODE} or next; |
| 151 | reset_sequence(); |
| 152 | print "FUNC: ", *s, "\n"; |
| 153 | my $codeobj = svref_2object($coderef); |
| 154 | next unless ref $codeobj eq 'B::CV'; |
| 155 | eval { concise_cv_obj($order, $codeobj, $k) }; |
| 156 | warn "err $@ on $codeobj" if $@; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | # This should have been called concise_subref, but it was exported |
| 161 | # under this name in versions before 0.56 |
| 162 | *concise_cv = \&concise_subref; |
| 163 | |
| 164 | sub concise_cv_obj { |
| 165 | my ($order, $cv, $name) = @_; |
| 166 | # name is either a string, or a CODE ref (copy of $cv arg??) |
| 167 | |
| 168 | $curcv = $cv; |
| 169 | |
| 170 | if (ref($cv->XSUBANY) =~ /B::(\w+)/) { |
| 171 | print $walkHandle "$name is a constant sub, optimized to a $1\n"; |
| 172 | return; |
| 173 | } |
| 174 | if ($cv->XSUB) { |
| 175 | print $walkHandle "$name is XS code\n"; |
| 176 | return; |
| 177 | } |
| 178 | if (class($cv->START) eq "NULL") { |
| 179 | no strict 'refs'; |
| 180 | if (ref $name eq 'CODE') { |
| 181 | print $walkHandle "coderef $name has no START\n"; |
| 182 | } |
| 183 | elsif (exists &$name) { |
| 184 | print $walkHandle "$name exists in stash, but has no START\n"; |
| 185 | } |
| 186 | else { |
| 187 | print $walkHandle "$name not in symbol table\n"; |
| 188 | } |
| 189 | return; |
| 190 | } |
| 191 | sequence($cv->START); |
| 192 | if ($order eq "exec") { |
| 193 | walk_exec($cv->START); |
| 194 | } |
| 195 | elsif ($order eq "basic") { |
| 196 | # walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0); |
| 197 | my $root = $cv->ROOT; |
| 198 | unless (ref $root eq 'B::NULL') { |
| 199 | walk_topdown($root, sub { $_[0]->concise($_[1]) }, 0); |
| 200 | } else { |
| 201 | print $walkHandle "B::NULL encountered doing ROOT on $cv. avoiding disaster\n"; |
| 202 | } |
| 203 | } else { |
| 204 | print $walkHandle tree($cv->ROOT, 0); |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | sub concise_main { |
| 209 | my($order) = @_; |
| 210 | sequence(main_start); |
| 211 | $curcv = main_cv; |
| 212 | if ($order eq "exec") { |
| 213 | return if class(main_start) eq "NULL"; |
| 214 | walk_exec(main_start); |
| 215 | } elsif ($order eq "tree") { |
| 216 | return if class(main_root) eq "NULL"; |
| 217 | print $walkHandle tree(main_root, 0); |
| 218 | } elsif ($order eq "basic") { |
| 219 | return if class(main_root) eq "NULL"; |
| 220 | walk_topdown(main_root, |
| 221 | sub { $_[0]->concise($_[1]) }, 0); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | sub concise_specials { |
| 226 | my($name, $order, @cv_s) = @_; |
| 227 | my $i = 1; |
| 228 | if ($name eq "BEGIN") { |
| 229 | splice(@cv_s, 0, 8); # skip 7 BEGIN blocks in this file. NOW 8 ?? |
| 230 | } elsif ($name eq "CHECK") { |
| 231 | pop @cv_s; # skip the CHECK block that calls us |
| 232 | } |
| 233 | for my $cv (@cv_s) { |
| 234 | print $walkHandle "$name $i:\n"; |
| 235 | $i++; |
| 236 | concise_cv_obj($order, $cv, $name); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | my $start_sym = "\e(0"; # "\cN" sometimes also works |
| 241 | my $end_sym = "\e(B"; # "\cO" respectively |
| 242 | |
| 243 | my @tree_decorations = |
| 244 | ([" ", "--", "+-", "|-", "| ", "`-", "-", 1], |
| 245 | [" ", "-", "+", "+", "|", "`", "", 0], |
| 246 | [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1], |
| 247 | [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0], |
| 248 | ); |
| 249 | |
| 250 | my @render_packs; # collect -stash=<packages> |
| 251 | |
| 252 | sub compileOpts { |
| 253 | # set rendering state from options and args |
| 254 | my (@options,@args); |
| 255 | if (@_) { |
| 256 | @options = grep(/^-/, @_); |
| 257 | @args = grep(!/^-/, @_); |
| 258 | } |
| 259 | for my $o (@options) { |
| 260 | # mode/order |
| 261 | if ($o eq "-basic") { |
| 262 | $order = "basic"; |
| 263 | } elsif ($o eq "-exec") { |
| 264 | $order = "exec"; |
| 265 | } elsif ($o eq "-tree") { |
| 266 | $order = "tree"; |
| 267 | } |
| 268 | # tree-specific |
| 269 | elsif ($o eq "-compact") { |
| 270 | $tree_style |= 1; |
| 271 | } elsif ($o eq "-loose") { |
| 272 | $tree_style &= ~1; |
| 273 | } elsif ($o eq "-vt") { |
| 274 | $tree_style |= 2; |
| 275 | } elsif ($o eq "-ascii") { |
| 276 | $tree_style &= ~2; |
| 277 | } |
| 278 | # sequence numbering |
| 279 | elsif ($o =~ /^-base(\d+)$/) { |
| 280 | $base = $1; |
| 281 | } elsif ($o eq "-bigendian") { |
| 282 | $big_endian = 1; |
| 283 | } elsif ($o eq "-littleendian") { |
| 284 | $big_endian = 0; |
| 285 | } |
| 286 | # miscellaneous, presentation |
| 287 | elsif ($o eq "-nobanner") { |
| 288 | $banner = 0; |
| 289 | } elsif ($o eq "-banner") { |
| 290 | $banner = 1; |
| 291 | } |
| 292 | elsif ($o eq "-main") { |
| 293 | $do_main = 1; |
| 294 | } elsif ($o eq "-nomain") { |
| 295 | $do_main = 0; |
| 296 | } elsif ($o eq "-src") { |
| 297 | $show_src = 1; |
| 298 | } |
| 299 | elsif ($o =~ /^-stash=(.*)/) { |
| 300 | my $pkg = $1; |
| 301 | no strict 'refs'; |
| 302 | if (! %{$pkg.'::'}) { |
| 303 | eval "require $pkg"; |
| 304 | } else { |
| 305 | require Config; |
| 306 | if (!$Config::Config{usedl} |
| 307 | && keys %{$pkg.'::'} == 1 |
| 308 | && $pkg->can('bootstrap')) { |
| 309 | # It is something that we're staticly linked to, but hasn't |
| 310 | # yet been used. |
| 311 | eval "require $pkg"; |
| 312 | } |
| 313 | } |
| 314 | push @render_packs, $pkg; |
| 315 | } |
| 316 | # line-style options |
| 317 | elsif (exists $style{substr($o, 1)}) { |
| 318 | $stylename = substr($o, 1); |
| 319 | set_style_standard($stylename); |
| 320 | } else { |
| 321 | warn "Option $o unrecognized"; |
| 322 | } |
| 323 | } |
| 324 | return (@args); |
| 325 | } |
| 326 | |
| 327 | sub compile { |
| 328 | my (@args) = compileOpts(@_); |
| 329 | return sub { |
| 330 | my @newargs = compileOpts(@_); # accept new rendering options |
| 331 | warn "disregarding non-options: @newargs\n" if @newargs; |
| 332 | |
| 333 | for my $objname (@args) { |
| 334 | next unless $objname; # skip null args to avoid noisy responses |
| 335 | |
| 336 | if ($objname eq "BEGIN") { |
| 337 | concise_specials("BEGIN", $order, |
| 338 | B::begin_av->isa("B::AV") ? |
| 339 | B::begin_av->ARRAY : ()); |
| 340 | } elsif ($objname eq "INIT") { |
| 341 | concise_specials("INIT", $order, |
| 342 | B::init_av->isa("B::AV") ? |
| 343 | B::init_av->ARRAY : ()); |
| 344 | } elsif ($objname eq "CHECK") { |
| 345 | concise_specials("CHECK", $order, |
| 346 | B::check_av->isa("B::AV") ? |
| 347 | B::check_av->ARRAY : ()); |
| 348 | } elsif ($objname eq "UNITCHECK") { |
| 349 | concise_specials("UNITCHECK", $order, |
| 350 | B::unitcheck_av->isa("B::AV") ? |
| 351 | B::unitcheck_av->ARRAY : ()); |
| 352 | } elsif ($objname eq "END") { |
| 353 | concise_specials("END", $order, |
| 354 | B::end_av->isa("B::AV") ? |
| 355 | B::end_av->ARRAY : ()); |
| 356 | } |
| 357 | else { |
| 358 | # convert function names to subrefs |
| 359 | my $objref; |
| 360 | if (ref $objname) { |
| 361 | print $walkHandle "B::Concise::compile($objname)\n" |
| 362 | if $banner; |
| 363 | $objref = $objname; |
| 364 | } else { |
| 365 | $objname = "main::" . $objname unless $objname =~ /::/; |
| 366 | print $walkHandle "$objname:\n"; |
| 367 | no strict 'refs'; |
| 368 | unless (exists &$objname) { |
| 369 | print $walkHandle "err: unknown function ($objname)\n"; |
| 370 | return; |
| 371 | } |
| 372 | $objref = \&$objname; |
| 373 | } |
| 374 | concise_subref($order, $objref, $objname); |
| 375 | } |
| 376 | } |
| 377 | for my $pkg (@render_packs) { |
| 378 | no strict 'refs'; |
| 379 | concise_stashref($order, \%{$pkg.'::'}); |
| 380 | } |
| 381 | |
| 382 | if (!@args or $do_main or @render_packs) { |
| 383 | print $walkHandle "main program:\n" if $do_main; |
| 384 | concise_main($order); |
| 385 | } |
| 386 | return @args; # something |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | my %labels; |
| 391 | my $lastnext; # remembers op-chain, used to insert gotos |
| 392 | |
| 393 | my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|", |
| 394 | 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*", |
| 395 | 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#"); |
| 396 | |
| 397 | no warnings 'qw'; # "Possible attempt to put comments..."; use #7 |
| 398 | my @linenoise = |
| 399 | qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl |
| 400 | ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I |
| 401 | -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i< |
| 402 | > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i |
| 403 | ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy |
| 404 | uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@ |
| 405 | a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s} |
| 406 | v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o |
| 407 | ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v |
| 408 | ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r |
| 409 | -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd |
| 410 | co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3 |
| 411 | g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e |
| 412 | e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn |
| 413 | Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO'; |
| 414 | |
| 415 | my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
| 416 | |
| 417 | sub op_flags { # common flags (see BASOP.op_flags in op.h) |
| 418 | my($x) = @_; |
| 419 | my(@v); |
| 420 | push @v, "v" if ($x & 3) == 1; |
| 421 | push @v, "s" if ($x & 3) == 2; |
| 422 | push @v, "l" if ($x & 3) == 3; |
| 423 | push @v, "K" if $x & 4; |
| 424 | push @v, "P" if $x & 8; |
| 425 | push @v, "R" if $x & 16; |
| 426 | push @v, "M" if $x & 32; |
| 427 | push @v, "S" if $x & 64; |
| 428 | push @v, "*" if $x & 128; |
| 429 | return join("", @v); |
| 430 | } |
| 431 | |
| 432 | sub base_n { |
| 433 | my $x = shift; |
| 434 | return "-" . base_n(-$x) if $x < 0; |
| 435 | my $str = ""; |
| 436 | do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base); |
| 437 | $str = reverse $str if $big_endian; |
| 438 | return $str; |
| 439 | } |
| 440 | |
| 441 | my %sequence_num; |
| 442 | my $seq_max = 1; |
| 443 | |
| 444 | sub reset_sequence { |
| 445 | # reset the sequence |
| 446 | %sequence_num = (); |
| 447 | $seq_max = 1; |
| 448 | $lastnext = 0; |
| 449 | } |
| 450 | |
| 451 | sub seq { |
| 452 | my($op) = @_; |
| 453 | return "-" if not exists $sequence_num{$$op}; |
| 454 | return base_n($sequence_num{$$op}); |
| 455 | } |
| 456 | |
| 457 | sub walk_topdown { |
| 458 | my($op, $sub, $level) = @_; |
| 459 | $sub->($op, $level); |
| 460 | if ($op->flags & OPf_KIDS) { |
| 461 | for (my $kid = $op->first; $$kid; $kid = $kid->sibling) { |
| 462 | walk_topdown($kid, $sub, $level + 1); |
| 463 | } |
| 464 | } |
| 465 | if (class($op) eq "PMOP") { |
| 466 | my $maybe_root = $op->pmreplroot; |
| 467 | if (ref($maybe_root) and $maybe_root->isa("B::OP")) { |
| 468 | # It really is the root of the replacement, not something |
| 469 | # else stored here for lack of space elsewhere |
| 470 | walk_topdown($maybe_root, $sub, $level + 1); |
| 471 | } |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | sub walklines { |
| 476 | my($ar, $level) = @_; |
| 477 | for my $l (@$ar) { |
| 478 | if (ref($l) eq "ARRAY") { |
| 479 | walklines($l, $level + 1); |
| 480 | } else { |
| 481 | $l->concise($level); |
| 482 | } |
| 483 | } |
| 484 | } |
| 485 | |
| 486 | sub walk_exec { |
| 487 | my($top, $level) = @_; |
| 488 | my %opsseen; |
| 489 | my @lines; |
| 490 | my @todo = ([$top, \@lines]); |
| 491 | while (@todo and my($op, $targ) = @{shift @todo}) { |
| 492 | for (; $$op; $op = $op->next) { |
| 493 | last if $opsseen{$$op}++; |
| 494 | push @$targ, $op; |
| 495 | my $name = $op->name; |
| 496 | if (class($op) eq "LOGOP") { |
| 497 | my $ar = []; |
| 498 | push @$targ, $ar; |
| 499 | push @todo, [$op->other, $ar]; |
| 500 | } elsif ($name eq "subst" and $ {$op->pmreplstart}) { |
| 501 | my $ar = []; |
| 502 | push @$targ, $ar; |
| 503 | push @todo, [$op->pmreplstart, $ar]; |
| 504 | } elsif ($name =~ /^enter(loop|iter)$/) { |
| 505 | if ($] > 5.009) { |
| 506 | $labels{${$op->nextop}} = "NEXT"; |
| 507 | $labels{${$op->lastop}} = "LAST"; |
| 508 | $labels{${$op->redoop}} = "REDO"; |
| 509 | } else { |
| 510 | $labels{$op->nextop->seq} = "NEXT"; |
| 511 | $labels{$op->lastop->seq} = "LAST"; |
| 512 | $labels{$op->redoop->seq} = "REDO"; |
| 513 | } |
| 514 | } |
| 515 | } |
| 516 | } |
| 517 | walklines(\@lines, 0); |
| 518 | } |
| 519 | |
| 520 | # The structure of this routine is purposely modeled after op.c's peep() |
| 521 | sub sequence { |
| 522 | my($op) = @_; |
| 523 | my $oldop = 0; |
| 524 | return if class($op) eq "NULL" or exists $sequence_num{$$op}; |
| 525 | for (; $$op; $op = $op->next) { |
| 526 | last if exists $sequence_num{$$op}; |
| 527 | my $name = $op->name; |
| 528 | if ($name =~ /^(null|scalar|lineseq|scope)$/) { |
| 529 | next if $oldop and $ {$op->next}; |
| 530 | } else { |
| 531 | $sequence_num{$$op} = $seq_max++; |
| 532 | if (class($op) eq "LOGOP") { |
| 533 | my $other = $op->other; |
| 534 | $other = $other->next while $other->name eq "null"; |
| 535 | sequence($other); |
| 536 | } elsif (class($op) eq "LOOP") { |
| 537 | my $redoop = $op->redoop; |
| 538 | $redoop = $redoop->next while $redoop->name eq "null"; |
| 539 | sequence($redoop); |
| 540 | my $nextop = $op->nextop; |
| 541 | $nextop = $nextop->next while $nextop->name eq "null"; |
| 542 | sequence($nextop); |
| 543 | my $lastop = $op->lastop; |
| 544 | $lastop = $lastop->next while $lastop->name eq "null"; |
| 545 | sequence($lastop); |
| 546 | } elsif ($name eq "subst" and $ {$op->pmreplstart}) { |
| 547 | my $replstart = $op->pmreplstart; |
| 548 | $replstart = $replstart->next while $replstart->name eq "null"; |
| 549 | sequence($replstart); |
| 550 | } |
| 551 | } |
| 552 | $oldop = $op; |
| 553 | } |
| 554 | } |
| 555 | |
| 556 | sub fmt_line { # generate text-line for op. |
| 557 | my($hr, $op, $text, $level) = @_; |
| 558 | |
| 559 | $_->($hr, $op, \$text, \$level, $stylename) for @callbacks; |
| 560 | |
| 561 | return '' if $hr->{SKIP}; # suppress line if a callback said so |
| 562 | return '' if $hr->{goto} and $hr->{goto} eq '-'; # no goto nowhere |
| 563 | |
| 564 | # spec: (?(text1#varText2)?) |
| 565 | $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/ |
| 566 | $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg; |
| 567 | |
| 568 | # spec: (x(exec_text;basic_text)x) |
| 569 | $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs; |
| 570 | |
| 571 | # spec: (*(text)*) |
| 572 | $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs; |
| 573 | |
| 574 | # spec: (*(text1;text2)*) |
| 575 | $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs; |
| 576 | |
| 577 | # convert #Var to tag=>val form: Var\t#var |
| 578 | $text =~ s/\#([A-Z][a-z]+)(\d+)?/\t\u$1\t\L#$1$2/gs; |
| 579 | |
| 580 | # spec: #varN |
| 581 | $text =~ s/\#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg; |
| 582 | |
| 583 | $text =~ s/\#([a-zA-Z]+)/$hr->{$1}/eg; # populate #var's |
| 584 | $text =~ s/[ \t]*~+[ \t]*/ /g; # squeeze tildes |
| 585 | |
| 586 | $text = "# $hr->{src}\n$text" if $show_src and $hr->{src}; |
| 587 | |
| 588 | chomp $text; |
| 589 | return "$text\n" if $text ne "" and $order ne "tree"; |
| 590 | return $text; # suppress empty lines |
| 591 | } |
| 592 | |
| 593 | our %priv; # used to display each opcode's BASEOP.op_private values |
| 594 | |
| 595 | $priv{$_}{128} = "LVINTRO" |
| 596 | for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv", |
| 597 | "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv", |
| 598 | "padav", "padhv", "enteriter"); |
| 599 | $priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite"); |
| 600 | $priv{"aassign"}{64} = "COMMON"; |
| 601 | $priv{"aassign"}{32} = $] < 5.009 ? "PHASH" : "STATE"; |
| 602 | $priv{"sassign"}{32} = "STATE"; |
| 603 | $priv{"sassign"}{64} = "BKWARD"; |
| 604 | $priv{$_}{64} = "RTIME" for ("match", "subst", "substcont", "qr"); |
| 605 | @{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL", |
| 606 | "COMPL", "GROWS"); |
| 607 | $priv{transr} = $priv{trans}; |
| 608 | $priv{"repeat"}{64} = "DOLIST"; |
| 609 | $priv{"leaveloop"}{64} = "CONT"; |
| 610 | $priv{$_}{4} = "DREFed" for (qw(rv2sv rv2av rv2hv)); |
| 611 | @{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV") |
| 612 | for (qw(rv2gv rv2sv padsv aelem helem)); |
| 613 | $priv{$_}{16} = "STATE" for ("padav", "padhv", "padsv"); |
| 614 | @{$priv{"entersub"}}{16,32,64} = ("DBG","TARG","NOMOD"); |
| 615 | @{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv"); |
| 616 | $priv{"gv"}{32} = "EARLYCV"; |
| 617 | $priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER"; |
| 618 | $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv", |
| 619 | "enteriter"); |
| 620 | $priv{$_}{16} = "TARGMY" |
| 621 | for (map(($_,"s$_"),"chop", "chomp"), |
| 622 | map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo", |
| 623 | "add", "subtract", "negate"), "pow", "concat", "stringify", |
| 624 | "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or", |
| 625 | "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt", |
| 626 | "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf", |
| 627 | "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock", |
| 628 | "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename", |
| 629 | "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system", |
| 630 | "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority", |
| 631 | "setpriority", "time", "sleep"); |
| 632 | $priv{$_}{4} = "REVERSED" for ("enteriter", "iter"); |
| 633 | @{$priv{"const"}}{4,8,16,32,64,128} = ("SHORT","STRICT","ENTERED",'$[',"BARE","WARN"); |
| 634 | $priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM"; |
| 635 | $priv{"list"}{64} = "GUESSED"; |
| 636 | $priv{"delete"}{64} = "SLICE"; |
| 637 | $priv{"exists"}{64} = "SUB"; |
| 638 | @{$priv{"sort"}}{1,2,4,8,16,32,64} = ("NUM", "INT", "REV", "INPLACE","DESC","QSORT","STABLE"); |
| 639 | $priv{"reverse"}{8} = "INPLACE"; |
| 640 | $priv{"threadsv"}{64} = "SVREFd"; |
| 641 | @{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR") |
| 642 | for ("open", "backtick"); |
| 643 | $priv{"exit"}{128} = "VMS"; |
| 644 | $priv{$_}{2} = "FTACCESS" |
| 645 | for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec"); |
| 646 | $priv{"entereval"}{2} = "HAS_HH"; |
| 647 | if ($] >= 5.009) { |
| 648 | # Stacked filetests are post 5.8.x |
| 649 | $priv{$_}{4} = "FTSTACKED" |
| 650 | for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec", |
| 651 | "ftis", "fteowned", "ftrowned", "ftzero", "ftsize", "ftmtime", |
| 652 | "ftatime", "ftctime", "ftsock", "ftchr", "ftblk", "ftfile", "ftdir", |
| 653 | "ftpipe", "ftlink", "ftsuid", "ftsgid", "ftsvtx", "fttty", "fttext", |
| 654 | "ftbinary"); |
| 655 | # Lexical $_ is post 5.8.x |
| 656 | $priv{$_}{2} = "GREPLEX" |
| 657 | for ("mapwhile", "mapstart", "grepwhile", "grepstart"); |
| 658 | } |
| 659 | |
| 660 | our %hints; # used to display each COP's op_hints values |
| 661 | |
| 662 | # strict refs, subs, vars |
| 663 | @hints{2,512,1024} = ('$', '&', '*'); |
| 664 | # integers, locale, bytes, arybase |
| 665 | @hints{1,4,8,16,32} = ('i', 'l', 'b', '['); |
| 666 | # block scope, localise %^H, $^OPEN (in), $^OPEN (out) |
| 667 | @hints{256,131072,262144,524288} = ('{','%','<','>'); |
| 668 | # overload new integer, float, binary, string, re |
| 669 | @hints{4096,8192,16384,32768,65536} = ('I', 'F', 'B', 'S', 'R'); |
| 670 | # taint and eval |
| 671 | @hints{1048576,2097152} = ('T', 'E'); |
| 672 | # filetest access, UTF-8 |
| 673 | @hints{4194304,8388608} = ('X', 'U'); |
| 674 | |
| 675 | sub _flags { |
| 676 | my($hash, $x) = @_; |
| 677 | my @s; |
| 678 | for my $flag (sort {$b <=> $a} keys %$hash) { |
| 679 | if ($hash->{$flag} and $x & $flag and $x >= $flag) { |
| 680 | $x -= $flag; |
| 681 | push @s, $hash->{$flag}; |
| 682 | } |
| 683 | } |
| 684 | push @s, $x if $x; |
| 685 | return join(",", @s); |
| 686 | } |
| 687 | |
| 688 | sub private_flags { |
| 689 | my($name, $x) = @_; |
| 690 | _flags($priv{$name}, $x); |
| 691 | } |
| 692 | |
| 693 | sub hints_flags { |
| 694 | my($x) = @_; |
| 695 | _flags(\%hints, $x); |
| 696 | } |
| 697 | |
| 698 | sub concise_sv { |
| 699 | my($sv, $hr, $preferpv) = @_; |
| 700 | $hr->{svclass} = class($sv); |
| 701 | $hr->{svclass} = "UV" |
| 702 | if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV; |
| 703 | Carp::cluck("bad concise_sv: $sv") unless $sv and $$sv; |
| 704 | $hr->{svaddr} = sprintf("%#x", $$sv); |
| 705 | if ($hr->{svclass} eq "GV" && $sv->isGV_with_GP()) { |
| 706 | my $gv = $sv; |
| 707 | my $stash = $gv->STASH->NAME; if ($stash eq "main") { |
| 708 | $stash = ""; |
| 709 | } else { |
| 710 | $stash = $stash . "::"; |
| 711 | } |
| 712 | $hr->{svval} = "*$stash" . $gv->SAFENAME; |
| 713 | return "*$stash" . $gv->SAFENAME; |
| 714 | } else { |
| 715 | if ($] >= 5.011) { |
| 716 | while (class($sv) eq "IV" && $sv->FLAGS & SVf_ROK) { |
| 717 | $hr->{svval} .= "\\"; |
| 718 | $sv = $sv->RV; |
| 719 | } |
| 720 | } else { |
| 721 | while (class($sv) eq "RV") { |
| 722 | $hr->{svval} .= "\\"; |
| 723 | $sv = $sv->RV; |
| 724 | } |
| 725 | } |
| 726 | if (class($sv) eq "SPECIAL") { |
| 727 | $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv]; |
| 728 | } elsif ($preferpv && $sv->FLAGS & SVf_POK) { |
| 729 | $hr->{svval} .= cstring($sv->PV); |
| 730 | } elsif ($sv->FLAGS & SVf_NOK) { |
| 731 | $hr->{svval} .= $sv->NV; |
| 732 | } elsif ($sv->FLAGS & SVf_IOK) { |
| 733 | $hr->{svval} .= $sv->int_value; |
| 734 | } elsif ($sv->FLAGS & SVf_POK) { |
| 735 | $hr->{svval} .= cstring($sv->PV); |
| 736 | } elsif (class($sv) eq "HV") { |
| 737 | $hr->{svval} .= 'HASH'; |
| 738 | } |
| 739 | |
| 740 | $hr->{svval} = 'undef' unless defined $hr->{svval}; |
| 741 | my $out = $hr->{svclass}; |
| 742 | return $out .= " $hr->{svval}" ; |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | my %srclines; |
| 747 | |
| 748 | sub fill_srclines { |
| 749 | my $fullnm = shift; |
| 750 | if ($fullnm eq '-e') { |
| 751 | $srclines{$fullnm} = [ $fullnm, "-src not supported for -e" ]; |
| 752 | return; |
| 753 | } |
| 754 | open (my $fh, '<', $fullnm) |
| 755 | or warn "# $fullnm: $!, (chdirs not supported by this feature yet)\n" |
| 756 | and return; |
| 757 | my @l = <$fh>; |
| 758 | chomp @l; |
| 759 | unshift @l, $fullnm; # like @{_<$fullnm} in debug, array starts at 1 |
| 760 | $srclines{$fullnm} = \@l; |
| 761 | } |
| 762 | |
| 763 | sub concise_op { |
| 764 | my ($op, $level, $format) = @_; |
| 765 | my %h; |
| 766 | $h{exname} = $h{name} = $op->name; |
| 767 | $h{NAME} = uc $h{name}; |
| 768 | $h{class} = class($op); |
| 769 | $h{extarg} = $h{targ} = $op->targ; |
| 770 | $h{extarg} = "" unless $h{extarg}; |
| 771 | if ($h{name} eq "null" and $h{targ}) { |
| 772 | # targ holds the old type |
| 773 | $h{exname} = "ex-" . substr(ppname($h{targ}), 3); |
| 774 | $h{extarg} = ""; |
| 775 | } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) { |
| 776 | # targ potentially holds a reference count |
| 777 | if ($op->private & 64) { |
| 778 | my $refs = "ref" . ($h{targ} != 1 ? "s" : ""); |
| 779 | $h{targarglife} = $h{targarg} = "$h{targ} $refs"; |
| 780 | } |
| 781 | } elsif ($h{targ}) { |
| 782 | my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}]; |
| 783 | if (defined $padname and class($padname) ne "SPECIAL") { |
| 784 | $h{targarg} = $padname->PVX; |
| 785 | if ($padname->FLAGS & SVf_FAKE) { |
| 786 | if ($] < 5.009) { |
| 787 | $h{targarglife} = "$h{targarg}:FAKE"; |
| 788 | } else { |
| 789 | # These changes relate to the jumbo closure fix. |
| 790 | # See changes 19939 and 20005 |
| 791 | my $fake = ''; |
| 792 | $fake .= 'a' |
| 793 | if $padname->PARENT_FAKELEX_FLAGS & PAD_FAKELEX_ANON; |
| 794 | $fake .= 'm' |
| 795 | if $padname->PARENT_FAKELEX_FLAGS & PAD_FAKELEX_MULTI; |
| 796 | $fake .= ':' . $padname->PARENT_PAD_INDEX |
| 797 | if $curcv->CvFLAGS & CVf_ANON; |
| 798 | $h{targarglife} = "$h{targarg}:FAKE:$fake"; |
| 799 | } |
| 800 | } |
| 801 | else { |
| 802 | my $intro = $padname->COP_SEQ_RANGE_LOW - $cop_seq_base; |
| 803 | my $finish = int($padname->COP_SEQ_RANGE_HIGH) - $cop_seq_base; |
| 804 | $finish = "end" if $finish == 999999999 - $cop_seq_base; |
| 805 | $h{targarglife} = "$h{targarg}:$intro,$finish"; |
| 806 | } |
| 807 | } else { |
| 808 | $h{targarglife} = $h{targarg} = "t" . $h{targ}; |
| 809 | } |
| 810 | } |
| 811 | $h{arg} = ""; |
| 812 | $h{svclass} = $h{svaddr} = $h{svval} = ""; |
| 813 | if ($h{class} eq "PMOP") { |
| 814 | my $precomp = $op->precomp; |
| 815 | if (defined $precomp) { |
| 816 | $precomp = cstring($precomp); # Escape literal control sequences |
| 817 | $precomp = "/$precomp/"; |
| 818 | } else { |
| 819 | $precomp = ""; |
| 820 | } |
| 821 | my $pmreplroot = $op->pmreplroot; |
| 822 | my $pmreplstart; |
| 823 | if (ref($pmreplroot) eq "B::GV") { |
| 824 | # with C<@stash_array = split(/pat/, str);>, |
| 825 | # *stash_array is stored in /pat/'s pmreplroot. |
| 826 | $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")"; |
| 827 | } elsif (!ref($pmreplroot) and $pmreplroot) { |
| 828 | # same as the last case, except the value is actually a |
| 829 | # pad offset for where the GV is kept (this happens under |
| 830 | # ithreads) |
| 831 | my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot]; |
| 832 | $h{arg} = "($precomp => \@" . $gv->NAME . ")"; |
| 833 | } elsif ($ {$op->pmreplstart}) { |
| 834 | undef $lastnext; |
| 835 | $pmreplstart = "replstart->" . seq($op->pmreplstart); |
| 836 | $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")"; |
| 837 | } else { |
| 838 | $h{arg} = "($precomp)"; |
| 839 | } |
| 840 | } elsif ($h{class} eq "PVOP" and $h{name} !~ '^transr?\z') { |
| 841 | $h{arg} = '("' . $op->pv . '")'; |
| 842 | $h{svval} = '"' . $op->pv . '"'; |
| 843 | } elsif ($h{class} eq "COP") { |
| 844 | my $label = $op->label; |
| 845 | $h{coplabel} = $label; |
| 846 | $label = $label ? "$label: " : ""; |
| 847 | my $loc = $op->file; |
| 848 | my $pathnm = $loc; |
| 849 | $loc =~ s[.*/][]; |
| 850 | my $ln = $op->line; |
| 851 | $loc .= ":$ln"; |
| 852 | my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base); |
| 853 | my $arybase = $op->arybase; |
| 854 | $arybase = $arybase ? ' $[=' . $arybase : ""; |
| 855 | $h{arg} = "($label$stash $cseq $loc$arybase)"; |
| 856 | if ($show_src) { |
| 857 | fill_srclines($pathnm) unless exists $srclines{$pathnm}; |
| 858 | # Would love to retain Jim's use of // but this code needs to be |
| 859 | # portable to 5.8.x |
| 860 | my $line = $srclines{$pathnm}[$ln]; |
| 861 | $line = "-src unavailable under -e" unless defined $line; |
| 862 | $h{src} = "$ln: $line"; |
| 863 | } |
| 864 | } elsif ($h{class} eq "LOOP") { |
| 865 | $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop) |
| 866 | . " redo->" . seq($op->redoop) . ")"; |
| 867 | } elsif ($h{class} eq "LOGOP") { |
| 868 | undef $lastnext; |
| 869 | $h{arg} = "(other->" . seq($op->other) . ")"; |
| 870 | } |
| 871 | elsif ($h{class} eq "SVOP" or $h{class} eq "PADOP") { |
| 872 | unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) { |
| 873 | my $idx = ($h{class} eq "SVOP") ? $op->targ : $op->padix; |
| 874 | my $preferpv = $h{name} eq "method_named"; |
| 875 | if ($h{class} eq "PADOP" or !${$op->sv}) { |
| 876 | my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$idx]; |
| 877 | $h{arg} = "[" . concise_sv($sv, \%h, $preferpv) . "]"; |
| 878 | $h{targarglife} = $h{targarg} = ""; |
| 879 | } else { |
| 880 | $h{arg} = "(" . concise_sv($op->sv, \%h, $preferpv) . ")"; |
| 881 | } |
| 882 | } |
| 883 | } |
| 884 | $h{seq} = $h{hyphseq} = seq($op); |
| 885 | $h{seq} = "" if $h{seq} eq "-"; |
| 886 | if ($] > 5.009) { |
| 887 | $h{opt} = $op->opt; |
| 888 | $h{label} = $labels{$$op}; |
| 889 | } else { |
| 890 | $h{seqnum} = $op->seq; |
| 891 | $h{label} = $labels{$op->seq}; |
| 892 | } |
| 893 | $h{next} = $op->next; |
| 894 | $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next}); |
| 895 | $h{nextaddr} = sprintf("%#x", $ {$op->next}); |
| 896 | $h{sibaddr} = sprintf("%#x", $ {$op->sibling}); |
| 897 | $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first"); |
| 898 | $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last"); |
| 899 | |
| 900 | $h{classsym} = $opclass{$h{class}}; |
| 901 | $h{flagval} = $op->flags; |
| 902 | $h{flags} = op_flags($op->flags); |
| 903 | $h{privval} = $op->private; |
| 904 | $h{private} = private_flags($h{name}, $op->private); |
| 905 | if ($op->can("hints")) { |
| 906 | $h{hintsval} = $op->hints; |
| 907 | $h{hints} = hints_flags($h{hintsval}); |
| 908 | } else { |
| 909 | $h{hintsval} = $h{hints} = ''; |
| 910 | } |
| 911 | $h{addr} = sprintf("%#x", $$op); |
| 912 | $h{typenum} = $op->type; |
| 913 | $h{noise} = $linenoise[$op->type]; |
| 914 | |
| 915 | return fmt_line(\%h, $op, $format, $level); |
| 916 | } |
| 917 | |
| 918 | sub B::OP::concise { |
| 919 | my($op, $level) = @_; |
| 920 | if ($order eq "exec" and $lastnext and $$lastnext != $$op) { |
| 921 | # insert a 'goto' line |
| 922 | my $synth = {"seq" => seq($lastnext), "class" => class($lastnext), |
| 923 | "addr" => sprintf("%#x", $$lastnext), |
| 924 | "goto" => seq($lastnext), # simplify goto '-' removal |
| 925 | }; |
| 926 | print $walkHandle fmt_line($synth, $op, $gotofmt, $level+1); |
| 927 | } |
| 928 | $lastnext = $op->next; |
| 929 | print $walkHandle concise_op($op, $level, $format); |
| 930 | } |
| 931 | |
| 932 | # B::OP::terse (see Terse.pm) now just calls this |
| 933 | sub b_terse { |
| 934 | my($op, $level) = @_; |
| 935 | |
| 936 | # This isn't necessarily right, but there's no easy way to get |
| 937 | # from an OP to the right CV. This is a limitation of the |
| 938 | # ->terse() interface style, and there isn't much to do about |
| 939 | # it. In particular, we can die in concise_op if the main pad |
| 940 | # isn't long enough, or has the wrong kind of entries, compared to |
| 941 | # the pad a sub was compiled with. The fix for that would be to |
| 942 | # make a backwards compatible "terse" format that never even |
| 943 | # looked at the pad, just like the old B::Terse. I don't think |
| 944 | # that's worth the effort, though. |
| 945 | $curcv = main_cv unless $curcv; |
| 946 | |
| 947 | if ($order eq "exec" and $lastnext and $$lastnext != $$op) { |
| 948 | # insert a 'goto' |
| 949 | my $h = {"seq" => seq($lastnext), "class" => class($lastnext), |
| 950 | "addr" => sprintf("%#x", $$lastnext)}; |
| 951 | print # $walkHandle |
| 952 | fmt_line($h, $op, $style{"terse"}[1], $level+1); |
| 953 | } |
| 954 | $lastnext = $op->next; |
| 955 | print # $walkHandle |
| 956 | concise_op($op, $level, $style{"terse"}[0]); |
| 957 | } |
| 958 | |
| 959 | sub tree { |
| 960 | my $op = shift; |
| 961 | my $level = shift; |
| 962 | my $style = $tree_decorations[$tree_style]; |
| 963 | my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style; |
| 964 | my $name = concise_op($op, $level, $treefmt); |
| 965 | if (not $op->flags & OPf_KIDS) { |
| 966 | return $name . "\n"; |
| 967 | } |
| 968 | my @lines; |
| 969 | for (my $kid = $op->first; $$kid; $kid = $kid->sibling) { |
| 970 | push @lines, tree($kid, $level+1); |
| 971 | } |
| 972 | my $i; |
| 973 | for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) { |
| 974 | $lines[$i] = $space . $lines[$i]; |
| 975 | } |
| 976 | if ($i > 0) { |
| 977 | $lines[$i] = $last . $lines[$i]; |
| 978 | while ($i-- > 1) { |
| 979 | if (substr($lines[$i], 0, 1) eq " ") { |
| 980 | $lines[$i] = $nokid . $lines[$i]; |
| 981 | } else { |
| 982 | $lines[$i] = $kid . $lines[$i]; |
| 983 | } |
| 984 | } |
| 985 | $lines[$i] = $kids . $lines[$i]; |
| 986 | } else { |
| 987 | $lines[0] = $single . $lines[0]; |
| 988 | } |
| 989 | return("$name$lead" . shift @lines, |
| 990 | map(" " x (length($name)+$size) . $_, @lines)); |
| 991 | } |
| 992 | |
| 993 | # *** Warning: fragile kludge ahead *** |
| 994 | # Because the B::* modules run in the same interpreter as the code |
| 995 | # they're compiling, their presence tends to distort the view we have of |
| 996 | # the code we're looking at. In particular, perl gives sequence numbers |
| 997 | # to COPs. If the program we're looking at were run on its own, this |
| 998 | # would start at 1. Because all of B::Concise and all the modules it |
| 999 | # uses are compiled first, though, by the time we get to the user's |
| 1000 | # program the sequence number is already pretty high, which could be |
| 1001 | # distracting if you're trying to tell OPs apart. Therefore we'd like to |
| 1002 | # subtract an offset from all the sequence numbers we display, to |
| 1003 | # restore the simpler view of the world. The trick is to know what that |
| 1004 | # offset will be, when we're still compiling B::Concise! If we |
| 1005 | # hardcoded a value, it would have to change every time B::Concise or |
| 1006 | # other modules we use do. To help a little, what we do here is compile |
| 1007 | # a little code at the end of the module, and compute the base sequence |
| 1008 | # number for the user's program as being a small offset later, so all we |
| 1009 | # have to worry about are changes in the offset. |
| 1010 | |
| 1011 | # [For 5.8.x and earlier perl is generating sequence numbers for all ops, |
| 1012 | # and using them to reference labels] |
| 1013 | |
| 1014 | |
| 1015 | # When you say "perl -MO=Concise -e '$a'", the output should look like: |
| 1016 | |
| 1017 | # 4 <@> leave[t1] vKP/REFC ->(end) |
| 1018 | # 1 <0> enter ->2 |
| 1019 | #^ smallest OP sequence number should be 1 |
| 1020 | # 2 <;> nextstate(main 1 -e:1) v ->3 |
| 1021 | # ^ smallest COP sequence number should be 1 |
| 1022 | # - <1> ex-rv2sv vK/1 ->4 |
| 1023 | # 3 <$> gvsv(*a) s ->4 |
| 1024 | |
| 1025 | # If the second of the marked numbers there isn't 1, it means you need |
| 1026 | # to update the corresponding magic number in the next line. |
| 1027 | # Remember, this needs to stay the last things in the module. |
| 1028 | |
| 1029 | # Why is this different for MacOS? Does it matter? |
| 1030 | my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11; |
| 1031 | $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum; |
| 1032 | |
| 1033 | 1; |
| 1034 | |
| 1035 | __END__ |
| 1036 | |
| 1037 | =head1 NAME |
| 1038 | |
| 1039 | B::Concise - Walk Perl syntax tree, printing concise info about ops |
| 1040 | |
| 1041 | =head1 SYNOPSIS |
| 1042 | |
| 1043 | perl -MO=Concise[,OPTIONS] foo.pl |
| 1044 | |
| 1045 | use B::Concise qw(set_style add_callback); |
| 1046 | |
| 1047 | =head1 DESCRIPTION |
| 1048 | |
| 1049 | This compiler backend prints the internal OPs of a Perl program's syntax |
| 1050 | tree in one of several space-efficient text formats suitable for debugging |
| 1051 | the inner workings of perl or other compiler backends. It can print OPs in |
| 1052 | the order they appear in the OP tree, in the order they will execute, or |
| 1053 | in a text approximation to their tree structure, and the format of the |
| 1054 | information displayed is customizable. Its function is similar to that of |
| 1055 | perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more |
| 1056 | sophisticated and flexible. |
| 1057 | |
| 1058 | =head1 EXAMPLE |
| 1059 | |
| 1060 | Here's two outputs (or 'renderings'), using the -exec and -basic |
| 1061 | (i.e. default) formatting conventions on the same code snippet. |
| 1062 | |
| 1063 | % perl -MO=Concise,-exec -e '$a = $b + 42' |
| 1064 | 1 <0> enter |
| 1065 | 2 <;> nextstate(main 1 -e:1) v |
| 1066 | 3 <#> gvsv[*b] s |
| 1067 | 4 <$> const[IV 42] s |
| 1068 | * 5 <2> add[t3] sK/2 |
| 1069 | 6 <#> gvsv[*a] s |
| 1070 | 7 <2> sassign vKS/2 |
| 1071 | 8 <@> leave[1 ref] vKP/REFC |
| 1072 | |
| 1073 | In this -exec rendering, each opcode is executed in the order shown. |
| 1074 | The add opcode, marked with '*', is discussed in more detail. |
| 1075 | |
| 1076 | The 1st column is the op's sequence number, starting at 1, and is |
| 1077 | displayed in base 36 by default. Here they're purely linear; the |
| 1078 | sequences are very helpful when looking at code with loops and |
| 1079 | branches. |
| 1080 | |
| 1081 | The symbol between angle brackets indicates the op's type, for |
| 1082 | example; <2> is a BINOP, <@> a LISTOP, and <#> is a PADOP, which is |
| 1083 | used in threaded perls. (see L</"OP class abbreviations">). |
| 1084 | |
| 1085 | The opname, as in B<'add[t1]'>, may be followed by op-specific |
| 1086 | information in parentheses or brackets (ex B<'[t1]'>). |
| 1087 | |
| 1088 | The op-flags (ex B<'sK/2'>) are described in (L</"OP flags |
| 1089 | abbreviations">). |
| 1090 | |
| 1091 | % perl -MO=Concise -e '$a = $b + 42' |
| 1092 | 8 <@> leave[1 ref] vKP/REFC ->(end) |
| 1093 | 1 <0> enter ->2 |
| 1094 | 2 <;> nextstate(main 1 -e:1) v ->3 |
| 1095 | 7 <2> sassign vKS/2 ->8 |
| 1096 | * 5 <2> add[t1] sK/2 ->6 |
| 1097 | - <1> ex-rv2sv sK/1 ->4 |
| 1098 | 3 <$> gvsv(*b) s ->4 |
| 1099 | 4 <$> const(IV 42) s ->5 |
| 1100 | - <1> ex-rv2sv sKRM*/1 ->7 |
| 1101 | 6 <$> gvsv(*a) s ->7 |
| 1102 | |
| 1103 | The default rendering is top-down, so they're not in execution order. |
| 1104 | This form reflects the way the stack is used to parse and evaluate |
| 1105 | expressions; the add operates on the two terms below it in the tree. |
| 1106 | |
| 1107 | Nullops appear as C<ex-opname>, where I<opname> is an op that has been |
| 1108 | optimized away by perl. They're displayed with a sequence-number of |
| 1109 | '-', because they are not executed (they don't appear in previous |
| 1110 | example), they're printed here because they reflect the parse. |
| 1111 | |
| 1112 | The arrow points to the sequence number of the next op; they're not |
| 1113 | displayed in -exec mode, for obvious reasons. |
| 1114 | |
| 1115 | Note that because this rendering was done on a non-threaded perl, the |
| 1116 | PADOPs in the previous examples are now SVOPs, and some (but not all) |
| 1117 | of the square brackets have been replaced by round ones. This is a |
| 1118 | subtle feature to provide some visual distinction between renderings |
| 1119 | on threaded and un-threaded perls. |
| 1120 | |
| 1121 | |
| 1122 | =head1 OPTIONS |
| 1123 | |
| 1124 | Arguments that don't start with a hyphen are taken to be the names of |
| 1125 | subroutines to render; if no such functions are specified, the main |
| 1126 | body of the program (outside any subroutines, and not including use'd |
| 1127 | or require'd files) is rendered. Passing C<BEGIN>, C<UNITCHECK>, |
| 1128 | C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding |
| 1129 | special blocks to be printed. Arguments must follow options. |
| 1130 | |
| 1131 | Options affect how things are rendered (ie printed). They're presented |
| 1132 | here by their visual effect, 1st being strongest. They're grouped |
| 1133 | according to how they interrelate; within each group the options are |
| 1134 | mutually exclusive (unless otherwise stated). |
| 1135 | |
| 1136 | =head2 Options for Opcode Ordering |
| 1137 | |
| 1138 | These options control the 'vertical display' of opcodes. The display |
| 1139 | 'order' is also called 'mode' elsewhere in this document. |
| 1140 | |
| 1141 | =over 4 |
| 1142 | |
| 1143 | =item B<-basic> |
| 1144 | |
| 1145 | Print OPs in the order they appear in the OP tree (a preorder |
| 1146 | traversal, starting at the root). The indentation of each OP shows its |
| 1147 | level in the tree, and the '->' at the end of the line indicates the |
| 1148 | next opcode in execution order. This mode is the default, so the flag |
| 1149 | is included simply for completeness. |
| 1150 | |
| 1151 | =item B<-exec> |
| 1152 | |
| 1153 | Print OPs in the order they would normally execute (for the majority |
| 1154 | of constructs this is a postorder traversal of the tree, ending at the |
| 1155 | root). In most cases the OP that usually follows a given OP will |
| 1156 | appear directly below it; alternate paths are shown by indentation. In |
| 1157 | cases like loops when control jumps out of a linear path, a 'goto' |
| 1158 | line is generated. |
| 1159 | |
| 1160 | =item B<-tree> |
| 1161 | |
| 1162 | Print OPs in a text approximation of a tree, with the root of the tree |
| 1163 | at the left and 'left-to-right' order of children transformed into |
| 1164 | 'top-to-bottom'. Because this mode grows both to the right and down, |
| 1165 | it isn't suitable for large programs (unless you have a very wide |
| 1166 | terminal). |
| 1167 | |
| 1168 | =back |
| 1169 | |
| 1170 | =head2 Options for Line-Style |
| 1171 | |
| 1172 | These options select the line-style (or just style) used to render |
| 1173 | each opcode, and dictates what info is actually printed into each line. |
| 1174 | |
| 1175 | =over 4 |
| 1176 | |
| 1177 | =item B<-concise> |
| 1178 | |
| 1179 | Use the author's favorite set of formatting conventions. This is the |
| 1180 | default, of course. |
| 1181 | |
| 1182 | =item B<-terse> |
| 1183 | |
| 1184 | Use formatting conventions that emulate the output of B<B::Terse>. The |
| 1185 | basic mode is almost indistinguishable from the real B<B::Terse>, and the |
| 1186 | exec mode looks very similar, but is in a more logical order and lacks |
| 1187 | curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode |
| 1188 | is only vaguely reminiscent of B<B::Terse>. |
| 1189 | |
| 1190 | =item B<-linenoise> |
| 1191 | |
| 1192 | Use formatting conventions in which the name of each OP, rather than being |
| 1193 | written out in full, is represented by a one- or two-character abbreviation. |
| 1194 | This is mainly a joke. |
| 1195 | |
| 1196 | =item B<-debug> |
| 1197 | |
| 1198 | Use formatting conventions reminiscent of B<B::Debug>; these aren't |
| 1199 | very concise at all. |
| 1200 | |
| 1201 | =item B<-env> |
| 1202 | |
| 1203 | Use formatting conventions read from the environment variables |
| 1204 | C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>. |
| 1205 | |
| 1206 | =back |
| 1207 | |
| 1208 | =head2 Options for tree-specific formatting |
| 1209 | |
| 1210 | =over 4 |
| 1211 | |
| 1212 | =item B<-compact> |
| 1213 | |
| 1214 | Use a tree format in which the minimum amount of space is used for the |
| 1215 | lines connecting nodes (one character in most cases). This squeezes out |
| 1216 | a few precious columns of screen real estate. |
| 1217 | |
| 1218 | =item B<-loose> |
| 1219 | |
| 1220 | Use a tree format that uses longer edges to separate OP nodes. This format |
| 1221 | tends to look better than the compact one, especially in ASCII, and is |
| 1222 | the default. |
| 1223 | |
| 1224 | =item B<-vt> |
| 1225 | |
| 1226 | Use tree connecting characters drawn from the VT100 line-drawing set. |
| 1227 | This looks better if your terminal supports it. |
| 1228 | |
| 1229 | =item B<-ascii> |
| 1230 | |
| 1231 | Draw the tree with standard ASCII characters like C<+> and C<|>. These don't |
| 1232 | look as clean as the VT100 characters, but they'll work with almost any |
| 1233 | terminal (or the horizontal scrolling mode of less(1)) and are suitable |
| 1234 | for text documentation or email. This is the default. |
| 1235 | |
| 1236 | =back |
| 1237 | |
| 1238 | These are pairwise exclusive, i.e. compact or loose, vt or ascii. |
| 1239 | |
| 1240 | =head2 Options controlling sequence numbering |
| 1241 | |
| 1242 | =over 4 |
| 1243 | |
| 1244 | =item B<-base>I<n> |
| 1245 | |
| 1246 | Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the |
| 1247 | digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit |
| 1248 | for 37 will be 'A', and so on until 62. Values greater than 62 are not |
| 1249 | currently supported. The default is 36. |
| 1250 | |
| 1251 | =item B<-bigendian> |
| 1252 | |
| 1253 | Print sequence numbers with the most significant digit first. This is the |
| 1254 | usual convention for Arabic numerals, and the default. |
| 1255 | |
| 1256 | =item B<-littleendian> |
| 1257 | |
| 1258 | Print seqence numbers with the least significant digit first. This is |
| 1259 | obviously mutually exclusive with bigendian. |
| 1260 | |
| 1261 | =back |
| 1262 | |
| 1263 | =head2 Other options |
| 1264 | |
| 1265 | =over 4 |
| 1266 | |
| 1267 | =item B<-src> |
| 1268 | |
| 1269 | With this option, the rendering of each statement (starting with the |
| 1270 | nextstate OP) will be preceded by the 1st line of source code that |
| 1271 | generates it. For example: |
| 1272 | |
| 1273 | 1 <0> enter |
| 1274 | # 1: my $i; |
| 1275 | 2 <;> nextstate(main 1 junk.pl:1) v:{ |
| 1276 | 3 <0> padsv[$i:1,10] vM/LVINTRO |
| 1277 | # 3: for $i (0..9) { |
| 1278 | 4 <;> nextstate(main 3 junk.pl:3) v:{ |
| 1279 | 5 <0> pushmark s |
| 1280 | 6 <$> const[IV 0] s |
| 1281 | 7 <$> const[IV 9] s |
| 1282 | 8 <{> enteriter(next->j last->m redo->9)[$i:1,10] lKS |
| 1283 | k <0> iter s |
| 1284 | l <|> and(other->9) vK/1 |
| 1285 | # 4: print "line "; |
| 1286 | 9 <;> nextstate(main 2 junk.pl:4) v |
| 1287 | a <0> pushmark s |
| 1288 | b <$> const[PV "line "] s |
| 1289 | c <@> print vK |
| 1290 | # 5: print "$i\n"; |
| 1291 | ... |
| 1292 | |
| 1293 | =item B<-stash="somepackage"> |
| 1294 | |
| 1295 | With this, "somepackage" will be required, then the stash is |
| 1296 | inspected, and each function is rendered. |
| 1297 | |
| 1298 | =back |
| 1299 | |
| 1300 | The following options are pairwise exclusive. |
| 1301 | |
| 1302 | =over 4 |
| 1303 | |
| 1304 | =item B<-main> |
| 1305 | |
| 1306 | Include the main program in the output, even if subroutines were also |
| 1307 | specified. This rendering is normally suppressed when a subroutine |
| 1308 | name or reference is given. |
| 1309 | |
| 1310 | =item B<-nomain> |
| 1311 | |
| 1312 | This restores the default behavior after you've changed it with '-main' |
| 1313 | (it's not normally needed). If no subroutine name/ref is given, main is |
| 1314 | rendered, regardless of this flag. |
| 1315 | |
| 1316 | =item B<-nobanner> |
| 1317 | |
| 1318 | Renderings usually include a banner line identifying the function name |
| 1319 | or stringified subref. This suppresses the printing of the banner. |
| 1320 | |
| 1321 | TBC: Remove the stringified coderef; while it provides a 'cookie' for |
| 1322 | each function rendered, the cookies used should be 1,2,3.. not a |
| 1323 | random hex-address. It also complicates string comparison of two |
| 1324 | different trees. |
| 1325 | |
| 1326 | =item B<-banner> |
| 1327 | |
| 1328 | restores default banner behavior. |
| 1329 | |
| 1330 | =item B<-banneris> => subref |
| 1331 | |
| 1332 | TBC: a hookpoint (and an option to set it) for a user-supplied |
| 1333 | function to produce a banner appropriate for users needs. It's not |
| 1334 | ideal, because the rendering-state variables, which are a natural |
| 1335 | candidate for use in concise.t, are unavailable to the user. |
| 1336 | |
| 1337 | =back |
| 1338 | |
| 1339 | =head2 Option Stickiness |
| 1340 | |
| 1341 | If you invoke Concise more than once in a program, you should know that |
| 1342 | the options are 'sticky'. This means that the options you provide in |
| 1343 | the first call will be remembered for the 2nd call, unless you |
| 1344 | re-specify or change them. |
| 1345 | |
| 1346 | =head1 ABBREVIATIONS |
| 1347 | |
| 1348 | The concise style uses symbols to convey maximum info with minimal |
| 1349 | clutter (like hex addresses). With just a little practice, you can |
| 1350 | start to see the flowers, not just the branches, in the trees. |
| 1351 | |
| 1352 | =head2 OP class abbreviations |
| 1353 | |
| 1354 | These symbols appear before the op-name, and indicate the |
| 1355 | B:: namespace that represents the ops in your Perl code. |
| 1356 | |
| 1357 | 0 OP (aka BASEOP) An OP with no children |
| 1358 | 1 UNOP An OP with one child |
| 1359 | 2 BINOP An OP with two children |
| 1360 | | LOGOP A control branch OP |
| 1361 | @ LISTOP An OP that could have lots of children |
| 1362 | / PMOP An OP with a regular expression |
| 1363 | $ SVOP An OP with an SV |
| 1364 | " PVOP An OP with a string |
| 1365 | { LOOP An OP that holds pointers for a loop |
| 1366 | ; COP An OP that marks the start of a statement |
| 1367 | # PADOP An OP with a GV on the pad |
| 1368 | |
| 1369 | =head2 OP flags abbreviations |
| 1370 | |
| 1371 | OP flags are either public or private. The public flags alter the |
| 1372 | behavior of each opcode in consistent ways, and are represented by 0 |
| 1373 | or more single characters. |
| 1374 | |
| 1375 | v OPf_WANT_VOID Want nothing (void context) |
| 1376 | s OPf_WANT_SCALAR Want single value (scalar context) |
| 1377 | l OPf_WANT_LIST Want list of any length (list context) |
| 1378 | Want is unknown |
| 1379 | K OPf_KIDS There is a firstborn child. |
| 1380 | P OPf_PARENS This operator was parenthesized. |
| 1381 | (Or block needs explicit scope entry.) |
| 1382 | R OPf_REF Certified reference. |
| 1383 | (Return container, not containee). |
| 1384 | M OPf_MOD Will modify (lvalue). |
| 1385 | S OPf_STACKED Some arg is arriving on the stack. |
| 1386 | * OPf_SPECIAL Do something weird for this op (see op.h) |
| 1387 | |
| 1388 | Private flags, if any are set for an opcode, are displayed after a '/' |
| 1389 | |
| 1390 | 8 <@> leave[1 ref] vKP/REFC ->(end) |
| 1391 | 7 <2> sassign vKS/2 ->8 |
| 1392 | |
| 1393 | They're opcode specific, and occur less often than the public ones, so |
| 1394 | they're represented by short mnemonics instead of single-chars; see |
| 1395 | F<op.h> for gory details, or try this quick 2-liner: |
| 1396 | |
| 1397 | $> perl -MB::Concise -de 1 |
| 1398 | DB<1> |x \%B::Concise::priv |
| 1399 | |
| 1400 | =head1 FORMATTING SPECIFICATIONS |
| 1401 | |
| 1402 | For each line-style ('concise', 'terse', 'linenoise', etc.) there are |
| 1403 | 3 format-specs which control how OPs are rendered. |
| 1404 | |
| 1405 | The first is the 'default' format, which is used in both basic and exec |
| 1406 | modes to print all opcodes. The 2nd, goto-format, is used in exec |
| 1407 | mode when branches are encountered. They're not real opcodes, and are |
| 1408 | inserted to look like a closing curly brace. The tree-format is tree |
| 1409 | specific. |
| 1410 | |
| 1411 | When a line is rendered, the correct format-spec is copied and scanned |
| 1412 | for the following items; data is substituted in, and other |
| 1413 | manipulations like basic indenting are done, for each opcode rendered. |
| 1414 | |
| 1415 | There are 3 kinds of items that may be populated; special patterns, |
| 1416 | #vars, and literal text, which is copied verbatim. (Yes, it's a set |
| 1417 | of s///g steps.) |
| 1418 | |
| 1419 | =head2 Special Patterns |
| 1420 | |
| 1421 | These items are the primitives used to perform indenting, and to |
| 1422 | select text from amongst alternatives. |
| 1423 | |
| 1424 | =over 4 |
| 1425 | |
| 1426 | =item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)> |
| 1427 | |
| 1428 | Generates I<exec_text> in exec mode, or I<basic_text> in basic mode. |
| 1429 | |
| 1430 | =item B<(*(>I<text>B<)*)> |
| 1431 | |
| 1432 | Generates one copy of I<text> for each indentation level. |
| 1433 | |
| 1434 | =item B<(*(>I<text1>B<;>I<text2>B<)*)> |
| 1435 | |
| 1436 | Generates one fewer copies of I<text1> than the indentation level, followed |
| 1437 | by one copy of I<text2> if the indentation level is more than 0. |
| 1438 | |
| 1439 | =item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)> |
| 1440 | |
| 1441 | If the value of I<var> is true (not empty or zero), generates the |
| 1442 | value of I<var> surrounded by I<text1> and I<Text2>, otherwise |
| 1443 | nothing. |
| 1444 | |
| 1445 | =item B<~> |
| 1446 | |
| 1447 | Any number of tildes and surrounding whitespace will be collapsed to |
| 1448 | a single space. |
| 1449 | |
| 1450 | =back |
| 1451 | |
| 1452 | =head2 # Variables |
| 1453 | |
| 1454 | These #vars represent opcode properties that you may want as part of |
| 1455 | your rendering. The '#' is intended as a private sigil; a #var's |
| 1456 | value is interpolated into the style-line, much like "read $this". |
| 1457 | |
| 1458 | These vars take 3 forms: |
| 1459 | |
| 1460 | =over 4 |
| 1461 | |
| 1462 | =item B<#>I<var> |
| 1463 | |
| 1464 | A property named 'var' is assumed to exist for the opcodes, and is |
| 1465 | interpolated into the rendering. |
| 1466 | |
| 1467 | =item B<#>I<var>I<N> |
| 1468 | |
| 1469 | Generates the value of I<var>, left justified to fill I<N> spaces. |
| 1470 | Note that this means while you can have properties 'foo' and 'foo2', |
| 1471 | you cannot render 'foo2', but you could with 'foo2a'. You would be |
| 1472 | wise not to rely on this behavior going forward ;-) |
| 1473 | |
| 1474 | =item B<#>I<Var> |
| 1475 | |
| 1476 | This ucfirst form of #var generates a tag-value form of itself for |
| 1477 | display; it converts '#Var' into a 'Var => #var' style, which is then |
| 1478 | handled as described above. (Imp-note: #Vars cannot be used for |
| 1479 | conditional-fills, because the => #var transform is done after the check |
| 1480 | for #Var's value). |
| 1481 | |
| 1482 | =back |
| 1483 | |
| 1484 | The following variables are 'defined' by B::Concise; when they are |
| 1485 | used in a style, their respective values are plugged into the |
| 1486 | rendering of each opcode. |
| 1487 | |
| 1488 | Only some of these are used by the standard styles, the others are |
| 1489 | provided for you to delve into optree mechanics, should you wish to |
| 1490 | add a new style (see L</add_style> below) that uses them. You can |
| 1491 | also add new ones using L</add_callback>. |
| 1492 | |
| 1493 | =over 4 |
| 1494 | |
| 1495 | =item B<#addr> |
| 1496 | |
| 1497 | The address of the OP, in hexadecimal. |
| 1498 | |
| 1499 | =item B<#arg> |
| 1500 | |
| 1501 | The OP-specific information of the OP (such as the SV for an SVOP, the |
| 1502 | non-local exit pointers for a LOOP, etc.) enclosed in parentheses. |
| 1503 | |
| 1504 | =item B<#class> |
| 1505 | |
| 1506 | The B-determined class of the OP, in all caps. |
| 1507 | |
| 1508 | =item B<#classsym> |
| 1509 | |
| 1510 | A single symbol abbreviating the class of the OP. |
| 1511 | |
| 1512 | =item B<#coplabel> |
| 1513 | |
| 1514 | The label of the statement or block the OP is the start of, if any. |
| 1515 | |
| 1516 | =item B<#exname> |
| 1517 | |
| 1518 | The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo. |
| 1519 | |
| 1520 | =item B<#extarg> |
| 1521 | |
| 1522 | The target of the OP, or nothing for a nulled OP. |
| 1523 | |
| 1524 | =item B<#firstaddr> |
| 1525 | |
| 1526 | The address of the OP's first child, in hexadecimal. |
| 1527 | |
| 1528 | =item B<#flags> |
| 1529 | |
| 1530 | The OP's flags, abbreviated as a series of symbols. |
| 1531 | |
| 1532 | =item B<#flagval> |
| 1533 | |
| 1534 | The numeric value of the OP's flags. |
| 1535 | |
| 1536 | =item B<#hints> |
| 1537 | |
| 1538 | The COP's hint flags, rendered with abbreviated names if possible. An empty |
| 1539 | string if this is not a COP. Here are the symbols used: |
| 1540 | |
| 1541 | $ strict refs |
| 1542 | & strict subs |
| 1543 | * strict vars |
| 1544 | i integers |
| 1545 | l locale |
| 1546 | b bytes |
| 1547 | [ arybase |
| 1548 | { block scope |
| 1549 | % localise %^H |
| 1550 | < open in |
| 1551 | > open out |
| 1552 | I overload int |
| 1553 | F overload float |
| 1554 | B overload binary |
| 1555 | S overload string |
| 1556 | R overload re |
| 1557 | T taint |
| 1558 | E eval |
| 1559 | X filetest access |
| 1560 | U utf-8 |
| 1561 | |
| 1562 | =item B<#hintsval> |
| 1563 | |
| 1564 | The numeric value of the COP's hint flags, or an empty string if this is not |
| 1565 | a COP. |
| 1566 | |
| 1567 | =item B<#hyphseq> |
| 1568 | |
| 1569 | The sequence number of the OP, or a hyphen if it doesn't have one. |
| 1570 | |
| 1571 | =item B<#label> |
| 1572 | |
| 1573 | 'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec |
| 1574 | mode, or empty otherwise. |
| 1575 | |
| 1576 | =item B<#lastaddr> |
| 1577 | |
| 1578 | The address of the OP's last child, in hexadecimal. |
| 1579 | |
| 1580 | =item B<#name> |
| 1581 | |
| 1582 | The OP's name. |
| 1583 | |
| 1584 | =item B<#NAME> |
| 1585 | |
| 1586 | The OP's name, in all caps. |
| 1587 | |
| 1588 | =item B<#next> |
| 1589 | |
| 1590 | The sequence number of the OP's next OP. |
| 1591 | |
| 1592 | =item B<#nextaddr> |
| 1593 | |
| 1594 | The address of the OP's next OP, in hexadecimal. |
| 1595 | |
| 1596 | =item B<#noise> |
| 1597 | |
| 1598 | A one- or two-character abbreviation for the OP's name. |
| 1599 | |
| 1600 | =item B<#private> |
| 1601 | |
| 1602 | The OP's private flags, rendered with abbreviated names if possible. |
| 1603 | |
| 1604 | =item B<#privval> |
| 1605 | |
| 1606 | The numeric value of the OP's private flags. |
| 1607 | |
| 1608 | =item B<#seq> |
| 1609 | |
| 1610 | The sequence number of the OP. Note that this is a sequence number |
| 1611 | generated by B::Concise. |
| 1612 | |
| 1613 | =item B<#seqnum> |
| 1614 | |
| 1615 | 5.8.x and earlier only. 5.9 and later do not provide this. |
| 1616 | |
| 1617 | The real sequence number of the OP, as a regular number and not adjusted |
| 1618 | to be relative to the start of the real program. (This will generally be |
| 1619 | a fairly large number because all of B<B::Concise> is compiled before |
| 1620 | your program is). |
| 1621 | |
| 1622 | =item B<#opt> |
| 1623 | |
| 1624 | Whether or not the op has been optimised by the peephole optimiser. |
| 1625 | |
| 1626 | Only available in 5.9 and later. |
| 1627 | |
| 1628 | =item B<#sibaddr> |
| 1629 | |
| 1630 | The address of the OP's next youngest sibling, in hexadecimal. |
| 1631 | |
| 1632 | =item B<#svaddr> |
| 1633 | |
| 1634 | The address of the OP's SV, if it has an SV, in hexadecimal. |
| 1635 | |
| 1636 | =item B<#svclass> |
| 1637 | |
| 1638 | The class of the OP's SV, if it has one, in all caps (e.g., 'IV'). |
| 1639 | |
| 1640 | =item B<#svval> |
| 1641 | |
| 1642 | The value of the OP's SV, if it has one, in a short human-readable format. |
| 1643 | |
| 1644 | =item B<#targ> |
| 1645 | |
| 1646 | The numeric value of the OP's targ. |
| 1647 | |
| 1648 | =item B<#targarg> |
| 1649 | |
| 1650 | The name of the variable the OP's targ refers to, if any, otherwise the |
| 1651 | letter t followed by the OP's targ in decimal. |
| 1652 | |
| 1653 | =item B<#targarglife> |
| 1654 | |
| 1655 | Same as B<#targarg>, but followed by the COP sequence numbers that delimit |
| 1656 | the variable's lifetime (or 'end' for a variable in an open scope) for a |
| 1657 | variable. |
| 1658 | |
| 1659 | =item B<#typenum> |
| 1660 | |
| 1661 | The numeric value of the OP's type, in decimal. |
| 1662 | |
| 1663 | =back |
| 1664 | |
| 1665 | =head1 One-Liner Command tips |
| 1666 | |
| 1667 | =over 4 |
| 1668 | |
| 1669 | =item perl -MO=Concise,bar foo.pl |
| 1670 | |
| 1671 | Renders only bar() from foo.pl. To see main, drop the ',bar'. To see |
| 1672 | both, add ',-main' |
| 1673 | |
| 1674 | =item perl -MDigest::MD5=md5 -MO=Concise,md5 -e1 |
| 1675 | |
| 1676 | Identifies md5 as an XS function. The export is needed so that BC can |
| 1677 | find it in main. |
| 1678 | |
| 1679 | =item perl -MPOSIX -MO=Concise,_POSIX_ARG_MAX -e1 |
| 1680 | |
| 1681 | Identifies _POSIX_ARG_MAX as a constant sub, optimized to an IV. |
| 1682 | Although POSIX isn't entirely consistent across platforms, this is |
| 1683 | likely to be present in virtually all of them. |
| 1684 | |
| 1685 | =item perl -MPOSIX -MO=Concise,a -e 'print _POSIX_SAVED_IDS' |
| 1686 | |
| 1687 | This renders a print statement, which includes a call to the function. |
| 1688 | It's identical to rendering a file with a use call and that single |
| 1689 | statement, except for the filename which appears in the nextstate ops. |
| 1690 | |
| 1691 | =item perl -MPOSIX -MO=Concise,a -e 'sub a{_POSIX_SAVED_IDS}' |
| 1692 | |
| 1693 | This is B<very> similar to previous, only the first two ops differ. This |
| 1694 | subroutine rendering is more representative, insofar as a single main |
| 1695 | program will have many subs. |
| 1696 | |
| 1697 | =item perl -MB::Concise -e 'B::Concise::compile("-exec","-src", \%B::Concise::)->()' |
| 1698 | |
| 1699 | This renders all functions in the B::Concise package with the source |
| 1700 | lines. It eschews the O framework so that the stashref can be passed |
| 1701 | directly to B::Concise::compile(). See -stash option for a more |
| 1702 | convenient way to render a package. |
| 1703 | |
| 1704 | =back |
| 1705 | |
| 1706 | =head1 Using B::Concise outside of the O framework |
| 1707 | |
| 1708 | The common (and original) usage of B::Concise was for command-line |
| 1709 | renderings of simple code, as given in EXAMPLE. But you can also use |
| 1710 | B<B::Concise> from your code, and call compile() directly, and |
| 1711 | repeatedly. By doing so, you can avoid the compile-time only |
| 1712 | operation of O.pm, and even use the debugger to step through |
| 1713 | B::Concise::compile() itself. |
| 1714 | |
| 1715 | Once you're doing this, you may alter Concise output by adding new |
| 1716 | rendering styles, and by optionally adding callback routines which |
| 1717 | populate new variables, if such were referenced from those (just |
| 1718 | added) styles. |
| 1719 | |
| 1720 | =head2 Example: Altering Concise Renderings |
| 1721 | |
| 1722 | use B::Concise qw(set_style add_callback); |
| 1723 | add_style($yourStyleName => $defaultfmt, $gotofmt, $treefmt); |
| 1724 | add_callback |
| 1725 | ( sub { |
| 1726 | my ($h, $op, $format, $level, $stylename) = @_; |
| 1727 | $h->{variable} = some_func($op); |
| 1728 | }); |
| 1729 | $walker = B::Concise::compile(@options,@subnames,@subrefs); |
| 1730 | $walker->(); |
| 1731 | |
| 1732 | =head2 set_style() |
| 1733 | |
| 1734 | B<set_style> accepts 3 arguments, and updates the three format-specs |
| 1735 | comprising a line-style (basic-exec, goto, tree). It has one minor |
| 1736 | drawback though; it doesn't register the style under a new name. This |
| 1737 | can become an issue if you render more than once and switch styles. |
| 1738 | Thus you may prefer to use add_style() and/or set_style_standard() |
| 1739 | instead. |
| 1740 | |
| 1741 | =head2 set_style_standard($name) |
| 1742 | |
| 1743 | This restores one of the standard line-styles: C<terse>, C<concise>, |
| 1744 | C<linenoise>, C<debug>, C<env>, into effect. It also accepts style |
| 1745 | names previously defined with add_style(). |
| 1746 | |
| 1747 | =head2 add_style () |
| 1748 | |
| 1749 | This subroutine accepts a new style name and three style arguments as |
| 1750 | above, and creates, registers, and selects the newly named style. It is |
| 1751 | an error to re-add a style; call set_style_standard() to switch between |
| 1752 | several styles. |
| 1753 | |
| 1754 | =head2 add_callback () |
| 1755 | |
| 1756 | If your newly minted styles refer to any new #variables, you'll need |
| 1757 | to define a callback subroutine that will populate (or modify) those |
| 1758 | variables. They are then available for use in the style you've |
| 1759 | chosen. |
| 1760 | |
| 1761 | The callbacks are called for each opcode visited by Concise, in the |
| 1762 | same order as they are added. Each subroutine is passed five |
| 1763 | parameters. |
| 1764 | |
| 1765 | 1. A hashref, containing the variable names and values which are |
| 1766 | populated into the report-line for the op |
| 1767 | 2. the op, as a B<B::OP> object |
| 1768 | 3. a reference to the format string |
| 1769 | 4. the formatting (indent) level |
| 1770 | 5. the selected stylename |
| 1771 | |
| 1772 | To define your own variables, simply add them to the hash, or change |
| 1773 | existing values if you need to. The level and format are passed in as |
| 1774 | references to scalars, but it is unlikely that they will need to be |
| 1775 | changed or even used. |
| 1776 | |
| 1777 | =head2 Running B::Concise::compile() |
| 1778 | |
| 1779 | B<compile> accepts options as described above in L</OPTIONS>, and |
| 1780 | arguments, which are either coderefs, or subroutine names. |
| 1781 | |
| 1782 | It constructs and returns a $treewalker coderef, which when invoked, |
| 1783 | traverses, or walks, and renders the optrees of the given arguments to |
| 1784 | STDOUT. You can reuse this, and can change the rendering style used |
| 1785 | each time; thereafter the coderef renders in the new style. |
| 1786 | |
| 1787 | B<walk_output> lets you change the print destination from STDOUT to |
| 1788 | another open filehandle, or into a string passed as a ref (unless |
| 1789 | you've built perl with -Uuseperlio). |
| 1790 | |
| 1791 | my $walker = B::Concise::compile('-terse','aFuncName', \&aSubRef); # 1 |
| 1792 | walk_output(\my $buf); |
| 1793 | $walker->(); # 1 renders -terse |
| 1794 | set_style_standard('concise'); # 2 |
| 1795 | $walker->(); # 2 renders -concise |
| 1796 | $walker->(@new); # 3 renders whatever |
| 1797 | print "3 different renderings: terse, concise, and @new: $buf\n"; |
| 1798 | |
| 1799 | When $walker is called, it traverses the subroutines supplied when it |
| 1800 | was created, and renders them using the current style. You can change |
| 1801 | the style afterwards in several different ways: |
| 1802 | |
| 1803 | 1. call C<compile>, altering style or mode/order |
| 1804 | 2. call C<set_style_standard> |
| 1805 | 3. call $walker, passing @new options |
| 1806 | |
| 1807 | Passing new options to the $walker is the easiest way to change |
| 1808 | amongst any pre-defined styles (the ones you add are automatically |
| 1809 | recognized as options), and is the only way to alter rendering order |
| 1810 | without calling compile again. Note however that rendering state is |
| 1811 | still shared amongst multiple $walker objects, so they must still be |
| 1812 | used in a coordinated manner. |
| 1813 | |
| 1814 | =head2 B::Concise::reset_sequence() |
| 1815 | |
| 1816 | This function (not exported) lets you reset the sequence numbers (note |
| 1817 | that they're numbered arbitrarily, their goal being to be human |
| 1818 | readable). Its purpose is mostly to support testing, i.e. to compare |
| 1819 | the concise output from two identical anonymous subroutines (but |
| 1820 | different instances). Without the reset, B::Concise, seeing that |
| 1821 | they're separate optrees, generates different sequence numbers in |
| 1822 | the output. |
| 1823 | |
| 1824 | =head2 Errors |
| 1825 | |
| 1826 | Errors in rendering (non-existent function-name, non-existent coderef) |
| 1827 | are written to the STDOUT, or wherever you've set it via |
| 1828 | walk_output(). |
| 1829 | |
| 1830 | Errors using the various *style* calls, and bad args to walk_output(), |
| 1831 | result in die(). Use an eval if you wish to catch these errors and |
| 1832 | continue processing. |
| 1833 | |
| 1834 | =head1 AUTHOR |
| 1835 | |
| 1836 | Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>. |
| 1837 | |
| 1838 | =cut |