2 # Copyright (C) 2000-2003 Stephen McCamant. All rights reserved.
3 # This program is free software; you can redistribute and/or modify it
4 # under the same terms as Perl itself.
6 # Note: we need to keep track of how many use declarations/BEGIN
7 # blocks this module uses, so we can avoid printing them when user
8 # asks for the BEGIN blocks in her program. Update the comments and
9 # the count in concise_specials if you add or delete one. The
10 # -MO=Concise counts as use #1.
13 use warnings; # uses #3 and #4, since warnings uses Carp
15 use Exporter (); # use #5
17 our $VERSION = "0.61";
18 our @ISA = qw(Exporter);
19 our @EXPORT_OK = qw(set_style set_style_standard add_callback
20 concise_subref concise_cv concise_main
21 add_style walk_output);
24 use B qw(class ppname main_start main_root main_cv cstring svref_2object
25 SVf_IOK SVf_NOK SVf_POK SVf_IVisUV SVf_FAKE OPf_KIDS OPf_SPECIAL
30 ["(?(#label =>\n)?)(*( )*)#class (#addr) #name (?([#targ])?) "
31 . "#svclass~(?((#svaddr))?)~#svval~(?(label \"#coplabel\")?)\n",
32 "(*( )*)goto #class (#addr)\n",
35 ["#hyphseq2 (*( (x( ;)x))*)<#classsym> "
36 . "#exname#arg(?([#targarglife])?)~#flags(?(/#private)?)(x(;~->#next)x)\n",
37 " (*( )*) goto #seq\n",
38 "(?(<#seq>)?)#exname#arg(?([#targarglife])?)"],
40 ["(x(;(*( )*))x)#noise#arg(?([#targarg])?)(x( ;\n)x)",
42 "(?(#seq)?)#noise#arg(?([#targarg])?)"],
44 ["#class (#addr)\n\top_next\t\t#nextaddr\n\top_sibling\t#sibaddr\n\t"
45 . "op_ppaddr\tPL_ppaddr[OP_#NAME]\n\top_type\t\t#typenum\n"
46 . "\top_flags\t#flagval\n\top_private\t#privval\n"
47 . "(?(\top_first\t#firstaddr\n)?)(?(\top_last\t\t#lastaddr\n)?)"
48 . "(?(\top_sv\t\t#svaddr\n)?)",
51 "env" => [$ENV{B_CONCISE_FORMAT}, $ENV{B_CONCISE_GOTO_FORMAT},
52 $ENV{B_CONCISE_TREE_FORMAT}],
55 # Renderings, ie how Concise prints, is controlled by these vars
57 our $stylename; # selects current style from %style
58 my $order = "basic"; # how optree is walked & printed: basic, exec, tree
60 # rendering mechanics:
61 # these 'formats' are the line-rendering templates
62 # they're updated from %style when $stylename changes
63 my ($format, $gotofmt, $treefmt);
66 my $base = 36; # how <sequence#> is displayed
67 my $big_endian = 1; # more <sequence#> display
68 my $tree_style = 0; # tree-order details
69 my $banner = 1; # print banner before optree is traversed
72 our @callbacks; # allow external management
74 set_style_standard("concise");
80 ($format, $gotofmt, $treefmt) = @_;
81 #warn "set_style: deprecated, use set_style_standard instead\n"; # someday
82 die "expecting 3 style-format args\n" unless @_ == 3;
86 my ($newstyle,@args) = @_;
87 die "style '$newstyle' already exists, choose a new name\n"
88 if exists $style{$newstyle};
89 die "expecting 3 style-format args\n" unless @args == 3;
90 $style{$newstyle} = [@args];
91 $stylename = $newstyle; # update rendering state
94 sub set_style_standard {
95 ($stylename) = @_; # update rendering state
96 die "err: style '$stylename' unknown\n" unless exists $style{$stylename};
97 set_style(@{$style{$stylename}});
104 # output handle, used with all Concise-output printing
105 our $walkHandle = \*STDOUT; # public for your convenience
107 sub walk_output { # updates $walkHandle
109 if (ref $handle eq 'SCALAR') {
111 die "no perlio in this build, can't call walk_output (\\\$scalar)\n"
112 unless $Config::Config{useperlio};
113 # in 5.8+, open(FILEHANDLE,MODE,REFERENCE) writes to string
114 open my $tmp, '>', $handle; # but cant re-set existing STDOUT
115 $walkHandle = $tmp; # so use my $tmp as intermediate var
118 $walkHandle = $handle;
119 my $iotype = ref $walkHandle;
120 die "expecting argument/object that can print\n"
121 unless $iotype eq 'GLOB' or $iotype and $walkHandle->can('print');
125 my($order, $coderef) = @_;
126 my $codeobj = svref_2object($coderef);
127 die "err: not a coderef: $coderef\n" unless ref $codeobj eq 'B::CV';#CODE';
128 concise_cv_obj($order, $codeobj);
131 # This should have been called concise_subref, but it was exported
132 # under this name in versions before 0.56
133 sub concise_cv { concise_subref(@_); }
136 my ($order, $cv) = @_;
138 die "err: coderef has no START\n" if class($cv->START) eq "NULL";
139 sequence($cv->START);
140 if ($order eq "exec") {
141 walk_exec($cv->START);
142 } elsif ($order eq "basic") {
143 walk_topdown($cv->ROOT, sub { $_[0]->concise($_[1]) }, 0);
145 print $walkHandle tree($cv->ROOT, 0);
151 sequence(main_start);
153 if ($order eq "exec") {
154 return if class(main_start) eq "NULL";
155 walk_exec(main_start);
156 } elsif ($order eq "tree") {
157 return if class(main_root) eq "NULL";
158 print $walkHandle tree(main_root, 0);
159 } elsif ($order eq "basic") {
160 return if class(main_root) eq "NULL";
161 walk_topdown(main_root,
162 sub { $_[0]->concise($_[1]) }, 0);
166 sub concise_specials {
167 my($name, $order, @cv_s) = @_;
169 if ($name eq "BEGIN") {
170 splice(@cv_s, 0, 7); # skip 7 BEGIN blocks in this file
171 } elsif ($name eq "CHECK") {
172 pop @cv_s; # skip the CHECK block that calls us
175 print $walkHandle "$name $i:\n";
177 concise_cv_obj($order, $cv);
181 my $start_sym = "\e(0"; # "\cN" sometimes also works
182 my $end_sym = "\e(B"; # "\cO" respectively
184 my @tree_decorations =
185 ([" ", "--", "+-", "|-", "| ", "`-", "-", 1],
186 [" ", "-", "+", "+", "|", "`", "", 0],
187 [" ", map("$start_sym$_$end_sym", "qq", "wq", "tq", "x ", "mq", "q"), 1],
188 [" ", map("$start_sym$_$end_sym", "q", "w", "t", "x", "m"), "", 0],
192 my @options = grep(/^-/, @_);
193 my @args = grep(!/^-/, @_);
195 for my $o (@options) {
196 if ($o eq "-basic") {
198 } elsif ($o eq "-exec") {
200 } elsif ($o eq "-tree") {
202 } elsif ($o eq "-compact") {
204 } elsif ($o eq "-loose") {
206 } elsif ($o eq "-vt") {
208 } elsif ($o eq "-ascii") {
210 } elsif ($o eq "-main") {
212 } elsif ($o =~ /^-base(\d+)$/) {
214 } elsif ($o eq "-bigendian") {
216 } elsif ($o eq "-littleendian") {
218 } elsif ($o eq "-banner") {
221 elsif (exists $style{substr($o, 1)}) {
222 $stylename = substr($o, 1);
223 set_style_standard($stylename);
225 warn "Option $o unrecognized";
230 for my $objname (@args) {
231 if ($objname eq "BEGIN") {
232 concise_specials("BEGIN", $order,
233 B::begin_av->isa("B::AV") ?
234 B::begin_av->ARRAY : ());
235 } elsif ($objname eq "INIT") {
236 concise_specials("INIT", $order,
237 B::init_av->isa("B::AV") ?
238 B::init_av->ARRAY : ());
239 } elsif ($objname eq "CHECK") {
240 concise_specials("CHECK", $order,
241 B::check_av->isa("B::AV") ?
242 B::check_av->ARRAY : ());
243 } elsif ($objname eq "END") {
244 concise_specials("END", $order,
245 B::end_av->isa("B::AV") ?
246 B::end_av->ARRAY : ());
248 # convert function names to subrefs
251 print $walkHandle "B::Concise::compile($objname)\n"
255 $objname = "main::" . $objname unless $objname =~ /::/;
256 print $walkHandle "$objname:\n";
258 die "err: unknown function ($objname)\n"
259 unless *{$objname}{CODE};
260 $objref = \&$objname;
262 concise_subref($order, $objref);
266 if (!@args or $do_main) {
267 print $walkHandle "main program:\n" if $do_main;
268 concise_main($order);
274 my $lastnext; # remembers op-chain, used to insert gotos
276 my %opclass = ('OP' => "0", 'UNOP' => "1", 'BINOP' => "2", 'LOGOP' => "|",
277 'LISTOP' => "@", 'PMOP' => "/", 'SVOP' => "\$", 'GVOP' => "*",
278 'PVOP' => '"', 'LOOP' => "{", 'COP' => ";", 'PADOP' => "#");
280 no warnings 'qw'; # "Possible attempt to put comments..."; use #7
282 qw'# () sc ( @? 1 $* gv *{ m$ m@ m% m? p/ *$ $ $# & a& pt \\ s\\ rf bl
283 ` *? <> ?? ?/ r/ c/ // qr s/ /c y/ = @= C sC Cp sp df un BM po +1 +I
284 -1 -I 1+ I+ 1- I- ** * i* / i/ %$ i% x + i+ - i- . " << >> < i<
285 > i> <= i, >= i. == i= != i! <? i? s< s> s, s. s= s! s? b& b^ b| -0 -i
286 ! ~ a2 si cs rd sr e^ lg sq in %x %o ab le ss ve ix ri sf FL od ch cy
287 uf lf uc lc qm @ [f [ @[ eh vl ky dl ex % ${ @{ uk pk st jn ) )[ a@
288 a% sl +] -] [- [+ so rv GS GW MS MW .. f. .f && || ^^ ?: &= |= -> s{ s}
289 v} ca wa di rs ;; ; ;d }{ { } {} f{ it {l l} rt }l }n }r dm }g }e ^o
290 ^c ^| ^# um bm t~ u~ ~d DB db ^s se ^g ^r {w }w pf pr ^O ^K ^R ^W ^d ^v
291 ^e ^t ^k t. fc ic fl .s .p .b .c .l .a .h g1 s1 g2 s2 ?. l? -R -W -X -r
292 -w -x -e -o -O -z -s -M -A -C -S -c -b -f -d -p -l -u -g -k -t -T -B cd
293 co cr u. cm ut r. l@ s@ r@ mD uD oD rD tD sD wD cD f$ w$ p$ sh e$ k$ g3
294 g4 s4 g5 s5 T@ C@ L@ G@ A@ S@ Hg Hc Hr Hw Mg Mc Ms Mr Sg Sc So rq do {e
295 e} {t t} g6 G6 6e g7 G7 7e g8 G8 8e g9 G9 9e 6s 7s 8s 9s 6E 7E 8E 9E Pn
296 Pu GP SP EP Gn Gg GG SG EG g0 c$ lk t$ ;s n> // /= CO';
298 my $chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
303 push @v, "v" if ($x & 3) == 1;
304 push @v, "s" if ($x & 3) == 2;
305 push @v, "l" if ($x & 3) == 3;
306 push @v, "K" if $x & 4;
307 push @v, "P" if $x & 8;
308 push @v, "R" if $x & 16;
309 push @v, "M" if $x & 32;
310 push @v, "S" if $x & 64;
311 push @v, "*" if $x & 128;
317 return "-" . base_n(-$x) if $x < 0;
319 do { $str .= substr($chars, $x % $base, 1) } while $x = int($x / $base);
320 $str = reverse $str if $big_endian;
335 return "-" if not exists $sequence_num{$$op};
336 return base_n($sequence_num{$$op});
340 my($op, $sub, $level) = @_;
342 if ($op->flags & OPf_KIDS) {
343 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
344 walk_topdown($kid, $sub, $level + 1);
347 if (class($op) eq "PMOP") {
348 my $maybe_root = $op->pmreplroot;
349 if (ref($maybe_root) and $maybe_root->isa("B::OP")) {
350 # It really is the root of the replacement, not something
351 # else stored here for lack of space elsewhere
352 walk_topdown($maybe_root, $sub, $level + 1);
358 my($ar, $level) = @_;
360 if (ref($l) eq "ARRAY") {
361 walklines($l, $level + 1);
369 my($top, $level) = @_;
372 my @todo = ([$top, \@lines]);
373 while (@todo and my($op, $targ) = @{shift @todo}) {
374 for (; $$op; $op = $op->next) {
375 last if $opsseen{$$op}++;
377 my $name = $op->name;
378 if (class($op) eq "LOGOP") {
381 push @todo, [$op->other, $ar];
382 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
385 push @todo, [$op->pmreplstart, $ar];
386 } elsif ($name =~ /^enter(loop|iter)$/) {
387 $labels{${$op->nextop}} = "NEXT";
388 $labels{${$op->lastop}} = "LAST";
389 $labels{${$op->redoop}} = "REDO";
393 walklines(\@lines, 0);
396 # The structure of this routine is purposely modeled after op.c's peep()
400 return if class($op) eq "NULL" or exists $sequence_num{$$op};
401 for (; $$op; $op = $op->next) {
402 last if exists $sequence_num{$$op};
403 my $name = $op->name;
404 if ($name =~ /^(null|scalar|lineseq|scope)$/) {
405 next if $oldop and $ {$op->next};
407 $sequence_num{$$op} = $seq_max++;
408 if (class($op) eq "LOGOP") {
409 my $other = $op->other;
410 $other = $other->next while $other->name eq "null";
412 } elsif (class($op) eq "LOOP") {
413 my $redoop = $op->redoop;
414 $redoop = $redoop->next while $redoop->name eq "null";
416 my $nextop = $op->nextop;
417 $nextop = $nextop->next while $nextop->name eq "null";
419 my $lastop = $op->lastop;
420 $lastop = $lastop->next while $lastop->name eq "null";
422 } elsif ($name eq "subst" and $ {$op->pmreplstart}) {
423 my $replstart = $op->pmreplstart;
424 $replstart = $replstart->next while $replstart->name eq "null";
425 sequence($replstart);
432 sub fmt_line { # generate text-line for op.
433 my($hr, $text, $level) = @_;
434 return '' if $hr->{SKIP}; # suppress line if a callback said so
436 $text =~ s/\(\?\(([^\#]*?)\#(\w+)([^\#]*?)\)\?\)/
437 $hr->{$2} ? $1.$hr->{$2}.$3 : ""/eg;
439 $text =~ s/\(x\((.*?);(.*?)\)x\)/$order eq "exec" ? $1 : $2/egs;
440 $text =~ s/\(\*\(([^;]*?)\)\*\)/$1 x $level/egs;
441 $text =~ s/\(\*\((.*?);(.*?)\)\*\)/$1 x ($level - 1) . $2 x ($level>0)/egs;
442 $text =~ s/\#([a-zA-Z]+)(\d+)/sprintf("%-$2s", $hr->{$1})/eg;
444 $text =~ s/\#([a-zA-Z]+)/$hr->{$1}/eg; # populate data into template
445 $text =~ s/[ \t]*~+[ \t]*/ /g;
447 return "$text\n" if $text ne "";
448 return $text; # suppress empty lines
452 $priv{$_}{128} = "LVINTRO"
453 for ("pos", "substr", "vec", "threadsv", "gvsv", "rv2sv", "rv2hv", "rv2gv",
454 "rv2av", "rv2arylen", "aelem", "helem", "aslice", "hslice", "padsv",
455 "padav", "padhv", "enteriter");
456 $priv{$_}{64} = "REFC" for ("leave", "leavesub", "leavesublv", "leavewrite");
457 $priv{"aassign"}{64} = "COMMON";
458 $priv{"aassign"}{32} = "PHASH" if $] < 5.009;
459 $priv{"sassign"}{64} = "BKWARD";
460 $priv{$_}{64} = "RTIME" for ("match", "subst", "substcont");
461 @{$priv{"trans"}}{1,2,4,8,16,64} = ("<UTF", ">UTF", "IDENT", "SQUASH", "DEL",
463 $priv{"repeat"}{64} = "DOLIST";
464 $priv{"leaveloop"}{64} = "CONT";
465 @{$priv{$_}}{32,64,96} = ("DREFAV", "DREFHV", "DREFSV")
466 for (qw(rv2gv rv2sv padsv aelem helem));
467 $priv{"entersub"}{16} = "DBG";
468 $priv{"entersub"}{32} = "TARG";
469 @{$priv{$_}}{4,8,128} = ("INARGS","AMPER","NO()") for ("entersub", "rv2cv");
470 $priv{"gv"}{32} = "EARLYCV";
471 $priv{"aelem"}{16} = $priv{"helem"}{16} = "LVDEFER";
472 $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
474 $priv{$_}{16} = "TARGMY"
475 for (map(($_,"s$_"),"chop", "chomp"),
476 map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",
477 "add", "subtract", "negate"), "pow", "concat", "stringify",
478 "left_shift", "right_shift", "bit_and", "bit_xor", "bit_or",
479 "complement", "atan2", "sin", "cos", "rand", "exp", "log", "sqrt",
480 "int", "hex", "oct", "abs", "length", "index", "rindex", "sprintf",
481 "ord", "chr", "crypt", "quotemeta", "join", "push", "unshift", "flock",
482 "chdir", "chown", "chroot", "unlink", "chmod", "utime", "rename",
483 "link", "symlink", "mkdir", "rmdir", "wait", "waitpid", "system",
484 "exec", "kill", "getppid", "getpgrp", "setpgrp", "getpriority",
485 "setpriority", "time", "sleep");
486 @{$priv{"const"}}{8,16,32,64,128} = ("STRICT","ENTERED", '$[', "BARE", "WARN");
487 $priv{"flip"}{64} = $priv{"flop"}{64} = "LINENUM";
488 $priv{"list"}{64} = "GUESSED";
489 $priv{"delete"}{64} = "SLICE";
490 $priv{"exists"}{64} = "SUB";
491 $priv{$_}{64} = "LOCALE"
492 for ("sort", "prtf", "sprintf", "slt", "sle", "seq", "sne", "sgt", "sge",
493 "scmp", "lc", "uc", "lcfirst", "ucfirst");
494 @{$priv{"sort"}}{1,2,4,8} = ("NUM", "INT", "REV", "INPLACE");
495 $priv{"threadsv"}{64} = "SVREFd";
496 @{$priv{$_}}{16,32,64,128} = ("INBIN","INCR","OUTBIN","OUTCR")
497 for ("open", "backtick");
498 $priv{"exit"}{128} = "VMS";
499 $priv{$_}{2} = "FTACCESS"
500 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec");
502 # Stacked filetests are post 5.8.x
503 $priv{$_}{4} = "FTSTACKED"
504 for ("ftrread", "ftrwrite", "ftrexec", "fteread", "ftewrite", "fteexec",
505 "ftis", "fteowned", "ftrowned", "ftzero", "ftsize", "ftmtime",
506 "ftatime", "ftctime", "ftsock", "ftchr", "ftblk", "ftfile", "ftdir",
507 "ftpipe", "ftlink", "ftsuid", "ftsgid", "ftsvtx", "fttty", "fttext",
509 # Lexical $_ is post 5.8.x
510 $priv{$_}{2} = "GREPLEX"
511 for ("mapwhile", "mapstart", "grepwhile", "grepstart");
517 for my $flag (128, 96, 64, 32, 16, 8, 4, 2, 1) {
518 if ($priv{$name}{$flag} and $x & $flag and $x >= $flag) {
520 push @s, $priv{$name}{$flag};
524 return join(",", @s);
529 $hr->{svclass} = class($sv);
530 $hr->{svclass} = "UV"
531 if $hr->{svclass} eq "IV" and $sv->FLAGS & SVf_IVisUV;
532 $hr->{svaddr} = sprintf("%#x", $$sv);
533 if ($hr->{svclass} eq "GV") {
535 my $stash = $gv->STASH->NAME;
536 if ($stash eq "main") {
539 $stash = $stash . "::";
541 $hr->{svval} = "*$stash" . $gv->SAFENAME;
542 return "*$stash" . $gv->SAFENAME;
544 while (class($sv) eq "RV") {
545 $hr->{svval} .= "\\";
548 if (class($sv) eq "SPECIAL") {
549 $hr->{svval} .= ["Null", "sv_undef", "sv_yes", "sv_no"]->[$$sv];
550 } elsif ($sv->FLAGS & SVf_NOK) {
551 $hr->{svval} .= $sv->NV;
552 } elsif ($sv->FLAGS & SVf_IOK) {
553 $hr->{svval} .= $sv->int_value;
554 } elsif ($sv->FLAGS & SVf_POK) {
555 $hr->{svval} .= cstring($sv->PV);
556 } elsif (class($sv) eq "HV") {
557 $hr->{svval} .= 'HASH';
559 return $hr->{svclass} . " " . $hr->{svval};
564 my ($op, $level, $format) = @_;
566 $h{exname} = $h{name} = $op->name;
567 $h{NAME} = uc $h{name};
568 $h{class} = class($op);
569 $h{extarg} = $h{targ} = $op->targ;
570 $h{extarg} = "" unless $h{extarg};
571 if ($h{name} eq "null" and $h{targ}) {
572 # targ holds the old type
573 $h{exname} = "ex-" . substr(ppname($h{targ}), 3);
575 } elsif ($op->name =~ /^leave(sub(lv)?|write)?$/) {
576 # targ potentially holds a reference count
577 if ($op->private & 64) {
578 my $refs = "ref" . ($h{targ} != 1 ? "s" : "");
579 $h{targarglife} = $h{targarg} = "$h{targ} $refs";
582 my $padname = (($curcv->PADLIST->ARRAY)[0]->ARRAY)[$h{targ}];
583 if (defined $padname and class($padname) ne "SPECIAL") {
584 $h{targarg} = $padname->PVX;
585 if ($padname->FLAGS & SVf_FAKE) {
587 $h{targarglife} = "$h{targarg}:FAKE";
589 # These changes relate to the jumbo closure fix.
590 # See changes 19939 and 20005
592 $fake .= 'a' if $padname->IVX & 1; # PAD_FAKELEX_ANON
593 $fake .= 'm' if $padname->IVX & 2; # PAD_FAKELEX_MULTI
594 $fake .= ':' . $padname->NVX if $curcv->CvFLAGS & CVf_ANON;
595 $h{targarglife} = "$h{targarg}:FAKE:$fake";
599 my $intro = $padname->NVX - $cop_seq_base;
600 my $finish = int($padname->IVX) - $cop_seq_base;
601 $finish = "end" if $finish == 999999999 - $cop_seq_base;
602 $h{targarglife} = "$h{targarg}:$intro,$finish";
605 $h{targarglife} = $h{targarg} = "t" . $h{targ};
609 $h{svclass} = $h{svaddr} = $h{svval} = "";
610 if ($h{class} eq "PMOP") {
611 my $precomp = $op->precomp;
612 if (defined $precomp) {
613 $precomp = cstring($precomp); # Escape literal control sequences
614 $precomp = "/$precomp/";
618 my $pmreplroot = $op->pmreplroot;
620 if (ref($pmreplroot) eq "B::GV") {
621 # with C<@stash_array = split(/pat/, str);>,
622 # *stash_array is stored in /pat/'s pmreplroot.
623 $h{arg} = "($precomp => \@" . $pmreplroot->NAME . ")";
624 } elsif (!ref($pmreplroot) and $pmreplroot) {
625 # same as the last case, except the value is actually a
626 # pad offset for where the GV is kept (this happens under
628 my $gv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$pmreplroot];
629 $h{arg} = "($precomp => \@" . $gv->NAME . ")";
630 } elsif ($ {$op->pmreplstart}) {
632 $pmreplstart = "replstart->" . seq($op->pmreplstart);
633 $h{arg} = "(" . join(" ", $precomp, $pmreplstart) . ")";
635 $h{arg} = "($precomp)";
637 } elsif ($h{class} eq "PVOP" and $h{name} ne "trans") {
638 $h{arg} = '("' . $op->pv . '")';
639 $h{svval} = '"' . $op->pv . '"';
640 } elsif ($h{class} eq "COP") {
641 my $label = $op->label;
642 $h{coplabel} = $label;
643 $label = $label ? "$label: " : "";
646 $loc .= ":" . $op->line;
647 my($stash, $cseq) = ($op->stash->NAME, $op->cop_seq - $cop_seq_base);
648 my $arybase = $op->arybase;
649 $arybase = $arybase ? ' $[=' . $arybase : "";
650 $h{arg} = "($label$stash $cseq $loc$arybase)";
651 } elsif ($h{class} eq "LOOP") {
652 $h{arg} = "(next->" . seq($op->nextop) . " last->" . seq($op->lastop)
653 . " redo->" . seq($op->redoop) . ")";
654 } elsif ($h{class} eq "LOGOP") {
656 $h{arg} = "(other->" . seq($op->other) . ")";
657 } elsif ($h{class} eq "SVOP") {
658 unless ($h{name} eq 'aelemfast' and $op->flags & OPf_SPECIAL) {
660 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->targ];
661 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
662 $h{targarglife} = $h{targarg} = "";
664 $h{arg} = "(" . concise_sv($op->sv, \%h) . ")";
667 } elsif ($h{class} eq "PADOP") {
668 my $sv = (($curcv->PADLIST->ARRAY)[1]->ARRAY)[$op->padix];
669 $h{arg} = "[" . concise_sv($sv, \%h) . "]";
671 $h{seq} = $h{hyphseq} = seq($op);
672 $h{seq} = "" if $h{seq} eq "-";
674 $h{static} = $op->static;
675 $h{next} = $op->next;
676 $h{next} = (class($h{next}) eq "NULL") ? "(end)" : seq($h{next});
677 $h{nextaddr} = sprintf("%#x", $ {$op->next});
678 $h{sibaddr} = sprintf("%#x", $ {$op->sibling});
679 $h{firstaddr} = sprintf("%#x", $ {$op->first}) if $op->can("first");
680 $h{lastaddr} = sprintf("%#x", $ {$op->last}) if $op->can("last");
682 $h{classsym} = $opclass{$h{class}};
683 $h{flagval} = $op->flags;
684 $h{flags} = op_flags($op->flags);
685 $h{privval} = $op->private;
686 $h{private} = private_flags($h{name}, $op->private);
687 $h{addr} = sprintf("%#x", $$op);
688 $h{label} = $labels{$$op};
689 $h{typenum} = $op->type;
690 $h{noise} = $linenoise[$op->type];
692 $_->(\%h, $op, \$format, \$level, $stylename) for @callbacks;
693 return fmt_line(\%h, $format, $level);
697 my($op, $level) = @_;
698 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
699 # insert a 'goto' line
700 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
701 "addr" => sprintf("%#x", $$lastnext)};
702 print $walkHandle fmt_line($h, $gotofmt, $level+1);
704 $lastnext = $op->next;
705 print $walkHandle concise_op($op, $level, $format);
708 # B::OP::terse (see Terse.pm) now just calls this
710 my($op, $level) = @_;
712 # This isn't necessarily right, but there's no easy way to get
713 # from an OP to the right CV. This is a limitation of the
714 # ->terse() interface style, and there isn't much to do about
715 # it. In particular, we can die in concise_op if the main pad
716 # isn't long enough, or has the wrong kind of entries, compared to
717 # the pad a sub was compiled with. The fix for that would be to
718 # make a backwards compatible "terse" format that never even
719 # looked at the pad, just like the old B::Terse. I don't think
720 # that's worth the effort, though.
721 $curcv = main_cv unless $curcv;
723 if ($order eq "exec" and $lastnext and $$lastnext != $$op) {
725 my $h = {"seq" => seq($lastnext), "class" => class($lastnext),
726 "addr" => sprintf("%#x", $$lastnext)};
727 print fmt_line($h, $style{"terse"}[1], $level+1);
729 $lastnext = $op->next;
730 print concise_op($op, $level, $style{"terse"}[0]);
736 my $style = $tree_decorations[$tree_style];
737 my($space, $single, $kids, $kid, $nokid, $last, $lead, $size) = @$style;
738 my $name = concise_op($op, $level, $treefmt);
739 if (not $op->flags & OPf_KIDS) {
743 for (my $kid = $op->first; $$kid; $kid = $kid->sibling) {
744 push @lines, tree($kid, $level+1);
747 for ($i = $#lines; substr($lines[$i], 0, 1) eq " "; $i--) {
748 $lines[$i] = $space . $lines[$i];
751 $lines[$i] = $last . $lines[$i];
753 if (substr($lines[$i], 0, 1) eq " ") {
754 $lines[$i] = $nokid . $lines[$i];
756 $lines[$i] = $kid . $lines[$i];
759 $lines[$i] = $kids . $lines[$i];
761 $lines[0] = $single . $lines[0];
763 return("$name$lead" . shift @lines,
764 map(" " x (length($name)+$size) . $_, @lines));
767 # *** Warning: fragile kludge ahead ***
768 # Because the B::* modules run in the same interpreter as the code
769 # they're compiling, their presence tends to distort the view we have of
770 # the code we're looking at. In particular, perl gives sequence numbers
771 # to COPs. If the program we're looking at were run on its own, this
772 # would start at 1. Because all of B::Concise and all the modules it
773 # uses are compiled first, though, by the time we get to the user's
774 # program the sequence number is already pretty high, which could be
775 # distracting if you're trying to tell OPs apart. Therefore we'd like to
776 # subtract an offset from all the sequence numbers we display, to
777 # restore the simpler view of the world. The trick is to know what that
778 # offset will be, when we're still compiling B::Concise! If we
779 # hardcoded a value, it would have to change every time B::Concise or
780 # other modules we use do. To help a little, what we do here is compile
781 # a little code at the end of the module, and compute the base sequence
782 # number for the user's program as being a small offset later, so all we
783 # have to worry about are changes in the offset.
785 # When you say "perl -MO=Concise -e '$a'", the output should look like:
787 # 4 <@> leave[t1] vKP/REFC ->(end)
789 #^ smallest OP sequence number should be 1
790 # 2 <;> nextstate(main 1 -e:1) v ->3
791 # ^ smallest COP sequence number should be 1
792 # - <1> ex-rv2sv vK/1 ->4
793 # 3 <$> gvsv(*a) s ->4
795 # If the second of the marked numbers there isn't 1, it means you need
796 # to update the corresponding magic number in the next line.
797 # Remember, this needs to stay the last things in the module.
799 # Why is this different for MacOS? Does it matter?
800 my $cop_seq_mnum = $^O eq 'MacOS' ? 12 : 11;
801 $cop_seq_base = svref_2object(eval 'sub{0;}')->START->cop_seq + $cop_seq_mnum;
809 B::Concise - Walk Perl syntax tree, printing concise info about ops
813 perl -MO=Concise[,OPTIONS] foo.pl
815 use B::Concise qw(set_style add_callback);
819 This compiler backend prints the internal OPs of a Perl program's syntax
820 tree in one of several space-efficient text formats suitable for debugging
821 the inner workings of perl or other compiler backends. It can print OPs in
822 the order they appear in the OP tree, in the order they will execute, or
823 in a text approximation to their tree structure, and the format of the
824 information displyed is customizable. Its function is similar to that of
825 perl's B<-Dx> debugging flag or the B<B::Terse> module, but it is more
826 sophisticated and flexible.
830 Here's is a short example of output (aka 'rendering'), using the
831 default formatting conventions :
833 % perl -MO=Concise -e '$a = $b + 42'
834 8 <@> leave[1 ref] vKP/REFC ->(end)
836 2 <;> nextstate(main 1 -e:1) v ->3
837 7 <2> sassign vKS/2 ->8
838 5 <2> add[t1] sK/2 ->6
839 - <1> ex-rv2sv sK/1 ->4
841 4 <$> const(IV 42) s ->5
842 - <1> ex-rv2sv sKRM*/1 ->7
845 Each line corresponds to an opcode. Null ops appear as C<ex-opname>,
846 where I<opname> is the op that has been optimized away by perl.
848 The number on the first row indicates the op's sequence number. It's
849 given in base 36 by default.
851 The symbol between angle brackets indicates the op's type : for example,
852 <2> is a BINOP, <@> a LISTOP, etc. (see L</"OP class abbreviations">).
854 The opname may be followed by op-specific information in parentheses
855 (e.g. C<gvsv(*b)>), and by targ information in brackets (e.g.
858 Next come the op flags. The common flags are listed below
859 (L</"OP flags abbreviations">). The private flags follow, separated
860 by a slash. For example, C<vKP/REFC> means that the leave op has
861 public flags OPf_WANT_VOID, OPf_KIDS, and OPf_PARENS, and the private
864 Finally an arrow points to the sequence number of the next op.
868 Arguments that don't start with a hyphen are taken to be the names of
869 subroutines to print the OPs of; if no such functions are specified,
870 the main body of the program (outside any subroutines, and not
871 including use'd or require'd files) is printed. Passing C<BEGIN>,
872 C<CHECK>, C<INIT>, or C<END> will cause all of the corresponding
873 special blocks to be printed.
875 Options affect how things are rendered (ie printed). They're presented
876 here by their visual effect, 1st being strongest. They're grouped
877 according to how they interrelate; within each group the options are
878 mutually exclusive (unless otherwise stated).
880 =head2 Options for Opcode Ordering
882 These options control the 'vertical display' of opcodes. The display
883 'order' is also called 'mode' elsewhere in this document.
889 Print OPs in the order they appear in the OP tree (a preorder
890 traversal, starting at the root). The indentation of each OP shows its
891 level in the tree. This mode is the default, so the flag is included
892 simply for completeness.
896 Print OPs in the order they would normally execute (for the majority
897 of constructs this is a postorder traversal of the tree, ending at the
898 root). In most cases the OP that usually follows a given OP will
899 appear directly below it; alternate paths are shown by indentation. In
900 cases like loops when control jumps out of a linear path, a 'goto'
905 Print OPs in a text approximation of a tree, with the root of the tree
906 at the left and 'left-to-right' order of children transformed into
907 'top-to-bottom'. Because this mode grows both to the right and down,
908 it isn't suitable for large programs (unless you have a very wide
913 =head2 Options for Line-Style
915 These options select the line-style (or just style) used to render
916 each opcode, and dictates what info is actually printed into each line.
922 Use the author's favorite set of formatting conventions. This is the
927 Use formatting conventions that emulate the output of B<B::Terse>. The
928 basic mode is almost indistinguishable from the real B<B::Terse>, and the
929 exec mode looks very similar, but is in a more logical order and lacks
930 curly brackets. B<B::Terse> doesn't have a tree mode, so the tree mode
931 is only vaguely reminiscent of B<B::Terse>.
935 Use formatting conventions in which the name of each OP, rather than being
936 written out in full, is represented by a one- or two-character abbreviation.
937 This is mainly a joke.
941 Use formatting conventions reminiscent of B<B::Debug>; these aren't
946 Use formatting conventions read from the environment variables
947 C<B_CONCISE_FORMAT>, C<B_CONCISE_GOTO_FORMAT>, and C<B_CONCISE_TREE_FORMAT>.
951 =head2 Options for tree-specific formatting
957 Use a tree format in which the minimum amount of space is used for the
958 lines connecting nodes (one character in most cases). This squeezes out
959 a few precious columns of screen real estate.
963 Use a tree format that uses longer edges to separate OP nodes. This format
964 tends to look better than the compact one, especially in ASCII, and is
969 Use tree connecting characters drawn from the VT100 line-drawing set.
970 This looks better if your terminal supports it.
974 Draw the tree with standard ASCII characters like C<+> and C<|>. These don't
975 look as clean as the VT100 characters, but they'll work with almost any
976 terminal (or the horizontal scrolling mode of less(1)) and are suitable
977 for text documentation or email. This is the default.
981 These are pairwise exclusive, i.e. compact or loose, vt or ascii.
983 =head2 Options controlling sequence numbering
989 Print OP sequence numbers in base I<n>. If I<n> is greater than 10, the
990 digit for 11 will be 'a', and so on. If I<n> is greater than 36, the digit
991 for 37 will be 'A', and so on until 62. Values greater than 62 are not
992 currently supported. The default is 36.
996 Print sequence numbers with the most significant digit first. This is the
997 usual convention for Arabic numerals, and the default.
999 =item B<-littleendian>
1001 Print seqence numbers with the least significant digit first. This is
1002 obviously mutually exclusive with bigendian.
1006 =head2 Other options
1012 Include the main program in the output, even if subroutines were also
1013 specified. This is the only option that is not sticky (see below)
1017 B::Concise::compile normally prints a banner line identifying the
1018 function name, or in case of a subref, a generic message including
1019 (unfortunately) the stringified coderef. This option suppresses the
1020 printing of the banner.
1024 =head2 Option Stickiness
1026 If you invoke Concise more than once in a program, you should know that
1027 the options are 'sticky'. This means that the options you provide in
1028 the first call will be remembered for the 2nd call, unless you
1029 re-specify or change them.
1031 =head1 FORMATTING SPECIFICATIONS
1033 For each line-style ('concise', 'terse', 'linenoise', etc.) there are
1034 3 format-specs which control how OPs are rendered.
1036 The first is the 'default' format, which is used in both basic and exec
1037 modes to print all opcodes. The 2nd, goto-format, is used in exec
1038 mode when branches are encountered. They're not real opcodes, and are
1039 inserted to look like a closing curly brace. The tree-format is tree
1042 When a line is rendered, the correct format string is scanned for the
1043 following items, and data is substituted in, or other manipulations,
1044 like basic indenting. Any text that doesn't match a special pattern
1045 (the items below) is copied verbatim. (Yes, it's a set of s///g steps.)
1049 =item B<(x(>I<exec_text>B<;>I<basic_text>B<)x)>
1051 Generates I<exec_text> in exec mode, or I<basic_text> in basic mode.
1053 =item B<(*(>I<text>B<)*)>
1055 Generates one copy of I<text> for each indentation level.
1057 =item B<(*(>I<text1>B<;>I<text2>B<)*)>
1059 Generates one fewer copies of I<text1> than the indentation level, followed
1060 by one copy of I<text2> if the indentation level is more than 0.
1062 =item B<(?(>I<text1>B<#>I<var>I<Text2>B<)?)>
1064 If the value of I<var> is true (not empty or zero), generates the
1065 value of I<var> surrounded by I<text1> and I<Text2>, otherwise
1070 Generates the value of the variable I<var>.
1072 =item B<#>I<var>I<N>
1074 Generates the value of I<var>, left jutified to fill I<N> spaces.
1078 Any number of tildes and surrounding whitespace will be collapsed to
1083 The following variables are recognized:
1089 The address of the OP, in hexidecimal.
1093 The OP-specific information of the OP (such as the SV for an SVOP, the
1094 non-local exit pointers for a LOOP, etc.) enclosed in paretheses.
1098 The B-determined class of the OP, in all caps.
1102 A single symbol abbreviating the class of the OP.
1106 The label of the statement or block the OP is the start of, if any.
1110 The name of the OP, or 'ex-foo' if the OP is a null that used to be a foo.
1114 The target of the OP, or nothing for a nulled OP.
1118 The address of the OP's first child, in hexidecimal.
1122 The OP's flags, abbreviated as a series of symbols.
1126 The numeric value of the OP's flags.
1130 The sequence number of the OP, or a hyphen if it doesn't have one.
1134 'NEXT', 'LAST', or 'REDO' if the OP is a target of one of those in exec
1135 mode, or empty otherwise.
1139 The address of the OP's last child, in hexidecimal.
1147 The OP's name, in all caps.
1151 The sequence number of the OP's next OP.
1155 The address of the OP's next OP, in hexidecimal.
1159 A one- or two-character abbreviation for the OP's name.
1163 The OP's private flags, rendered with abbreviated names if possible.
1167 The numeric value of the OP's private flags.
1171 The sequence number of the OP. Note that this is a sequence number
1172 generated by B::Concise.
1176 Whether or not the op has been optimised by the peephole optimiser.
1180 Whether or not the op is statically defined. This flag is used by the
1181 B::C compiler backend and indicates that the op should not be freed.
1185 The address of the OP's next youngest sibling, in hexidecimal.
1189 The address of the OP's SV, if it has an SV, in hexidecimal.
1193 The class of the OP's SV, if it has one, in all caps (e.g., 'IV').
1197 The value of the OP's SV, if it has one, in a short human-readable format.
1201 The numeric value of the OP's targ.
1205 The name of the variable the OP's targ refers to, if any, otherwise the
1206 letter t followed by the OP's targ in decimal.
1208 =item B<#targarglife>
1210 Same as B<#targarg>, but followed by the COP sequence numbers that delimit
1211 the variable's lifetime (or 'end' for a variable in an open scope) for a
1216 The numeric value of the OP's type, in decimal.
1220 =head1 ABBREVIATIONS
1222 =head2 OP flags abbreviations
1224 v OPf_WANT_VOID Want nothing (void context)
1225 s OPf_WANT_SCALAR Want single value (scalar context)
1226 l OPf_WANT_LIST Want list of any length (list context)
1227 K OPf_KIDS There is a firstborn child.
1228 P OPf_PARENS This operator was parenthesized.
1229 (Or block needs explicit scope entry.)
1230 R OPf_REF Certified reference.
1231 (Return container, not containee).
1232 M OPf_MOD Will modify (lvalue).
1233 S OPf_STACKED Some arg is arriving on the stack.
1234 * OPf_SPECIAL Do something weird for this op (see op.h)
1236 =head2 OP class abbreviations
1238 0 OP (aka BASEOP) An OP with no children
1239 1 UNOP An OP with one child
1240 2 BINOP An OP with two children
1241 | LOGOP A control branch OP
1242 @ LISTOP An OP that could have lots of children
1243 / PMOP An OP with a regular expression
1244 $ SVOP An OP with an SV
1245 " PVOP An OP with a string
1246 { LOOP An OP that holds pointers for a loop
1247 ; COP An OP that marks the start of a statement
1248 # PADOP An OP with a GV on the pad
1250 =head1 Using B::Concise outside of the O framework
1252 You can use B<B::Concise>, and call compile() directly, and
1253 repeatedly. By doing so, you can avoid the compile-time only
1254 operation of 'perl -MO=Concise ..'. For example, you can use the
1255 debugger to step through B::Concise::compile() itself.
1257 When doing so, you can alter Concise output by providing new output
1258 styles, and optionally by adding callback routines which populate new
1259 variables that may be rendered as part of those styles. For all
1260 following sections, please review L</FORMATTING SPECIFICATIONS>.
1262 =head2 Example: Altering Concise Renderings
1264 use B::Concise qw(set_style add_callback);
1265 set_style($your_format, $your_gotofmt, $your_treefmt);
1268 my ($h, $op, $format, $level, $stylename) = @_;
1269 $h->{variable} = some_func($op);
1272 B::Concise::compile(@options)->();
1276 B<set_style> accepts 3 arguments, and updates the three format-specs
1277 comprising a line-style (basic-exec, goto, tree). It has one minor
1278 drawback though; it doesn't register the style under a new name. This
1279 can become an issue if you render more than once and switch styles.
1280 Thus you may prefer to use add_style() and/or set_style_standard()
1283 =head2 set_style_standard($name)
1285 This restores one of the standard line-styles: C<terse>, C<concise>,
1286 C<linenoise>, C<debug>, C<env>, into effect. It also accepts style
1287 names previously defined with add_style().
1291 This subroutine accepts a new style name and three style arguments as
1292 above, and creates, registers, and selects the newly named style. It is
1293 an error to re-add a style; call set_style_standard() to switch between
1296 =head2 add_callback()
1298 If your newly minted styles refer to any #variables, you'll need to
1299 define a callback subroutine that will populate (or modify) those
1300 variables. They are then available for use in the style you've chosen.
1302 The callbacks are called for each opcode visited by Concise, in the
1303 same order as they are added. Each subroutine is passed five
1306 1. A hashref, containing the variable names and values which are
1307 populated into the report-line for the op
1308 2. the op, as a B<B::OP> object
1309 3. a reference to the format string
1310 4. the formatting (indent) level
1311 5. the selected stylename
1313 To define your own variables, simply add them to the hash, or change
1314 existing values if you need to. The level and format are passed in as
1315 references to scalars, but it is unlikely that they will need to be
1316 changed or even used.
1318 =head2 Running B::Concise::compile()
1320 B<compile> accepts options as described above in L</OPTIONS>, and
1321 arguments, which are either coderefs, or subroutine names.
1323 compile() constructs and returns a coderef, which when invoked, scans
1324 the optree, and prints the results to STDOUT. Once you have the
1325 coderef, you may change the output style; thereafter the coderef renders
1328 B<walk_output> lets you change the print destination from STDOUT to
1329 another open filehandle, or (unless you've built with -Uuseperlio)
1330 into a string passed as a ref.
1332 walk_output(\my $buf);
1333 my $walker = B::Concise::compile('-concise','funcName', \&aSubRef);
1334 print "Concise Banner for Functions: $buf\n";
1336 print "Concise Rendering(s)?: $buf\n";
1338 For each subroutine visited by Concise, the $buf will contain a
1339 banner naming the function or coderef about to be traversed.
1340 Once $walker is invoked, it prints the actual renderings for each.
1342 To switch back to one of the standard styles like C<concise> or
1343 C<terse>, call C<set_style_standard>, or pass the style name into
1344 B::Concise::compile() (as done above).
1346 =head2 B::Concise::reset_sequence()
1348 This function (not exported) lets you reset the sequence numbers (note
1349 that they're numbered arbitrarily, their goal being to be human
1350 readable). Its purpose is mostly to support testing, i.e. to compare
1351 the concise output from two identical anonymous subroutines (but
1352 different instances). Without the reset, B::Concise, seeing that
1353 they're separate optrees, generates different sequence numbers in
1358 All detected errors, (invalid arguments, internal errors, etc.) are
1359 resolved with a die($message). Use an eval if you wish to catch these
1360 errors and continue processing.
1362 In particular, B<compile> will die if you've asked for a non-existent
1363 function-name, a non-existent coderef, or a non-CODE reference.
1367 Stephen McCamant, E<lt>smcc@CSUA.Berkeley.EDUE<gt>.