3 # Copyright (c) 1996, 1997, 1998 Malcolm Beattie
5 # You may distribute under the terms of either the GNU General Public
6 # License or the Artistic License, as specified in the README file.
12 @B::ISA = qw(Exporter);
14 # walkoptree_slow comes from B.pm (you are there),
15 # walkoptree comes from B.xs
21 # Our BOOT code needs $VERSION set, and will append to @EXPORT_OK.
22 # Want our constants loaded before the compiler meets OPf_KIDS below, as
23 # the combination of having the constant stay a Proxy Constant Subroutine
24 # and its value being inlined saves a little over .5K
30 push @B::EXPORT_OK, (qw(minus_c ppname save_BEGINs
31 class peekop cast_I32 cstring cchar hash threadsv_names
32 main_root main_start main_cv svref_2object opnumber
33 sub_generation amagic_generation perlstring
34 walkoptree_slow walkoptree walkoptree_exec walksymtable
35 parents comppadlist sv_undef compile_stats timing_info
36 begin_av init_av check_av end_av regex_padav dowarn
37 defstash curstash warnhook diehook inc_gv @optype
38 @specialsv_name unitcheck_av safename));
40 @B::SV::ISA = 'B::OBJECT';
41 @B::NULL::ISA = 'B::SV';
42 @B::PV::ISA = 'B::SV';
43 @B::IV::ISA = 'B::SV';
44 @B::NV::ISA = 'B::SV';
45 # RV is eliminated with 5.11.0, but effectively is a specialisation of IV now.
46 @B::RV::ISA = $] >= 5.011 ? 'B::IV' : 'B::SV';
47 @B::PVIV::ISA = qw(B::PV B::IV);
48 @B::PVNV::ISA = qw(B::PVIV B::NV);
49 @B::PVMG::ISA = 'B::PVNV';
50 @B::REGEXP::ISA = 'B::PVMG' if $] >= 5.011;
51 @B::INVLIST::ISA = 'B::PV' if $] >= 5.019;
52 @B::PVLV::ISA = 'B::GV';
53 @B::BM::ISA = 'B::GV';
54 @B::AV::ISA = 'B::PVMG';
55 @B::GV::ISA = 'B::PVMG';
56 @B::HV::ISA = 'B::PVMG';
57 @B::CV::ISA = 'B::PVMG';
58 @B::IO::ISA = 'B::PVMG';
59 @B::FM::ISA = 'B::CV';
61 @B::OP::ISA = 'B::OBJECT';
62 @B::UNOP::ISA = 'B::OP';
63 @B::BINOP::ISA = 'B::UNOP';
64 @B::LOGOP::ISA = 'B::UNOP';
65 @B::LISTOP::ISA = 'B::BINOP';
66 @B::SVOP::ISA = 'B::OP';
67 @B::PADOP::ISA = 'B::OP';
68 @B::PVOP::ISA = 'B::OP';
69 @B::LOOP::ISA = 'B::LISTOP';
70 @B::PMOP::ISA = 'B::LISTOP';
71 @B::COP::ISA = 'B::OP';
73 @B::SPECIAL::ISA = 'B::OBJECT';
75 @B::optype = qw(OP UNOP BINOP LOGOP LISTOP PMOP SVOP PADOP PVOP LOOP COP);
76 # bytecode.pl contained the following comment:
77 # Nullsv *must* come first in the following so that the condition
78 # ($$sv == 0) can continue to be used to test (sv == Nullsv).
79 @B::specialsv_name = qw(Nullsv &PL_sv_undef &PL_sv_yes &PL_sv_no
80 (SV*)pWARN_ALL (SV*)pWARN_NONE (SV*)pWARN_STD);
83 # Stop "-w" from complaining about the lack of a real B::OBJECT class
88 safename(shift()->NAME);
94 # The regex below corresponds to the isCONTROLVAR macro
98 or $name =~ s/^([\cA-\cZ\c\\c[\c]\c_\c^])/
99 "^" . chr( utf8::unicode_to_native( 64 ^ ord($1) ))/e;
101 # When we say unicode_to_native we really mean ascii_to_native,
102 # which matters iff this is a non-ASCII platform (EBCDIC). '\c?' would
103 # not have to be special cased, except for non-ASCII.
108 sub B::IV::int_value {
110 return (($self->FLAGS() & SVf_IVisUV()) ? $self->UVX : $self->IV);
113 sub B::NULL::as_string() {""}
114 *B::IV::as_string = \*B::IV::int_value;
115 *B::PV::as_string = \*B::PV::PV;
117 # The input typemap checking makes no distinction between different SV types,
118 # so the XS body will generate the same C code, despite the different XS
119 # "types". So there is no change in behaviour from doing "newXS" like this,
120 # compared with the old approach of having a (near) duplicate XS body.
121 # We should fix the typemap checking.
122 *B::IV::RV = \*B::PV::RV if $] > 5.012;
129 my ($class, $value) = @_;
131 walkoptree_debug($value);
141 sub parents { \@parents }
146 return sprintf("%s (0x%x) %s", class($op), $$op, $op->name);
149 sub walkoptree_slow {
150 my($op, $method, $level) = @_;
151 $op_count++; # just for statistics
153 warn(sprintf("walkoptree: %d. %s\n", $level, peekop($op))) if $debug;
154 $op->$method($level) if $op->can($method);
155 if ($$op && ($op->flags & OPf_KIDS)) {
157 unshift(@parents, $op);
158 for ($kid = $op->first; $$kid; $kid = $kid->sibling) {
159 walkoptree_slow($kid, $method, $level + 1);
163 if (class($op) eq 'PMOP'
164 && ref($op->pmreplroot)
165 && ${$op->pmreplroot}
166 && $op->pmreplroot->isa( 'B::OP' ))
168 unshift(@parents, $op);
169 walkoptree_slow($op->pmreplroot, $method, $level + 1);
175 return "Total number of OPs processed: $op_count\n";
179 my ($sec, $min, $hr) = localtime;
180 my ($user, $sys) = times;
181 sprintf("%02d:%02d:%02d user=$user sys=$sys",
182 $hr, $min, $sec, $user, $sys);
192 my ($obj, $value) = @_;
193 # warn(sprintf("savesym: sym_%x => %s\n", $$obj, $value)); # debug
194 $symtable{sprintf("sym_%x", $$obj)} = $value;
199 return $symtable{sprintf("sym_%x", $$obj)};
202 sub walkoptree_exec {
203 my ($op, $method, $level) = @_;
206 my $prefix = " " x $level;
207 for (; $$op; $op = $op->next) {
210 print $prefix, "goto $sym\n";
213 savesym($op, sprintf("%s (0x%lx)", class($op), $$op));
214 $op->$method($level);
217 /^(d?or(assign)?|and(assign)?|mapwhile|grepwhile|entertry|range|cond_expr)$/)
219 print $prefix, uc($1), " => {\n";
220 walkoptree_exec($op->other, $method, $level + 1);
221 print $prefix, "}\n";
222 } elsif ($ppname eq "match" || $ppname eq "subst") {
223 my $pmreplstart = $op->pmreplstart;
225 print $prefix, "PMREPLSTART => {\n";
226 walkoptree_exec($pmreplstart, $method, $level + 1);
227 print $prefix, "}\n";
229 } elsif ($ppname eq "substcont") {
230 print $prefix, "SUBSTCONT => {\n";
231 walkoptree_exec($op->other->pmreplstart, $method, $level + 1);
232 print $prefix, "}\n";
234 } elsif ($ppname eq "enterloop") {
235 print $prefix, "REDO => {\n";
236 walkoptree_exec($op->redoop, $method, $level + 1);
237 print $prefix, "}\n", $prefix, "NEXT => {\n";
238 walkoptree_exec($op->nextop, $method, $level + 1);
239 print $prefix, "}\n", $prefix, "LAST => {\n";
240 walkoptree_exec($op->lastop, $method, $level + 1);
241 print $prefix, "}\n";
242 } elsif ($ppname eq "subst") {
243 my $replstart = $op->pmreplstart;
245 print $prefix, "SUBST => {\n";
246 walkoptree_exec($replstart, $method, $level + 1);
247 print $prefix, "}\n";
254 my ($symref, $method, $recurse, $prefix) = @_;
259 $prefix = '' unless defined $prefix;
260 foreach my $sym ( sort keys %$symref ) {
261 $ref= $symref->{$sym};
262 $fullname = "*main::".$prefix.$sym;
264 $sym = $prefix . $sym;
265 if (svref_2object(\*$sym)->NAME ne "main::" && $sym ne "<none>::" && &$recurse($sym)) {
266 walksymtable(\%$fullname, $method, $recurse, $sym);
269 svref_2object(\*$fullname)->$method();
280 my ($class, $section, $symtable, $default) = @_;
281 $output_fh ||= FileHandle->new_tmpfile;
282 my $obj = bless [-1, $section, $symtable, $default], $class;
283 $sections{$section} = $obj;
288 my ($class, $section) = @_;
289 return $sections{$section};
294 while (defined($_ = shift)) {
295 print $output_fh "$section->[1]\t$_\n";
302 return $section->[0];
307 return $section->[1];
312 return $section->[2];
317 return $section->[3];
321 my ($section, $fh, $format) = @_;
322 my $name = $section->name;
323 my $sym = $section->symtable || {};
324 my $default = $section->default;
326 seek($output_fh, 0, 0);
327 while (<$output_fh>) {
332 exists($sym->{$1}) ? $sym->{$1} : $default;
334 printf $fh $format, $_;
346 B - The Perl Compiler Backend
354 The C<B> module supplies classes which allow a Perl program to delve
355 into its own innards. It is the module used to implement the
356 "backends" of the Perl compiler. Usage of the compiler does not
357 require knowledge of this module: see the F<O> module for the
358 user-visible part. The C<B> module is of use to those who want to
359 write new compiler backends. This documentation assumes that the
360 reader knows a fair amount about perl's internals including such
361 things as SVs, OPs and the internal symbol table and syntax tree
366 The C<B> module contains a set of utility functions for querying the
367 current state of the Perl interpreter; typically these functions
368 return objects from the B::SV and B::OP classes, or their derived
369 classes. These classes in turn define methods for querying the
370 resulting objects about their own internal state.
372 =head1 Utility Functions
374 The C<B> module exports a variety of functions: some are simple
375 utility functions, others provide a Perl program with a way to
376 get an initial "handle" on an internal object.
378 =head2 Functions Returning C<B::SV>, C<B::AV>, C<B::HV>, and C<B::CV> objects
380 For descriptions of the class hierarchy of these objects and the
381 methods that can be called on them, see below, L<"OVERVIEW OF
382 CLASSES"> and L<"SV-RELATED CLASSES">.
388 Returns the SV object corresponding to the C variable C<sv_undef>.
392 Returns the SV object corresponding to the C variable C<sv_yes>.
396 Returns the SV object corresponding to the C variable C<sv_no>.
398 =item svref_2object(SVREF)
400 Takes a reference to any Perl value, and turns the referred-to value
401 into an object in the appropriate B::OP-derived or B::SV-derived
402 class. Apart from functions such as C<main_root>, this is the primary
403 way to get an initial "handle" on an internal perl data structure
404 which can then be followed with the other access methods.
406 The returned object will only be valid as long as the underlying OPs
407 and SVs continue to exist. Do not attempt to use the object after the
408 underlying structures are freed.
410 =item amagic_generation
412 Returns the SV object corresponding to the C variable C<amagic_generation>.
413 As of Perl 5.18, this is just an alias to C<PL_na>, so its value is
418 Returns the AV object (i.e. in class B::AV) representing INIT blocks.
422 Returns the AV object (i.e. in class B::AV) representing CHECK blocks.
426 Returns the AV object (i.e. in class B::AV) representing UNITCHECK blocks.
430 Returns the AV object (i.e. in class B::AV) representing BEGIN blocks.
434 Returns the AV object (i.e. in class B::AV) representing END blocks.
438 Returns the PADLIST object (i.e. in class B::PADLIST) of the global
439 comppadlist. In Perl 5.16 and earlier it returns an AV object (class
444 Only when perl was compiled with ithreads.
448 Return the (faked) CV corresponding to the main part of the Perl
453 =head2 Functions for Examining the Symbol Table
457 =item walksymtable(SYMREF, METHOD, RECURSE, PREFIX)
459 Walk the symbol table starting at SYMREF and call METHOD on each
460 symbol (a B::GV object) visited. When the walk reaches package
461 symbols (such as "Foo::") it invokes RECURSE, passing in the symbol
462 name, and only recurses into the package if that sub returns true.
464 PREFIX is the name of the SYMREF you're walking.
468 # Walk CGI's symbol table calling print_subs on each symbol.
469 # Recurse only into CGI::Util::
470 walksymtable(\%CGI::, 'print_subs',
471 sub { $_[0] eq 'CGI::Util::' }, 'CGI::');
473 print_subs() is a B::GV method you have declared. Also see L<"B::GV
478 =head2 Functions Returning C<B::OP> objects or for walking op trees
480 For descriptions of the class hierarchy of these objects and the
481 methods that can be called on them, see below, L<"OVERVIEW OF
482 CLASSES"> and L<"OP-RELATED CLASSES">.
488 Returns the root op (i.e. an object in the appropriate B::OP-derived
489 class) of the main part of the Perl program.
493 Returns the starting op of the main part of the Perl program.
495 =item walkoptree(OP, METHOD)
497 Does a tree-walk of the syntax tree based at OP and calls METHOD on
498 each op it visits. Each node is visited before its children. If
499 C<walkoptree_debug> (see below) has been called to turn debugging on then
500 the method C<walkoptree_debug> is called on each op before METHOD is
503 =item walkoptree_debug(DEBUG)
505 Returns the current debugging flag for C<walkoptree>. If the optional
506 DEBUG argument is non-zero, it sets the debugging flag to that. See
507 the description of C<walkoptree> above for what the debugging flag
512 =head2 Miscellaneous Utility Functions
518 Return the PP function name (e.g. "pp_add") of op number OPNUM.
522 Returns a string in the form "0x..." representing the value of the
523 internal hash function used by perl on string STR.
527 Casts I to the internal I32 type used by that perl.
531 Does the equivalent of the C<-c> command-line option. Obviously, this
532 is only useful in a BEGIN block or else the flag is set too late.
536 Returns a double-quote-surrounded escaped version of STR which can
537 be used as a string in C source code.
539 =item perlstring(STR)
541 Returns a double-quote-surrounded escaped version of STR which can
542 be used as a string in Perl source code.
546 This function returns the string with the first character modified if it
547 is a control character. It converts it to ^X format first, so that "\cG"
548 becomes "^G". This is used internally by L<B::GV::SAFENAME|/SAFENAME>, but
549 you can call it directly.
553 Returns the class of an object without the part of the classname
554 preceding the first C<"::">. This is used to turn C<"B::UNOP"> into
555 C<"UNOP"> for example.
559 In a perl compiled for threads, this returns a list of the special
560 per-thread threadsv variables.
564 =head2 Exported utility variables
570 my $op_type = $optype[$op_type_num];
572 A simple mapping of the op type number to its type (like 'COP' or 'BINOP').
574 =item @specialsv_name
576 my $sv_name = $specialsv_name[$sv_index];
578 Certain SV types are considered 'special'. They're represented by
579 B::SPECIAL and are referred to by a number from the specialsv_list.
580 This array maps that number back to the name of the SV (like 'Nullsv'
586 =head1 OVERVIEW OF CLASSES
588 The C structures used by Perl's internals to hold SV and OP
589 information (PVIV, AV, HV, ..., OP, SVOP, UNOP, ...) are modelled on a
590 class hierarchy and the C<B> module gives access to them via a true
591 object hierarchy. Structure fields which point to other objects
592 (whether types of SV or types of OP) are represented by the C<B>
593 module as Perl objects of the appropriate class.
595 The bulk of the C<B> module is the methods for accessing fields of
598 Note that all access is read-only. You cannot modify the internals by
599 using this module. Also, note that the B::OP and B::SV objects created
600 by this module are only valid for as long as the underlying objects
601 exist; their creation doesn't increase the reference counts of the
602 underlying objects. Trying to access the fields of a freed object will
603 give incomprehensible results, or worse.
605 =head2 SV-RELATED CLASSES
607 B::IV, B::NV, B::RV, B::PV, B::PVIV, B::PVNV, B::PVMG, B::BM (5.9.5 and
608 earlier), B::PVLV, B::AV, B::HV, B::CV, B::GV, B::FM, B::IO. These classes
609 correspond in the obvious way to the underlying C structures of similar names.
610 The inheritance hierarchy mimics the underlying C "inheritance". For the
611 5.10.x branch, (I<ie> 5.10.0, 5.10.1 I<etc>) this is:
615 +------------+------------+------------+
617 B::PV B::IV B::NV B::RV
629 +-----+-----+-----+-----+
631 B::AV B::GV B::HV B::CV B::IO
636 For 5.9.0 and earlier, PVLV is a direct subclass of PVMG, and BM is still
637 present as a distinct type, so the base of this diagram is
644 +------+-----+-----+-----+-----+-----+
646 B::PVLV B::BM B::AV B::GV B::HV B::CV B::IO
651 For 5.11.0 and later, B::RV is abolished, and IVs can be used to store
652 references, and a new type B::REGEXP is introduced, giving this structure:
656 +------------+------------+
670 +-------+-------+---+---+-------+-------+
672 B::AV B::GV B::HV B::CV B::IO B::REGEXP
678 Access methods correspond to the underlying C macros for field access,
679 usually with the leading "class indication" prefix removed (Sv, Av,
680 Hv, ...). The leading prefix is only left in cases where its removal
681 would cause a clash in method name. For example, C<GvREFCNT> stays
682 as-is since its abbreviation would clash with the "superclass" method
683 C<REFCNT> (corresponding to the C function C<SvREFCNT>).
695 Returns a reference to the regular scalar corresponding to this
696 B::SV object. In other words, this method is the inverse operation
697 to the svref_2object() subroutine. This scalar and other data it points
698 at should be considered read-only: modifying them is neither safe nor
699 guaranteed to have a sensible effect.
709 Returns the value of the IV, I<interpreted as
710 a signed integer>. This will be misleading
711 if C<FLAGS & SVf_IVisUV>. Perhaps you want the
712 C<int_value> method instead?
720 This method returns the value of the IV as an integer.
721 It differs from C<IV> in that it returns the correct
722 value regardless of whether it's stored signed or
755 This method is the one you usually want. It constructs a
756 string using the length and offset information in the struct:
757 for ordinary scalars it will return the string that you'd see
758 from Perl, even if it contains null characters.
762 Same as B::RV::RV, except that it will die() if the PV isn't
767 This method is less often useful. It assumes that the string
768 stored in the struct is null-terminated, and disregards the
771 It is the appropriate method to use if you need to get the name
772 of a lexical variable from a padname array. Lexical variable names
773 are always stored with a null terminator, and the length field
774 (CUR) is overloaded for other purposes and can't be relied on here.
778 This method returns the internal length field, which consists of the number
779 of internal bytes, not necessarily the number of logical characters.
783 This method returns the number of bytes allocated (via malloc) for storing
784 the string. This is 0 if the scalar does not "own" the string.
788 =head2 B::PVMG Methods
798 =head2 B::MAGIC Methods
806 Only valid on r-magic, returns the string that generated the regexp.
816 Will die() if called on r-magic.
822 Only valid on r-magic, returns the integer value of the REGEX stored
827 =head2 B::PVLV Methods
861 This method returns TRUE if the GP field of the GV is NULL.
867 This method returns the name of the glob, but if the first
868 character of the name is a control character, then it converts
869 it to ^X first, so that *^G would return "^G" rather than "\cG".
871 It's useful if you want to print out the name of a variable.
872 If you restrict yourself to globs which exist at compile-time
873 then the result ought to be unambiguous, because code like
874 C<${"^G"} = 1> is compiled as two ops - a constant string and
875 a dereference (rv2gv) - so that the glob is created at runtime.
877 If you're working with globs at runtime, and need to disambiguate
878 *^G from *{"^G"}, then you should use the raw NAME method.
912 B::IO objects derive from IO objects and you will get more information from
913 the IO object itself.
917 $gvio = B::svref_2object(\*main::stdin)->IO;
918 $IO = $gvio->object_2svref();
947 A character symbolizing the type of IO Handle.
960 \0 closed internal handle
966 Takes one argument ( 'stdin' | 'stdout' | 'stderr' ) and returns true
967 if the IoIFP of the object is equal to the handle whose name was
968 passed as argument; i.e., $io->IsSTD('stderr') is true if
969 IoIFP($io) == PerlIO_stderr().
985 Like C<ARRAY>, but takes an index as an argument to get only one element,
986 rather than a list of all of them.
990 This method is deprecated if running under Perl 5.8, and is no longer present
991 if running under Perl 5.9
995 This method returns the AV specific
996 flags. In Perl 5.9 these are now stored
997 in with the main SV flags, so this method is no longer present.
1001 =head2 B::CV Methods
1019 Returns a B::PADLIST object under Perl 5.18 or higher, or a B::AV in
1030 For constant subroutines, returns the constant SV returned by the subroutine.
1038 Returns the name of a lexical sub, otherwise C<undef>.
1042 =head2 B::HV Methods
1060 This method is not present if running under Perl 5.9, as the PMROOT
1061 information is no longer stored directly in the hash.
1065 =head2 OP-RELATED CLASSES
1067 C<B::OP>, C<B::UNOP>, C<B::BINOP>, C<B::LOGOP>, C<B::LISTOP>, C<B::PMOP>,
1068 C<B::SVOP>, C<B::PADOP>, C<B::PVOP>, C<B::LOOP>, C<B::COP>.
1070 These classes correspond in the obvious way to the underlying C
1071 structures of similar names. The inheritance hierarchy mimics the
1072 underlying C "inheritance":
1076 +---------------+--------+--------+-------+
1078 B::UNOP B::SVOP B::PADOP B::COP B::PVOP
1089 Access methods correspond to the underlying C structure field names,
1090 with the leading "class indication" prefix (C<"op_">) removed.
1092 =head2 B::OP Methods
1094 These methods get the values of similarly named fields within the OP
1095 data structure. See top of C<op.h> for more info.
1105 Returns the OP's parent. If it has no parent, or if your perl wasn't built
1106 with C<-DPERL_OP_PARENT>, returns NULL.
1110 This returns the op name as a string (e.g. "add", "rv2av").
1114 This returns the function name as a string (e.g. "PL_ppaddr[OP_ADD]",
1115 "PL_ppaddr[OP_RV2AV]").
1119 This returns the op description from the global C PL_op_desc array
1120 (e.g. "addition" "array deref").
1136 =head2 B::UNOP METHOD
1144 =head2 B::BINOP METHOD
1152 =head2 B::LOGOP METHOD
1160 =head2 B::LISTOP METHOD
1168 =head2 B::PMOP Methods
1178 Only up to Perl 5.9.4
1190 Only when perl was compiled with ithreads.
1198 =head2 B::SVOP METHOD
1208 =head2 B::PADOP METHOD
1216 =head2 B::PVOP METHOD
1224 =head2 B::LOOP Methods
1236 =head2 B::COP Methods
1246 =item stashoff (threaded only)
1266 =head2 OTHER CLASSES
1268 Perl 5.18 introduces a new class, B::PADLIST, returned by B::CV's
1271 =head2 B::PADLIST Methods
1279 A list of pads. The first one contains the names. These are currently
1280 B::AV objects, but that is likely to change in future versions.
1284 Like C<ARRAY>, but takes an index as an argument to get only one element,
1285 rather than a list of all of them.
1293 Although the optree is read-only, there is an overlay facility that allows
1294 you to override what values the various B::*OP methods return for a
1295 particular op. C<$B::overlay> should be set to reference a two-deep hash:
1296 indexed by OP address, then method name. Whenever a an op method is
1297 called, the value in the hash is returned if it exists. This facility is
1298 used by B::Deparse to "undo" some optimisations. For example:
1301 local $B::overlay = {};
1303 if ($op->name eq "foo") {
1304 $B::overlay->{$$op} = {
1306 next => $op->next->next,
1310 $op->name # returns "bar"
1311 $op->next # returns the next op but one
1316 Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>