perlguts: clarify SV types which are scalars

[perl5.git] / pod / perlguts.pod
diff --git a/pod/perlguts.pod b/pod/perlguts.pod

index 8091fe5..5f33bbb 100644 (file)
--- a/pod/perlguts.pod
+++ b/pod/perlguts.pod
@@ -56,7 +56,7 @@ The seven routines are:
      SV*  newSVpvf(const char*, ...);
      SV*  newSVsv(SV*);
  
-C<STRLEN> is an integer type (Size_t, usually defined as size_t in
+C<STRLEN> is an integer type (C<Size_t>, usually defined as C<size_t> in
  F<config.h>) guaranteed to be large enough to represent the size of
  any string that perl can handle.
  
@@ -79,7 +79,7 @@ To change the value of an I<already-existing> SV, there are eight routines:
      void  sv_setpvn(SV*, const char*, STRLEN)
      void  sv_setpvf(SV*, const char*, ...);
      void  sv_vsetpvfn(SV*, const char*, STRLEN, va_list *,
-                                                    SV **, I32, bool *);
+                                        SV **, Size_t, bool *);
      void  sv_setsv(SV*, SV*);
  
  Notice that you can choose to specify the length of the string to be
@@ -103,7 +103,7 @@ important.  Note that this function requires you to specify the length of
  the format.
  
  The C<sv_set*()> functions are not generic enough to operate on values
-that have "magic".  See L<Magic Virtual Tables> later in this document.
+that have "magic".  See L</Magic Virtual Tables> later in this document.
  
  All SVs that contain strings should be terminated with a C<NUL> character.
  If it is not C<NUL>-terminated there is a risk of
@@ -186,7 +186,7 @@ sv_insert() or sv_insert_flags().
  If you don't need the existing content of the SV, you can avoid some
  copying with:
  
-    sv_setpvn(sv, "", 0);
+    SvPVCLEAR(sv);
      s = SvGROW(sv, needlen + 1);
      /* something that modifies up to needlen bytes at s, but modifies
         newlen bytes
@@ -252,7 +252,7 @@ SV with the string stored in the second SV.  It also forces the second SV
  to be interpreted as a string.
  
  The C<sv_cat*()> functions are not generic enough to operate on values that
-have "magic".  See L<Magic Virtual Tables> later in this document.
+have "magic".  See L</Magic Virtual Tables> later in this document.
  
  If you know the name of a scalar variable, you can get a pointer to its SV
  by using the following:
@@ -282,7 +282,7 @@ But won't work when called as:
  So to repeat always use SvOK() to check whether an sv is defined.
  
  Also you have to be careful when using C<&PL_sv_undef> as a value in
-AVs or HVs (see L<AVs, HVs and undefined values>).
+AVs or HVs (see L</AVs, HVs and undefined values>).
  
  There are also the two values C<PL_sv_yes> and C<PL_sv_no>, which contain
  boolean TRUE and FALSE values, respectively.  Like C<PL_sv_undef>, their
@@ -304,7 +304,7 @@ bus error, or just weird results.  Change the zero to C<&PL_sv_undef> in the
  first line and all will be well.
  
  To free an SV that you've created, call C<SvREFCNT_dec(SV*)>.  Normally this
-call is not necessary (see L<Reference Counts and Mortality>).
+call is not necessary (see L</Reference Counts and Mortality>).
  
  =head2 Offsets
  
@@ -341,14 +341,15 @@ copy-on-write is skipped.  First have a look at an empty string:
  Notice here the LEN is 10.  (It may differ on your platform.)  Extend the
  length of the string to one less than 10, and do a substitution:
  
-  % ./perl -Ilib -MDevel::Peek -le '$a=""; $a.="123456789"; $a=~s/.//; Dump($a)'
-  SV = PV(0x7ffa04008a70) at 0x7ffa04030390
-    REFCNT = 1
-    FLAGS = (POK,OOK,pPOK)
-    OFFSET = 1
-    PV = 0x7ffa03c05b61 ( "\1" . ) "23456789"\0
-    CUR = 8
-    LEN = 9
+ % ./perl -Ilib -MDevel::Peek -le '$a=""; $a.="123456789"; $a=~s/.//; \
+                                                            Dump($a)'
+ SV = PV(0x7ffa04008a70) at 0x7ffa04030390
+   REFCNT = 1
+   FLAGS = (POK,OOK,pPOK)
+   OFFSET = 1
+   PV = 0x7ffa03c05b61 ( "\1" . ) "23456789"\0
+   CUR = 8
+   LEN = 9
  
  Here the number of bytes chopped off (1) is shown next as the OFFSET.  The
  portion of the string between the "real" and the "fake" beginnings is
@@ -460,7 +461,7 @@ by using the following:
  
  This returns NULL if the variable does not exist.
  
-See L<Understanding the Magic of Tied Hashes and Arrays> for more
+See L</Understanding the Magic of Tied Hashes and Arrays> for more
  information on how to use the array access functions on tied arrays.
  
  =head2 Working with HVs
@@ -544,7 +545,7 @@ The exact implementation of this macro varies by architecture and version
  of perl, and the return value may change per invocation, so the value
  is only valid for the duration of a single perl process.
  
-See L<Understanding the Magic of Tied Hashes and Arrays> for more
+See L</Understanding the Magic of Tied Hashes and Arrays> for more
  information on how to use the hash access functions on tied hashes.
  
  =head2 Hash API Extensions
@@ -680,12 +681,14 @@ macro and then check the return value.
  
  The most useful types that will be returned are:
  
-    < SVt_PVAV  Scalar
      SVt_PVAV    Array
      SVt_PVHV    Hash
      SVt_PVCV    Code
      SVt_PVGV    Glob (possibly a file handle)
  
+Any numerical value returned which is less than SVt_PVAV will be a scalar
+of some form.
+
  See L<perlapi/svtype> for more details.
  
  =head2 Blessed References and Class Objects
@@ -701,7 +704,7 @@ A reference can be blessed into a package with the following function:
  
  The C<sv> argument must be a reference value.  The C<stash> argument
  specifies which class the reference will belong to.  See
-L<Stashes and Globs> for information on converting class names into stashes.
+L</Stashes and Globs> for information on converting class names into stashes.
  
  /* Still under construction */
  
@@ -797,68 +800,116 @@ Perl uses a reference count-driven garbage collection mechanism.  SVs,
  AVs, or HVs (xV for short in the following) start their life with a
  reference count of 1.  If the reference count of an xV ever drops to 0,
  then it will be destroyed and its memory made available for reuse.
-
-This normally doesn't happen at the Perl level unless a variable is
-undef'ed or the last variable holding a reference to it is changed or
-overwritten.  At the internal level, however, reference counts can be
-manipulated with the following macros:
+At the most basic internal level, reference counts can be manipulated
+with the following macros:
  
      int SvREFCNT(SV* sv);
      SV* SvREFCNT_inc(SV* sv);
      void SvREFCNT_dec(SV* sv);
  
-However, there is one other function which manipulates the reference
-count of its argument.  The C<newRV_inc> function, you will recall,
-creates a reference to the specified argument.  As a side effect,
-it increments the argument's reference count.  If this is not what
-you want, use C<newRV_noinc> instead.
-
-For example, imagine you want to return a reference from an XSUB function.
-Inside the XSUB routine, you create an SV which initially has a reference
-count of one.  Then you call C<newRV_inc>, passing it the just-created SV.
-This returns the reference as a new SV, but the reference count of the
-SV you passed to C<newRV_inc> has been incremented to two.  Now you
-return the reference from the XSUB routine and forget about the SV.
-But Perl hasn't!  Whenever the returned reference is destroyed, the
-reference count of the original SV is decreased to one and nothing happens.
-The SV will hang around without any way to access it until Perl itself
-terminates.  This is a memory leak.
-
-The correct procedure, then, is to use C<newRV_noinc> instead of
-C<newRV_inc>.  Then, if and when the last reference is destroyed,
-the reference count of the SV will go to zero and it will be destroyed,
-stopping any memory leak.
+(There are also suffixed versions of the increment and decrement macros,
+for situations where the full generality of these basic macros can be
+exchanged for some performance.)
+
+However, the way a programmer should think about references is not so
+much in terms of the bare reference count, but in terms of I<ownership>
+of references.  A reference to an xV can be owned by any of a variety
+of entities: another xV, the Perl interpreter, an XS data structure,
+a piece of running code, or a dynamic scope.  An xV generally does not
+know what entities own the references to it; it only knows how many
+references there are, which is the reference count.
+
+To correctly maintain reference counts, it is essential to keep track
+of what references the XS code is manipulating.  The programmer should
+always know where a reference has come from and who owns it, and be
+aware of any creation or destruction of references, and any transfers
+of ownership.  Because ownership isn't represented explicitly in the xV
+data structures, only the reference count need be actually maintained
+by the code, and that means that this understanding of ownership is not
+actually evident in the code.  For example, transferring ownership of a
+reference from one owner to another doesn't change the reference count
+at all, so may be achieved with no actual code.  (The transferring code
+doesn't touch the referenced object, but does need to ensure that the
+former owner knows that it no longer owns the reference, and that the
+new owner knows that it now does.)
+
+An xV that is visible at the Perl level should not become unreferenced
+and thus be destroyed.  Normally, an object will only become unreferenced
+when it is no longer visible, often by the same means that makes it
+invisible.  For example, a Perl reference value (RV) owns a reference to
+its referent, so if the RV is overwritten that reference gets destroyed,
+and the no-longer-reachable referent may be destroyed as a result.
+
+Many functions have some kind of reference manipulation as
+part of their purpose.  Sometimes this is documented in terms
+of ownership of references, and sometimes it is (less helpfully)
+documented in terms of changes to reference counts.  For example, the
+L<newRV_inc()|perlapi/newRV_inc> function is documented to create a new RV
+(with reference count 1) and increment the reference count of the referent
+that was supplied by the caller.  This is best understood as creating
+a new reference to the referent, which is owned by the created RV,
+and returning to the caller ownership of the sole reference to the RV.
+The L<newRV_noinc()|perlapi/newRV_noinc> function instead does not
+increment the reference count of the referent, but the RV nevertheless
+ends up owning a reference to the referent.  It is therefore implied
+that the caller of C<newRV_noinc()> is relinquishing a reference to the
+referent, making this conceptually a more complicated operation even
+though it does less to the data structures.
+
+For example, imagine you want to return a reference from an XSUB
+function.  Inside the XSUB routine, you create an SV which initially
+has just a single reference, owned by the XSUB routine.  This reference
+needs to be disposed of before the routine is complete, otherwise it
+will leak, preventing the SV from ever being destroyed.  So to create
+an RV referencing the SV, it is most convenient to pass the SV to
+C<newRV_noinc()>, which consumes that reference.  Now the XSUB routine
+no longer owns a reference to the SV, but does own a reference to the RV,
+which in turn owns a reference to the SV.  The ownership of the reference
+to the RV is then transferred by the process of returning the RV from
+the XSUB.
  
  There are some convenience functions available that can help with the
  destruction of xVs.  These functions introduce the concept of "mortality".
-An xV that is mortal has had its reference count marked to be decremented,
-but not actually decremented, until "a short time later".  Generally the
-term "short time later" means a single Perl statement, such as a call to
-an XSUB function.  The actual determinant for when mortal xVs have their
-reference count decremented depends on two macros, SAVETMPS and FREETMPS.
-See L<perlcall> and L<perlxs> for more details on these macros.
-
-"Mortalization" then is at its simplest a deferred C<SvREFCNT_dec>.
-However, if you mortalize a variable twice, the reference count will
-later be decremented twice.
-
-"Mortal" SVs are mainly used for SVs that are placed on perl's stack.
-For example an SV which is created just to pass a number to a called sub
-is made mortal to have it cleaned up automatically when it's popped off
-the stack.  Similarly, results returned by XSUBs (which are pushed on the
-stack) are often made mortal.
-
-To create a mortal variable, use the functions:
+Much documentation speaks of an xV itself being mortal, but this is
+misleading.  It is really I<a reference to> an xV that is mortal, and it
+is possible for there to be more than one mortal reference to a single xV.
+For a reference to be mortal means that it is owned by the temps stack,
+one of perl's many internal stacks, which will destroy that reference
+"a short time later".  Usually the "short time later" is the end of
+the current Perl statement.  However, it gets more complicated around
+dynamic scopes: there can be multiple sets of mortal references hanging
+around at the same time, with different death dates.  Internally, the
+actual determinant for when mortal xV references are destroyed depends
+on two macros, SAVETMPS and FREETMPS.  See L<perlcall> and L<perlxs>
+for more details on these macros.
+
+Mortal references are mainly used for xVs that are placed on perl's
+main stack.  The stack is problematic for reference tracking, because it
+contains a lot of xV references, but doesn't own those references: they
+are not counted.  Currently, there are many bugs resulting from xVs being
+destroyed while referenced by the stack, because the stack's uncounted
+references aren't enough to keep the xVs alive.  So when putting an
+(uncounted) reference on the stack, it is vitally important to ensure that
+there will be a counted reference to the same xV that will last at least
+as long as the uncounted reference.  But it's also important that that
+counted reference be cleaned up at an appropriate time, and not unduly
+prolong the xV's life.  For there to be a mortal reference is often the
+best way to satisfy this requirement, especially if the xV was created
+especially to be put on the stack and would otherwise be unreferenced.
+
+To create a mortal reference, use the functions:
  
      SV*  sv_newmortal()
-    SV*  sv_2mortal(SV*)
      SV*  sv_mortalcopy(SV*)
+    SV*  sv_2mortal(SV*)
  
-The first call creates a mortal SV (with no value), the second converts an existing
-SV to a mortal SV (and thus defers a call to C<SvREFCNT_dec>), and the
-third creates a mortal copy of an existing SV.
-Because C<sv_newmortal> gives the new SV no value, it must normally be given one
-via C<sv_setpv>, C<sv_setiv>, etc. :
+C<sv_newmortal()> creates an SV (with the undefined value) whose sole
+reference is mortal.  C<sv_mortalcopy()> creates an xV whose value is a
+copy of a supplied xV and whose sole reference is mortal.  C<sv_2mortal()>
+mortalises an existing xV reference: it transfers ownership of a reference
+from the caller to the temps stack.  Because C<sv_newmortal> gives the new
+SV no value, it must normally be given one via C<sv_setpv>, C<sv_setiv>,
+etc. :
  
      SV *tmp = sv_newmortal();
      sv_setiv(tmp, an_integer);
@@ -867,17 +918,6 @@ As that is multiple C statements it is quite common so see this idiom instead:
  
      SV *tmp = sv_2mortal(newSViv(an_integer));
  
-
-You should be careful about creating mortal variables.  Strange things
-can happen if you make the same value mortal within multiple contexts,
-or if you make a variable mortal multiple
-times.  Thinking of "Mortalization"
-as deferred C<SvREFCNT_dec> should help to minimize such problems.
-For example if you are passing an SV which you I<know> has a high enough REFCNT
-to survive its use on the stack you need not do any mortalization.
-If you are not sure then doing an C<SvREFCNT_inc> and C<sv_2mortal>, or
-making a C<sv_mortalcopy> is safer.
-
  The mortal routines are not just for SVs; AVs and HVs can be
  made mortal by passing their address (type-casted to C<SV*>) to the
  C<sv_2mortal> or C<sv_mortalcopy> routines.
@@ -1075,7 +1115,7 @@ to contain an C<SV*> and is stored as-is with its REFCNT incremented.
  
  The sv_magic function uses C<how> to determine which, if any, predefined
  "Magic Virtual Table" should be assigned to the C<mg_virtual> field.
-See the L<Magic Virtual Tables> section below.  The C<how> argument is also
+See the L</Magic Virtual Tables> section below.  The C<how> argument is also
  stored in the C<mg_type> field.  The value of
  C<how> should be chosen from the set of macros
  C<PERL_MAGIC_foo> found in F<perl.h>.  Note that before
@@ -1086,8 +1126,9 @@ referring to 'U' magic rather than C<PERL_MAGIC_uvar> for example.
  The C<obj> argument is stored in the C<mg_obj> field of the C<MAGIC>
  structure.  If it is not the same as the C<sv> argument, the reference
  count of the C<obj> object is incremented.  If it is the same, or if
-the C<how> argument is C<PERL_MAGIC_arylen>, or if it is a NULL pointer,
-then C<obj> is merely stored, without the reference count being incremented.
+the C<how> argument is C<PERL_MAGIC_arylen>, C<PERL_MAGIC_regdatum>,
+C<PERL_MAGIC_regdata>, or if it is a NULL pointer, then C<obj> is merely
+stored, without the reference count being incremented.
  
  See also C<sv_magicext> in L<perlapi> for a more flexible way to add magic
  to an SV.
@@ -1122,16 +1163,16 @@ applied to that variable.
  The C<MGVTBL> has five (or sometimes eight) pointers to the following
  routine types:
  
-    int  (*svt_get)(SV* sv, MAGIC* mg);
-    int  (*svt_set)(SV* sv, MAGIC* mg);
-    U32  (*svt_len)(SV* sv, MAGIC* mg);
-    int  (*svt_clear)(SV* sv, MAGIC* mg);
-    int  (*svt_free)(SV* sv, MAGIC* mg);
+    int  (*svt_get)  (pTHX_ SV* sv, MAGIC* mg);
+    int  (*svt_set)  (pTHX_ SV* sv, MAGIC* mg);
+    U32  (*svt_len)  (pTHX_ SV* sv, MAGIC* mg);
+    int  (*svt_clear)(pTHX_ SV* sv, MAGIC* mg);
+    int  (*svt_free) (pTHX_ SV* sv, MAGIC* mg);
  
-    int  (*svt_copy)(SV *sv, MAGIC* mg, SV *nsv,
+    int  (*svt_copy) (pTHX_ SV *sv, MAGIC* mg, SV *nsv,
                                            const char *name, I32 namlen);
-    int  (*svt_dup)(MAGIC *mg, CLONE_PARAMS *param);
-    int  (*svt_local)(SV *nsv, MAGIC *mg);
+    int  (*svt_dup)  (pTHX_ MAGIC *mg, CLONE_PARAMS *param);
+    int  (*svt_local)(pTHX_ SV *nsv, MAGIC *mg);
  
  
  This MGVTBL structure is set at compile-time in F<perl.h> and there are
@@ -1232,6 +1273,8 @@ will be lost.
   v  PERL_MAGIC_vec            vtbl_vec       vec() lvalue
   w  PERL_MAGIC_utf8           vtbl_utf8      Cached UTF-8 information
   x  PERL_MAGIC_substr         vtbl_substr    substr() lvalue
+ Y  PERL_MAGIC_nonelem        vtbl_nonelem   Array element that does not
+                                             exist
   y  PERL_MAGIC_defelem        vtbl_defelem   Shadow "foreach" iterator
                                               variable / smart parameter
                                               vivification
@@ -1370,7 +1413,7 @@ creates a second hash which it blesses into the class which will implement
  the tie methods.  Lastly it ties the two hashes together, and returns a
  reference to the new tied hash.  Note that the code below does NOT call the
  TIEHASH method in the MyTie class -
-see L<Calling Perl Routines from within C Programs> for details on how
+see L</Calling Perl Routines from within C Programs> for details on how
  to do this.
  
      SV*
@@ -1732,7 +1775,7 @@ reuse specially assigned SVs (I<target>s) which are (as a corollary)
  not constantly freed/created.
  
  Each of the targets is created only once (but see
-L<Scratchpads and recursion> below), and when an opcode needs to put
+L</Scratchpads and recursion> below), and when an opcode needs to put
  an integer, a double, or a string on stack, it just sets the
  corresponding parts of its I<target> and puts the I<target> on stack.
  
@@ -2673,6 +2716,20 @@ whatever the compiler has.
  If you are printing addresses of pointers, use UVxf combined
  with PTR2UV(), do not use %lx or %p.
  
+=head2 Formatted Printing of C<Size_t> and C<SSize_t>
+
+The most general way to do this is to cast them to a UV or IV, and
+print as in the
+L<previous section|/Formatted Printing of IVs, UVs, and NVs>.
+
+But if you're using C<PerlIO_printf()>, it's less typing and visual
+clutter to use the C<"%z"> length modifier (for I<siZe>):
+
+        PerlIO_printf("STRLEN is %zu\n", len);
+
+This modifier is not portable, so its use should be restricted to
+C<PerlIO_printf()>.
+
  =head2 Pointer-To-Integer and Integer-To-Pointer
  
  Because pointer size does not necessarily equal integer size,
@@ -2741,7 +2798,7 @@ source, like this:
   =for apidoc sv_setiv
  
   Copies an integer into the given SV.  Does not handle 'set' magic.  See
- C<sv_setiv_mg>.
+ L<perlapi/sv_setiv_mg>.
  
   =cut
   */
@@ -2854,10 +2911,13 @@ so you can test if you need to do something special with this
  character like this (the C<UTF8_IS_INVARIANT()> is a macro that tests
  whether the byte is encoded as a single byte even in UTF-8):
  
-    U8 *utf;
-    U8 *utf_end; /* 1 beyond buffer pointed to by utf */
-    UV uv;     /* Note: a UV, not a U8, not a char */
-    STRLEN len; /* length of character in bytes */
+    U8 *utf;     /* Initialize this to point to the beginning of the
+                    sequence to convert */
+    U8 *utf_end; /* Initialize this to 1 beyond the end of the sequence
+                    pointed to by 'utf' */
+    UV uv;      /* Returned code point; note: a UV, not a U8, not a
+                    char */
+    STRLEN len; /* Returned length of character in bytes */
  
      if (!UTF8_IS_INVARIANT(*utf))
          /* Must treat this as UTF-8 */
@@ -3389,7 +3449,7 @@ is likely to be imminently called which will do a C<FREETMPS>, so there's
  no need to do that either.
  
  The next step is to pop savestack entries: C<CX_LEAVE_SCOPE(cx)> is just
-defined as C<<LEAVE_SCOPE(cx->blk_oldsaveix)>>. Note that during the
+defined as C<< LEAVE_SCOPE(cx->blk_oldsaveix) >>. Note that during the
  popping, it's possible for perl to call destructors, call C<STORE> to undo
  localisations of tied vars, and so on. Any of these can die or call
  C<exit()>. In this case, C<dounwind()> will be called, and the current