Re: [ID 20000816.012] *foo = *_ is broken

[perl5.git] / pod / perlxs.pod
diff --git a/pod/perlxs.pod b/pod/perlxs.pod

index a4db596..e6f1862 100644 (file)
--- a/pod/perlxs.pod
+++ b/pod/perlxs.pod
@@ -276,6 +276,63 @@ some heuristic code which tries to disambiguate between "truly-void"
  and "old-practice-declared-as-void" functions. Hence your code is at
  mercy of this heuristics unless you use C<SV *> as return value.)
  
+=head2 Returning SVs, AVs and HVs through RETVAL
+
+When you're using RETVAL to return an C<SV *>, there's some magic
+going on behind the scenes that should be mentioned. When you're
+manipulating the argument stack using the ST(x) macro, for example,
+you usually have to pay special attention to reference counts. (For
+more about reference counts, see L<perlguts>.) To make your life
+easier, the typemap file automatically makes C<RETVAL> mortal when
+you're returning an C<SV *>. Thus, the following two XSUBs are more
+or less equivalent:
+
+  void
+  alpha()
+      PPCODE:
+          ST(0) = newSVpv("Hello World",0);
+          sv_2mortal(ST(0));
+          XSRETURN(1);
+  
+  SV *
+  beta()
+      CODE:
+          RETVAL = newSVpv("Hello World",0);
+      OUTPUT:
+          RETVAL
+
+This is quite useful as it usually improves readability. While
+this works fine for an C<SV *>, it's unfortunately not as easy
+to have C<AV *> or C<HV *> as a return value. You I<should> be
+able to write:
+
+  AV *
+  array()
+      CODE:
+          RETVAL = newAV();
+          /* do something with RETVAL */
+      OUTPUT:
+          RETVAL
+
+But due to an unfixable bug (fixing it would break lots of existing
+CPAN modules) in the typemap file, the reference count of the C<AV *>
+is not properly decremented. Thus, the above XSUB would leak memory
+whenever it is being called. The same problem exists for C<HV *>.
+
+When you're returning an C<AV *> or a C<HV *>, you have make sure
+their reference count is decremented by making the AV or HV mortal:
+
+  AV *
+  array()
+      CODE:
+          RETVAL = newAV();
+          sv_2mortal((SV*)RETVAL);
+          /* do something with RETVAL */
+      OUTPUT:
+          RETVAL
+
+And also remember that you don't have to do this for an C<SV *>.
+
  =head2 The MODULE Keyword
  
  The MODULE keyword is used to start the XS code and to specify the package
@@ -310,6 +367,10 @@ keyword and must follow immediately after it when used.
  
       [ XS code in package RPC ]
  
+The same package name can be used more than once, allowing for
+non-contiguous code. This is useful if you have a stronger ordering
+principle than package names.
+
  Although this keyword is optional and in some cases provides redundant
  information it should always be used.  This keyword will ensure that the
  XSUBs appear in the desired package.
@@ -398,7 +459,7 @@ indicator.  For example,
  
    NO_OUTPUT int
    delete_file(char *name)
-    POST_CALL:
+    POSTCALL:
        if (RETVAL != 0)
           croak("Error %d while deleting file '%s'", RETVAL, name);
  
@@ -527,7 +588,7 @@ Here's a truly obscure example:
  
       bool_t
       rpcb_gettime(host,timep)
-          time_t &timep ; /* \$v{timep}=@{[$v{timep}=$arg]} */
+          time_t &timep; /* \$v{timep}=@{[$v{timep}=$arg]} */
            char *host + SvOK($v{timep}) ? SvPV($arg,PL_na) : NULL;
          OUTPUT:
            timep
@@ -809,14 +870,14 @@ mixed with ANSI-style declarations, as in
  (here the optional C<IN> keyword is omitted).
  
  The C<IN_OUT> parameters are identical with parameters introduced with
-L<The & Unary Operator> and put into the C<OUTPUT:> section (see L<The
-OUTPUT: Keyword>).  The C<IN_OUTLIST> parameters are very similar, the
-only difference being that the value C function writes through the
+L<The & Unary Operator> and put into the C<OUTPUT:> section (see
+L<The OUTPUT: Keyword>).  The C<IN_OUTLIST> parameters are very similar,
+the only difference being that the value C function writes through the
  pointer would not modify the Perl parameter, but is put in the output
  list.
  
  The C<OUTLIST>/C<OUT> parameter differ from C<IN_OUTLIST>/C<IN_OUT>
-parameters only by the the initial value of the Perl parameter not
+parameters only by the initial value of the Perl parameter not
  being read (and not being given to the C function - which gets some
  garbage instead).  For example, the same C function as above can be
  interfaced with as
@@ -839,6 +900,31 @@ However, the generated Perl function is called in very C-ish style:
    my ($day, $month);
    day_month($day, time, $month);
  
+=head2 The C<length(NAME)> Keyword
+
+If one of the input arguments to the C function is the length of a string
+argument C<NAME>, one can substitute the name of the length-argument by
+C<length(NAME)> in the XSUB declaration.  This argument must be omitted when
+the generated Perl function is called.  E.g.,
+
+  void
+  dump_chars(char *s, short l)
+  {
+    short n = 0;
+    while (n < l) {
+        printf("s[%d] = \"\\%#03o\"\n", n, (int)s[n]);
+        n++;
+    }
+  }
+
+  MODULE = x           PACKAGE = x
+
+  void dump_chars(char *s, short length(s))
+
+should be called as C<dump_chars($string)>.
+
+This directive is supported with ANSI-type function declarations only.
+
  =head2 Variable-length Parameter Lists
  
  XSUBs can have variable-length parameter lists by specifying an ellipsis
@@ -1035,8 +1121,8 @@ then not push return values on the stack.
  Some people may be inclined to include an explicit C<return> in the above
  XSUB, rather than letting control fall through to the end.  In those
  situations C<XSRETURN_EMPTY> should be used, instead.  This will ensure that
-the XSUB stack is properly adjusted.  Consult L<perlguts/"API LISTING"> for
-other C<XSRETURN> macros.
+the XSUB stack is properly adjusted.  Consult L<perlapi> for other
+C<XSRETURN> macros.
  
  Since C<XSRETURN_*> macros can be used with CODE blocks as well, one can
  rewrite this example as:
@@ -1053,14 +1139,14 @@ rewrite this example as:
         OUTPUT:
           RETVAL
  
-In fact, one can put this check into a POST_CALL: section as well.  Together
+In fact, one can put this check into a POSTCALL: section as well.  Together
  with PREINIT: simplifications, this leads to:
  
       int
       rpcb_gettime(host)
            char *host
            time_t  timep;
-       POST_CALL:
+       POSTCALL:
           if (RETVAL == 0)
                 XSRETURN_UNDEF;
  
@@ -1081,14 +1167,16 @@ any CODE:, PPCODE:, or OUTPUT: blocks which are present in the XSUB.  The
  code specified for the cleanup block will be added as the last statements
  in the XSUB.
  
-=head2 The POST_CALL: Keyword
+=head2 The POSTCALL: Keyword
  
  This keyword can be used when an XSUB requires special procedures
-executed after the C subroutine call is performed.  When the POST_CALL:
+executed after the C subroutine call is performed.  When the POSTCALL:
  keyword is used it must precede OUTPUT: and CLEANUP: blocks which are
  present in the XSUB.
  
-The POST_CALL: block does not make a lot of sense when the C subroutine
+See examples in L<"The NO_OUTPUT Keyword"> and L<"Returning Undef And Empty Lists">.
+
+The POSTCALL: block does not make a lot of sense when the C subroutine
  call is supplied by user by providing either CODE: or PPCODE: section.
  
  =head2 The BOOT: Keyword
@@ -1163,6 +1251,14 @@ prototypes.
            timep
            RETVAL
  
+If the prototypes are enabled, you can disable it locally for a given
+XSUB as in the following example:
+
+    void
+    rpcb_gettime_noproto()
+        PROTOTYPE: DISABLE
+    ...
+
  =head2 The ALIAS: Keyword
  
  The ALIAS: keyword allows an XSUB to have two or more unique Perl names
@@ -1187,6 +1283,57 @@ C<BAR::getit()> for this function.
          OUTPUT:
            timep
  
+=head2 The OVERLOAD: Keyword
+
+Instead of writing an overloaded interface using pure Perl, you
+can also use the OVERLOAD keyword to define additional Perl names
+for your functions (like the ALIAS: keyword above).  However, the
+overloaded functions must be defined with three parameters (except
+for the nomethod() function which needs four parameters).  If any
+function has the OVERLOAD: keyword, several additional lines
+will be defined in the c file generated by xsubpp in order to 
+register with the overload magic.
+
+Since blessed objects are actually stored as RV's, it is useful
+to use the typemap features to preprocess parameters and extract
+the actual SV stored within the blessed RV. See the sample for
+T_PTROBJ_SPECIAL below.
+
+To use the OVERLOAD: keyword, create an XS function which takes
+three input parameters ( or use the c style '...' definition) like
+this:
+
+    SV *
+    cmp (lobj, robj, swap)
+    My_Module_obj    lobj
+    My_Module_obj    robj
+    IV               swap
+    OVERLOAD: cmp <=>
+    { /* function defined here */}
+
+In this case, the function will overload both of the three way
+comparison operators.  For all overload operations using non-alpha
+characters, you must type the parameter without quoting, seperating
+multiple overloads with whitespace.  Note that "" (the stringify 
+overload) should be entered as \"\" (i.e. escaped).
+
+=head2 The FALLBACK: Keyword
+
+In addition to the OVERLOAD keyword, if you need to control how
+Perl autogenerates missing overloaded operators, you can set the
+FALLBACK keyword in the module header section, like this:
+
+    MODULE = RPC  PACKAGE = RPC
+
+    FALLBACK: TRUE
+    ...
+
+where FALLBACK can take any of the three values TRUE, FALSE, or
+UNDEF.  If you do not set any FALLBACK value when using OVERLOAD,
+it defaults to UNDEF.  FALLBACK is not used except when one or 
+more functions using OVERLOAD have been defined.  Please see
+L<overload/Fallback> for more details.
+
  =head2 The INTERFACE: Keyword
  
  This keyword declares the current XSUB as a keeper of the given
@@ -1342,7 +1489,7 @@ of C<&>, but provide a pointer to this value when the C function is called.
  
  This is useful to avoid a CODE: block for a C function which takes a parameter
  by reference.  Typically, the parameter should be not a pointer type (an
-C<int> or C<long> but not a C<int*> or C<long*>).
+C<int> or C<long> but not an C<int*> or C<long*>).
  
  The following XSUB will generate incorrect C code.  The B<xsubpp> compiler will
  turn this into code which calls C<rpcb_gettime()> with parameters C<(char
@@ -1371,7 +1518,7 @@ C<&> through, so the function call looks like C<rpcb_gettime(host, &timep)>.
  =head2 Inserting POD, Comments and C Preprocessor Directives
  
  C preprocessor directives are allowed within BOOT:, PREINIT: INIT:, CODE:,
-PPCODE:, POST_CALL:, and CLEANUP: blocks, as well as outside the functions.
+PPCODE:, POSTCALL:, and CLEANUP: blocks, as well as outside the functions.
  Comments are allowed anywhere after the MODULE keyword.  The compiler will
  pass the preprocessor directives through untouched and will remove the
  commented lines. POD documentation is allowed at any point, both in the
@@ -1532,7 +1679,7 @@ of failure.  They may be
  candidates to return undef or an empty list in case of failure.  If the
  failure may be detected without a call to the C function, you may want to use
  an INIT: section to report the failure.  For failures detectable after the C
-function returns one may want to use a POST_CALL: section to process the
+function returns one may want to use a POSTCALL: section to process the
  failure.  In more complicated cases use CODE: or PPCODE: sections.
  
  If many functions use the same failure indication based on the return value,
@@ -1686,7 +1833,7 @@ double-colons (::), and declare C<Net_Config> to be of that type:
          T_PTROBJ_SPECIAL
                  if (sv_derived_from($arg, \"${(my $ntt=$ntype)=~s/_/::/g;\$ntt}\")) {
                          IV tmp = SvIV((SV*)SvRV($arg));
-                $var = ($type) tmp;
+                        $var = INT2PTR($type, tmp);
                  }
                  else
                          croak(\"$var is not of type ${(my $ntt=$ntype)=~s/_/::/g;\$ntt}\")
@@ -1700,6 +1847,184 @@ The INPUT and OUTPUT sections substitute underscores for double-colons
  on the fly, giving the desired effect.  This example demonstrates some
  of the power and versatility of the typemap facility.
  
+The INT2PTR macro (defined in perl.h) casts an integer to a pointer, 
+of a given type, taking care of the possible different size of integers
+and pointers.  There are also PTR2IV, PTR2UV, PTR2NV macros,
+to map the other way, which may be useful in OUTPUT sections.
+
+=head2 Safely Storing Static Data in XS
+
+Starting with Perl 5.8, a macro framework has been defined to allow
+static data to be safely stored in XS modules that will be accessed from
+a multi-threaded Perl.
+
+Although primarily designed for use with multi-threaded Perl, the macros
+have been designed so that they will work with non-threaded Perl as well.
+
+It is therefore strongly recommended that these macros be used by all
+XS modules that make use of static data.
+
+The easiest way to get a template set of macros to use is by specifying
+the C<-g> (C<--global>) option with h2xs (see L<h2xs>).
+
+Below is an example module that makes use of the macros.
+
+    #include "EXTERN.h"
+    #include "perl.h"
+    #include "XSUB.h"
+
+    /* Global Data */
+
+    #define MY_CXT_KEY "BlindMice::_guts" XS_VERSION
+
+    typedef struct {
+        int count;
+        char name[3][100];
+    } my_cxt_t;
+
+    START_MY_CXT
+
+    MODULE = BlindMice           PACKAGE = BlindMice
+
+    BOOT:
+    {
+        MY_CXT_INIT;
+        MY_CXT.count = 0;
+        strcpy(MY_CXT.name[0], "None");
+        strcpy(MY_CXT.name[1], "None");
+        strcpy(MY_CXT.name[2], "None");
+    }                              
+
+    int
+    newMouse(char * name)
+        char * name;
+        PREINIT:
+          dMY_CXT;
+        CODE:
+          if (MY_CXT.count >= 3) {
+              warn("Already have 3 blind mice");
+              RETVAL = 0;
+          }
+          else {
+              RETVAL = ++ MY_CXT.count;
+              strcpy(MY_CXT.name[MY_CXT.count - 1], name);
+          }
+
+    char *
+    get_mouse_name(index)
+      int index
+      CODE:
+        dMY_CXT;
+        RETVAL = MY_CXT.lives ++;
+        if (index > MY_CXT.count)
+          croak("There are only 3 blind mice.");
+        else
+          RETVAL = newSVpv(MY_CXT.name[index - 1]);
+
+    void
+    CLONE(...)
+       CODE:
+       MY_CXT_CLONE;
+
+B<REFERENCE>
+
+=over 5
+
+=item MY_CXT_KEY
+
+This macro is used to define a unique key to refer to the static data
+for an XS module. The suggested naming scheme, as used by h2xs, is to
+use a string that consists of the module name, the string "::_guts"
+and the module version number.
+
+    #define MY_CXT_KEY "MyModule::_guts" XS_VERSION
+
+=item typedef my_cxt_t
+
+This struct typedef I<must> always be called C<my_cxt_t> -- the other
+C<CXT*> macros assume the existence of the C<my_cxt_t> typedef name.
+
+Declare a typedef named C<my_cxt_t> that is a structure that contains
+all the data that needs to be interpreter-local.
+
+    typedef struct {
+        int some_value;
+    } my_cxt_t;
+
+=item START_MY_CXT
+
+Always place the START_MY_CXT macro directly after the declaration
+of C<my_cxt_t>.
+
+=item MY_CXT_INIT
+
+The MY_CXT_INIT macro initialises storage for the C<my_cxt_t> struct.
+
+It I<must> be called exactly once -- typically in a BOOT: section. If you
+are maintaining multiple interpreters, it should be called once in each
+interpreter instance, except for interpreters cloned from existing ones.
+(But see C<MY_CXT_CLONE> below.)
+
+=item dMY_CXT
+
+Use the dMY_CXT macro (a declaration) in all the functions that access
+MY_CXT.
+
+=item MY_CXT
+
+Use the MY_CXT macro to access members of the C<my_cxt_t> struct. For
+example, if C<my_cxt_t> is 
+
+    typedef struct {
+        int index;
+    } my_cxt_t;
+
+then use this to access the C<index> member
+
+    dMY_CXT;
+    MY_CXT.index = 2;
+
+=item aMY_CXT/pMY_CXT
+
+C<dMY_CXT> may be quite expensive to calculate, and to avoid the overhead
+of invoking it in each function it is possible to pass the declaration
+onto other functions using the C<aMY_CXT>/C<pMY_CXT> macros, eg
+
+    void sub1() {
+       dMY_CXT;
+       MY_CXT.index = 1;
+       sub2(aMY_CXT);
+    }
+
+    void sub2(pMY_CXT) {
+       MY_CXT.index = 2;
+    }
+
+Analogously to C<pTHX>, there are equivalent forms for when the macro is the
+first or last in multiple arguments, where an underscore represents a
+comma, i.e.  C<_aMY_CXT>, C<aMY_CXT_>, C<_pMY_CXT> and C<pMY_CXT_>.
+
+=item MY_CXT_CLONE
+
+By default, when a new interpreter is created as a copy of an existing one
+(eg via C<<threads->new()>>), both interpreters share the same physical
+my_cxt_t structure. Calling C<MY_CXT_CLONE> (typically via the package's
+C<CLONE()> function), causes a byte-for-byte copy of the structure to be
+taken, and any future dMY_CXT will cause the copy to be accessed instead.
+
+=item MY_CXT_INIT_INTERP(my_perl)
+
+=item dMY_CXT_INTERP(my_perl)
+
+These are versions of the macros which take an explicit interpreter as an
+argument.
+
+=back
+
+Note that these macros will only work together within the I<same> source
+file; that is, a dMY_CTX in one source file will access a different structure
+than a dMY_CTX in another source file.
+
  =head1 EXAMPLES
  
  File C<RPC.xs>: Interface to some ONC+ RPC bind library functions.