[perl #47365] Docs for \$ prototypes

[perl5.git] / pod / perlsub.pod
diff --git a/pod/perlsub.pod b/pod/perlsub.pod

index e830130..9121519 100644 (file)
--- a/pod/perlsub.pod
+++ b/pod/perlsub.pod
@@ -1,10 +1,12 @@
  =head1 NAME
+X<subroutine> X<function>
  
  perlsub - Perl subroutines
  
  =head1 SYNOPSIS
  
  To declare subroutines:
+X<subroutine, declaration> X<sub>
  
      sub NAME;                    # A "forward" declaration.
      sub NAME(PROTO);             #  ditto, but with prototypes
@@ -17,6 +19,7 @@ To declare subroutines:
      sub NAME(PROTO) : ATTRS BLOCK #  with prototypes and attributes
  
  To define an anonymous subroutine at runtime:
+X<subroutine, anonymous>
  
      $subref = sub BLOCK;                # no proto
      $subref = sub (PROTO) BLOCK;        # with proto
@@ -24,10 +27,12 @@ To define an anonymous subroutine at runtime:
      $subref = sub (PROTO) : ATTRS BLOCK; # with proto and attributes
  
  To import subroutines:
+X<import>
  
      use MODULE qw(NAME1 NAME2 NAME3);
  
  To call subroutines:
+X<subroutine, call> X<call>
  
      NAME(LIST);           # & is optional with parentheses.
      NAME LIST;    # Parentheses optional if predeclared/imported.
@@ -52,6 +57,7 @@ pass-by-reference instead to avoid this.  Both call and return lists may
  contain as many or as few scalar elements as you'd like.  (Often a
  function without an explicit return statement is called a subroutine, but
  there's really no difference from Perl's perspective.)
+X<subroutine, parameter> X<parameter>
  
  Any arguments passed in show up in the array C<@_>.  Therefore, if
  you called a function with two arguments, those would be stored in
@@ -65,16 +71,22 @@ or a reference to it is taken.  (Some earlier versions of Perl
  created the element whether or not the element was assigned to.)
  Assigning to the whole array C<@_> removes that aliasing, and does
  not update any arguments.
-
-The return value of a subroutine is the value of the last expression
-evaluated.  More explicitly, a C<return> statement may be used to exit the
-subroutine, optionally specifying the returned value, which will be
-evaluated in the appropriate context (list, scalar, or void) depending
-on the context of the subroutine call.  If you specify no return value,
-the subroutine returns an empty list in list context, the undefined
-value in scalar context, or nothing in void context.  If you return
-one or more aggregates (arrays and hashes), these will be flattened
-together into one large indistinguishable list.
+X<subroutine, argument> X<argument> X<@_>
+
+A C<return> statement may be used to exit a subroutine, optionally
+specifying the returned value, which will be evaluated in the
+appropriate context (list, scalar, or void) depending on the context of
+the subroutine call.  If you specify no return value, the subroutine
+returns an empty list in list context, the undefined value in scalar
+context, or nothing in void context.  If you return one or more
+aggregates (arrays and hashes), these will be flattened together into
+one large indistinguishable list.
+
+If no C<return> is found and if the last statement is an expression, its
+value is returned. If the last statement is a loop control structure
+like a C<foreach> or a C<while>, the returned value is unspecified. The
+empty sub returns the empty list.
+X<subroutine, return value> X<return value> X<return>
  
  Perl does not have named formal parameters.  In practice all you
  do is assign to a C<my()> list of these.  Variables that aren't
@@ -83,6 +95,7 @@ on creating private variables, see L<"Private Variables via my()">
  and L<"Temporary Values via local()">.  To create protected
  environments for a set of functions in a separate package (and
  probably a separate file), see L<perlmod/"Packages">.
+X<formal parameter> X<parameter, formal>
  
  Example:
  
@@ -129,6 +142,7 @@ Because the assignment copies the values, this also has the effect
  of turning call-by-reference into call-by-value.  Otherwise a
  function is free to do in-place modifications of C<@_> and change
  its caller's values.
+X<call-by-reference> X<call-by-value>
  
      upcase_in($v1, $v2);  # this changes $v1 and $v2
      sub upcase_in {
@@ -138,6 +152,7 @@ its caller's values.
  You aren't allowed to modify constants in this way, of course.  If an
  argument were actually literal and you tried to change it, you'd take a
  (presumably fatal) exception.   For example, this won't work:
+X<call-by-reference> X<call-by-value>
  
      upcase_in("frederick");
  
@@ -181,12 +196,14 @@ want to do an indirect subroutine call with a subroutine name or
  reference using the C<&$subref()> or C<&{$subref}()> constructs,
  although the C<< $subref->() >> notation solves that problem.
  See L<perlref> for more about all that.
+X<&>
  
  Subroutines may be called recursively.  If a subroutine is called
  using the C<&> form, the argument list is optional, and if omitted,
  no C<@_> array is set up for the subroutine: the C<@_> array at the
  time of the call is visible to subroutine instead.  This is an
  efficiency mechanism that new users may wish to avoid.
+X<recursion>
  
      &foo(1,2,3);       # pass three arguments
      foo(1,2,3);                # the same
@@ -201,6 +218,7 @@ Not only does the C<&> form make the argument list optional, it also
  disables any prototype checking on arguments you do provide.  This
  is partly for historical reasons, and partly for having a convenient way
  to cheat if you know what you're doing.  See L<Prototypes> below.
+X<&>
  
  Subroutines whose names are in all upper case are reserved to the Perl
  core, as are modules whose names are in all lower case.  A subroutine in
@@ -209,12 +227,14 @@ indirectly by the run-time system itself, usually due to a triggered event.
  Subroutines that do special, pre-defined things include C<AUTOLOAD>, C<CLONE>,
  C<DESTROY> plus all functions mentioned in L<perltie> and L<PerlIO::via>.
  
-The C<BEGIN>, C<CHECK>, C<INIT> and C<END> subroutines are not so much
-subroutines as named special code blocks, of which you can have more
-than one in a package, and which you can B<not> call explicitely.  See
-L<perlmod/"BEGIN, CHECK, INIT and END">
+The C<BEGIN>, C<UNITCHECK>, C<CHECK>, C<INIT> and C<END> subroutines
+are not so much subroutines as named special code blocks, of which you
+can have more than one in a package, and which you can B<not> call
+explicitly.  See L<perlmod/"BEGIN, UNITCHECK, CHECK, INIT and END">
  
  =head2 Private Variables via my()
+X<my> X<variable, lexical> X<lexical> X<lexical variable> X<scope, lexical>
+X<lexical scope> X<attributes, my>
  
  Synopsis:
  
@@ -242,6 +262,7 @@ variables declared with C<my> are totally hidden from the outside
  world, including any called subroutines.  This is true if it's the
  same subroutine called from itself or elsewhere--every call gets
  its own copy.
+X<local>
  
  This doesn't mean that a C<my> variable declared in a statically
  enclosing lexical scope would be invisible.  Only dynamic scopes
@@ -255,6 +276,7 @@ occurred at the same scope, presumably file scope.
  An C<eval()>, however, can see lexical variables of the scope it is
  being evaluated in, so long as the names aren't hidden by declarations within
  the C<eval()> itself.  See L<perlref>.
+X<eval, scope of>
  
  The parameter list to my() may be assigned to if desired, which allows you
  to initialize your variables.  (If no initializer is given for a
@@ -337,6 +359,7 @@ in the manner of C<local>.  However, if the index variable is
  prefixed with the keyword C<my>, or if there is already a lexical
  by that name in scope, then a new lexical is created instead.  Thus
  in the loop
+X<foreach> X<for>
  
      for my $i (1, 2, 3) {
          some_function();
@@ -344,6 +367,7 @@ in the loop
  
  the scope of $i extends to the end of the loop, but not beyond it,
  rendering the value of $i inaccessible within C<some_function()>.
+X<foreach> X<for>
  
  Some users may wish to encourage the use of lexically scoped variables.
  As an aid to catching implicit uses to package variables,
@@ -370,7 +394,6 @@ never fully qualified with the package name.  In particular, you're not
  allowed to try to make a package variable (or other global) lexical:
  
      my $pack::var;     # ERROR!  Illegal syntax
-    my $_;             # also illegal (currently)
  
  In fact, a dynamic variable (also known as package or global variables)
  are still accessible using the fully qualified C<::> notation even while a
@@ -408,6 +431,34 @@ L<perlref/"Function Templates"> for something of a work-around to
  this.
  
  =head2 Persistent Private Variables
+X<state> X<state variable> X<static> X<variable, persistent> X<variable, static> X<closure>
+
+There are two ways to build persistent private variables in Perl 5.10.
+First, you can simply use the C<state> feature. Or, you can use closures,
+if you want to stay compatible with releases older than 5.10.
+
+=head3 Persistent variables via state()
+
+Beginning with perl 5.9.4, you can declare variables with the C<state>
+keyword in place of C<my>. For that to work, though, you must have
+enabled that feature beforehand, either by using the C<feature> pragma, or
+by using C<-E> on one-liners. (see L<feature>)
+
+For example, the following code maintains a private counter, incremented
+each time the gimme_another() function is called:
+
+    use feature 'state';
+    sub gimme_another { state $x; return ++$x }
+
+Also, since C<$x> is lexical, it can't be reached or modified by any Perl
+code outside.
+
+When combined with variable declaration, simple scalar assignment to C<state>
+variables (as in C<state $x = 42>) is executed only the first time.  When such
+statements are evaluated subsequent times, the assignment is ignored.  The
+behavior of this sort of assignment to non-scalar variables is undefined.
+
+=head3 Persistent variables with closures
  
  Just because a lexical variable is lexically (also called statically)
  scoped to its enclosing block, C<eval>, or C<do> FILE, this doesn't mean that
@@ -454,8 +505,9 @@ starts to run:
         }
      }
  
-See L<perlmod/"BEGIN, CHECK, INIT and END"> about the
-special triggered code blocks, C<BEGIN>, C<CHECK>, C<INIT> and C<END>.
+See L<perlmod/"BEGIN, UNITCHECK, CHECK, INIT and END"> about the
+special triggered code blocks, C<BEGIN>, C<UNITCHECK>, C<CHECK>,
+C<INIT> and C<END>.
  
  If declared at the outermost scope (the file scope), then lexicals
  work somewhat like C's file statics.  They are available to all
@@ -464,6 +516,8 @@ from outside that file.  This strategy is sometimes used in modules
  to create private variables that the whole module can see.
  
  =head2 Temporary Values via local()
+X<local> X<scope, dynamic> X<dynamic scope> X<variable, local>
+X<variable, temporary>
  
  B<WARNING>: In general, you should be using C<my> instead of C<local>, because
  it's faster and safer.  Exceptions to this include the global punctuation
@@ -481,6 +535,7 @@ Synopsis:
      local @oof = @bar;         # make @oof dynamic, and init it
  
      local $hash{key} = "val";  # sets a local value for this hash entry
+    delete local $hash{key};    # delete this entry for the current block
      local ($cond ? $v1 : $v2); # several types of lvalues support
                                 # localization
  
@@ -519,6 +574,7 @@ through a loop.  Consequently, it's more efficient to localize your
  variables outside the loop.
  
  =head3 Grammatical note on local()
+X<local, context>
  
  A C<local> is simply a modifier on an lvalue expression.  When you assign to
  a C<local>ized variable, the C<local> doesn't change whether its list is viewed
@@ -534,6 +590,7 @@ both supply a list context to the right-hand side, while
  supplies a scalar context.
  
  =head3 Localization of special variables
+X<local, special variable>
  
  If you localize a special variable, you'll be giving a new value to it,
  but its magic won't go away.  That means that all side-effects related
@@ -569,8 +626,10 @@ code that relies on any particular behaviour of localising tied arrays
  or hashes (localising individual elements is still okay).
  See L<perl58delta/"Localising Tied Arrays and Hashes Is Broken"> for more
  details.
+X<local, tie>
  
  =head3 Localization of globs
+X<local, glob> X<glob>
  
  The construct
  
@@ -592,6 +651,7 @@ As of perl 5.9.1, you can also use the lexical form of C<$_> (declaring it
  with C<my $_>), which avoids completely this problem.
  
  =head3 Localization of elements of composite types
+X<local, composite type element> X<local, array element> X<local, hash element>
  
  It's also worth taking a moment to explain what happens when you
  C<local>ize a member of a composite type (i.e. an array or hash element).
@@ -633,7 +693,57 @@ Perl will print
  The behavior of local() on non-existent members of composite
  types is subject to change in future.
  
+=head3 Localized deletion of elements of composite types
+X<delete> X<local, composite type element> X<local, array element> X<local, hash element>
+
+You can use the C<delete local $array[$idx]> and C<delete local $hash{key}>
+constructs to delete a composite type entry for the current block and restore
+it when it ends. They return the array/hash value before the localization,
+which means that they are respectively equivalent to
+
+    do {
+        my $val = $array[$idx];
+        local  $array[$idx];
+        delete $array[$idx];
+        $val
+    }
+
+and
+
+    do {
+        my $val = $hash{key};
+        local  $hash{key};
+        delete $hash{key};
+        $val
+    }
+
+except that for those the C<local> is scoped to the C<do> block. Slices are
+also accepted.
+
+    my %hash = (
+     a => [ 7, 8, 9 ],
+     b => 1,
+    )
+
+    {
+     my $a = delete local $hash{a};
+     # $a is [ 7, 8, 9 ]
+     # %hash is (b => 1)
+
+     {
+      my @nums = delete local @$a[0, 2]
+      # @nums is (7, 9)
+      # $a is [ undef, 8 ]
+
+      $a[0] = 999; # will be erased when the scope ends
+     }
+     # $a is back to [ 7, 8, 9 ]
+
+    }
+    # %hash is back to its original state
+
  =head2 Lvalue subroutines
+X<lvalue> X<subroutine, lvalue>
  
  B<WARNING>: Lvalue subroutines are still experimental and the
  implementation may change in future versions of Perl.
@@ -703,6 +813,7 @@ subroutine never gets that chance.  Consider;
  =back
  
  =head2 Passing Symbol Table Entries (typeglobs)
+X<typeglob> X<*>
  
  B<WARNING>: The mechanism described in this section was originally
  the only way to simulate pass-by-reference in older versions of
@@ -745,6 +856,7 @@ the individual arrays.  For more on typeglobs, see
  L<perldata/"Typeglobs and Filehandles">.
  
  =head2 When to Still Use local()
+X<local> X<variable, local>
  
  Despite the existence of C<my>, there are still three places where the
  C<local> operator still shines.  In fact, in these three places, you
@@ -822,6 +934,7 @@ this operation could on occasion misbehave.
  =back
  
  =head2 Pass by Reference
+X<pass by reference> X<pass-by-reference> X<reference>
  
  If you want to pass more than one array or hash into a function--or
  return them from it--and have them maintain their integrity, then
@@ -935,6 +1048,7 @@ Notice to pass back just the bare *FH, not its reference.
      }
  
  =head2 Prototypes
+X<prototype> X<subroutine, prototype>
  
  Perl supports a very limited kind of compile-time argument checking
  using function prototyping.  If you declare
@@ -970,18 +1084,21 @@ corresponding built-in.
      sub myreverse (@)       myreverse $a, $b, $c
      sub myjoin ($@)         myjoin ":", $a, $b, $c
      sub mypop (\@)          mypop @array
-    sub mysplice (\@$$@)     mysplice @array, @array, 0, @pushme
+    sub mysplice (\@$$@)     mysplice @array, 0, 2, @pushme
      sub mykeys (\%)         mykeys %{$hashref}
      sub myopen (*;$)        myopen HANDLE, $name
      sub mypipe (**)         mypipe READHANDLE, WRITEHANDLE
      sub mygrep (&@)         mygrep { /foo/ } $a, $b, $c
-    sub myrand ($)          myrand 42
+    sub myrand (;$)         myrand 42
      sub mytime ()           mytime
  
  Any backslashed prototype character represents an actual argument
-that absolutely must start with that character.  The value passed
-as part of C<@_> will be a reference to the actual argument given
-in the subroutine call, obtained by applying C<\> to that argument.
+that must start with that character (optionally preceded by C<my>,
+C<our> or C<local>), with the exception of C<$>, which will accept a
+hash or array element even without a dollar sign, such as
+C<my_function()->[0]>. The value passed as part of C<@_> will be a
+reference to the actual argument given in the subroutine call,
+obtained by applying C<\> to that argument.
  
  You can also backslash several argument types simultaneously by using
  the C<\[]> notation:
@@ -1019,9 +1136,13 @@ follows:
         ...
      }
  
-A semicolon separates mandatory arguments from optional arguments.
+A semicolon (C<;>) separates mandatory arguments from optional arguments.
  It is redundant before C<@> or C<%>, which gobble up everything else.
  
+As the last character of a prototype, or just before a semicolon, you can
+use C<_> in place of C<$>: if this argument is not provided, C<$_> will be
+used instead.
+
  Note how the last three examples in the table above are treated
  specially by the parser.  C<mygrep()> is parsed as a true list
  operator, C<myrand()> is parsed as a true unary operator with unary
@@ -1035,6 +1156,7 @@ without a prototype.
  
  The interesting thing about C<&> is that you can generate new syntax with it,
  provided it's in the initial position:
+X<&>
  
      sub try (&@) {
         my($try,$catch) = @_;
@@ -1059,6 +1181,7 @@ scoped, those anonymous subroutines can act like closures... (Gee,
  is this sounding a little Lispish?  (Never mind.))))
  
  And here's a reimplementation of the Perl C<grep> operator:
+X<grep>
  
      sub mygrep (&@) {
         my $code = shift;
@@ -1112,6 +1235,7 @@ This is all very powerful, of course, and should be used only in moderation
  to make the world a better place.
  
  =head2 Constant Functions
+X<constant>
  
  Functions with a prototype of C<()> are potential candidates for
  inlining.  If the result after optimization and constant folding
@@ -1133,7 +1257,17 @@ The following functions would all be inlined:
      sub FLAG_MASK ()   { FLAG_FOO | FLAG_BAR }
  
      sub OPT_BAZ ()     { not (0x1B58 & FLAG_MASK) }
-    sub BAZ_VAL () {
+
+    sub N () { int(OPT_BAZ) / 3 }
+
+    sub FOO_SET () { 1 if FLAG_MASK & FLAG_FOO }
+
+Be aware that these will not be inlined; as they contain inner scopes,
+the constant folding doesn't reduce them to a single constant:
+
+    sub foo_set () { if (FLAG_MASK & FLAG_FOO) { 1 } }
+
+    sub baz_val () {
         if (OPT_BAZ) {
             return 23;
         }
@@ -1142,13 +1276,6 @@ The following functions would all be inlined:
         }
      }
  
-    sub N () { int(BAZ_VAL) / 3 }
-    BEGIN {
-       my $prod = 1;
-       for (1..N) { $prod *= $_ }
-       sub N_FACTORIAL () { $prod }
-    }
-
  If you redefine a subroutine that was eligible for inlining, you'll get
  a mandatory warning.  (You can use this warning to tell whether or not a
  particular subroutine is considered constant.)  The warning is
@@ -1164,6 +1291,7 @@ inlining mechanism in some other way, such as
      }
  
  =head2 Overriding Built-in Functions
+X<built-in> X<override> X<CORE> X<CORE::GLOBAL>
  
  Many built-in functions may be overridden, though this should be tried
  only occasionally and for good reason.  Typically this might be
@@ -1226,10 +1354,9 @@ that understands regular expressions.
      sub glob {
         my $pat = shift;
         my @got;
-       local *D;
-       if (opendir D, '.') { 
-           @got = grep /$pat/, readdir D; 
-           closedir D;   
+       if (opendir my $d, '.') { 
+           @got = grep /$pat/, readdir $d; 
+           closedir $d;   
         }
         return @got;
      }
@@ -1281,11 +1408,13 @@ And, as you'll have noticed from the previous example, if you override
  C<glob>, the C<< <*> >> glob operator is overridden as well.
  
  In a similar fashion, overriding the C<readline> function also overrides
-the equivalent I/O operator C<< <FILEHANDLE> >>.
+the equivalent I/O operator C<< <FILEHANDLE> >>. Also, overriding
+C<readpipe> also overrides the operators C<``> and C<qx//>.
  
  Finally, some built-ins (e.g. C<exists> or C<grep>) can't be overridden.
  
  =head2 Autoloading
+X<autoloading> X<AUTOLOAD>
  
  If you call a subroutine that is undefined, you would ordinarily
  get an immediate, fatal error complaining that the subroutine doesn't
@@ -1299,7 +1428,11 @@ of the original subroutine magically appears in the global $AUTOLOAD
  variable of the same package as the C<AUTOLOAD> routine.  The name
  is not passed as an ordinary argument because, er, well, just
  because, that's why.  (As an exception, a method call to a nonexistent
-C<import> or C<unimport> method is just skipped instead.)
+C<import> or C<unimport> method is just skipped instead.  Also, if
+the AUTOLOAD subroutine is an XSUB, C<$AUTOLOAD> is not populated;
+instead, you should call L<< C<SvPVX>E<sol>C<SvCUR>|perlapi >> on the
+C<CV> for C<AUTOLOAD> to retrieve the method name.)
+
  
  Many C<AUTOLOAD> routines load in a definition for the requested
  subroutine using eval(), then execute that subroutine using a special
@@ -1337,6 +1470,7 @@ SelfLoader modules in L<SelfLoader>, and the document on adding C
  functions to Perl code in L<perlxs>.
  
  =head2 Subroutine Attributes
+X<attribute> X<subroutine, attribute> X<attrs>
  
  A subroutine declaration or definition may have a list of attributes
  associated with it.  If such an attribute list is present, it is
@@ -1354,17 +1488,17 @@ nest properly.
  
  Examples of valid syntax (even though the attributes are unknown):
  
-    sub fnord (&\%) : switch(10,foo(7,3))  :  expensive ;
-    sub plugh () : Ugly('\(") :Bad ;
+    sub fnord (&\%) : switch(10,foo(7,3))  :  expensive;
+    sub plugh () : Ugly('\(") :Bad;
      sub xyzzy : _5x5 { ... }
  
  Examples of invalid syntax:
  
-    sub fnord : switch(10,foo() ; # ()-string not balanced
-    sub snoid : Ugly('(') ;      # ()-string not balanced
-    sub xyzzy : 5x5 ;            # "5x5" not a valid identifier
-    sub plugh : Y2::north ;      # "Y2::north" not a simple identifier
-    sub snurt : foo + bar ;      # "+" not a colon or space
+    sub fnord : switch(10,foo(); # ()-string not balanced
+    sub snoid : Ugly('(');       # ()-string not balanced
+    sub xyzzy : 5x5;             # "5x5" not a valid identifier
+    sub plugh : Y2::north;       # "Y2::north" not a simple identifier
+    sub snurt : foo + bar;       # "+" not a colon or space
  
  The attribute list is passed as a list of constant strings to the code
  which associates them with the subroutine.  In particular, the second example