perlfunc.pod: ioctl.ph

[perl5.git] / pod / perlfunc.pod
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod

index 833e891..57b355d 100644 (file)
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -742,6 +742,10 @@ chr(0x263a) is a Unicode smiley face.  Note that characters from 128
  to 255 (inclusive) are by default not encoded in UTF-8 Unicode for
  backward compatibility reasons (but see L<encoding>).
  
+Negative values give the Unicode replacement character (chr(0xfffd)),
+except under the L</bytes> pragma, where low eight bits of the value
+(truncated to an integer) are used.
+
  If NUMBER is omitted, uses C<$_>.
  
  For the reverse, use L</ord>.
@@ -782,7 +786,8 @@ program exits with non-zero status.  (If the only problem was that the
  program exited non-zero, C<$!> will be set to C<0>.)  Closing a pipe
  also waits for the process executing on the pipe to complete, in case you
  want to look at the output of the pipe afterwards, and
-implicitly puts the exit status value of that command into C<$?>.
+implicitly puts the exit status value of that command into C<$?> and
+C<${^CHILD_ERROR_NATIVE}>.
  
  Prematurely closing the read end of a pipe (i.e. before the process
  writing to it at the other end has closed it) will result in a
@@ -1206,8 +1211,7 @@ A deprecated form of subroutine call.  See L<perlsub>.
  =item do EXPR
  
  Uses the value of EXPR as a filename and executes the contents of the
-file as a Perl script.  Its primary use is to include subroutines
-from a Perl subroutine library.
+file as a Perl script.
  
      do 'stat.pl';
  
@@ -1216,7 +1220,7 @@ is just like
      eval `cat stat.pl`;
  
  except that it's more efficient and concise, keeps track of the current
-filename for error messages, searches the @INC libraries, and updates
+filename for error messages, searches the @INC directories, and updates
  C<%INC> if the file is found.  See L<perlvar/Predefined Names> for these
  variables.  It also differs in that code evaluated with C<do FILENAME>
  cannot see lexicals in the enclosing scope; C<eval STRING> does.  It's the
@@ -2055,7 +2059,7 @@ addresses returned by the corresponding system library call.  In the
  Internet domain, each address is four bytes long and you can unpack it
  by saying something like:
  
-    ($a,$b,$c,$d) = unpack('C4',$addr[0]);
+    ($a,$b,$c,$d) = unpack('W4',$addr[0]);
  
  The Socket library makes this slightly easier:
  
@@ -2115,13 +2119,13 @@ integer which you can decode using unpack with the C<i> (or C<I>) format.
  
  An example testing if Nagle's algorithm is turned on on a socket:
  
-    use Socket;
+    use Socket qw(:all);
  
      defined(my $tcp = getprotobyname("tcp"))
         or die "Could not determine the protocol number for tcp";
-    # my $tcp = Socket::IPPROTO_TCP; # Alternative
-    my $packed = getsockopt($socket, $tcp, Socket::TCP_NODELAY)
-       or die "Could not query TCP_NODELAY SOCKEt option: $!";
+    # my $tcp = IPPROTO_TCP; # Alternative
+    my $packed = getsockopt($socket, $tcp, TCP_NODELAY)
+       or die "Could not query TCP_NODELAY socket option: $!";
      my $nodelay = unpack("I", $packed);
      print "Nagle's algorithm is turned ", $nodelay ? "off\n" : "on\n";
  
@@ -2186,6 +2190,8 @@ This scalar value is B<not> locale dependent (see L<perllocale>), but is
  instead a Perl builtin.  To get somewhat similar but locale dependent date
  strings, see the example in L</localtime>.
  
+See L<perlport/gmtime> for portability concerns.
+
  =item goto LABEL
  
  =item goto EXPR
@@ -2266,7 +2272,7 @@ See also L</map> for a list composed of the results of the BLOCK or EXPR.
  =item hex
  
  Interprets EXPR as a hex string and returns the corresponding value.
-(To convert strings that might start with either 0, 0x, or 0b, see
+(To convert strings that might start with either C<0>, C<0x>, or C<0b>, see
  L</oct>.)  If EXPR is omitted, uses C<$_>.
  
      print hex '0xAf'; # prints '175'
@@ -2274,7 +2280,8 @@ L</oct>.)  If EXPR is omitted, uses C<$_>.
  
  Hex strings may only represent integers.  Strings that would cause
  integer overflow trigger a warning.  Leading whitespace is not stripped,
-unlike oct().
+unlike oct(). To present something as hex, look into L</printf>,
+L</sprintf>, or L</unpack>.
  
  =item import
  
@@ -2312,9 +2319,9 @@ functions will serve you better than will int().
  
  Implements the ioctl(2) function.  You'll probably first have to say
  
-    require "ioctl.ph";        # probably in /usr/local/lib/perl/ioctl.ph
+    require "sys/ioctl.ph";    # probably in $Config{archlib}/ioctl.ph
  
-to get the correct function definitions.  If F<ioctl.ph> doesn't
+to get the correct function definitions.  If F<sys/ioctl.ph> doesn't
  exist or doesn't have the correct definitions you'll have to roll your
  own, based on your C header files such as F<< <sys/ioctl.h> >>.
  (There is a Perl script called B<h2ph> that comes with the Perl kit that
@@ -2582,6 +2589,8 @@ try for example:
  Note that the C<%a> and C<%b>, the short forms of the day of the week
  and the month of the year, may not necessarily be three characters wide.
  
+See L<perlport/localtime> for portability concerns.
+
  =item lock THING
  
  This function places an advisory lock on a shared variable, or referenced
@@ -2687,10 +2696,13 @@ and you get list of anonymous hashes each with only 1 entry.
  
  =item mkdir FILENAME
  
+=item mkdir
+
  Creates the directory specified by FILENAME, with permissions
  specified by MASK (as modified by C<umask>).  If it succeeds it
  returns true, otherwise it returns false and sets C<$!> (errno).
-If omitted, MASK defaults to 0777.
+If omitted, MASK defaults to 0777. If omitted, FILENAME defaults
+to C<$_>.
  
  In general, it is better to create directories with permissive MASK,
  and let the user modify that with their C<umask>, than it is to supply
@@ -3008,7 +3020,7 @@ Examples:
         }
      }
  
-See L<perliol/> for detailed info on PerlIO.
+See L<perliol> for detailed info on PerlIO.
  
  You may also, in the Bourne shell tradition, specify an EXPR beginning
  with C<< '>&' >>, in which case the rest of the string is interpreted
@@ -3123,7 +3135,8 @@ be set for the newly opened file descriptor as determined by the value
  of $^F.  See L<perlvar/$^F>.
  
  Closing any piped filehandle causes the parent process to wait for the
-child to finish, and returns the status value in C<$?>.
+child to finish, and returns the status value in C<$?> and
+C<${^CHILD_ERROR_NATIVE}>.
  
  The filename passed to 2-argument (or 1-argument) form of open() will
  have leading and trailing whitespace deleted, and the normal
@@ -3297,7 +3310,8 @@ Takes a LIST of values and converts it into a string using the rules
  given by the TEMPLATE.  The resulting string is the concatenation of
  the converted values.  Typically, each converted value looks
  like its machine-level representation.  For example, on 32-bit machines
-a converted integer may be represented by a sequence of 4 bytes.
+an integer may be represented by a sequence of 4 bytes which will be 
+converted to a sequence of 4 characters.
  
  The TEMPLATE is a sequence of characters that give the order and type
  of values, as follows:
@@ -3312,7 +3326,9 @@ of values, as follows:
      H  A hex string (high nybble first).
  
      c  A signed char (8-bit) value.
-    C  An unsigned char value.  Only does bytes.  See U for Unicode.
+    C  An unsigned C char (octet) even under Unicode. Should normally not
+        be used. See U and W instead.
+    W   An unsigned char value (can be greater than 255).
  
      s  A signed short (16-bit) value.
      S  An unsigned short value.
@@ -3355,15 +3371,16 @@ of values, as follows:
      U  A Unicode character number.  Encodes to UTF-8 internally
         (or UTF-EBCDIC in EBCDIC platforms).
  
-    w  A BER compressed integer.  Its bytes represent an unsigned
-       integer in base 128, most significant digit first, with as
-        few digits as possible.  Bit eight (the high bit) is set
-        on each byte except the last.
+    w  A BER compressed integer (not an ASN.1 BER, see perlpacktut for
+       details).  Its bytes represent an unsigned integer in base 128,
+       most significant digit first, with as few digits as possible.  Bit
+       eight (the high bit) is set on each byte except the last.
  
      x  A null byte.
      X  Back up a byte.
-    @  Null fill to absolute position, counted from the start of
-        the innermost ()-group.
+    @  Null fill or truncate to absolute position, counted from the
+        start of the innermost ()-group.
+    .   Null fill or truncate to absolute position specified by value.
      (  Start of a ()-group.
  
  Some letters in the TEMPLATE may optionally be followed by one or
@@ -3377,6 +3394,10 @@ which the modifier is valid):
  
          nNvV       Treat integers as signed instead of unsigned.
  
+        @.         Specify position as byte offset in the internal
+                   representation of the packed string. Efficient but
+                   dangerous.
+
      >   sSiIlLqQ   Force big-endian byte-order on the type.
          jJfFdDpP   (The "big end" touches the construct.)
  
@@ -3395,12 +3416,13 @@ The following rules apply:
  
  Each letter may optionally be followed by a number giving a repeat
  count.  With all types except C<a>, C<A>, C<Z>, C<b>, C<B>, C<h>,
-C<H>, C<@>, C<x>, C<X> and C<P> the pack function will gobble up that
-many values from the LIST.  A C<*> for the repeat count means to use
-however many items are left, except for C<@>, C<x>, C<X>, where it is
-equivalent to C<0>, and C<u>, where it is equivalent to 1 (or 45, what
-is the same).  A numeric repeat count may optionally be enclosed in
-brackets, as in C<pack 'C[80]', @arr>.
+C<H>, C<@>, C<.>, C<x>, C<X> and C<P> the pack function will gobble up
+that many values from the LIST.  A C<*> for the repeat count means to
+use however many items are left, except for C<@>, C<x>, C<X>, where it
+is equivalent to C<0>, for <.> where it means relative to string start
+and C<u>, where it is equivalent to 1 (or 45, which is the same).
+A numeric repeat count may optionally be enclosed in brackets, as in
+C<pack 'C[80]', @arr>.
  
  One can replace the numeric repeat count by a template enclosed in brackets;
  then the packed length of this template in bytes is used as a count.
@@ -3414,72 +3436,84 @@ When used with C<Z>, C<*> results in the addition of a trailing null
  byte (so the packed result will be one longer than the byte C<length>
  of the item).
  
+When used with C<@>, the repeat count represents an offset from the start
+of the innermost () group.
+
+When used with C<.>, the repeat count is used to determine the starting
+position from where the value offset is calculated. If the repeat count
+is 0, it's relative to the current position. If the repeat count is C<*>,
+the offset is relative to the start of the packed string. And if its an
+integer C<n> the offset is relative to the start of the n-th innermost
+() group (or the start of the string if C<n> is bigger then the group
+level).
+
  The repeat count for C<u> is interpreted as the maximal number of bytes
-to encode per line of output, with 0 and 1 replaced by 45.
+to encode per line of output, with 0, 1 and 2 replaced by 45. The repeat 
+count should not be more than 65.
  
  =item *
  
  The C<a>, C<A>, and C<Z> types gobble just one value, but pack it as a
  string of length count, padding with nulls or spaces as necessary.  When
-unpacking, C<A> strips trailing spaces and nulls, C<Z> strips everything
-after the first null, and C<a> returns data verbatim.  When packing,
-C<a>, and C<Z> are equivalent.
+unpacking, C<A> strips trailing whitespace and nulls, C<Z> strips everything
+after the first null, and C<a> returns data verbatim.
  
  If the value-to-pack is too long, it is truncated.  If too long and an
  explicit count is provided, C<Z> packs only C<$count-1> bytes, followed
-by a null byte.  Thus C<Z> always packs a trailing null byte under
-all circumstances.
+by a null byte.  Thus C<Z> always packs a trailing null (except when the
+count is 0).
  
  =item *
  
  Likewise, the C<b> and C<B> fields pack a string that many bits long.
-Each byte of the input field of pack() generates 1 bit of the result.
+Each character of the input field of pack() generates 1 bit of the result.
  Each result bit is based on the least-significant bit of the corresponding
-input byte, i.e., on C<ord($byte)%2>.  In particular, bytes C<"0"> and
-C<"1"> generate bits 0 and 1, as do bytes C<"\0"> and C<"\1">.
+input character, i.e., on C<ord($char)%2>.  In particular, characters C<"0">
+and C<"1"> generate bits 0 and 1, as do characters C<"\0"> and C<"\1">.
  
  Starting from the beginning of the input string of pack(), each 8-tuple
-of bytes is converted to 1 byte of output.  With format C<b>
-the first byte of the 8-tuple determines the least-significant bit of a
-byte, and with format C<B> it determines the most-significant bit of
-a byte.
+of characters is converted to 1 character of output.  With format C<b>
+the first character of the 8-tuple determines the least-significant bit of a
+character, and with format C<B> it determines the most-significant bit of
+a character.
  
  If the length of the input string is not exactly divisible by 8, the
-remainder is packed as if the input string were padded by null bytes
+remainder is packed as if the input string were padded by null characters
  at the end.  Similarly, during unpack()ing the "extra" bits are ignored.
  
-If the input string of pack() is longer than needed, extra bytes are ignored.
-A C<*> for the repeat count of pack() means to use all the bytes of
-the input field.  On unpack()ing the bits are converted to a string
-of C<"0">s and C<"1">s.
+If the input string of pack() is longer than needed, extra characters are 
+ignored. A C<*> for the repeat count of pack() means to use all the 
+characters of the input field.  On unpack()ing the bits are converted to a 
+string of C<"0">s and C<"1">s.
  
  =item *
  
  The C<h> and C<H> fields pack a string that many nybbles (4-bit groups,
  representable as hexadecimal digits, 0-9a-f) long.
  
-Each byte of the input field of pack() generates 4 bits of the result.
-For non-alphabetical bytes the result is based on the 4 least-significant
-bits of the input byte, i.e., on C<ord($byte)%16>.  In particular,
-bytes C<"0"> and C<"1"> generate nybbles 0 and 1, as do bytes
-C<"\0"> and C<"\1">.  For bytes C<"a".."f"> and C<"A".."F"> the result
+Each character of the input field of pack() generates 4 bits of the result.
+For non-alphabetical characters the result is based on the 4 least-significant
+bits of the input character, i.e., on C<ord($char)%16>.  In particular,
+characters C<"0"> and C<"1"> generate nybbles 0 and 1, as do bytes
+C<"\0"> and C<"\1">.  For characters C<"a".."f"> and C<"A".."F"> the result
  is compatible with the usual hexadecimal digits, so that C<"a"> and
-C<"A"> both generate the nybble C<0xa==10>.  The result for bytes
+C<"A"> both generate the nybble C<0xa==10>.  The result for characters
  C<"g".."z"> and C<"G".."Z"> is not well-defined.
  
  Starting from the beginning of the input string of pack(), each pair
-of bytes is converted to 1 byte of output.  With format C<h> the
-first byte of the pair determines the least-significant nybble of the
-output byte, and with format C<H> it determines the most-significant
+of characters is converted to 1 character of output.  With format C<h> the
+first character of the pair determines the least-significant nybble of the
+output character, and with format C<H> it determines the most-significant
  nybble.
  
  If the length of the input string is not even, it behaves as if padded
-by a null byte at the end.  Similarly, during unpack()ing the "extra"
+by a null character at the end.  Similarly, during unpack()ing the "extra"
  nybbles are ignored.
  
-If the input string of pack() is longer than needed, extra bytes are ignored.
-A C<*> for the repeat count of pack() means to use all the bytes of
-the input field.  On unpack()ing the bits are converted to a string
+If the input string of pack() is longer than needed, extra characters are
+ignored.
+A C<*> for the repeat count of pack() means to use all the characters of
+the input field.  On unpack()ing the nybbles are converted to a string
  of hexadecimal digits.
  
  =item *
@@ -3498,24 +3532,32 @@ so will result in a fatal error.
  
  =item *
  
-The C</> template character allows packing and unpacking of strings where
-the packed structure contains a byte count followed by the string itself.
-You write I<length-item>C</>I<string-item>.
+The C</> template character allows packing and unpacking of a sequence of
+items where the packed structure contains a packed item count followed by 
+the packed items themselves.
+You write I<length-item>C</>I<sequence-item>.
  
  The I<length-item> can be any C<pack> template letter, and describes
  how the length value is packed.  The ones likely to be of most use are
  integer-packing ones like C<n> (for Java strings), C<w> (for ASN.1 or
  SNMP) and C<N> (for Sun XDR).
  
-For C<pack>, the I<string-item> must, at present, be C<"A*">, C<"a*"> or
-C<"Z*">. For C<unpack> the length of the string is obtained from the
-I<length-item>, but if you put in the '*' it will be ignored. For all other
-codes, C<unpack> applies the length value to the next item, which must not
-have a repeat count.
+For C<pack>, the I<sequence-item> may have a repeat count, in which case
+the minimum of that and the number of available items is used as argument
+for the I<length-item>. If it has no repeat count or uses a '*', the number
+of available items is used. For C<unpack> the repeat count is always obtained
+by decoding the packed item count, and the I<sequence-item> must not have a
+repeat count.
+
+If the I<sequence-item> refers to a string type (C<"A">, C<"a"> or C<"Z">),
+the I<length-item> is a string length, not a number of strings. If there is
+an explicit repeat count for pack, the packed string will be adjusted to that
+given length.
  
-    unpack 'C/a', "\04Gurusamy";        gives 'Guru'
-    unpack 'a3/A* A*', '007 Bond  J ';  gives (' Bond','J')
-    pack 'n/a* w/a*','hello,','world';  gives "\000\006hello,\005world"
+    unpack 'W/a', "\04Gurusamy";        gives ('Guru')
+    unpack 'a3/A* A*', '007 Bond  J ';  gives (' Bond', 'J')
+    pack 'n/a* w/a','hello,','world';   gives "\000\006hello,\005world"
+    pack 'a/W2', ord('a') .. ord('z');  gives '2ab'
  
  The I<length-item> is not returned explicitly from C<unpack>.
  
@@ -3582,7 +3624,7 @@ Some systems may have even weirder byte orders such as
  You can see your system's preference with
  
         print join(" ", map { sprintf "%#02x", $_ }
-                            unpack("C*",pack("L",0x12345678))), "\n";
+                            unpack("W*",pack("L",0x12345678))), "\n";
  
  The byteorder on the platform where Perl was built is also available
  via L<Config>:
@@ -3650,21 +3692,21 @@ will not in general equal $foo).
  
  =item *
  
-If the pattern begins with a C<U>, the resulting string will be
-treated as UTF-8-encoded Unicode. You can force UTF-8 encoding on in a
-string with an initial C<U0>, and the bytes that follow will be
-interpreted as Unicode characters. If you don't want this to happen,
-you can begin your pattern with C<C0> (or anything else) to force Perl
-not to UTF-8 encode your string, and then follow this with a C<U*>
-somewhere in your pattern.
+Pack and unpack can operate in two modes, character mode (C<C0> mode) where
+the packed string is processed per character and UTF-8 mode (C<U0> mode)
+where the packed string is processed in its UTF-8-encoded Unicode form on
+a byte by byte basis. Character mode is the default unless the format string 
+starts with an C<U>. You can switch mode at any moment with an explicit 
+C<C0> or C<U0> in the format. A mode is in effect until the next mode switch 
+or until the end of the ()-group in which it was entered.
  
  =item *
  
  You must yourself do any alignment or padding by inserting for example
  enough C<'x'>es while packing.  There is no way to pack() and unpack()
-could know where the bytes are going to or coming from.  Therefore
+could know where the characters are going to or coming from.  Therefore
  C<pack> (and C<unpack>) handle their output and input as flat
-sequences of bytes.
+sequences of characters.
  
  =item *
  
@@ -3677,14 +3719,13 @@ C<@> starts again at 0. Therefore, the result of
  
  is the string "\0a\0\0bc".
  
-
  =item *
  
  C<x> and C<X> accept C<!> modifier.  In this case they act as
  alignment commands: they jump forward/back to the closest position
-aligned at a multiple of C<count> bytes.  For example, to pack() or
+aligned at a multiple of C<count> characters. For example, to pack() or
  unpack() C's C<struct {char c; double d; char cc[2]}> one may need to
-use the template C<C x![d] d C[2]>; this assumes that doubles must be
+use the template C<W x![d] d W[2]>; this assumes that doubles must be
  aligned on the double's size.
  
  For alignment commands C<count> of 0 is equivalent to C<count> of 1;
@@ -3714,20 +3755,27 @@ to pack() than actually given, extra arguments are ignored.
  
  Examples:
  
-    $foo = pack("CCCC",65,66,67,68);
+    $foo = pack("WWWW",65,66,67,68);
      # foo eq "ABCD"
-    $foo = pack("C4",65,66,67,68);
+    $foo = pack("W4",65,66,67,68);
      # same thing
+    $foo = pack("W4",0x24b6,0x24b7,0x24b8,0x24b9);
+    # same thing with Unicode circled letters.
      $foo = pack("U4",0x24b6,0x24b7,0x24b8,0x24b9);
-    # same thing with Unicode circled letters
+    # same thing with Unicode circled letters. You don't get the UTF-8
+    # bytes because the U at the start of the format caused a switch to
+    # U0-mode, so the UTF-8 bytes get joined into characters
+    $foo = pack("C0U4",0x24b6,0x24b7,0x24b8,0x24b9);
+    # foo eq "\xe2\x92\xb6\xe2\x92\xb7\xe2\x92\xb8\xe2\x92\xb9"
+    # This is the UTF-8 encoding of the string in the previous example
  
      $foo = pack("ccxxcc",65,66,67,68);
      # foo eq "AB\0\0CD"
  
-    # note: the above examples featuring "C" and "c" are true
+    # note: the above examples featuring "W" and "c" are true
      # only on ASCII and ASCII-derived systems such as ISO Latin 1
      # and UTF-8.  In EBCDIC the first example would be
-    # $foo = pack("CCCC",193,194,195,196);
+    # $foo = pack("WWWW",193,194,195,196);
  
      $foo = pack("s2",1,2);
      # "\1\0\2\0" on little-endian
@@ -3761,6 +3809,8 @@ Examples:
      $bar = pack('s@4l', 12, 34);
      # short 12, zero fill to position 4, long 34
      # $foo eq $bar
+    $baz = pack('s.l', 12, 4, 34);
+    # short 12, zero fill to position 4, long 34
  
      $foo = pack('nN', 42, 4711);
      # pack big-endian 16- and 32-bit unsigned integers
@@ -4263,7 +4313,7 @@ a bareword argument, there is a little extra functionality going on
  behind the scenes.  Before C<require> looks for a "F<.pm>" extension,
  it will first look for a filename with a "F<.pmc>" extension.  A file
  with this extension is assumed to be Perl bytecode generated by
-L<B::Bytecode|B::Bytecode>.  If this file is found, and it's modification
+L<B::Bytecode|B::Bytecode>.  If this file is found, and its modification
  time is newer than a coinciding "F<.pm>" non-compiled file, it will be
  loaded in place of that non-compiled file ending in a "F<.pm>" extension.
  
@@ -4566,6 +4616,14 @@ Note that whether C<select> gets restarted after signals (say, SIGALRM)
  is implementation-dependent.  See also L<perlport> for notes on the
  portability of C<select>.
  
+On error, C<select> returns C<undef> and sets C<$!>.
+
+Note: on some Unixes, the select(2) system call may report a socket file
+descriptor as "ready for reading", when actually no data is available,
+thus a subsequent read blocks. It can be avoided using always the
+O_NONBLOCK flag on the socket. See select(2) and fcntl(2) for further
+details.
+
  B<WARNING>: One should not attempt to mix buffered I/O (like C<read>
  or <FH>) with C<select>, except as permitted by POSIX, and even
  then only on POSIX systems.  You have to use C<sysread> instead.
@@ -5029,14 +5087,19 @@ characters at each point it matches that way.  For example:
  
  produces the output 'h:i:t:h:e:r:e'.
  
-Using the empty pattern C<//> specifically matches the null string, and is
-not be confused with the use of C<//> to mean "the last successful pattern
-match".
+As a special case for C<split>, using the empty pattern C<//> specifically
+matches only the null string, and is not be confused with the regular use
+of C<//> to mean "the last successful pattern match".  So, for C<split>,
+the following:
  
-Empty leading (or trailing) fields are produced when there are positive width
-matches at the beginning (or end) of the string; a zero-width match at the
-beginning (or end) of the string does not produce an empty field.  For
-example:
+    print join(':', split(//, 'hi there'));
+
+produces the output 'h:i: :t:h:e:r:e'.
+
+Empty leading (or trailing) fields are produced when there are positive
+width matches at the beginning (or end) of the string; a zero-width match
+at the beginning (or end) of the string does not produce an empty field.
+For example:
  
     print join(':', split(/(?=\w)/, 'hi there!'));
  
@@ -5922,8 +5985,8 @@ C<$?> like this:
         printf "child exited with value %d\n", $? >> 8;
      }
  
-or more portably by using the W*() calls of the POSIX extension;
-see L<perlport> for more information.
+Alternatively you might inspect the value of C<${^CHILD_ERROR_NATIVE}>
+with the W*() calls of the POSIX extension.
  
  When the arguments get executed via the system shell, results
  and return codes will be subject to its quirks and capabilities.
@@ -6243,7 +6306,7 @@ If EXPR is omitted, unpacks the C<$_> string.
  
  The string is broken into chunks described by the TEMPLATE.  Each chunk
  is converted separately to a value.  Typically, either the string is a result
-of C<pack>, or the bytes of the string represent a C structure of some
+of C<pack>, or the characters of the string represent a C structure of some
  kind.
  
  The TEMPLATE has the same format as in the C<pack> function.
@@ -6256,7 +6319,7 @@ Here's a subroutine that does substring:
  
  and then there's
  
-    sub ordinal { unpack("c",$_[0]); } # same as ord()
+    sub ordinal { unpack("W",$_[0]); } # same as ord()
  
  In addition to fields allowed in pack(), you may prefix a field with
  a %<number> to indicate that
@@ -6270,7 +6333,7 @@ computes the same number as the System V sum program:
  
      $checksum = do {
         local $/;  # slurp!
-       unpack("%32C*",<>) % 65535;
+       unpack("%32W*",<>) % 65535;
      };
  
  The following efficiently counts the number of set bits in a bit vector:
@@ -6708,7 +6771,8 @@ example should print the following table:
  
  Behaves like the wait(2) system call on your system: it waits for a child
  process to terminate and returns the pid of the deceased process, or
-C<-1> if there are no child processes.  The status is returned in C<$?>.
+C<-1> if there are no child processes.  The status is returned in C<$?>
+and C<{^CHILD_ERROR_NATIVE}>.
  Note that a return value of C<-1> could mean that child processes are
  being automatically reaped, as described in L<perlipc>.
  
@@ -6717,7 +6781,7 @@ being automatically reaped, as described in L<perlipc>.
  Waits for a particular child process to terminate and returns the pid of
  the deceased process, or C<-1> if there is no such child process.  On some
  systems, a value of 0 indicates that there are processes still running.
-The status is returned in C<$?>.  If you say
+The status is returned in C<$?> and C<{^CHILD_ERROR_NATIVE}>.  If you say
  
      use POSIX ":sys_wait_h";
      #...