perldelta for 2f465e08e / #123652

[perl5.git] / pod / perldata.pod
diff --git a/pod/perldata.pod b/pod/perldata.pod

index 8611d3d..5316fe2 100644 (file)
--- a/pod/perldata.pod
+++ b/pod/perldata.pod
@@ -24,8 +24,9 @@ containing letters, underscores, and digits.  In some cases, it may
  be a chain of identifiers, separated by C<::> (or by the slightly
  archaic C<'>); all but the last are interpreted as names of packages,
  to locate the namespace in which to look up the final identifier
-(see L<perlmod/Packages> for details).  It's possible to substitute
-for a simple identifier, an expression that produces a reference
+(see L<perlmod/Packages> for details).  For a more in-depth discussion
+on identifiers, see L<Identifier parsing>.  It's possible to
+substitute for a simple identifier, an expression that produces a reference
  to the value at runtime.   This is described in more detail below
  and in L<perlref>.
  X<identifier>
@@ -104,6 +105,133 @@ C<$$>.  (Most of these one character names have a predefined
  significance to Perl.  For instance, C<$$> is the current process
  id.)
  
+=head2 Identifier parsing
+X<identifiers>
+
+Up until Perl 5.18, the actual rules of what a valid identifier
+was were a bit fuzzy.  However, in general, anything defined here should
+work on previous versions of Perl, while the opposite -- edge cases
+that work in previous versions, but aren't defined here -- probably
+won't work on newer versions.
+As an important side note, please note that the following only applies
+to bareword identifiers as found in Perl source code, not identifiers
+introduced through symbolic references, which have much fewer
+restrictions.
+If working under the effect of the C<use utf8;> pragma, the following
+rules apply:
+
+    / (?[ ( \p{Word} & \p{XID_Start} ) + [_] ])
+      (?[ ( \p{Word} & \p{XID_Continue} ) ]) *    /x
+
+That is, a "start" character followed by any number of "continue"
+characters.  Perl requires every character in an identifier to also
+match C<\w> (this prevents some problematic cases); and Perl
+additionally accepts identfier names beginning with an underscore.
+
+If not under C<use utf8>, the source is treated as ASCII + 128 extra
+controls, and identifiers should match
+
+    / (?aa) (?!\d) \w+ /x
+
+That is, any word character in the ASCII range, as long as the first
+character is not a digit.
+
+There are two package separators in Perl: A double colon (C<::>) and a single
+quote (C<'>).  Normal identifiers can start or end with a double colon, and
+can contain several parts delimited by double colons.
+Single quotes have similar rules, but with the exception that they are not
+legal at the end of an identifier: That is, C<$'foo> and C<$foo'bar> are
+legal, but C<$foo'bar'> is not.
+
+Additionally, if the identifier is preceded by a sigil --
+that is, if the identifier is part of a variable name -- it
+may optionally be enclosed in braces.
+
+While you can mix double colons with singles quotes, the quotes must come
+after the colons: C<$::::'foo> and C<$foo::'bar> are legal, but C<$::'::foo>
+and C<$foo'::bar> are not.
+
+Put together, a grammar to match a basic identifier becomes
+
+ /
+  (?(DEFINE)
+      (?<variable>
+          (?&sigil)
+          (?:
+                  (?&normal_identifier)
+              |   \{ \s* (?&normal_identifier) \s* \}
+          )
+      )
+      (?<normal_identifier>
+          (?: :: )* '?
+           (?&basic_identifier)
+           (?: (?= (?: :: )+ '? | (?: :: )* ' ) (?&normal_identifier) )?
+          (?: :: )*
+      )
+      (?<basic_identifier>
+        # is use utf8 on?
+          (?(?{ (caller(0))[8] & $utf8::hint_bits })
+              (?&Perl_XIDS) (?&Perl_XIDC)*
+            | (?aa) (?!\d) \w+
+          )
+      )
+      (?<sigil> [&*\$\@\%])
+      (?<Perl_XIDS> (?[ ( \p{Word} & \p{XID_Start} ) + [_] ]) )
+      (?<Perl_XIDC> (?[ \p{Word} & \p{XID_Continue} ]) )
+  )
+ /x
+
+Meanwhile, special identifiers don't follow the above rules; For the most
+part, all of the identifiers in this category have a special meaning given
+by Perl.  Because they have special parsing rules, these generally can't be
+fully-qualified.  They come in four forms:
+
+=over
+
+=item *
+
+A sigil, followed solely by digits matching C<\p{POSIX_Digit}>, like
+C<$0>, C<$1>, or C<$10000>.
+
+=item *
+
+A sigil, followed by either a caret and a single POSIX uppercase letter,
+like C<$^V> or C<$^W>, or a sigil followed by a literal non-space,
+non-C<NUL> control character matching the C<\p{POSIX_Cntrl}> property.
+Due to a historical oddity, if not running under C<use utf8>, the 128
+characters in the C<[0x80-0xff]> range are considered to be controls,
+and may also be used in length-one variables.  However, the use of
+non-graphical characters is deprecated as of v5.22, and support for them
+will be removed in a future version of perl.  ASCII space characters and
+C<NUL> already aren't allowed, so this means that a single-character
+variable name with that name being any other C0 control C<[0x01-0x1F]>,
+or C<DEL> will generate a deprecated warning.  Already, under C<"use
+utf8">, non-ASCII characters must match C<Perl_XIDS>.  As of v5.22, when
+not under C<"use utf8"> C1 controls C<[0x80-0x9F]>, NO BREAK SPACE, and
+SOFT HYPHEN (C<SHY>)) generate a deprecated warning.
+
+=item *
+
+Similar to the above, a sigil, followed by bareword text in brackets,
+where the first character is either a caret followed by an uppercase
+letter, like C<${^GLOBAL_PHASE}> or a non-C<NUL>, non-space literal
+control like C<${\7LOBAL_PHASE}>.  Like the above, when not under
+C<"use utf8">, the characters in C<[0x80-0xFF]> are considered controls, but as
+of v5.22, the use of any that are non-graphical are deprecated, and as
+of v5.20 the use of any ASCII-range literal control is deprecated.
+Support for these will be removed in a future version of perl.
+
+=item *
+
+A sigil followed by a single character matching the C<\p{POSIX_Punct}>
+property, like C<$!> or C<%+>, except the character C<"{"> doesn't work.
+
+=back
+
+Note that as of Perl 5.20, literal control characters in variable names
+are deprecated; and as of Perl 5.22, any other non-graphic characters
+are also deprecated.
+
  =head2 Context
  X<context> X<scalar context> X<list context>
  
@@ -232,8 +360,7 @@ which is a different value since there is ordinarily a 0th element.
  Assigning to C<$#days> actually changes the length of the array.
  Shortening an array this way destroys intervening values.  Lengthening
  an array that was previously shortened does not recover values
-that were in those elements.  (It used to do so in Perl 4, but we
-had to break this to make sure destructors were called when expected.)
+that were in those elements.
  X<$#> X<array, length>
  
  You can also gain some minuscule measure of efficiency by pre-extending
@@ -284,15 +411,16 @@ X<scalar, literal> X<scalar, constant>
  Numeric literals are specified in any of the following floating point or
  integer formats:
  
-    12345
-    12345.67
-    .23E-10             # a very small number
-    3.14_15_92          # a very important number
-    4_294_967_296       # underscore for legibility
-    0xff                # hex
-    0xdead_beef         # more hex   
-    0377                # octal (only numbers, begins with 0)
-    0b011011            # binary
+ 12345
+ 12345.67
+ .23E-10             # a very small number
+ 3.14_15_92          # a very important number
+ 4_294_967_296       # underscore for legibility
+ 0xff                # hex
+ 0xdead_beef         # more hex
+ 0377                # octal (only numbers, begins with 0)
+ 0b011011            # binary
+ 0x1.999ap-4         # hexadecimal floating point (the 'p' is required)
  
  You are allowed to use underscores (underbars) in numeric literals
  between digits for legibility (but not multiple underscores in a row:
@@ -316,6 +444,17 @@ Hexadecimal, octal, or binary, representations in string literals
  representation.  The hex() and oct() functions make these conversions
  for you.  See L<perlfunc/hex> and L<perlfunc/oct> for more details.
  
+Hexadecimal floating point can start just like a hexadecimal literal,
+and it can be followed by an optional fractional hexadecimal part,
+but it must be followed by C<p>, an optional sign, and a power of two.
+The format is useful for accurately presenting floating point values,
+avoiding conversions to or from decimal floating point, and therefore
+avoiding possible loss in precision.  Notice that while most current
+platforms use the 64-bit IEEE 754 floating point, not all do.  Another
+potential source of (low-order) differences are the floating point
+rounding modes, which can differ between CPUs, operating systems,
+and compilers, and which Perl doesn't control.
+
  You can also embed newlines directly in your strings, i.e., they can end
  on a different line than they begin.  This is nice, but if you forget
  your trailing quote, the error will not be reported until Perl finds
@@ -355,8 +494,8 @@ C<$who::0>, and a C<$who's> variable.  The last two would be the
  $0 and the $s variables in the (presumably) non-existent package
  C<who>.
  
-In fact, an identifier within such curlies is forced to be a string,
-as is any simple identifier within a hash subscript.  Neither need
+In fact, a simple identifier within such curlies is forced to be
+a string, and likewise within a hash subscript.  Neither need
  quoting.  Our earlier example, C<$days{'Feb'}> can be written as
  C<$days{Feb}> and the quotes will be assumed automatically.  But
  anything more complicated in the subscript will be interpreted as an
@@ -579,6 +718,10 @@ function:
  
      ($dev, $ino, undef, undef, $uid, $gid) = stat($file);
  
+As of Perl 5.22, you can also use C<(undef)x2> instead of C<undef, undef>.
+(You can also do C<($x) x 2>, which is less useful, because it assigns to
+the same variable twice, clobbering the first value assigned.)
+
  List assignment in scalar context returns the number of elements
  produced by the expression on the right side of the assignment:
  
@@ -774,21 +917,21 @@ values of the array or hash.
          s/(\w+)/\u\L$1/g;   # "titlecase" words
      }
  
-A slice of an empty list is still an empty list.  Thus:
+As a special exception, when you slice a list (but not an array or a hash),
+if the list evaluates to empty, then taking a slice of that empty list will
+always yield the empty list in turn.  Thus:
  
-    @a = ()[1,0];           # @a has no elements
-    @b = (@a)[0,1];         # @b has no elements
-    @c = (0,1)[2,3];        # @c has no elements
-
-But:
-
-    @a = (1)[1,0];          # @a has two elements
-    @b = (1,undef)[1,0,2];  # @b has three elements
+    @a = ()[0,1];          # @a has no elements
+    @b = (@a)[0,1];        # @b has no elements
+    @c = (sub{}->())[0,1]; # @c has no elements
+    @d = ('a','b')[0,1];   # @d has two elements
+    @e = (@d)[0,1,8,9];    # @e has four elements
+    @f = (@d)[8,9];        # @f has two elements
  
  This makes it easy to write loops that terminate when a null list
  is returned:
  
-    while ( ($home, $user) = (getpwent)[7,0]) {
+    while ( ($home, $user) = (getpwent)[7,0] ) {
          printf "%-8s %s\n", $user, $home;
      }
  
@@ -811,6 +954,30 @@ On the other hand, the leading symbol ('$' or '@') on the array or
  hash indicates whether you are getting back a singular value (a
  scalar) or a plural one (a list).
  
+=head3 Key/Value Hash Slices
+
+Starting in Perl 5.20, a hash slice operation
+with the % symbol is a variant of slice operation
+returning a list of key/value pairs rather than just values:
+
+    %h = (blonk => 2, foo => 3, squink => 5, bar => 8);
+    %subset = %h{'foo', 'bar'}; # key/value hash slice
+    # %subset is now (foo => 3, bar => 8)
+
+However, the result of such a slice cannot be localized, deleted or used
+in assignment.  These are otherwise very much consistent with hash slices
+using the @ symbol.
+
+=head3 Index/Value Array Slices
+
+Similar to key/value hash slices (and also introduced
+in Perl 5.20), the % array slice syntax returns a list
+of index/value pairs:
+
+    @a = "a".."z";
+    @list = %a[3,4,6];
+    # @list is now (3, "d", 4, "e", 6, "g")
+
  =head2 Typeglobs and Filehandles
  X<typeglob> X<filehandle> X<*>