Unused 'cv'

[perl5.git] / pod / perlre.pod
diff --git a/pod/perlre.pod b/pod/perlre.pod

index 96ed872..ee1c2cb 100644 (file)
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -102,7 +102,7 @@ X</x>
  
  =head3 Metacharacters
  
-The patterns used in Perl pattern matching evolved from the ones supplied in
+The patterns used in Perl pattern matching evolved from those supplied in
  the Version 8 regex routines.  (The routines are derived
  (distantly) from Henry Spencer's freely redistributable reimplementation
  of the V8 routines.)  See L<Version 8 Regular Expressions> for
@@ -258,7 +258,7 @@ X<word> X<whitespace> X<character class> X<backreference>
      \pP             Match P, named property.  Use \p{Prop} for longer names.
      \PP             Match non-P
      \X      Match eXtended Unicode "combining character sequence",
-             equivalent to (?:\PM\pM*)
+             equivalent to (?>\PM\pM*)
      \C      Match a single C char (octet) even under Unicode.
              NOTE: breaks up characters into their UTF-8 bytes,
              so you may end up with malformed pieces of UTF-8.
@@ -271,6 +271,7 @@ X<word> X<whitespace> X<character class> X<backreference>
      \g{name} Named backreference
      \k<name> Named backreference
      \K       Keep the stuff left of the \K, don't include it in $&
+    \N       Any character but \n
      \v       Vertical whitespace
      \V       Not vertical whitespace
      \h       Horizontal whitespace
@@ -375,21 +376,61 @@ X<character class> X<\p> X<\p{}>
      digit       IsDigit        \d
      graph       IsGraph
      lower       IsLower
-    print       IsPrint
-    punct       IsPunct
+    print       IsPrint                (but see [2] below)
+    punct       IsPunct                (but see [3] below)
      space       IsSpace
                  IsSpacePerl    \s
      upper       IsUpper
-    word        IsWord
+    word        IsWord         \w
      xdigit      IsXDigit
  
  For example C<[[:lower:]]> and C<\p{IsLower}> are equivalent.
  
+However, the equivalence between C<[[:xxxxx:]]> and C<\p{IsXxxxx}>
+is not exact.
+
+=over 4
+
+=item [1]
+
  If the C<utf8> pragma is not used but the C<locale> pragma is, the
  classes correlate with the usual isalpha(3) interface (except for
  "word" and "blank").
  
-The assumedly non-obviously named classes are:
+But if the C<locale> or C<encoding> pragmas are not used and
+the string is not C<utf8>, then C<[[:xxxxx:]]> (and C<\w>, etc.)
+will not match characters 0x80-0xff; whereas C<\p{IsXxxxx}> will
+force the string to C<utf8> and can match these characters
+(as Unicode).
+
+=item [2]
+
+C<\p{IsPrint}> matches characters 0x09-0x0d but C<[[:print:]]> does not.
+
+=item [3]
+
+C<[[:punct::]]> matches the following but C<\p{IsPunct}> does not,
+because they are classed as symbols (not punctuation) in Unicode.
+
+=over 4
+
+=item C<$>
+
+Currency symbol
+
+=item C<+> C<< < >> C<=> C<< > >> C<|> C<~>
+
+Mathematical symbols
+
+=item C<^> C<`>
+
+Modifier symbols (accents)
+
+=back
+
+=back
+
+The other named classes are:
  
  =over 4
  
@@ -521,14 +562,14 @@ backreferences.
  
  X<\g{1}> X<\g{-1}> X<\g{name}> X<relative backreference> X<named backreference>
  In order to provide a safer and easier way to construct patterns using
-backreferences, Perl 5.10 provides the C<\g{N}> notation. The curly
-brackets are optional, however omitting them is less safe as the meaning
-of the pattern can be changed by text (such as digits) following it.
-When N is a positive integer the C<\g{N}> notation is exactly equivalent
-to using normal backreferences. When N is a negative integer then it is
-a relative backreference referring to the previous N'th capturing group.
-When the bracket form is used and N is not an integer, it is treated as a
-reference to a named buffer.
+backreferences, Perl provides the C<\g{N}> notation (starting with perl
+5.10.0). The curly brackets are optional, however omitting them is less
+safe as the meaning of the pattern can be changed by text (such as digits)
+following it. When N is a positive integer the C<\g{N}> notation is
+exactly equivalent to using normal backreferences. When N is a negative
+integer then it is a relative backreference referring to the previous N'th
+capturing group. When the bracket form is used and N is not an integer, it
+is treated as a reference to a named buffer.
  
  Thus C<\g{-1}> refers to the last buffer, C<\g{-2}> refers to the
  buffer before that. For example:
@@ -544,7 +585,7 @@ buffer before that. For example:
  
  and would match the same as C</(Y) ( (X) \3 \1 )/x>.
  
-Additionally, as of Perl 5.10 you may use named capture buffers and named
+Additionally, as of Perl 5.10.0 you may use named capture buffers and named
  backreferences. The notation is C<< (?<name>...) >> to declare and C<< \k<name> >>
  to reference. You may also use apostrophes instead of angle brackets to delimit the
  name; and you may use the bracketed C<< \g{name} >> backreference syntax.
@@ -614,7 +655,7 @@ already paid the price.  As of 5.005, C<$&> is not so costly as the
  other two.
  X<$&> X<$`> X<$'>
  
-As a workaround for this problem, Perl 5.10 introduces C<${^PREMATCH}>,
+As a workaround for this problem, Perl 5.10.0 introduces C<${^PREMATCH}>,
  C<${^MATCH}> and C<${^POSTMATCH}>, which are equivalent to C<$`>, C<$&>
  and C<$'>, B<except> that they are only guaranteed to be defined after a
  successful match that was executed with the C</p> (preserve) modifier.
@@ -740,7 +781,7 @@ X<(?|)> X<Branch reset>
  
  This is the "branch reset" pattern, which has the special property
  that the capture buffers are numbered from the same starting point
-in each alternation branch. It is available starting from perl 5.10.
+in each alternation branch. It is available starting from perl 5.10.0.
  
  Capture buffers are numbered from left to right, but inside this
  construct the numbering is restarted for each branch.
@@ -761,6 +802,9 @@ which buffer the captured content will be stored.
      / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
      # 1            2         2  3        2     3     4  
  
+Note: as of Perl 5.10.0, branch resets interfere with the contents of
+the C<%+> hash, that holds named captures. Consider using C<%-> instead.
+
  =item Look-Around Assertions
  X<look-around assertion> X<lookaround assertion> X<look-around> X<lookaround>
  
@@ -837,9 +881,9 @@ only for fixed-width look-behind.
  X<< (?<NAME>) >> X<(?'NAME')> X<named capture> X<capture>
  
  A named capture buffer. Identical in every respect to normal capturing
-parentheses C<()> but for the additional fact that C<%+> may be used after
-a successful match to refer to a named buffer. See C<perlvar> for more
-details on the C<%+> hash.
+parentheses C<()> but for the additional fact that C<%+> or C<%-> may be
+used after a successful match to refer to a named buffer. See C<perlvar>
+for more details on the C<%+> and C<%-> hashes.
  
  If multiple distinct capture buffers have the same name then the
  $+{NAME} will refer to the leftmost defined buffer in the match.
@@ -862,7 +906,7 @@ its Unicode extension (see L<utf8>),
  though it isn't extended by the locale (see L<perllocale>).
  
  B<NOTE:> In order to make things easier for programmers with experience
-with the Python or PCRE regex engines, the pattern C<< (?P<NAME>pattern) >>
+with the Python or PCRE regex engines, the pattern C<< (?PE<lt>NAMEE<gt>pattern) >>
  may be used instead of C<< (?<NAME>pattern) >>; however this form does not
  support the use of single quotes as a delimiter for the name.
  
@@ -1386,7 +1430,7 @@ If we add a C<(*PRUNE)> before the count like the following
      print "Count=$count\n";
  
  we prevent backtracking and find the count of the longest matching
-at each matching startpoint like so:
+at each matching starting point like so:
  
      aaab
      aab
@@ -1432,7 +1476,7 @@ outputs
      Count=2
  
  Once the 'aaab' at the start of the string has matched, and the C<(*SKIP)>
-executed, the next startpoint will be where the cursor was when the
+executed, the next starting point will be where the cursor was when the
  C<(*SKIP)> was executed.
  
  =item C<(*MARK:NAME)> C<(*:NAME)>
@@ -2105,13 +2149,13 @@ part of this regular expression needs to be converted explicitly
  
  =head1 PCRE/Python Support
  
-As of Perl 5.10 Perl supports several Python/PCRE specific extensions
+As of Perl 5.10.0, Perl supports several Python/PCRE specific extensions
  to the regex syntax. While Perl programmers are encouraged to use the
-Perl specific syntax, the following are legal in Perl 5.10:
+Perl specific syntax, the following are also accepted:
  
  =over 4
  
-=item C<< (?P<NAME>pattern) >>
+=item C<< (?PE<lt>NAMEE<gt>pattern) >>
  
  Define a named capture buffer. Equivalent to C<< (?<NAME>pattern) >>.