Upgrade to Test::Simple 0.64_03

[perl5.git] / pod / perlreref.pod
diff --git a/pod/perlreref.pod b/pod/perlreref.pod

index 8aad327..8a793c8 100644 (file)
--- a/pod/perlreref.pod
+++ b/pod/perlreref.pod
@@ -6,15 +6,21 @@ perlreref - Perl Regular Expressions Reference
  
  This is a quick reference to Perl's regular expressions.
  For full information see L<perlre> and L<perlop>, as well
-as the L<references|/"SEE ALSO"> section in this document.
+as the L</"SEE ALSO"> section in this document.
  
-=head1 OPERATORS
+=head2 OPERATORS
  
    =~ determines to which variable the regex is applied.
-     In its absence, C<$_> is used.
+     In its absence, $_ is used.
  
          $var =~ /foo/;
  
+  !~ determines to which variable the regex is applied,
+     and negates the result of the match; it returns
+     false if the match succeeds, and true if it fails.
+
+       $var !~ /foo/;
+
    m/pattern/igmsoxc searches a string for a pattern match,
       applying the given options.
  
@@ -24,41 +30,42 @@ as the L<references|/"SEE ALSO"> section in this document.
          s  match as a Single line - . matches \n
          o  compile pattern Once
          x  eXtended legibility - free whitespace and comments
-        c  don't reset pos on fails when using /g
+        c  don't reset pos on failed matches when using /g
  
-     If C<pattern> is an empty string, the last I<successfully> match
-     regex is used. Delimiters other than C</> may be used for both this
+     If 'pattern' is an empty string, the last I<successfully> matched
+     regex is used. Delimiters other than '/' may be used for both this
       operator and the following ones.
  
    qr/pattern/imsox lets you store a regex in a variable,
-     or pass one around. Modifiers as for C<m//> and are stored
+     or pass one around. Modifiers as for m// and are stored
       within the regex.
  
    s/pattern/replacement/igmsoxe substitutes matches of
-     C<pattern> with C<replacement>. Modifiers as for C<m//>
-     with addition of C<e>:
+     'pattern' with 'replacement'. Modifiers as for m//
+     with one addition:
  
          e  Evaluate replacement as an expression
  
       'e' may be specified multiple times. 'replacement' is interpreted
       as a double quoted string unless a single-quote (') is the delimiter.
  
-  ?pattern? is like C<m/pattern/> but matches only once. No alternate
-     delimiters can be used. Must be reset with 'reset'.
-
-=head1 SYNTAX
-
-   \     Escapes the character(s) immediately following it
-   .     Matches any single character except a newline (unless /s is used)
-   ^     Matches at the beginning of the string (or line, if /m is used)
-   $     Matches at the end of the string (or line, if /m is used)
-   *     Matches the preceding element 0 or more times
-   +     Matches the preceding element 1 or more times
-   ?     Matches the preceding element 0 or 1 times
-   {...} Specifies a range of occurrences for the element preceding it
-   [...] Matches any one of the characters contained within the brackets
-   (...) Groups regular expressions
-   |     Matches either the expression preceding or following it
+  ?pattern? is like m/pattern/ but matches only once. No alternate
+      delimiters can be used. Must be reset with L<reset|perlfunc/reset>.
+
+=head2 SYNTAX
+
+   \       Escapes the character immediately following it
+   .       Matches any single character except a newline (unless /s is used)
+   ^       Matches at the beginning of the string (or line, if /m is used)
+   $       Matches at the end of the string (or line, if /m is used)
+   *       Matches the preceding element 0 or more times
+   +       Matches the preceding element 1 or more times
+   ?       Matches the preceding element 0 or 1 times
+   {...}   Specifies a range of occurrences for the element preceding it
+   [...]   Matches any one of the characters contained within the brackets
+   (...)   Groups subexpressions for capturing to $1, $2...
+   (?:...) Groups subexpressions without capturing (cluster)
+   |       Matches either the subexpression preceding or following it
     \1, \2 ...  The text from the Nth group
  
  =head2 ESCAPE SEQUENCES
@@ -71,19 +78,21 @@ These work as in normal strings.
     \n       Newline
     \r       Carriage return
     \t       Tab
-   \038     Any octal ASCII value
+   \037     Any octal ASCII value
     \x7f     Any hexadecimal ASCII value
     \x{263a} A wide hexadecimal value
     \cx      Control-x
     \N{name} A named character
  
-   \l  Lowercase until next character
-   \u  Uppercase until next character
+   \l  Lowercase next character
+   \u  Titlecase next character
     \L  Lowercase until \E
     \U  Uppercase until \E
     \Q  Disable pattern metacharacters until \E
     \E  End case modification
  
+For Titlecase, see L</Titlecase>.
+
  This one works differently from normal strings:
  
     \b  An assertion, not backspace, except in a character class
@@ -93,17 +102,21 @@ This one works differently from normal strings:
     [amy]    Match 'a', 'm' or 'y'
     [f-j]    Dash specifies "range"
     [f-j-]   Dash escaped or at start or end means 'dash'
-   [^f-j]   Caret indicates "match char any _except_ these"
+   [^f-j]   Caret indicates "match any character _except_ these"
  
-The following work within or without a character class:
+The following sequences work within or without a character class.
+The first six are locale aware, all are Unicode aware.  The default
+character class equivalent are given.  See L<perllocale> and
+L<perlunicode> for details.
  
-   \d      A digit, same as [0-9]
-   \D      A nondigit, same as [^0-9]
-   \w      A word character (alphanumeric), same as [a-zA-Z_0-9]
-   \W      A non-word character, [^a-zA-Z_0-9]
-   \s      A whitespace character, same as [ \t\n\r\f]
-   \S      A non-whitespace character, [^ \t\n\r\f]
-   \C      Match a byte (with Unicode. '.' matches char)
+   \d      A digit                     [0-9]
+   \D      A nondigit                  [^0-9]
+   \w      A word character            [a-zA-Z0-9_]
+   \W      A non-word character        [^a-zA-Z0-9_]
+   \s      A whitespace character      [ \t\n\r\f]
+   \S      A non-whitespace character  [^ \t\n\r\f]
+
+   \C      Match a byte (with Unicode, '.' matches a character)
     \pP     Match P-named (Unicode) property
     \p{...} Match Unicode property with long name
     \PP     Match non-P
@@ -112,21 +125,21 @@ The following work within or without a character class:
  
  POSIX character classes and their Unicode and Perl equivalents:
  
-   alnum   IsAlnum             Alphanumeric
-   alpha   IsAlpha             Alphabetic
-   ascii   IsASCII             Any ASCII char
-   blank   IsSpace  [ \t]      Horizontal whitespace (GNU)
-   cntrl   IsCntrl             Control characters
-   digit   IsDigit  \d         Digits
-   graph   IsGraph             Alphanumeric and punctuation
-   lower   IsLower             Lowercase chars (locale aware)
-   print   IsPrint             Alphanumeric, punct, and space
-   punct   IsPunct             Punctuation
-   space   IsSpace  [\s\ck]    Whitespace
-           IsSpacePerl   \s    Perl's whitespace definition
-   upper   IsUpper             Uppercase chars (locale aware)
-   word    IsWord   \w         Alphanumeric plus _ (Perl)
-   xdigit  IsXDigit [\dA-Fa-f] Hexadecimal digit
+   alnum   IsAlnum              Alphanumeric
+   alpha   IsAlpha              Alphabetic
+   ascii   IsASCII              Any ASCII char
+   blank   IsSpace  [ \t]       Horizontal whitespace (GNU extension)
+   cntrl   IsCntrl              Control characters
+   digit   IsDigit  \d          Digits
+   graph   IsGraph              Alphanumeric and punctuation
+   lower   IsLower              Lowercase chars (locale and Unicode aware)
+   print   IsPrint              Alphanumeric, punct, and space
+   punct   IsPunct              Punctuation
+   space   IsSpace  [\s\ck]     Whitespace
+           IsSpacePerl   \s     Perl's whitespace definition
+   upper   IsUpper              Uppercase chars (locale and Unicode aware)
+   word    IsWord   \w          Alphanumeric plus _ (Perl extension)
+   xdigit  IsXDigit [0-9A-Fa-f] Hexadecimal digit
  
  Within a character class:
  
@@ -141,46 +154,45 @@ All are zero-width assertions.
     ^  Match string start (or line, if /m is used)
     $  Match string end (or line, if /m is used) or before newline
     \b Match word boundary (between \w and \W)
-   \B Match except at word boundary
+   \B Match except at word boundary (between \w and \w or \W and \W)
     \A Match string start (regardless of /m)
-   \Z Match string end (preceding optional newline)
+   \Z Match string end (before optional newline)
     \z Match absolute string end
     \G Match where previous m//g left off
-   \c Suppresses resetting of search position when used with /g.
-      Without \c, search pattern is reset to the beginning of the string
  
  =head2 QUANTIFIERS
  
-Quantifiers are greedy by default --- match the B<longest> leftmost.
+Quantifiers are greedy by default -- match the B<longest> leftmost.
  
     Maximal Minimal Allowed range
     ------- ------- -------------
     {n,m}   {n,m}?  Must occur at least n times but no more than m times
     {n,}    {n,}?   Must occur at least n times
-   {n}     {n}?    Must match exactly n times
+   {n}     {n}?    Must occur exactly n times
     *       *?      0 or more times (same as {0,})
     +       +?      1 or more times (same as {1,})
     ?       ??      0 or 1 time (same as {0,1})
  
+There is no quantifier {,n} -- that gets understood as a literal string.
+
  =head2 EXTENDED CONSTRUCTS
  
     (?#text)         A comment
-   (?:...)          Cluster without capturing
-   (?imxs-imsx:...) Enable/disable option (as per m//)
+   (?imxs-imsx:...) Enable/disable option (as per m// modifiers)
     (?=...)          Zero-width positive lookahead assertion
     (?!...)          Zero-width negative lookahead assertion
-   (?<...)          Zero-width positive lookbehind assertion
+   (?<=...)         Zero-width positive lookbehind assertion
     (?<!...)         Zero-width negative lookbehind assertion
     (?>...)          Grab what we can, prohibit backtracking
     (?{ code })      Embedded code, return value becomes $^R
     (??{ code })     Dynamic regex, return value used as regex
-   (?(cond)yes|no)  cond being int corresponding to capturing parens
+   (?(cond)yes|no)  cond being integer corresponding to capturing parens
     (?(cond)yes)        or a lookaround/eval zero-width assertion
  
-=head1 VARIABLES
+=head2 VARIABLES
  
     $_    Default variable for operators to use
-   $*    Enable multiline matching (deprecated; not in 5.8.1+)
+   $*    Enable multiline matching (deprecated; not in 5.9.0 or later)
  
     $&    Entire matched string
     $`    Everything prior to matched string
@@ -195,17 +207,18 @@ See also L<Devel::SawAmpersand>.
     $+    Last parenthesized pattern match
     $^N   Holds the most recently closed capture
     $^R   Holds the result of the last (?{...}) expr
-   @-    Offsets of starts of groups. [0] holds start of whole match
-   @+    Offsets of ends of groups. [0] holds end of whole match
+   @-    Offsets of starts of groups. $-[0] holds start of whole match
+   @+    Offsets of ends of groups. $+[0] holds end of whole match
  
-Capture groups are numbered according to their I<opening> paren.
+Captured groups are numbered according to their I<opening> paren.
  
-=head1 FUNCTIONS
+=head2 FUNCTIONS
  
     lc          Lowercase a string
     lcfirst     Lowercase first char of a string
     uc          Uppercase a string
     ucfirst     Titlecase first char of a string
+
     pos         Return or set current match position
     quotemeta   Quote metacharacters
     reset       Reset ?pattern? status
@@ -213,6 +226,16 @@ Capture groups are numbered according to their I<opening> paren.
  
     split       Use regex to split a string into parts
  
+The first four of these are like the escape sequences C<\L>, C<\l>,
+C<\U>, and C<\u>.  For Titlecase, see L</Titlecase>.
+
+=head2 TERMINOLOGY
+
+=head3 Titlecase
+
+Unicode concept which most often is equal to uppercase, but for
+certain characters like the German "sharp s" there is a difference.
+
  =head1 AUTHOR
  
  Iain Truskett.
@@ -279,6 +302,9 @@ David P.C. Wollmann,
  Richard Soderberg,
  Sean M. Burke,
  Tom Christiansen,
+Jim Cromie,
  and
  Jeffrey Goff
  for useful advice.
+
+=cut