Fix two broken links in perldelta.

[perl5.git] / pod / perlreref.pod
diff --git a/pod/perlreref.pod b/pod/perlreref.pod

index 5ddacc5..db7c173 100644 (file)
--- a/pod/perlreref.pod
+++ b/pod/perlreref.pod
@@ -21,7 +21,7 @@ false if the match succeeds, and true if it fails.
  
      $var !~ /foo/;
  
-C<m/pattern/msixpogc> searches a string for a pattern match,
+C<m/pattern/msixpogcdualn> searches a string for a pattern match,
  applying the given options.
  
      m  Multiline mode - ^ and $ match internal lines
@@ -33,17 +33,24 @@ applying the given options.
      o  compile pattern Once
      g  Global - all occurrences
      c  don't reset pos on failed matches when using /g
+    a  restrict \d, \s, \w and [:posix:] to match ASCII only
+    aa (two a's) also /i matches exclude ASCII/non-ASCII
+    l  match according to current locale
+    u  match according to Unicode rules
+    d  match according to native rules unless something indicates
+       Unicode
+    n  Non-capture mode. Don't let () fill in $1, $2, etc...
  
  If 'pattern' is an empty string, the last I<successfully> matched
  regex is used. Delimiters other than '/' may be used for both this
  operator and the following ones. The leading C<m> can be omitted
  if the delimiter is '/'.
  
-C<qr/pattern/msixpo> lets you store a regex in a variable,
+C<qr/pattern/msixpodualn> lets you store a regex in a variable,
  or pass one around. Modifiers as for C<m//>, and are stored
  within the regex.
  
-C<s/pattern/replacement/msixpogce> substitutes matches of
+C<s/pattern/replacement/msixpogcedual> substitutes matches of
  'pattern' with 'replacement'. Modifiers as for C<m//>,
  with two additions:
  
@@ -71,8 +78,8 @@ delimiters can be used.  Must be reset with reset().
   (...)   Groups subexpressions for capturing to $1, $2...
   (?:...) Groups subexpressions without capturing (cluster)
   |       Matches either the subexpression preceding or following it
- \1, \2, \3 ...           Matches the text from the Nth group
   \g1 or \g{1}, \g2 ...    Matches the text from the Nth group
+ \1, \2, \3 ...           Matches the text from the Nth group
   \g-1 or \g{-1}, \g-2 ... Matches the text from the Nth previous group
   \g{name}     Named backreference
   \k<name>     Named backreference
@@ -89,17 +96,19 @@ These work as in normal strings.
     \n       Newline
     \r       Carriage return
     \t       Tab
-   \037     Any octal ASCII value
-   \x7f     Any hexadecimal ASCII value
-   \x{263a} A wide hexadecimal value
+   \037     Char whose ordinal is the 3 octal digits, max \777
+   \o{2307} Char whose ordinal is the octal number, unrestricted
+   \x7f     Char whose ordinal is the 2 hex digits, max \xFF
+   \x{263a} Char whose ordinal is the hex number, unrestricted
     \cx      Control-x
-   \N{name} A named character
+   \N{name} A named Unicode character or character sequence
     \N{U+263D} A Unicode character by hex ordinal
  
     \l  Lowercase next character
     \u  Titlecase next character
     \L  Lowercase until \E
     \U  Uppercase until \E
+   \F  Foldcase until \E
     \Q  Disable pattern metacharacters until \E
     \E  End modification
  
@@ -128,14 +137,13 @@ and L<perlunicode> for details.
     \S      A non-whitespace character
     \h      An horizontal whitespace
     \H      A non horizontal whitespace
-   \N      A non newline (when not followed by '{NAME}'; experimental;
+   \N      A non newline (when not followed by '{NAME}';;
             not valid in a character class; equivalent to [^\n]; it's
             like '.' without /s modifier)
     \v      A vertical whitespace
     \V      A non vertical whitespace
     \R      A generic newline           (?>\v|\x0D\x0A)
  
-   \C      Match a byte (with Unicode, '.' matches a character)
     \pP     Match P-named (Unicode) property
     \p{...} Match Unicode property with name longer than 1 character
     \PP     Match non-P
@@ -144,44 +152,39 @@ and L<perlunicode> for details.
  
  POSIX character classes and their Unicode and Perl equivalents:
  
-           ASCII-         Full-
-           range          range   backslash
- POSIX    \p{...}         \p{}    sequence       Description
+            ASCII-         Full-
+   POSIX    range          range    backslash
+ [[:...:]]  \p{...}        \p{...}   sequence    Description
+
   -----------------------------------------------------------------------
- alnum   PosixAlnum       Alnum               Alpha plus Digit
- alpha   PosixAlpha       Alpha               Alphabetic characters
- ascii   ASCII                                Any ASCII character
- blank   PosixBlank       Blank     \h        Horizontal whitespace;
-                                                full-range also written
-                                                as \p{HorizSpace} (GNU
-                                                extension)
- cntrl   PosixCntrl       Cntrl               Control characters
- digit   PosixDigit       Digit     \d        Decimal digits
- graph   PosixGraph       Graph               Alnum plus Punct
- lower   PosixLower       Lower               Lowercase characters
- print   PosixPrint       Print               Graph plus Print, but not
-                                                any Cntrls
- punct   PosixPunct       Punct               These aren't precisely
-                                                equivalent.  See NOTE,
-                                                below.
- space   PosixSpace       Space     [\s\cK]   Whitespace
-         PerlSpace        SpacePerl \s        Perl's whitespace
-                                                definition
- upper   PosixUpper       Upper               Uppercase characters
- word    PerlWord         Word      \w        Alnum plus '_' (Perl
-                                                extension)
- xdigit  ASCII_Hex_Digit  XDigit              Hexadecimal digit,
-                                                ASCII-range is
-                                                [0-9A-Fa-f]
-
-NOTE on C<[[:punct:]]>, C<\p{PosixPunct}> and C<\p{Punct}>:
-In the ASCII range, C<[[:punct:]]> and C<\p{PosixPunct}> match
-C<[-!"#$%&'()*+,./:;<=E<gt>?@[\\\]^_`{|}~]> (although if a locale is in
-effect, it could alter the behavior of C<[[:punct:]]>); and C<\p{Punct}>
-matches C<[-!"#%&'()*,./:;?@[\\\]_{}]>.  When matching a UTF-8 string,
-C<[[:punct:]]> matches what it does in the ASCII range, plus what
-C<\p{Punct}> matches.  C<\p{Punct}> matches, anything that isn't a
-control, an alphanumeric, a space, nor a symbol.
+ alnum   PosixAlnum       XPosixAlnum            'alpha' plus 'digit'
+ alpha   PosixAlpha       XPosixAlpha            Alphabetic characters
+ ascii   ASCII                                   Any ASCII character
+ blank   PosixBlank       XPosixBlank   \h       Horizontal whitespace;
+                                                   full-range also
+                                                   written as
+                                                   \p{HorizSpace} (GNU
+                                                   extension)
+ cntrl   PosixCntrl       XPosixCntrl            Control characters
+ digit   PosixDigit       XPosixDigit   \d       Decimal digits
+ graph   PosixGraph       XPosixGraph            'alnum' plus 'punct'
+ lower   PosixLower       XPosixLower            Lowercase characters
+ print   PosixPrint       XPosixPrint            'graph' plus 'space',
+                                                   but not any Controls
+ punct   PosixPunct       XPosixPunct            Punctuation and Symbols
+                                                   in ASCII-range; just
+                                                   punct outside it
+ space   PosixSpace       XPosixSpace   \s       Whitespace
+ upper   PosixUpper       XPosixUpper            Uppercase characters
+ word    PosixWord        XPosixWord    \w       'alnum' + Unicode marks
+                                                    + connectors, like
+                                                    '_' (Perl extension)
+ xdigit  ASCII_Hex_Digit  XPosixDigit            Hexadecimal digit,
+                                                    ASCII-range is
+                                                    [0-9A-Fa-f]
+
+Also, various synonyms like C<\p{Alpha}> for C<\p{XPosixAlpha}>; all listed
+in L<perluniprops/Properties accessible through \p{} and \P{}>
  
  Within a character class:
  
@@ -195,6 +198,8 @@ All are zero-width assertions.
  
     ^  Match string start (or line, if /m is used)
     $  Match string end (or line, if /m is used) or before newline
+   \b{} Match boundary of type specified within the braces
+   \B{} Match wherever \b{} doesn't match
     \b Match word boundary (between \w and \W)
     \B Match except at word boundary (between \w and \w or \W and \W)
     \A Match string start (regardless of /m)
@@ -237,6 +242,7 @@ There is no quantifier C<{,n}>. That's interpreted as a literal string.
     (?<name>...)      Named capture
     (?'name'...)      Named capture
     (?P<name>...)     Named capture (python syntax)
+   (?[...])          Extended bracketed character class
     (?{ code })       Embedded code, return value becomes $^R
     (??{ code })      Dynamic regex, return value used as regex
     (?N)              Recurse into subpattern number N
@@ -246,6 +252,10 @@ There is no quantifier C<{,n}>. That's interpreted as a literal string.
     (?P>name)         Recurse into a named subpattern (python syntax)
     (?(cond)yes|no)
     (?(cond)yes)      Conditional expression, where "cond" can be:
+                     (?=pat)   lookahead
+                     (?!pat)   negative lookahead
+                     (?<=pat)  lookbehind
+                     (?<!pat)  negative lookbehind
                       (N)       subpattern N has matched something
                       (<name>)  named subpattern has matched something
                       ('name')  named subpattern has matched something
@@ -267,6 +277,7 @@ There is no quantifier C<{,n}>. That's interpreted as a literal string.
     ${^MATCH}      Entire matched string
     ${^POSTMATCH}  Everything after to matched string
  
+Note to those still using Perl 5.18 or earlier:
  The use of C<$`>, C<$&> or C<$'> will slow down B<all> regex use
  within your program. Consult L<perlvar> for C<@->
  to see equivalent expressions that won't cause slow down.
@@ -274,6 +285,7 @@ See also L<Devel::SawAmpersand>. Starting with Perl 5.10, you
  can also use the equivalent variables C<${^PREMATCH}>, C<${^MATCH}>
  and C<${^POSTMATCH}>, but for them to be defined, you have to
  specify the C</p> (preserve) modifier on your regular expression.
+In Perl 5.20, the use of C<$`>, C<$&> and C<$'> makes no speed difference.
  
     $1, $2 ...  hold the Xth captured expr
     $+    Last parenthesized pattern match
@@ -281,8 +293,8 @@ specify the C</p> (preserve) modifier on your regular expression.
     $^R   Holds the result of the last (?{...}) expr
     @-    Offsets of starts of groups. $-[0] holds start of whole match
     @+    Offsets of ends of groups. $+[0] holds end of whole match
-   %+    Named capture buffers
-   %-    Named capture buffers, as array refs
+   %+    Named capture groups
+   %-    Named capture groups, as array refs
  
  Captured groups are numbered according to their I<opening> paren.
  
@@ -292,6 +304,7 @@ Captured groups are numbered according to their I<opening> paren.
     lcfirst     Lowercase first char of a string
     uc          Uppercase a string
     ucfirst     Titlecase first char of a string
+   fc          Foldcase a string
  
     pos         Return or set current match position
     quotemeta   Quote metacharacters
@@ -300,8 +313,9 @@ Captured groups are numbered according to their I<opening> paren.
  
     split       Use a regex to split a string into parts
  
-The first four of these are like the escape sequences C<\L>, C<\l>,
-C<\U>, and C<\u>.  For Titlecase, see L</Titlecase>.
+The first five of these are like the escape sequences C<\L>, C<\l>,
+C<\U>, C<\u>, and C<\F>.  For Titlecase, see L</Titlecase>; For
+Foldcase, see L</Foldcase>.
  
  =head2 TERMINOLOGY
  
@@ -310,6 +324,12 @@ C<\U>, and C<\u>.  For Titlecase, see L</Titlecase>.
  Unicode concept which most often is equal to uppercase, but for
  certain characters like the German "sharp s" there is a difference.
  
+=head3 Foldcase
+
+Unicode form that is useful when comparing strings regardless of case,
+as certain characters have complex one-to-many case mappings. Primarily a
+variant of lowercase.
+
  =head1 AUTHOR
  
  Iain Truskett. Updated by the Perl 5 Porters.
@@ -363,7 +383,7 @@ debugging.
  
  =item *
  
-L<perldebug/"Debugging regular expressions">
+L<perldebug/"Debugging Regular Expressions">
  
  =item *