Merge branch 'blead' of ssh://perl5.git.perl.org/gitroot/perl into blead

[perl5.git] / pod / perlrebackslash.pod
diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod

index 40f73fc..3d3a76f 100644 (file)
--- a/pod/perlrebackslash.pod
+++ b/pod/perlrebackslash.pod
@@ -85,8 +85,8 @@ quoted constructs>.
   \n                (Logical) newline character.
   \N                Any character but newline.
   \N{}              Named (Unicode) character.
- \p{}, \pP         Character with a Unicode property.
- \P{}, \PP         Character without a Unicode property.
+ \p{}, \pP         Character with the given Unicode property.
+ \P{}, \PP         Character without the given Unicode property.
   \Q                Quotemeta till \E.
   \r                Return character.
   \R                Generic new line.
@@ -100,7 +100,7 @@ quoted constructs>.
   \w                Character class for word characters.
   \W                Character class for non-word characters.
   \x{}, \x00        Hexadecimal escape sequence.
- \X                Extended Unicode "combining character sequence".
+ \X                Unicode "extended grapheme cluster".
   \z                End of string.
   \Z                End of string.
  
@@ -392,7 +392,7 @@ contain a hyphen, so there is no ambiguity.
  
  =head2 Assertions
  
-Assertions are conditions that have to be true -- they don't actually
+Assertions are conditions that have to be true; they don't actually
  match parts of the substring. There are six assertions that are written as
  backslash sequences.
  
@@ -507,18 +507,14 @@ metacharacter, and suggests C<\R> as the notation.
  
  =item \X
  
-This matches an extended Unicode I<combining character sequence>, and
-is equivalent to C<< (?>\PM\pM*) >>. C<\PM> matches any character that is
-not considered a Unicode mark character, while C<\pM> matches any character
-that is considered a Unicode mark character; so C<\X> matches any non
-mark character followed by zero or more mark characters. Mark characters
-include (but are not restricted to) I<combining characters> and
-I<vowel signs>.
+This matches a Unicode I<extended grapheme cluster>.
  
  C<\X> matches quite well what normal (non-Unicode-programmer) usage
-would consider a single character: for example a base character
-(the C<\PM> above), for example a letter, followed by zero or more
-diacritics, which are I<combining characters> (the C<\pM*> above).
+would consider a single character.  As an example, consider a G with some sort
+of diacritic mark, such as an arrow.  There is no such single character in
+Unicode, but one can be composed using a G followed by a Unicode "COMBINING
+UPWARDS ARROW BELOW", and would be displayed by Unicode-aware software as if it
+were a single character.
  
  Mnemonic: eI<X>tended Unicode character.