This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
current status on Unicode Regular Expressions
[perl5.git] / lib / unicore / mktables
index 006b9ef..72f33cb 100644 (file)
@@ -1500,41 +1500,63 @@ sub PropList_txt()
        );
     }
 
-    # Alphabetic is L and Other_Alphabetic.
+    # Alphabetic is L, Nl, and Other_Alphabetic.
     New_Prop(Is    => 'Alphabetic',
-             Table->Merge($Cat{L}, $Prop{Other_Alphabetic}),
-             Desc  => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here
+             Table->Merge($Cat{L}, $Cat{Nl}, $Prop{Other_Alphabetic}),
+             Desc  => '[\p{L}\p{Nl}\p{OtherAlphabetic}]', # canonical names
              Fuzzy => 1);
 
     # Lowercase is Ll and Other_Lowercase.
     New_Prop(Is    => 'Lowercase',
              Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}),
-             Desc  => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here
+             Desc  => '[\p{Ll}\p{OtherLowercase}]', # canonical names
              Fuzzy => 1);
 
     # Uppercase is Lu and Other_Uppercase.
     New_Prop(Is => 'Uppercase',
              Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}),
-             Desc  => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here
+             Desc  => '[\p{Lu}\p{OtherUppercase}]', # canonical names
              Fuzzy => 1);
 
     # Math is Sm and Other_Math.
     New_Prop(Is => 'Math',
              Table->Merge($Cat{Sm}, $Prop{Other_Math}),
-             Desc  => '[\p{Sm}\p{OtherMath}]', # use canonical names here
+             Desc  => '[\p{Sm}\p{OtherMath}]', # canonical names
              Fuzzy => 1);
 
-    # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl.
+    # ID_Start is Ll, Lu, Lt, Lm, Lo, Nl, and Other_ID_Start.
     New_Prop(Is => 'ID_Start',
-             Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}),
-             Desc  => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]',
+             Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}, $Prop{Other_ID_Start}),
+             Desc  => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}\p{OtherIDStart}]',
              Fuzzy => 1);
 
-    # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc.
+    # ID_Continue is ID_Start, Mn, Mc, Nd, Pc, and Other_ID_Continue.
     New_Prop(Is => 'ID_Continue',
-             Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}),
-             Desc  => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]',
+             Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]},
+                          @Prop{qw[Other_ID_Start Other_ID_Continue]}),
+             Desc  => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{OtherIDContinue}]',
              Fuzzy => 1);
+
+    # Default_Ignorable_Code_Point = Other_Default_Ignorable_Code_Point
+    #                     + Cf + Cc + Cs + Noncharacter + Variation_Selector
+    #                     - WhiteSpace - FFF9..FFFB (Annotation Characters)
+
+    my $Annotation = Table->New();
+    $Annotation->RawAppendRange(0xFFF9, 0xFFFB);
+
+    New_Prop(Is => 'Default_Ignorable_Code_Point',
+             Table->Merge(@Cat{qw[Cf Cc Cs]},
+                          $Prop{Noncharacter_Code_Point},
+                          $Prop{Variation_Selector},
+                          $Prop{Other_Default_Ignorable_Code_Point})
+                  ->Invert
+                  ->Merge($Prop{White_Space}, $Annotation)
+                  ->Invert,
+             Desc  => '(?![\p{WhiteSpace}\x{FFF9}-\x{FFFB}])[\p{Cf}\p{Cc}'.
+                      '\p{Cs}\p{NoncharacterCodePoint}\p{VariationSelector}'.
+                      '\p{OtherDefaultIgnorableCodePoint}]',
+             Fuzzy => 1);
+
 }