B::Deparse: Padrange deparse fix

[perl5.git] / lib / _charnames.pm
diff --git a/lib/_charnames.pm b/lib/_charnames.pm

index ad7684d..8955b6f 100644 (file)
--- a/lib/_charnames.pm
+++ b/lib/_charnames.pm
@@ -7,7 +7,7 @@ package _charnames;
  use strict;
  use warnings;
  use File::Spec;
-our $VERSION = '1.33';
+our $VERSION = '1.39';
  use unicore::Name;    # mktables-generated algorithmically-defined names
  
  use bytes ();          # for $bytes::hint_bits
@@ -66,10 +66,10 @@ $Carp::Internal{ (__PACKAGE__) } = 1;
  
  my %system_aliases = (
  
-    'SINGLE-SHIFT 2'                => pack("U", 0x8E),
-    'SINGLE-SHIFT 3'                => pack("U", 0x8F),
-    'PRIVATE USE 1'                 => pack("U", 0x91),
-    'PRIVATE USE 2'                 => pack("U", 0x92),
+    'SINGLE-SHIFT 2'                => pack("U", utf8::unicode_to_native(0x8E)),
+    'SINGLE-SHIFT 3'                => pack("U", utf8::unicode_to_native(0x8F)),
+    'PRIVATE USE 1'                 => pack("U", utf8::unicode_to_native(0x91)),
+    'PRIVATE USE 2'                 => pack("U", utf8::unicode_to_native(0x92)),
  );
  
  # These are the aliases above that differ under :loose and :full matching
@@ -78,7 +78,7 @@ my %system_aliases = (
  #);
  
  #my %deprecated_aliases;
-#$deprecated_aliases{'BELL'} = pack("U", 0x07) if $^V lt v5.17.0;
+#$deprecated_aliases{'BELL'} = pack("U", utf8::unicode_to_native(0x07)) if $^V lt v5.17.0;
  
  #my %loose_deprecated_aliases = (
  #);
@@ -157,24 +157,41 @@ sub alias (@) # Set up a single alias
      # hex, but makes the code easier to maintain, and is called
      # infrequently, only at compile-time
      if ($value !~ $decimal_qr && $value =~ $hex_qr) {
-      $value = CORE::hex $1;
+      my $temp = CORE::hex $1;
+      $temp = utf8::unicode_to_native($temp) if $value =~ /^[Uu]\+/;
+      $value = $temp;
      }
      if ($value =~ $decimal_qr) {
-        no warnings qw(non_unicode surrogate nonchar); # Allow any non-malformed
+        no warnings qw(non_unicode surrogate nonchar); # Allow any of these
          $^H{charnames_ord_aliases}{$name} = pack("U", $value);
  
          # Use a canonical form.
          $^H{charnames_inverse_ords}{sprintf("%05X", $value)} = $name;
      }
      else {
+        # This regex needs to be sync'd with the code in toke.c that checks
+        # for the same thing
          if ($name !~ / ^
                         \p{_Perl_Charname_Begin}
                         \p{_Perl_Charname_Continue}*
                         $ /x) {
+
            push @errors, $name;
          }
          else {
            $^H{charnames_name_aliases}{$name} = $value;
+
+          if (warnings::enabled('deprecated')) {
+            if ($name =~ / ( .* \s ) ( \s* ) $ /x) {
+              carp "Trailing white-space in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'";
+            }
+
+            # Use '+' instead of '*' in this regex, because any trailing
+            # blanks have already been warned about.
+            if ($name =~ / ( .*? \s{2} ) ( .+ ) /x) {
+              carp "A sequence of multiple spaces in a charnames alias definition is deprecated; marked by <-- HERE in '$1 <-- HERE " . $2 . "'";
+            }
+          }
          }
      }
    }
@@ -184,7 +201,8 @@ sub alias (@) # Set up a single alias
    if (@errors) {
      foreach my $name (@errors) {
        my $ok = "";
-      $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():\xa0]* ) /x;
+      my $nbsp = chr utf8::unicode_to_native(0xa0);
+      $ok = $1 if $name =~ / ^ ( \p{Alpha} [-\p{XPosixWord} ():$nbsp]* ) /x;
        my $first_bad = substr($name, length($ok), 1);
        $name = "Invalid character in charnames alias definition; marked by <-- HERE in '$ok$first_bad<-- HERE " . substr($name, length($ok) + 1) . "'";
      }
@@ -212,11 +230,11 @@ sub alias_file ($)  # Reads a file containing alias definitions
    if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
      $file = $arg;
    }
-  elsif ($arg =~ m/^\w+$/) {
+  elsif ($arg =~ m/ ^ \p{_Perl_IDStart} \p{_Perl_IDCont}* $/x) {
      $file = "unicore/${arg}_alias.pl";
    }
    else {
-    croak "Charnames alias files can only have identifier characters";
+    croak "Charnames alias file names can only have identifier characters";
    }
    if (my @alias = do $file) {
      @alias == 1 && !defined $alias[0] and
@@ -460,7 +478,7 @@ sub lookup_name ($$$) {
                                    /xs)
            {
                # Even in non-loose matching, the script traditionally has been
-              # case insensitve
+              # case insensitive
                $scripts_trie = "\U$1";
                $lookup_name = $2;
  
@@ -610,6 +628,7 @@ sub import
          ref $alias eq "HASH" or
            croak "Only HASH reference supported as argument to :alias";
          alias ($alias);
+        $promote = 1;
          next;
        }
        if ($alias =~ m{:(\w+)$}) {
@@ -618,7 +637,7 @@ sub import
          alias_file ($1) and $promote = 1;
          next;
        }
-      alias_file ($alias);
+      alias_file ($alias) and $promote = 1;
        next;
      }
      if (substr($arg, 0, 1) eq ':'
@@ -681,6 +700,11 @@ sub import
  # not an issue.
  my %viacode;
  
+my $no_name_code_points_re = join "|", map { sprintf("%05X",
+                                             utf8::unicode_to_native($_)) }
+                                            0x80, 0x81, 0x84, 0x99;
+$no_name_code_points_re = qr/$no_name_code_points_re/;
+
  sub viacode {
  
    # Returns the name of the code point argument
@@ -701,8 +725,10 @@ sub viacode {
    if ($arg =~ $decimal_qr) {
      $hex = sprintf "%05X", $arg;
    } elsif ($arg =~ $hex_qr) {
+    $hex = CORE::hex $1;
+    $hex = utf8::unicode_to_native($hex) if $arg =~ /^[Uu]\+/;
      # Below is the line that differs from the _getcode() source
-    $hex = sprintf "%05X", hex $1;
+    $hex = sprintf "%05X", $hex;
    } else {
      carp("unexpected arg \"$arg\" to charnames::viacode()");
      return;
@@ -735,7 +761,7 @@ sub viacode {
          $return = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]);
  
          # If not one of these 4 code points, return what we've found.
-        if ($hex !~ / ^ 000 (?: 8[014] | 99 ) $ /x) {
+        if ($hex !~ / ^ $no_name_code_points_re $ /x) {
            $viacode{$hex} = $return;
            return $return;
          }
@@ -763,12 +789,14 @@ sub viacode {
    # Here there is no user-defined alias, return any official one.
    return $return if defined $return;
  
-  if (CORE::hex($hex) > 0x10FFFF) {
+  if (CORE::hex($hex) > 0x10FFFF
+      && warnings::enabled('non_unicode'))
+  {
        carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)";
    }
    return;
  
-} # _viacode
+} # viacode
  
  1;