[perl5.git] / t / op / utfhash.t

#!./perl -w

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';

    plan(tests => 99);
}

use strict;

# Two hashes one will all keys 8-bit possible (initially), other
# with a utf8 requiring key from the outset.

my %hash8 = ( "\xff" => 0xff,
              "\x7f" => 0x7f,
            );
my %hashu = ( "\xff" => 0xff,
              "\x7f" => 0x7f,
              "\x{1ff}" => 0x1ff,
            );

# Check that we can find the 8-bit things by various literals
is($hash8{"\x{00ff}"},0xFF);
is($hash8{"\x{007f}"},0x7F);
is($hash8{"\xff"},0xFF);
is($hash8{"\x7f"},0x7F);
is($hashu{"\x{00ff}"},0xFF);
is($hashu{"\x{007f}"},0x7F);
is($hashu{"\xff"},0xFF);
is($hashu{"\x7f"},0x7F);

# Now try same thing with variables forced into various forms.
foreach ("\x7f","\xff")
 {
  my $a = $_; # Force a copy
  utf8::upgrade($a);
  is($hash8{$a},ord($a));
  is($hashu{$a},ord($a));
  utf8::downgrade($a);
  is($hash8{$a},ord($a));
  is($hashu{$a},ord($a));
  my $b = $a.chr(100);
  chop($b);
  is($hash8{$b},ord($b));
  is($hashu{$b},ord($b));
 }

# Check we have not got an spurious extra keys
is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff");
is(join('',sort { ord $a <=> ord $b } keys %hashu),"\x7f\xff\x{1ff}");

# Now add a utf8 key to the 8-bit hash
$hash8{chr(0x1ff)} = 0x1ff;

# Check we have not got an spurious extra keys
is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff\x{1ff}");

foreach ("\x7f","\xff","\x{1ff}")
 {
  my $a = $_;
  utf8::upgrade($a);
  is($hash8{$a},ord($a));
  my $b = $a.chr(100);
  chop($b);
  is($hash8{$b},ord($b));
 }

# and remove utf8 from the other hash
is(delete $hashu{chr(0x1ff)},0x1ff);
is(join('',sort keys %hashu),"\x7f\xff");

foreach ("\x7f","\xff")
 {
  my $a = $_;
  utf8::upgrade($a);
  is($hashu{$a},ord($a));
  utf8::downgrade($a);
  is($hashu{$a},ord($a));
  my $b = $a.chr(100);
  chop($b);
  is($hashu{$b},ord($b));
 }


{
  print "# Unicode hash keys and \\w\n";
  # This is not really a regex test but regexes bring
  # out the issue nicely.
  use strict;
  my $u3 = "f\x{df}\x{100}";
  my $u2 = substr($u3,0,2);
  my $u1 = substr($u2,0,1);
  my $u0 = chr (0xdf)x4; # Make this 4 chars so that all lengths are distinct.

  my @u = ($u0, $u1, $u2, $u3);

  while (@u) {
    my %u = (map {( $_, $_)} @u);
    my $keys = scalar @u;
    $keys .= ($keys == 1) ? " key" : " keys";

    for (keys %u) {
        my $l = 0 + /^\w+$/;
        my $r = 0 + $u{$_} =~ /^\w+$/;
	is ($l, $r, "\\w on keys with $keys, key of length " . length $_);
    }

    my $more;
    do {
      $more = 0;
      # Want to do this direct, rather than copying to a temporary variable
      # The first time each will return key and value at the start of the hash.
      # each will return () after we've done the last pair. $more won't get
      # set then, and the do will exit.
      for (each %u) {
        $more = 1;
        my $l = 0 + /^\w+$/;
        my $r = 0 + $u{$_} =~ /^\w+$/;
        is ($l, $r, "\\w on each, with $keys, key of length " . length $_);
      }
    } while ($more);

    for (%u) {
      my $l = 0 + /^\w+$/;
      my $r = 0 + $u{$_} =~ /^\w+$/;
      is ($l, $r, "\\w on hash with $keys, key of length " . length $_);
    }
    pop @u;
    undef %u;
  }
}

{
  my $utf8_sz = my $bytes_sz = "\x{df}";
  $utf8_sz .= chr 256;
  chop ($utf8_sz);

  my (%bytes_first, %utf8_first);

  $bytes_first{$bytes_sz} = $bytes_sz;

  for (keys %bytes_first) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $bytes_first{$_} =~ /^\w+$/;
    is ($l, $r, "\\w on each, bytes");
  }

  $bytes_first{$utf8_sz} = $utf8_sz;

  for (keys %bytes_first) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $bytes_first{$_} =~ /^\w+$/;
    is ($l, $r, "\\w on each, bytes now utf8");
  }

  $utf8_first{$utf8_sz} = $utf8_sz;

  for (keys %utf8_first) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $utf8_first{$_} =~ /^\w+$/;
    is ($l, $r, "\\w on each, utf8");
  }

  $utf8_first{$bytes_sz} = $bytes_sz;

  for (keys %utf8_first) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $utf8_first{$_} =~ /^\w+$/;
    is ($l, $r, "\\w on each, utf8 now bytes");
  }

}

{
    local $/; # Slurp.
    my $utf8      = <DATA>;
    my $utfebcdic = <DATA>;
    if (ord('A') == 65) {
	eval $utf8;
    } elsif (ord('A') == 193) {
	eval $utfebcdic;
    }
}
__END__
{
  # See if utf8 barewords work [perl #22969]
  use utf8;
  my %hash = (тест => 123);
  is($hash{тест}, $hash{'тест'});
  is($hash{тест}, 123);
  is($hash{'тест'}, 123);
  %hash = (тест => 123);
  is($hash{тест}, $hash{'тест'});
  is($hash{тест}, 123);
  is($hash{'тест'}, 123);

  # See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
  my %foo = (a => 'b', 'c' => 'd');
  for my $key (keys %foo) {
    ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
  }
}
__END__
{
  # See if utf8 barewords work [perl #22969]
  use utf8; # UTF-EBCDIC, really.
  my %hash = (½ää½âÀ½äâ½ää => 123);
  is($hash{½ää½âÀ½äâ½ää}, $hash{'½ää½âÀ½äâ½ää'});
  is($hash{½ää½âÀ½äâ½ää}, 123);
  is($hash{'½ää½âÀ½äâ½ää'}, 123);
  %hash = (½ää½âÀ½äâ½ää => 123);
  is($hash{½ää½âÀ½äâ½ää}, $hash{'½ää½âÀ½äâ½ää'});
  is($hash{½ää½âÀ½äâ½ää}, 123);
  is($hash{'½ää½âÀ½äâ½ää'}, 123);

  # See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
  my %foo = (a => 'b', 'c' => 'd');
  for my $key (keys %foo) {
    ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
  }
}
Commit	Line	Data
19692e8d NC	1	#!./perl -w
19692e8d NC	2
cb0a5b5c NIS	3	BEGIN {
	4	chdir 't' if -d 't';
	5	@INC = '../lib';
	6	require './test.pl';
	7
eaf7a4d2	8	plan(tests => 99);
cb0a5b5c NIS	9	}
cb0a5b5c NIS	10
19692e8d NC	11	use strict;
19692e8d NC	12
cb0a5b5c NIS	13	# Two hashes one will all keys 8-bit possible (initially), other
	14	# with a utf8 requiring key from the outset.
	15
	16	my %hash8 = ( "\xff" => 0xff,
	17	"\x7f" => 0x7f,
	18	);
	19	my %hashu = ( "\xff" => 0xff,
	20	"\x7f" => 0x7f,
	21	"\x{1ff}" => 0x1ff,
	22	);
	23
93f09d7b	24	# Check that we can find the 8-bit things by various literals
cb0a5b5c NIS	25	is($hash8{"\x{00ff}"},0xFF);
	26	is($hash8{"\x{007f}"},0x7F);
	27	is($hash8{"\xff"},0xFF);
	28	is($hash8{"\x7f"},0x7F);
	29	is($hashu{"\x{00ff}"},0xFF);
	30	is($hashu{"\x{007f}"},0x7F);
	31	is($hashu{"\xff"},0xFF);
	32	is($hashu{"\x7f"},0x7F);
	33
	34	# Now try same thing with variables forced into various forms.
5fec3b1d	35	foreach ("\x7f","\xff")
cb0a5b5c	36	{
5fec3b1d	37	my $a = $_; # Force a copy
cb0a5b5c NIS	38	utf8::upgrade($a);
	39	is($hash8{$a},ord($a));
	40	is($hashu{$a},ord($a));
	41	utf8::downgrade($a);
	42	is($hash8{$a},ord($a));
	43	is($hashu{$a},ord($a));
	44	my $b = $a.chr(100);
	45	chop($b);
	46	is($hash8{$b},ord($b));
	47	is($hashu{$b},ord($b));
	48	}
	49
	50	# Check we have not got an spurious extra keys
20b5b8d0 JH	51	is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff");
20b5b8d0 JH	52	is(join('',sort { ord $a <=> ord $b } keys %hashu),"\x7f\xff\x{1ff}");
cb0a5b5c NIS	53
	54	# Now add a utf8 key to the 8-bit hash
	55	$hash8{chr(0x1ff)} = 0x1ff;
	56
	57	# Check we have not got an spurious extra keys
20b5b8d0	58	is(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff\x{1ff}");
cb0a5b5c	59
5fec3b1d	60	foreach ("\x7f","\xff","\x{1ff}")
cb0a5b5c	61	{
5fec3b1d	62	my $a = $_;
cb0a5b5c NIS	63	utf8::upgrade($a);
	64	is($hash8{$a},ord($a));
	65	my $b = $a.chr(100);
	66	chop($b);
	67	is($hash8{$b},ord($b));
	68	}
	69
	70	# and remove utf8 from the other hash
	71	is(delete $hashu{chr(0x1ff)},0x1ff);
	72	is(join('',sort keys %hashu),"\x7f\xff");
	73
5fec3b1d	74	foreach ("\x7f","\xff")
cb0a5b5c	75	{
5fec3b1d	76	my $a = $_;
cb0a5b5c NIS	77	utf8::upgrade($a);
	78	is($hashu{$a},ord($a));
	79	utf8::downgrade($a);
	80	is($hashu{$a},ord($a));
	81	my $b = $a.chr(100);
	82	chop($b);
	83	is($hashu{$b},ord($b));
	84	}
	85
	86
4c79aee6 NC	87
4c79aee6 NC	88	{
19692e8d NC	89	print "# Unicode hash keys and \\w\n";
	90	# This is not really a regex test but regexes bring
	91	# out the issue nicely.
	92	use strict;
	93	my $u3 = "f\x{df}\x{100}";
	94	my $u2 = substr($u3,0,2);
	95	my $u1 = substr($u2,0,1);
	96	my $u0 = chr (0xdf)x4; # Make this 4 chars so that all lengths are distinct.
	97
	98	my @u = ($u0, $u1, $u2, $u3);
	99
	100	while (@u) {
	101	my %u = (map {( $_, $_)} @u);
	102	my $keys = scalar @u;
	103	$keys .= ($keys == 1) ? " key" : " keys";
4c79aee6 NC	104
4c79aee6 NC	105	for (keys %u) {
19692e8d NC	106	my $l = 0 + /^\w+$/;
	107	my $r = 0 + $u{$_} =~ /^\w+$/;
	108	is ($l, $r, "\\w on keys with $keys, key of length " . length $_);
	109	}
	110
	111	my $more;
	112	do {
	113	$more = 0;
	114	# Want to do this direct, rather than copying to a temporary variable
	115	# The first time each will return key and value at the start of the hash.
	116	# each will return () after we've done the last pair. $more won't get
	117	# set then, and the do will exit.
	118	for (each %u) {
	119	$more = 1;
	120	my $l = 0 + /^\w+$/;
	121	my $r = 0 + $u{$_} =~ /^\w+$/;
	122	is ($l, $r, "\\w on each, with $keys, key of length " . length $_);
	123	}
	124	} while ($more);
4c79aee6 NC	125
4c79aee6 NC	126	for (%u) {
19692e8d NC	127	my $l = 0 + /^\w+$/;
	128	my $r = 0 + $u{$_} =~ /^\w+$/;
	129	is ($l, $r, "\\w on hash with $keys, key of length " . length $_);
	130	}
	131	pop @u;
	132	undef %u;
	133	}
	134	}
	135
	136	{
	137	my $utf8_sz = my $bytes_sz = "\x{df}";
	138	$utf8_sz .= chr 256;
	139	chop ($utf8_sz);
	140
	141	my (%bytes_first, %utf8_first);
	142
	143	$bytes_first{$bytes_sz} = $bytes_sz;
	144
	145	for (keys %bytes_first) {
	146	my $l = 0 + /^\w+$/;
	147	my $r = 0 + $bytes_first{$_} =~ /^\w+$/;
	148	is ($l, $r, "\\w on each, bytes");
	149	}
	150
	151	$bytes_first{$utf8_sz} = $utf8_sz;
	152
	153	for (keys %bytes_first) {
	154	my $l = 0 + /^\w+$/;
	155	my $r = 0 + $bytes_first{$_} =~ /^\w+$/;
	156	is ($l, $r, "\\w on each, bytes now utf8");
	157	}
	158
	159	$utf8_first{$utf8_sz} = $utf8_sz;
	160
	161	for (keys %utf8_first) {
	162	my $l = 0 + /^\w+$/;
	163	my $r = 0 + $utf8_first{$_} =~ /^\w+$/;
	164	is ($l, $r, "\\w on each, utf8");
	165	}
	166
	167	$utf8_first{$bytes_sz} = $bytes_sz;
	168
	169	for (keys %utf8_first) {
	170	my $l = 0 + /^\w+$/;
	171	my $r = 0 + $utf8_first{$_} =~ /^\w+$/;
	172	is ($l, $r, "\\w on each, utf8 now bytes");
	173	}
	174
4c79aee6	175	}
5464dbd2 RGS	176
5464dbd2 RGS	177	{
250d67eb JH	178	local $/; # Slurp.
	179	my $utf8 = <DATA>;
	180	my $utfebcdic = <DATA>;
	181	if (ord('A') == 65) {
	182	eval $utf8;
	183	} elsif (ord('A') == 193) {
	184	eval $utfebcdic;
	185	}
	186	}
	187	__END__
	188	{
5464dbd2 RGS	189	# See if utf8 barewords work [perl #22969]
	190	use utf8;
	191	my %hash = (тест => 123);
	192	is($hash{тест}, $hash{'тест'});
	193	is($hash{тест}, 123);
	194	is($hash{'тест'}, 123);
	195	%hash = (тест => 123);
	196	is($hash{тест}, $hash{'тест'});
	197	is($hash{тест}, 123);
	198	is($hash{'тест'}, 123);
eaf7a4d2 CS	199
	200	# See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
	201	my %foo = (a => 'b', 'c' => 'd');
	202	for my $key (keys %foo) {
	203	ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
	204	}
5464dbd2	205	}
250d67eb JH	206	__END__
	207	{
	208	# See if utf8 barewords work [perl #22969]
	209	use utf8; # UTF-EBCDIC, really.
	210	my %hash = (½ää½âÀ½äâ½ää => 123);
	211	is($hash{½ää½âÀ½äâ½ää}, $hash{'½ää½âÀ½äâ½ää'});
	212	is($hash{½ää½âÀ½äâ½ää}, 123);
	213	is($hash{'½ää½âÀ½äâ½ää'}, 123);
	214	%hash = (½ää½âÀ½äâ½ää => 123);
	215	is($hash{½ää½âÀ½äâ½ää}, $hash{'½ää½âÀ½äâ½ää'});
	216	is($hash{½ää½âÀ½äâ½ää}, 123);
	217	is($hash{'½ää½âÀ½äâ½ää'}, 123);
eaf7a4d2 CS	218
	219	# See if plain ASCII strings quoted with '=>' erroneously get utf8 flag [perl #68812]
	220	my %foo = (a => 'b', 'c' => 'd');
	221	for my $key (keys %foo) {
	222	ok !utf8::is_utf8($key), "'$key' shouldn't have utf8 flag";
	223	}
250d67eb	224	}