This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Compress::Zlib, pack "C" and utf-8 [PATCH]
[perl5.git] / ext / Storable / t / utf8hash.t
CommitLineData
e16e2ff8 1#!./perl
e16e2ff8
NC
2
3sub BEGIN {
4 if ($] < 5.007) {
5 print "1..0 # Skip: no utf8 hash key support\n";
6 exit 0;
7 }
8 if ($ENV{PERL_CORE}){
9 chdir('t') if -d 't';
372cb964 10 @INC = ('.', '../lib');
fcaa57e7
AMS
11 if ($^O eq 'MacOS') {
12 # Look, I'm using this fully-qualified variable more than once!
13 my $arch = $MacPerl::Architecture;
14 push @INC, "::lib:${MacPerl::Architecture}:";
15 }
372cb964
NC
16 } else {
17 unshift @INC, 't';
e16e2ff8
NC
18 }
19 require Config; import Config;
20 if ($ENV{PERL_CORE}){
21 if($Config{'extensions'} !~ /\bStorable\b/) {
22 print "1..0 # Skip: Storable was not built\n";
23 exit 0;
24 }
25 }
e16e2ff8
NC
26}
27
28use strict;
29our $DEBUGME = shift || 0;
30use Storable qw(store nstore retrieve thaw freeze);
31{
32 no warnings;
33 $Storable::DEBUGME = ($DEBUGME > 1);
34}
35# Better than no plan, because I was getting out of memory errors, at which
36# point Test::More tidily prints up 1..79 as if I meant to finish there.
1651fc44 37use Test::More tests=>144;
e16e2ff8 38use bytes ();
e16e2ff8
NC
39my %utf8hash;
40
530b72ba
NC
41$Storable::canonical = $Storable::canonical; # Shut up a used only once warning.
42
e16e2ff8
NC
43for $Storable::canonical (0, 1) {
44
45# first we generate a nasty hash which keys include both utf8
46# on and off with identical PVs
47
de726223
JH
48no utf8; # we have a naked 8-bit byte below (in Latin 1, anyway)
49
50# In Latin 1 -ese the below ord() should end up 0xc0 (192),
51# in EBCDIC 0x64 (100). Both should end up being UTF-8/UTF-EBCDIC.
e16e2ff8 52my @ords = (
de726223 53 ord("Á"), # LATIN CAPITAL LETTER A WITH GRAVE
e16e2ff8
NC
54 0x3000, #IDEOGRAPHIC SPACE
55 );
56
57foreach my $i (@ords){
58 my $u = chr($i); utf8::upgrade($u);
59 # warn sprintf "%d,%d", bytes::length($u), is_utf8($u);
1651fc44 60 my $b = chr($i); utf8::encode($b);
e16e2ff8
NC
61 # warn sprintf "%d,%d" ,bytes::length($b), is_utf8($b);
62
1651fc44 63 isnt($u, $b, "equivalence - with utf8flag");
e16e2ff8
NC
64
65 $utf8hash{$u} = $utf8hash{$b} = $i;
66}
67
68sub nkeys($){
69 my $href = shift;
70 return scalar keys %$href;
71}
72
73my $nk;
74is($nk = nkeys(\%utf8hash), scalar(@ords)*2,
75 "nasty hash generated (nkeys=$nk)");
76
77# now let the show begin!
78
79my $thawed = thaw(freeze(\%utf8hash));
80
81is($nk = nkeys($thawed),
82 nkeys(\%utf8hash),
83 "scalar keys \%{\$thawed} (nkeys=$nk)");
84for my $k (sort keys %$thawed){
85 is($utf8hash{$k}, $thawed->{$k}, "frozen item chr($utf8hash{$k})");
86}
87
88my $storage = "utfhash.po"; # po = perl object!
89my $retrieved;
90
91ok((nstore \%utf8hash, $storage), "nstore to $storage");
92ok(($retrieved = retrieve($storage)), "retrieve from $storage");
93
94is($nk = nkeys($retrieved),
95 nkeys(\%utf8hash),
96 "scalar keys \%{\$retrieved} (nkeys=$nk)");
97for my $k (sort keys %$retrieved){
98 is($utf8hash{$k}, $retrieved->{$k}, "nstored item chr($utf8hash{$k})");
99}
100unlink $storage;
101
102
103ok((store \%utf8hash, $storage), "store to $storage");
104ok(($retrieved = retrieve($storage)), "retrieve from $storage");
105is($nk = nkeys($retrieved),
106 nkeys(\%utf8hash),
107 "scalar keys \%{\$retrieved} (nkeys=$nk)");
108for my $k (sort keys %$retrieved){
109 is($utf8hash{$k}, $retrieved->{$k}, "stored item chr($utf8hash{$k})");
110}
111$DEBUGME or unlink $storage;
112
113# On the premis that more tests are good, here are NWC's tests:
114
115package Hash_Test;
116
117sub me_second {
118 return (undef, $_[0]);
119}
120
121package main;
122
123my $utf8 = "Schlo\xdf" . chr 256;
124chop $utf8;
125
126# Set this to 1 to test the test by bypassing Storable.
127my $bypass = 0;
128
129sub class_test {
130 my ($object, $package) = @_;
131 unless ($package) {
132 is ref $object, 'HASH', "$object is unblessed";
133 return;
134 }
135 isa_ok ($object, $package);
136 my ($garbage, $copy) = eval {$object->me_second};
137 is $@, "", "check it has correct method";
138 cmp_ok $copy, '==', $object, "and that it returns the same object";
139}
140
141# Thanks to Dan Kogai for the Kanji for "castle" (which he informs me also
142# means 'a city' in Mandarin).
143my %hash = (map {$_, $_} 'castle', "ch\xe5teau", $utf8, "\x{57CE}");
144
145for my $package ('', 'Hash_Test') {
146 # Run through and sanity check these.
147 if ($package) {
148 bless \%hash, $package;
149 }
150 for (keys %hash) {
151 my $l = 0 + /^\w+$/;
152 my $r = 0 + $hash{$_} =~ /^\w+$/;
153 cmp_ok ($l, '==', $r);
154 }
155
156 # Grr. This cperl mode thinks that ${ is a punctuation variable.
157 # I presume it's punishment for using xemacs rather than emacs. Or OS/2 :-)
158 my $copy = $bypass ? \%hash : ${thaw freeze \\%hash};
159 class_test ($copy, $package);
160
161 for (keys %$copy) {
162 my $l = 0 + /^\w+$/;
163 my $r = 0 + $copy->{$_} =~ /^\w+$/;
164 cmp_ok ($l, '==', $r, sprintf "key length %d", length $_);
165 }
166
167
168 my $bytes = my $char = chr 27182;
169 utf8::encode ($bytes);
170
171 my $orig = {$char => 1};
172 if ($package) {
173 bless $orig, $package;
174 }
175 my $just_utf8 = $bypass ? $orig : ${thaw freeze \$orig};
176 class_test ($just_utf8, $package);
177 cmp_ok (scalar keys %$just_utf8, '==', 1, "1 key in utf8?");
178 cmp_ok ($just_utf8->{$char}, '==', 1, "utf8 key present?");
179 ok (!exists $just_utf8->{$bytes}, "bytes key absent?");
180
181 $orig = {$bytes => 1};
182 if ($package) {
183 bless $orig, $package;
184 }
185 my $just_bytes = $bypass ? $orig : ${thaw freeze \$orig};
186 class_test ($just_bytes, $package);
187
188 cmp_ok (scalar keys %$just_bytes, '==', 1, "1 key in bytes?");
189 cmp_ok ($just_bytes->{$bytes}, '==', 1, "bytes key present?");
190 ok (!exists $just_bytes->{$char}, "utf8 key absent?");
191
192 die sprintf "Both have length %d, which is crazy", length $char
193 if length $char == length $bytes;
194
195 $orig = {$bytes => length $bytes, $char => length $char};
196 if ($package) {
197 bless $orig, $package;
198 }
199 my $both = $bypass ? $orig : ${thaw freeze \$orig};
200 class_test ($both, $package);
201
202 cmp_ok (scalar keys %$both, '==', 2, "2 keys?");
203 cmp_ok ($both->{$bytes}, '==', length $bytes, "bytes key present?");
204 cmp_ok ($both->{$char}, '==', length $char, "utf8 key present?");
205}
206
207}