This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
netbsd-vax: customized.dat update for S-L-U
[perl5.git] / t / uni / lex_utf8.t
CommitLineData
e4206093 1#!./perl -w
77a135fe
KW
2#
3# This script is written intentionally in UTF-8
4
5BEGIN {
77a135fe 6 $| = 1;
e4206093 7
a817e89d 8 chdir 't' if -d 't';
e4206093 9 require './test.pl';
624c42e2 10 set_up_inc('../lib');
5d508e6c 11 require './charset_tools.pl';
b51555b9 12 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
f6807ef7 13 skip_all_without_unicode_tables();
77a135fe
KW
14}
15
16use strict;
17
42d532a6 18plan (tests => 16);
77a135fe
KW
19use charnames ':full';
20
21use utf8;
22
23my $A_with_ogonek = "Ą";
24my $micro_sign = "µ";
25my $hex_first = "a\x{A2}Ą";
26my $hex_last = "aĄ\x{A2}";
27my $name_first = "b\N{MICRO SIGN}Ɓ";
28my $name_last = "bƁ\N{MICRO SIGN}";
29my $uname_first = "b\N{U+00B5}Ɓ";
30my $uname_last = "bƁ\N{U+00B5}";
31my $octal_first = "c\377Ć";
32my $octal_last = "cĆ\377";
33
5d508e6c
KW
34sub fixup (@) {
35 # @_ is a list of strings. Each string is comprised of the digits that
36 # form a byte of the UTF-8 representation of a character, or sequence of
37 # characters
38
39 my $string = join "", map { chr 0 + $_ } @_;
40 $string = byte_utf8a_to_utf8n($string);
41
42 # Return the concatenation of each byte of $string converted to a string of
43 # its decimal ordinal value. This is just the input array converted to
44 # native, and joined together.
45 return join "", map { sprintf "%d", ord $_ } split "", $string;
46}
47
77a135fe
KW
48do {
49 use bytes;
5d508e6c
KW
50 is((join "", unpack("C*", $A_with_ogonek)), fixup("196", "132"), 'single char above 0x100');
51 is((join "", unpack("C*", $micro_sign)), fixup("194", "181"), 'single char in 0x80 .. 0xFF');
52 SKIP: {
53 skip("ASCII-centric tests", 2) if $::IS_EBCDIC;
54 is((join "", unpack("C*", $hex_first)), fixup("97", "194", "162", "196", "132"), 'a, \x{A2}, char above 0x100');
55 is((join "", unpack("C*", $hex_last)), fixup("97", "196", "132", "194", "162"), 'a, char above 0x100, \x{A2}');
56 }
57 is((join "", unpack("C*", $name_first)), fixup("98", "194", "181", "198", "129"), 'b, \N{MICRO SIGN}, char above 0x100');
58 is((join "", unpack("C*", $name_last)), fixup("98", "198", "129", "194", "181"), 'b, char above 0x100, \N{MICRO SIGN}');
59 is((join "", unpack("C*", $uname_first)), fixup("98", "194", "181", "198", "129"), 'b, \N{U+00B5}, char above 0x100');
60 is((join "", unpack("C*", $uname_last)), fixup("98", "198", "129", "194", "181"), 'b, char above 0x100, \N{U+00B5}');
61 SKIP: {
62 skip("ASCII-centric tests", 2) if $::IS_EBCDIC;
63 is((join "", unpack("C*", $octal_first)), fixup("99", "195", "191", "196", "134"), 'c, \377, char above 0x100');
64 is((join "", unpack("C*", $octal_last)), fixup("99", "196", "134", "195", "191"), 'c, char above 0x100, \377');
65 }
ba6ff154
FC
66};
67
68{
69 local $SIG{__WARN__} = sub {};
70 eval "our $::\xe9; $\xe9";
71 unlike $@, qr/utf8_heavy/,
72 'No utf8_heavy errors with our() syntax errors';
77a135fe 73}
e68dd03a
FC
74
75# [perl #120463]
76$_ = "a";
77eval 's αaαbα';
78is $@, "", 's/// compiles, where / is actually a wide character';
79is $_, "b", 'substitution worked';
80$_ = "a";
81eval 'tr νaνbν';
82is $@, "", 'y/// compiles, where / is actually a wide character';
83is $_, "b", 'transliteration worked';
84
5d508e6c
KW
85SKIP: {
86 skip("ASCII-centric test", 1) if $::IS_EBCDIC;
87 use constant foofoo=>qq|\xc4\xb5|;
88 { no strict; ()=${"\xc4\xb5::foo"} } # vivify ĵ package
89 eval 'my foofoo $dog'; # foofoo was resolving to ĵ, not ĵ
90 is $@, '', 'my constant $var in utf8 scope where constant is not utf8';
91}
42d532a6 92
77a135fe
KW
93__END__
94