use charnames ':full';
my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
- is($text, latin1_to_native("\xc4"), 'Verify \N{} returns correct string under "no utf8"');
+ is($text, chr utf8::unicode_to_native(0xc4), 'Verify \N{} returns correct string under "no utf8"');
# I'm not sure that this tests anything different from the above.
- cmp_ok(ord($text), '==', ord(latin1_to_native("\xc4")), '... and ords are ok');
+ cmp_ok(ord($text), '==', utf8::unicode_to_native(0xc4), '... and ords are ok');
}
{
is("\N{LINE FEED}", "\n", 'Verify "\N{LINE FEED}" eq "\n"');
is("\N{LF}", "\n", 'Verify "\N{LF}" eq "\n"');
-my $nel = latin1_to_native("\x85");
+my $nel = chr utf8::unicode_to_native(0x85);
$nel = qr/^$nel$/;
like("\N{NEXT LINE (NEL)}", $nel, 'Verify "\N{NEXT LINE (NEL)}" is correct');
# Override the elements in the to_lower arrays that have different standard
# lower case mappings. (standard meaning they are 32 numbers apart)
for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
- my $upper_ord = ord_latin1_to_native $i;
- my $lower_ord = ord_latin1_to_native($i + 32);
+ my $upper_ord = utf8::unicode_to_native $i;
+ my $lower_ord = utf8::unicode_to_native($i + 32);
$latin1_to_lower[$upper_ord] = chr($lower_ord);
# Same for upper and title
for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
- my $lower_ord = ord_latin1_to_native $i;
- my $upper_ord = ord_latin1_to_native($i - 32);
+ my $lower_ord = utf8::unicode_to_native $i;
+ my $upper_ord = utf8::unicode_to_native($i - 32);
$latin1_to_upper[$lower_ord] = chr($upper_ord);
$latin1_to_title[$lower_ord] = chr($upper_ord);
}
# Override the abnormal cases.
-$latin1_to_upper[ord_latin1_to_native 0xB5] = chr(0x39C);
-$latin1_to_title[ord_latin1_to_native 0xB5] = chr(0x39C);
-$latin1_to_upper[ord_latin1_to_native 0xDF] = 'SS';
-$latin1_to_title[ord_latin1_to_native 0xDF] = 'Ss';
-$latin1_to_upper[ord_latin1_to_native 0xFF] = chr(0x178);
-$latin1_to_title[ord_latin1_to_native 0xFF] = chr(0x178);
+$latin1_to_upper[utf8::unicode_to_native 0xB5] = chr(0x39C);
+$latin1_to_title[utf8::unicode_to_native 0xB5] = chr(0x39C);
+$latin1_to_upper[utf8::unicode_to_native 0xDF] = 'SS';
+$latin1_to_title[utf8::unicode_to_native 0xDF] = 'Ss';
+$latin1_to_upper[utf8::unicode_to_native 0xFF] = chr(0x178);
+$latin1_to_title[utf8::unicode_to_native 0xFF] = chr(0x178);
my $repeat = 25; # Length to make strings.
$cyrillic{'lc'} = chr(0x44F) x $repeat;
my %latin1;
-$latin1{'uc'} = chr(ord_latin1_to_native 0xD8) x $repeat;
-$latin1{'lc'} = chr(ord_latin1_to_native 0xF8) x $repeat;
+$latin1{'uc'} = chr(utf8::unicode_to_native 0xD8) x $repeat;
+$latin1{'lc'} = chr(utf8::unicode_to_native 0xF8) x $repeat;
my %empty;
$empty{'lc'} = $empty{'uc'} = "";
0xF8 .. 0xFF, # various
)
{
- $w[ord_latin1_to_native $i] = 1;
+ $w[utf8::unicode_to_native $i] = 1;
}
# Boolean: is s[$i] a \s character?
my @s = (0) x 256;
-$s[ord_latin1_to_native 0x09] = 1; # Tab
-$s[ord_latin1_to_native 0x0A] = 1; # LF
-$s[ord_latin1_to_native 0x0B] = 1; # VT
-$s[ord_latin1_to_native 0x0C] = 1; # FF
-$s[ord_latin1_to_native 0x0D] = 1; # CR
-$s[ord_latin1_to_native 0x20] = 1; # SPACE
-$s[ord_latin1_to_native 0x85] = 1; # NEL
-$s[ord_latin1_to_native 0xA0] = 1; # NO BREAK SPACE
+$s[utf8::unicode_to_native 0x09] = 1; # Tab
+$s[utf8::unicode_to_native 0x0A] = 1; # LF
+$s[utf8::unicode_to_native 0x0B] = 1; # VT
+$s[utf8::unicode_to_native 0x0C] = 1; # FF
+$s[utf8::unicode_to_native 0x0D] = 1; # CR
+$s[utf8::unicode_to_native 0x20] = 1; # SPACE
+$s[utf8::unicode_to_native 0x85] = 1; # NEL
+$s[utf8::unicode_to_native 0xA0] = 1; # NO BREAK SPACE
for my $i (0 .. 255) {
my $char = chr($i);
if (! $target_has_ascii) {
foreach my $cp (@target) {
if (exists $folds{$cp}
- && grep { ord_native_to_latin1($_) < 128 } @{$folds{$cp}} )
+ && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}} )
{
$target_has_ascii = 1;
last;
if (! $pattern_has_ascii) {
foreach my $cp (@pattern) {
if (exists $folds{$cp}
- && grep { ord_native_to_latin1($_) < 128 } @{$folds{$cp}} )
+ && grep { utf8::native_to_unicode($_) < 128 } @{$folds{$cp}} )
{
$pattern_has_ascii = 1;
last;
# characters, but in posix anything outside ASCII maps to itself, as we've
# already set up.
for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
- my $upper_ord = ord_latin1_to_native($i);
- my $lower_ord = ord_latin1_to_native($i + 32);
+ my $upper_ord = utf8::unicode_to_native($i);
+ my $lower_ord = utf8::unicode_to_native($i + 32);
$fold_latin1[$upper_ord] = $lower_ord;
# Same for folding lower to the upper equivalents
for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
- my $lower_ord = ord_latin1_to_native($i);
- my $upper_ord = ord_latin1_to_native($i - 32);
+ my $lower_ord = utf8::unicode_to_native($i);
+ my $upper_ord = utf8::unicode_to_native($i - 32);
$fold_latin1[$lower_ord] = $upper_ord;
return $string if $::IS_ASCII;
my $output = "";
for my $i (0 .. length($string) - 1) {
- $output .= chr(ord_native_to_latin1(ord(substr($string, $i, 1))));
+ $output .= chr(utf8::native_to_unicode(ord(substr($string, $i, 1))));
}
# Preserve utf8ness of input onto the output, even if it didn't need to be
# utf8
return $string if $::IS_ASCII;
my $output = "";
for my $i (0 .. length($string) - 1) {
- $output .= chr(ord_latin1_to_native(ord(substr($string, $i, 1))));
+ $output .= chr(utf8::unicode_to_native(ord(substr($string, $i, 1))));
}
# Preserve utf8ness of input onto the output, even if it didn't need to be
# utf8
return $output;
}
-sub ord_latin1_to_native {
- # given an input code point, return the platform's native
- # equivalent value. Anything above latin1 is itself.
-
- my $ord = shift;
- return $ord if $::IS_ASCII;
- return utf8::unicode_to_native($ord);
-}
-
-sub ord_native_to_latin1 {
- # given an input platform code point, return the latin1 equivalent value.
- # Anything above latin1 is itself.
-
- my $ord = shift;
- return $ord if ord('^') == 94; # ASCII, Latin1
- return utf8::native_to_unicode($ord);
-}
-
1;
plan tests => 11;
-my $str = join "", map latin1_to_native(chr($_)), 0x20 .. 0x6F;
+
+my $str = join "", map { chr utf8::unicode_to_native($_) } 0x20 .. 0x6F;
is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO',
'user-defined class compiled before defined');