[perl5.git] / lib / feature / unicode_strings.t

use warnings;
use strict;

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
}

plan(13312);    # Determined by experimentation

# Test the upper/lower/title case mappings for all characters 0-255.

# First compute the case mappings without resorting to the functions we're
# testing.

# Initialize the arrays so each $i maps to itself.
my @posix_to_upper;
for my $i (0 .. 255) {
    $posix_to_upper[$i] = chr($i);
}
my @posix_to_lower
= my @posix_to_title
= my @latin1_to_upper
= my @latin1_to_lower
= my @latin1_to_title
= @posix_to_upper;

# Override the elements in the to_lower arrays that have different lower case 
# mappings
for my $i (0x41 .. 0x5A) {
    $posix_to_lower[$i] = chr(ord($posix_to_lower[$i]) + 32);
    $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
}

# Same for upper and title
for my $i (0x61 .. 0x7A) {
    $posix_to_upper[$i] = chr(ord($posix_to_upper[$i]) - 32);
    $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
    $posix_to_title[$i] = chr(ord($posix_to_title[$i]) - 32);
    $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
}

# And the same for those in the latin1 range
for my $i (0xC0 .. 0xD6, 0xD8 .. 0xDE) {
    $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
}
for my $i (0xE0 .. 0xF6, 0xF8 .. 0xFE) {
    $latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
    $latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
}

# Override the abnormal cases.
$latin1_to_upper[0xB5] = chr(0x39C);
$latin1_to_title[0xB5] = chr(0x39C);
$latin1_to_upper[0xDF] = 'SS';
$latin1_to_title[0xDF] = 'Ss';
$latin1_to_upper[0xFF] = chr(0x178);
$latin1_to_title[0xFF] = chr(0x178);

my $repeat = 25;    # Length to make strings.

# Create hashes of strings in several ranges, both for uc and lc.
my %posix;
$posix{'uc'} = 'A' x $repeat;
$posix{'lc'} = 'a' x $repeat ;

my %cyrillic;
$cyrillic{'uc'} = chr(0x42F) x $repeat;
$cyrillic{'lc'} = chr(0x44F) x $repeat;

my %latin1;
$latin1{'uc'} = chr(0xD8) x $repeat;
$latin1{'lc'} = chr(0xF8) x $repeat;

my %empty;
$empty{'lc'} = $empty{'uc'} = "";

# Loop so prefix each character being tested with nothing, and the various
# strings; then loop for suffixes of those strings as well.
for my  $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
    for my  $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
        for my $i (0 .. 255) {  # For each possible posix or latin1 character
            my $cp = sprintf "U+%04X", $i;

            # First try using latin1 (Unicode) semantics.
            use feature "unicode_strings";    

            my $phrase = 'with uni8bit';
            my $char = chr($i);
            my $pre_lc = $prefix->{'lc'};
            my $pre_uc = $prefix->{'uc'};
            my $post_lc = $suffix->{'lc'};
            my $post_uc = $suffix->{'uc'};
            my $to_upper = $pre_lc . $char . $post_lc;
            my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
            my $to_lower = $pre_uc . $char . $post_uc;
            my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;

            is (uc($to_upper), $expected_upper,
                display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
            is (lc($to_lower), $expected_lower,
                display("$cp: $phrase: lc($to_lower) eq $expected_lower"));

            if ($pre_uc eq "") {    # Title case if null prefix.
                my $expected_title = $latin1_to_title[$i] . $post_lc;
                is (ucfirst($to_upper), $expected_title,
                    display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
                my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
                is (lcfirst($to_lower), $expected_lcfirst,
                    display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
            }

            # Then try with posix semantics.
            no feature "unicode_strings";
            $phrase = 'no uni8bit';

            # These don't contribute anything in this case.
            next if $suffix == \%cyrillic;
            next if $suffix == \%latin1;
            next if $prefix == \%cyrillic;
            next if $prefix == \%latin1;

            $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
            $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;

            is (uc($to_upper), $expected_upper,
                display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
            is (lc($to_lower), $expected_lower,
                display("$cp: $phrase: lc($to_lower) eq $expected_lower"));

            if ($pre_uc eq "") {
                my $expected_title = $posix_to_title[$i] . $post_lc;
                is (ucfirst($to_upper), $expected_title,
                    display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
                my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
                is (lcfirst($to_lower), $expected_lcfirst,
                    display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
            }
        }
    }
}
Commit	Line	Data
00f254e2 KW	1	use warnings;
	2	use strict;
	3
	4	BEGIN {
	5	chdir 't' if -d 't';
	6	@INC = '../lib';
	7	require './test.pl';
	8	}
	9
61fc5122	10	plan(13312); # Determined by experimentation
00f254e2	11
61fc5122	12	# Test the upper/lower/title case mappings for all characters 0-255.
00f254e2 KW	13
	14	# First compute the case mappings without resorting to the functions we're
	15	# testing.
	16
	17	# Initialize the arrays so each $i maps to itself.
	18	my @posix_to_upper;
	19	for my $i (0 .. 255) {
	20	$posix_to_upper[$i] = chr($i);
	21	}
	22	my @posix_to_lower
	23	= my @posix_to_title
	24	= my @latin1_to_upper
	25	= my @latin1_to_lower
	26	= my @latin1_to_title
	27	= @posix_to_upper;
	28
	29	# Override the elements in the to_lower arrays that have different lower case
61fc5122	30	# mappings
00f254e2 KW	31	for my $i (0x41 .. 0x5A) {
	32	$posix_to_lower[$i] = chr(ord($posix_to_lower[$i]) + 32);
	33	$latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
	34	}
	35
	36	# Same for upper and title
	37	for my $i (0x61 .. 0x7A) {
	38	$posix_to_upper[$i] = chr(ord($posix_to_upper[$i]) - 32);
	39	$latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
	40	$posix_to_title[$i] = chr(ord($posix_to_title[$i]) - 32);
	41	$latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
	42	}
	43
	44	# And the same for those in the latin1 range
	45	for my $i (0xC0 .. 0xD6, 0xD8 .. 0xDE) {
	46	$latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32);
	47	}
	48	for my $i (0xE0 .. 0xF6, 0xF8 .. 0xFE) {
	49	$latin1_to_upper[$i] = chr(ord($latin1_to_upper[$i]) - 32);
	50	$latin1_to_title[$i] = chr(ord($latin1_to_title[$i]) - 32);
	51	}
	52
	53	# Override the abnormal cases.
	54	$latin1_to_upper[0xB5] = chr(0x39C);
	55	$latin1_to_title[0xB5] = chr(0x39C);
	56	$latin1_to_upper[0xDF] = 'SS';
	57	$latin1_to_title[0xDF] = 'Ss';
	58	$latin1_to_upper[0xFF] = chr(0x178);
	59	$latin1_to_title[0xFF] = chr(0x178);
	60
	61	my $repeat = 25; # Length to make strings.
	62
	63	# Create hashes of strings in several ranges, both for uc and lc.
	64	my %posix;
	65	$posix{'uc'} = 'A' x $repeat;
	66	$posix{'lc'} = 'a' x $repeat ;
	67
	68	my %cyrillic;
	69	$cyrillic{'uc'} = chr(0x42F) x $repeat;
	70	$cyrillic{'lc'} = chr(0x44F) x $repeat;
	71
	72	my %latin1;
	73	$latin1{'uc'} = chr(0xD8) x $repeat;
	74	$latin1{'lc'} = chr(0xF8) x $repeat;
	75
	76	my %empty;
	77	$empty{'lc'} = $empty{'uc'} = "";
	78
	79	# Loop so prefix each character being tested with nothing, and the various
	80	# strings; then loop for suffixes of those strings as well.
	81	for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
	82	for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
	83	for my $i (0 .. 255) { # For each possible posix or latin1 character
61fc5122	84	my $cp = sprintf "U+%04X", $i;
00f254e2 KW	85
00f254e2 KW	86	# First try using latin1 (Unicode) semantics.
1863b879	87	use feature "unicode_strings";
00f254e2	88
61fc5122	89	my $phrase = 'with uni8bit';
00f254e2 KW	90	my $char = chr($i);
	91	my $pre_lc = $prefix->{'lc'};
	92	my $pre_uc = $prefix->{'uc'};
	93	my $post_lc = $suffix->{'lc'};
	94	my $post_uc = $suffix->{'uc'};
	95	my $to_upper = $pre_lc . $char . $post_lc;
	96	my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
	97	my $to_lower = $pre_uc . $char . $post_uc;
	98	my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
	99
	100	is (uc($to_upper), $expected_upper,
61fc5122	101	display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
00f254e2	102	is (lc($to_lower), $expected_lower,
61fc5122	103	display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
00f254e2 KW	104
	105	if ($pre_uc eq "") { # Title case if null prefix.
	106	my $expected_title = $latin1_to_title[$i] . $post_lc;
	107	is (ucfirst($to_upper), $expected_title,
61fc5122	108	display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
00f254e2 KW	109	my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
00f254e2 KW	110	is (lcfirst($to_lower), $expected_lcfirst,
61fc5122	111	display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
00f254e2 KW	112	}
	113
	114	# Then try with posix semantics.
1863b879	115	no feature "unicode_strings";
61fc5122	116	$phrase = 'no uni8bit';
00f254e2 KW	117
	118	# These don't contribute anything in this case.
	119	next if $suffix == \%cyrillic;
	120	next if $suffix == \%latin1;
	121	next if $prefix == \%cyrillic;
	122	next if $prefix == \%latin1;
	123
	124	$expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
	125	$expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
	126
	127	is (uc($to_upper), $expected_upper,
61fc5122	128	display("$cp: $phrase: uc($to_upper) eq $expected_upper"));
00f254e2	129	is (lc($to_lower), $expected_lower,
61fc5122	130	display("$cp: $phrase: lc($to_lower) eq $expected_lower"));
00f254e2 KW	131
	132	if ($pre_uc eq "") {
	133	my $expected_title = $posix_to_title[$i] . $post_lc;
	134	is (ucfirst($to_upper), $expected_title,
61fc5122	135	display("$cp: $phrase: ucfirst($to_upper) eq $expected_title"));
00f254e2 KW	136	my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
00f254e2 KW	137	is (lcfirst($to_lower), $expected_lcfirst,
61fc5122	138	display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst"));
00f254e2 KW	139	}
	140	}
	141	}
	142	}