X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/96980024b32bc912ece29c8bb2f36420dce8d27c..dd9a180e74e86e123c4a37c05803e14975ecbb69:/t/test.pl diff --git a/t/test.pl b/t/test.pl index a3bab73..5b1ee18 100644 --- a/t/test.pl +++ b/t/test.pl @@ -24,6 +24,10 @@ my $planned; my $noplan; my $Perl; # Safer version of $^X set by which_perl() +# This defines ASCII/UTF-8 vs EBCDIC/UTF-EBCDIC +$::IS_ASCII = ord 'A' == 65; +$::IS_EBCDIC = ord 'A' == 193; + $TODO = 0; $NO_ENDING = 0; $Tests_Are_Passing = 1; @@ -105,6 +109,16 @@ sub _comment { map { split /\n/ } @_; } +sub _have_dynamic_extension { + my $extension = shift; + unless (eval {require Config; 1}) { + warn "test.pl had problems loading Config: $@"; + return 1; + } + $extension =~ s!::!/!g; + return 1 if ($Config::Config{extensions} =~ /\b$extension\b/); +} + sub skip_all { if (@_) { _print "1..0 # Skip @_\n"; @@ -118,6 +132,74 @@ sub skip_all_if_miniperl { skip_all(@_) if is_miniperl(); } +sub skip_all_without_dynamic_extension { + my ($extension) = @_; + skip_all("no dynamic loading on miniperl, no $extension") if is_miniperl(); + return if &_have_dynamic_extension; + skip_all("$extension was not built"); +} + +sub skip_all_without_perlio { + skip_all('no PerlIO') unless PerlIO::Layer->find('perlio'); +} + +sub skip_all_without_config { + unless (eval {require Config; 1}) { + warn "test.pl had problems loading Config: $@"; + return; + } + foreach (@_) { + next if $Config::Config{$_}; + my $key = $_; # Need to copy, before trying to modify. + $key =~ s/^use//; + $key =~ s/^d_//; + skip_all("no $key"); + } +} + +sub find_git_or_skip { + my ($source_dir, $reason); + if (-d '.git') { + $source_dir = '.'; + } elsif (-l 'MANIFEST' && -l 'AUTHORS') { + my $where = readlink 'MANIFEST'; + die "Can't readling MANIFEST: $!" unless defined $where; + die "Confusing symlink target for MANIFEST, '$where'" + unless $where =~ s!/MANIFEST\z!!; + if (-d "$where/.git") { + # Looks like we are in a symlink tree + if (exists $ENV{GIT_DIR}) { + diag("Found source tree at $where, but \$ENV{GIT_DIR} is $ENV{GIT_DIR}. Not changing it"); + } else { + note("Found source tree at $where, setting \$ENV{GIT_DIR}"); + $ENV{GIT_DIR} = "$where/.git"; + } + $source_dir = $where; + } + } + if ($source_dir) { + my $version_string = `git --version`; + if (defined $version_string + && $version_string =~ /\Agit version (\d+\.\d+\.\d+)(.*)/) { + return $source_dir if eval "v$1 ge v1.5.0"; + # If you have earlier than 1.5.0 and it works, change this test + $reason = "in git checkout, but git version '$1$2' too old"; + } else { + $reason = "in git checkout, but cannot run git"; + } + } else { + $reason = 'not being run from a git checkout'; + } + skip_all($reason) if $_[0] && $_[0] eq 'all'; + skip($reason, @_); +} + +sub BAIL_OUT { + my ($reason) = @_; + _print("Bail out! $reason\n"); + exit 255; +} + sub _ok { my ($pass, $where, $name, @mess) = @_; # Do not try to microoptimize by factoring out the "not ". @@ -143,7 +225,10 @@ sub _ok { note @mess; # Ensure that the message is properly escaped. } else { - _diag "# Failed $where\n"; + my $msg = "# Failed test $test - "; + $msg.= "$name " if $name; + $msg .= "$where\n"; + _diag $msg; _diag @mess; } @@ -199,7 +284,9 @@ sub display { if ($z =~ /[[:^print:]]/) { # Use octal for characters traditionally expressed as - # such: the low controls + # such: the low controls, which on EBCDIC aren't + # necessarily the same ones as on ASCII platforms, but + # are small ordinals, nonetheless if ($c <= 037) { $z = sprintf "\\%03o", $c; } else { @@ -374,6 +461,13 @@ sub skip_if_miniperl { skip(@_) if is_miniperl(); } +sub skip_without_dynamic_extension { + my ($extension) = @_; + skip("no dynamic loading on miniperl, no $extension") if is_miniperl(); + return if &_have_dynamic_extension; + skip("$extension was not built"); +} + sub todo_skip { my $why = shift; my $n = @_ ? shift : 1; @@ -405,7 +499,10 @@ sub eq_hash { # Force a hash recompute if this perl's internals can cache the hash key. $key = "" . $key; if (exists $orig->{$key}) { - if ($orig->{$key} ne $value) { + if ( + defined $orig->{$key} != defined $value + || (defined $value && $orig->{$key} ne $value) + ) { _print "# key ", _qq($key), " was ", _qq($orig->{$key}), " now ", _qq($value), "\n"; $fail = 1; @@ -451,7 +548,7 @@ USE_OK } } -# runperl - Runs a separate perl interpreter. +# runperl - Runs a separate perl interpreter and returns its output. # Arguments : # switches => [ command-line switches ] # nolib => 1 # don't use -I../lib (included by default) @@ -459,7 +556,7 @@ USE_OK # prog => one-liner (avoid quotes) # progs => [ multi-liner (avoid quotes) ] # progfile => perl script -# stdin => string to feed the stdin +# stdin => string to feed the stdin (or undef to redirect from /dev/null) # stderr => redirect stderr to stdout # args => [ command-line arguments to the perl program ] # verbose => print the command line @@ -509,8 +606,16 @@ sub _create_runperl { # Create the string to qx in runperl(). die "test.pl:runperl(): 'progs' must be an ARRAYREF " . _where() unless ref $args{progs} eq "ARRAY"; foreach my $prog (@{$args{progs}}) { - if ($prog =~ tr/'"// && !$args{non_portable}) { - warn "quotes in prog >>$prog<< are not portable"; + if (!$args{non_portable}) { + if ($prog =~ tr/'"//) { + warn "quotes in prog >>$prog<< are not portable"; + } + if ($prog =~ /^([<>|]|2>)/) { + warn "Initial $1 in prog >>$prog<< is not portable"; + } + if ($prog =~ /&\z/) { + warn "Trailing & in prog >>$prog<< is not portable"; + } } if ($is_mswin || $is_netware || $is_vms) { $runperl = $runperl . qq ( -e "$prog" ); @@ -542,6 +647,28 @@ sub _create_runperl { # Create the string to qx in runperl(). $runperl = qq{$Perl -e 'print qq(} . $args{stdin} . q{)' | } . $runperl; } + } elsif (exists $args{stdin}) { + # Using the pipe construction above can cause fun on systems which use + # ksh as /bin/sh, as ksh does pipes differently (with one less process) + # With sh, for the command line 'perl -e 'print qq()' | perl -e ...' + # the sh process forks two children, which use exec to start the two + # perl processes. The parent shell process persists for the duration of + # the pipeline, and the second perl process starts with no children. + # With ksh (and zsh), the shell saves a process by forking a child for + # just the first perl process, and execing itself to start the second. + # This means that the second perl process starts with one child which + # it didn't create. This causes "fun" when if the tests assume that + # wait (or waitpid) will only return information about processes + # started within the test. + # They also cause fun on VMS, where the pipe implementation returns + # the exit code of the process at the front of the pipeline, not the + # end. This messes up any test using OPTION FATAL. + # Hence it's useful to have a way to make STDIN be at eof without + # needing a pipeline, so that the fork tests have a sane environment + # without these surprises. + + # /dev/null appears to be surprisingly portable. + $runperl = $runperl . ($is_mswin ? ' = 0; + my $alpha = ''; + my $char_count = 0; + $max_char = 0 if $max_char < 0; + + while( 1 ){ + $alpha = $letters[ $num % 26 ] . $alpha; + $num = int( $num / 26 ); + last if $num == 0; + $num = $num - 1; + + # char limit + next unless $max_char; + $char_count = $char_count + 1; + return if $char_count == $max_char; + } + return $alpha; +} + my %tmpfiles; END { unlink_all keys %tmpfiles } @@ -677,26 +842,24 @@ END { unlink_all keys %tmpfiles } $::tempfile_regexp = 'tmp\d+[A-Z][A-Z]?'; # Avoid ++, avoid ranges, avoid split // -my @letters = qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z); +my $tempfile_count = 0; sub tempfile { - my $count = 0; - do { - my $temp = $count; + while(1){ my $try = "tmp$$"; - do { - $try = $try . $letters[$temp % 26]; - $temp = int ($temp / 26); - } while $temp; + my $alpha = _num_to_alpha($tempfile_count,2); + last unless defined $alpha; + $try = $try . $alpha; + $tempfile_count = $tempfile_count + 1; + # Need to note all the file names we allocated, as a second request may # come before the first is created. - if (!-e $try && !$tmpfiles{$try}) { + if (!$tmpfiles{$try} && !-e $try) { # We have a winner $tmpfiles{$try} = 1; return $try; } - $count = $count + 1; - } while $count < 26 * 26; - die "Can't find temporary file name starting 'tmp$$'"; + } + die "Can't find temporary file name starting \"tmp$$\""; } # This is the temporary file for _fresh_perl @@ -713,20 +876,10 @@ sub _fresh_perl { # it feels like the least-worse thing is to assume that auto-vivification # works. At least, this is only going to be a run-time failure, so won't # affect tests using this file but not this function. - $runperl_args->{progfile} = $tmpfile; - $runperl_args->{stderr} = 1; + $runperl_args->{progfile} ||= $tmpfile; + $runperl_args->{stderr} = 1 unless exists $runperl_args->{stderr}; open TEST, ">$tmpfile" or die "Cannot open $tmpfile: $!"; - - # VMS adjustments - if( $is_vms ) { - $prog =~ s#/dev/null#NL:#; - - # VMS file locking - $prog =~ s{if \(-e _ and -f _ and -r _\)} - {if (-e _ and -f _)} - } - print TEST $prog; close TEST or die "Cannot close $tmpfile: $!"; @@ -809,6 +962,96 @@ sub fresh_perl_like { # Many tests use the same format in __DATA__ or external files to specify a # sequence of (fresh) tests to run, extra files they may temporarily need, and # what the expected output is. So have excatly one copy of the code to run that +# +# Each program is source code to run followed by an "EXPECT" line, followed +# by the expected output. +# +# The code to run may begin with a command line switch such as -w or -0777 +# (alphanumerics only), and may contain (note the '# ' on each): +# # TODO reason for todo +# # SKIP reason for skip +# # SKIP ?code to test if this should be skipped +# # NAME name of the test (as with ok($ok, $name)) +# +# The expected output may contain: +# OPTION list of options +# OPTIONS list of options +# +# The possible options for OPTION may be: +# regex - the expected output is a regular expression +# random - all lines match but in any order +# fatal - the code will fail fatally (croak, die) +# +# If the actual output contains a line "SKIPPED" the test will be +# skipped. +# +# If the actual output contains a line "PREFIX", any output starting with that +# line will be ignored when comparing with the expected output +# +# If the global variable $FATAL is true then OPTION fatal is the +# default. + +sub _setup_one_file { + my $fh = shift; + # Store the filename as a program that started at line 0. + # Real files count lines starting at line 1. + my @these = (0, shift); + my ($lineno, $current); + while (<$fh>) { + if ($_ eq "########\n") { + if (defined $current) { + push @these, $lineno, $current; + } + undef $current; + } else { + if (!defined $current) { + $lineno = $.; + } + $current .= $_; + } + } + if (defined $current) { + push @these, $lineno, $current; + } + ((scalar @these) / 2 - 1, @these); +} + +sub setup_multiple_progs { + my ($tests, @prgs); + foreach my $file (@_) { + next if $file =~ /(?:~|\.orig|,v)$/; + next if $file =~ /perlio$/ && !PerlIO::Layer->find('perlio'); + next if -d $file; + + open my $fh, '<', $file or die "Cannot open $file: $!\n" ; + my $found; + while (<$fh>) { + if (/^__END__/) { + ++$found; + last; + } + } + # This is an internal error, and should never happen. All bar one of + # the files had an __END__ marker to signal the end of their preamble, + # although for some it wasn't technically necessary as they have no + # tests. It might be possible to process files without an __END__ by + # seeking back to the start and treating the whole file as tests, but + # it's simpler and more reliable just to make the rule that all files + # must have __END__ in. This should never fail - a file without an + # __END__ should not have been checked in, because the regression tests + # would not have passed. + die "Could not find '__END__' in $file" + unless $found; + + my ($t, @p) = _setup_one_file($fh, $file); + $tests += $t; + push @prgs, @p; + + close $fh + or die "Cannot close $file: $!\n"; + } + return ($tests, @prgs); +} sub run_multiple_progs { my $up = shift; @@ -818,17 +1061,31 @@ sub run_multiple_progs { # pass in a list of "programs" to run @prgs = @_; } else { - # The tests below t run in t and pass in a file handle. - my $fh = shift; - local $/; - @prgs = split "\n########\n", <$fh>; + # The tests below t run in t and pass in a file handle. In theory we + # can pass (caller)[1] as the second argument to report errors with + # the filename of our caller, as the handle is always DATA. However, + # line numbers in DATA count from the __END__ token, so will be wrong. + # Which is more confusing than not providing line numbers. So, for now, + # don't provide line numbers. No obvious clean solution - one hack + # would be to seek DATA back to the start and read to the __END__ token, + # but that feels almost like we should just open $0 instead. + + # Not going to rely on undef in list assignment. + my $dummy; + ($dummy, @prgs) = _setup_one_file(shift); } my $tmpfile = tempfile(); - for (@prgs){ - unless (/\n/) { - print "# From $_\n"; + my ($file, $line); + PROGRAM: + while (defined ($line = shift @prgs)) { + $_ = shift @prgs; + unless ($line) { + $file = $_; + if (defined $file) { + print "# From $file\n"; + } next; } my $switch = ""; @@ -853,8 +1110,21 @@ sub run_multiple_progs { } } + my $name = ''; + if ($prog =~ s/^#\s*NAME\s+(.+)\n//m) { + $name = $1; + } + + if ($reason{skip}) { + SKIP: + { + skip($name ? "$name - $reason{skip}" : $reason{skip}, 1); + } + next PROGRAM; + } + if ($prog =~ /--FILE--/) { - my @files = split(/\n--FILE--\s*([^\s\n]*)\s*\n/, $prog) ; + my @files = split(/\n?--FILE--\s*([^\s\n]*)\s*\n/, $prog) ; shift @files ; die "Internal error: test $_ didn't split into pairs, got " . scalar(@files) . "[" . join("%%%%", @files) ."]\n" @@ -886,7 +1156,8 @@ sub run_multiple_progs { print $fh "\n#line 1\n"; # So the line numbers don't get messed up. print $fh $prog,"\n"; close $fh or die "Cannot close $tmpfile: $!"; - my $results = runperl( stderr => 1, progfile => $tmpfile, $up + my $results = runperl( stderr => 1, progfile => $tmpfile, + stdin => undef, $up ? (switches => ["-I$up/lib", $switch], nolib => 1) : (switches => [$switch]) ); @@ -912,6 +1183,7 @@ sub run_multiple_progs { # any special options? (OPTIONS foo bar zap) my $option_regex = 0; my $option_random = 0; + my $fatal = $FATAL; if ($expected =~ s/^OPTIONS? (.+)\n//) { foreach my $option (split(' ', $1)) { if ($option eq 'regex') { # allow regular expressions @@ -920,6 +1192,9 @@ sub run_multiple_progs { elsif ($option eq 'random') { # all lines match, but in any order $option_random = 1; } + elsif ($option eq 'fatal') { # perl should fail + $fatal = 1; + } else { die "$0: Unknown OPTION '$option'\n"; } @@ -932,28 +1207,36 @@ sub run_multiple_progs { print "$results\n" ; $ok = 1; } - elsif ($option_random) { - my @got = sort split "\n", $results; - my @expected = sort split "\n", $expected; - - $ok = "@got" eq "@expected"; - } - elsif ($option_regex) { - $ok = $results =~ /^$expected/; - } - elsif ($prefix) { - $ok = $results =~ /^\Q$expected/; - } else { - $ok = $results eq $expected; + if ($option_random) { + my @got = sort split "\n", $results; + my @expected = sort split "\n", $expected; + + $ok = "@got" eq "@expected"; + } + elsif ($option_regex) { + $ok = $results =~ /^$expected/; + } + elsif ($prefix) { + $ok = $results =~ /^\Q$expected/; + } + else { + $ok = $results eq $expected; + } + + if ($ok && $fatal && !($status >> 8)) { + $ok = 0; + } } local $::TODO = $reason{todo}; unless ($ok) { my $err_line = "PROG: $switch\n$prog\n" . - "EXPECTED:\n$expected\n" . - "GOT:\n$results\n"; + "EXPECTED:\n$expected\n"; + $err_line .= "EXIT STATUS: != 0\n" if $fatal; + $err_line .= "GOT:\n$results\n"; + $err_line .= "EXIT STATUS: " . ($status >> 8) . "\n" if $fatal; if ($::TODO) { $err_line =~ s/^/# /mg; print $err_line; # Harness can't filter it out from STDERR. @@ -963,7 +1246,14 @@ sub run_multiple_progs { } } - ok($ok); + if (defined $file) { + _ok($ok, "at $file line $line", $name); + } else { + # We don't have file and line number data for the test, so report + # errors as coming from our caller. + local $Level = $Level + 1; + ok($ok, $name); + } foreach (@temps) { unlink $_ if $_; @@ -997,7 +1287,7 @@ sub can_ok ($@) { } -# Call $class->new( @$args ); and run the result through isa_ok. +# Call $class->new( @$args ); and run the result through object_ok. # See Test::More::new_ok sub new_ok { my($class, $args, $obj_name) = @_; @@ -1011,7 +1301,7 @@ sub new_ok { my $error = $@; if($ok) { - isa_ok($obj, $class, $object_name); + object_ok($obj, $class, $object_name); } else { ok( 0, "new() died" ); @@ -1032,20 +1322,29 @@ sub isa_ok ($$;$) { if( !defined $object ) { $diag = "$obj_name isn't defined"; } - elsif( !ref $object ) { - $diag = "$obj_name isn't a reference"; - } else { + my $whatami = ref $object ? 'object' : 'class'; + # We can't use UNIVERSAL::isa because we want to honor isa() overrides local($@, $!); # eval sometimes resets $! my $rslt = eval { $object->isa($class) }; - if( $@ ) { - if( $@ =~ /^Can't call method "isa" on unblessed reference/ ) { + my $error = $@; # in case something else blows away $@ + + if( $error ) { + if( $error =~ /^Can't call method "isa" on unblessed reference/ ) { + # It's an unblessed reference + $obj_name = 'The reference' unless defined $obj_name; if( !UNIVERSAL::isa($object, $class) ) { my $ref = ref $object; $diag = "$obj_name isn't a '$class' it's a '$ref'"; } - } else { + } + elsif( $error =~ /Can't call method "isa" without a package/ ) { + # It's something that can't even be a class + $obj_name = 'The thing' unless defined $obj_name; + $diag = "$obj_name isn't a class or reference"; + } + else { die <isa on your object and got some weird error. This should never happen. Please contact the author immediately. @@ -1055,6 +1354,7 @@ WHOA } } elsif( !$rslt ) { + $obj_name = "The $whatami" unless defined $obj_name; my $ref = ref $object; $diag = "$obj_name isn't a '$class' it's a '$ref'"; } @@ -1063,23 +1363,102 @@ WHOA _ok( !$diag, _where(), $name ); } -sub warning_is { + +sub class_ok { + my($class, $isa, $class_name) = @_; + + # Written so as to count as one test + local $Level = $Level + 1; + if( ref $class ) { + ok( 0, "$class is a refrence, not a class name" ); + } + else { + isa_ok($class, $isa, $class_name); + } +} + + +sub object_ok { + my($obj, $isa, $obj_name) = @_; + + local $Level = $Level + 1; + if( !ref $obj ) { + ok( 0, "$obj is not a reference" ); + } + else { + isa_ok($obj, $isa, $obj_name); + } +} + + +# Purposefully avoiding a closure. +sub __capture { + push @::__capture, join "", @_; +} + +sub capture_warnings { + my $code = shift; + + local @::__capture; + local $SIG {__WARN__} = \&__capture; + &$code; + return @::__capture; +} + +# This will generate a variable number of tests. +# Use done_testing() instead of a fixed plan. +sub warnings_like { my ($code, $expect, $name) = @_; - my @w; - local $SIG {__WARN__} = sub {push @w, join "", @_}; - { - use warnings 'all'; - &$code; + local $Level = $Level + 1; + + my @w = capture_warnings($code); + + cmp_ok(scalar @w, '==', scalar @$expect, $name); + foreach my $e (@$expect) { + if (ref $e) { + like(shift @w, $e, $name); + } else { + is(shift @w, $e, $name); + } + } + if (@w) { + diag("Saw these additional warnings:"); + diag($_) foreach @w; } +} + +sub _fail_excess_warnings { + my($expect, $got, $name) = @_; + local $Level = $Level + 1; + # This will fail, and produce diagnostics + is($expect, scalar @$got, $name); + diag("Saw these warnings:"); + diag($_) foreach @$got; +} + +sub warning_is { + my ($code, $expect, $name) = @_; + die sprintf "Expect must be a string or undef, not a %s reference", ref $expect + if ref $expect; local $Level = $Level + 1; - if(!defined $expect) { - is("@w", '', $name); - } elsif (@w == 1) { + my @w = capture_warnings($code); + if (@w > 1) { + _fail_excess_warnings(0 + defined $expect, \@w, $name); + } else { is($w[0], $expect, $name); + } +} + +sub warning_like { + my ($code, $expect, $name) = @_; + die sprintf "Expect must be a regexp object" + unless ref $expect eq 'Regexp'; + local $Level = $Level + 1; + my @w = capture_warnings($code); + if (@w > 1) { + _fail_excess_warnings(0 + defined $expect, \@w, $name); } else { - # This will fail, generating diagnostics - cmp_ok(scalar @w, '==', 1, $name); - diag("Warning: $_") foreach @w; + like($w[0], $expect, $name); } } @@ -1106,7 +1485,7 @@ sub watchdog ($;$) # Don't use a watchdog process if 'threads' is loaded - # use a watchdog thread instead - if (!$threads_on) { + if (!$threads_on || $method eq "process") { # On Windows and VMS, try launching a watchdog process # using system(1, ...) (see perlport.pod) @@ -1173,6 +1552,11 @@ sub watchdog ($;$) if (kill(0, $pid_to_kill)) { _diag($timeout_msg); kill('KILL', $pid_to_kill); + if ($is_cygwin) { + # sometimes the above isn't enough on cygwin + sleep 1; # wait a little, it might have worked after all + system("/bin/kill -f $pid_to_kill"); + } } # Don't execute END block (added at beginning of this file) @@ -1188,7 +1572,7 @@ sub watchdog ($;$) # Use a watchdog thread because either 'threads' is loaded, # or fork() failed - if (eval 'require threads; 1') { + if (eval {require threads; 1}) { 'threads'->create(sub { # Load POSIX if available eval { require POSIX; }; @@ -1226,127 +1610,39 @@ WATCHDOG_VIA_ALARM: } } -my $cp_0037 = # EBCDIC code page 0037 - '\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x25\x0B\x0C\x0D\x0E\x0F' . - '\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F' . - '\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61' . - '\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F' . - '\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6' . - '\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xBA\xE0\xBB\xB0\x6D' . - '\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96' . - '\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07' . - '\x20\x21\x22\x23\x24\x15\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B' . - '\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF' . - '\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBD\xB4\x9A\x8A\x5F\xCA\xAF\xBC' . - '\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB' . - '\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77' . - '\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xAD\xAE\x59' . - '\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57' . - '\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF'; - -my $cp_1047 = # EBCDIC code page 1047 - '\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x15\x0B\x0C\x0D\x0E\x0F' . - '\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F' . - '\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61' . - '\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F' . - '\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6' . - '\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xAD\xE0\xBD\x5F\x6D' . - '\x79\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96' . - '\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xC0\x4F\xD0\xA1\x07' . - '\x20\x21\x22\x23\x24\x25\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B' . - '\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\xFF' . - '\x41\xAA\x4A\xB1\x9F\xB2\x6A\xB5\xBB\xB4\x9A\x8A\xB0\xCA\xAF\xBC' . - '\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB' . - '\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77' . - '\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xFD\xFE\xFB\xFC\xBA\xAE\x59' . - '\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57' . - '\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xDD\xDE\xDB\xDC\x8D\x8E\xDF'; - -my $cp_bc = # EBCDIC code page POSiX-BC - '\x00\x01\x02\x03\x37\x2D\x2E\x2F\x16\x05\x15\x0B\x0C\x0D\x0E\x0F' . - '\x10\x11\x12\x13\x3C\x3D\x32\x26\x18\x19\x3F\x27\x1C\x1D\x1E\x1F' . - '\x40\x5A\x7F\x7B\x5B\x6C\x50\x7D\x4D\x5D\x5C\x4E\x6B\x60\x4B\x61' . - '\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\x7A\x5E\x4C\x7E\x6E\x6F' . - '\x7C\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xD1\xD2\xD3\xD4\xD5\xD6' . - '\xD7\xD8\xD9\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xBB\xBC\xBD\x6A\x6D' . - '\x4A\x81\x82\x83\x84\x85\x86\x87\x88\x89\x91\x92\x93\x94\x95\x96' . - '\x97\x98\x99\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xFB\x4F\xFD\xFF\x07' . - '\x20\x21\x22\x23\x24\x25\x06\x17\x28\x29\x2A\x2B\x2C\x09\x0A\x1B' . - '\x30\x31\x1A\x33\x34\x35\x36\x08\x38\x39\x3A\x3B\x04\x14\x3E\x5F' . - '\x41\xAA\xB0\xB1\x9F\xB2\xD0\xB5\x79\xB4\x9A\x8A\xBA\xCA\xAF\xA1' . - '\x90\x8F\xEA\xFA\xBE\xA0\xB6\xB3\x9D\xDA\x9B\x8B\xB7\xB8\xB9\xAB' . - '\x64\x65\x62\x66\x63\x67\x9E\x68\x74\x71\x72\x73\x78\x75\x76\x77' . - '\xAC\x69\xED\xEE\xEB\xEF\xEC\xBF\x80\xE0\xFE\xDD\xFC\xAD\xAE\x59' . - '\x44\x45\x42\x46\x43\x47\x9C\x48\x54\x51\x52\x53\x58\x55\x56\x57' . - '\x8C\x49\xCD\xCE\xCB\xCF\xCC\xE1\x70\xC0\xDE\xDB\xDC\x8D\x8E\xDF'; - -my $straight = # Avoid ranges - '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F' . - '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' . - '\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F' . - '\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F' . - '\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' . - '\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F' . - '\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F' . - '\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F' . - '\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F' . - '\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F' . - '\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF' . - '\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF' . - '\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF' . - '\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF' . - '\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF' . - '\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF'; - # The following 2 functions allow tests to work on both EBCDIC and # ASCII-ish platforms. They convert string scalars between the native # character set and the set of 256 characters which is usually called # Latin1. -# -# These routines don't work on UTF-EBCDIC and UTF-8. sub native_to_latin1($) { my $string = shift; return $string if ord('^') == 94; # ASCII, Latin1 - my $cp; - if (ord('^') == 95) { # EBCDIC 1047 - $cp = \$cp_1047; - } - elsif (ord('^') == 106) { # EBCDIC POSIX-BC - $cp = \$cp_bc; - } - elsif (ord('^') == 176) { # EBCDIC 037 */ - $cp = \$cp_0037; - } - else { - die "Unknown native character set"; + my $output = ""; + for my $i (0 .. length($string) - 1) { + $output .= chr(ord_native_to_latin1(ord(substr($string, $i, 1)))); } + # Preserve utf8ness of input onto the output, even if it didn't need to be + # utf8 + utf8::upgrade($output) if utf8::is_utf8($string); - eval '$string =~ tr/' . $$cp . '/' . $straight . '/'; - return $string; + return $output; } sub latin1_to_native($) { my $string = shift; return $string if ord('^') == 94; # ASCII, Latin1 - my $cp; - if (ord('^') == 95) { # EBCDIC 1047 - $cp = \$cp_1047; - } - elsif (ord('^') == 106) { # EBCDIC POSIX-BC - $cp = \$cp_bc; - } - elsif (ord('^') == 176) { # EBCDIC 037 */ - $cp = \$cp_0037; - } - else { - die "Unknown native character set"; + my $output = ""; + for my $i (0 .. length($string) - 1) { + $output .= chr(ord_latin1_to_native(ord(substr($string, $i, 1)))); } + # Preserve utf8ness of input onto the output, even if it didn't need to be + # utf8 + utf8::upgrade($output) if utf8::is_utf8($string); - eval '$string =~ tr/' . $straight . '/' . $$cp . '/'; - return $string; + return $output; } sub ord_latin1_to_native { @@ -1354,8 +1650,8 @@ sub ord_latin1_to_native { # equivalent value. Anything above latin1 is itself. my $ord = shift; - return $ord if $ord > 255; - return ord latin1_to_native(chr $ord); + return $ord if ord('^') == 94; # ASCII, Latin1 + return utf8::unicode_to_native($ord); } sub ord_native_to_latin1 { @@ -1363,8 +1659,8 @@ sub ord_native_to_latin1 { # Anything above latin1 is itself. my $ord = shift; - return $ord if $ord > 255; - return ord native_to_latin1(chr $ord); + return $ord if ord('^') == 94; # ASCII, Latin1 + return utf8::native_to_unicode($ord); } 1;