From 425224e1aa3143d7b00f920815709497b1ad8120 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 26 Jun 2013 18:23:27 -0600 Subject: [PATCH] regexp_unicode_prop.t: Generalize for non-ASCII platforms --- t/re/regexp_unicode_prop.t | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/t/re/regexp_unicode_prop.t b/t/re/regexp_unicode_prop.t index 617f14e..8e68ab1 100644 --- a/t/re/regexp_unicode_prop.t +++ b/t/re/regexp_unicode_prop.t @@ -65,20 +65,19 @@ my @CLASSES = ( # It's ok to repeat class names. # InLatin1Supplement => - $::IS_EBCDIC ? ['!\x{7f}', '\x{80}', '!\x{100}'] - : ['!\x{7f}', '\x{80}', '\x{ff}', '!\x{100}'], + ['!\N{U+7f}', '\N{U+80}', '\N{U+ff}', '!\x{100}'], InLatinExtendedA => - ['!\x{7f}', '!\x{80}', '!\x{ff}', '\x{100}'], + ['!\N{U+7f}', '!\N{U+80}', '!\N{U+ff}', '\x{100}'], # # Properties are case-insensitive, and may have whitespace, # dashes and underscores. # - 'in-latin1_SUPPLEMENT' => ['\x{80}', + 'in-latin1_SUPPLEMENT' => ['\N{U+80}', '\N{LATIN SMALL LETTER Y WITH DIAERESIS}'], ' ^ In Latin 1 Supplement ' - => ['!\x{80}', '\N{COFFIN}'], - 'latin-1 supplement' => ['\x{80}', "0xDF"], + => ['!\N{U+80}', '\N{COFFIN}'], + 'latin-1 supplement' => ['\N{U+80}', "0xDF"], ); @@ -154,13 +153,11 @@ while (my ($class, $chars) = each %SHORT_PROPERTIES) { push @{$d {IsWord}} => map {$class =~ /^[LMN]/ || $_ eq "_" ? $_ : "!$_"} @$chars; push @{$d {IsSpace}} => map {$class =~ /^Z/ || - length ($_) == 1 && ord ($_) >= 0x09 - && ord ($_) <= 0x0D + length ($_) == 1 && utf8::native_to_unicode(ord ($_)) >= 0x09 + && utf8::native_to_unicode(ord ($_)) <= 0x0D ? $_ : "!$_"} @$chars; } -delete $d {IsASCII} if $::IS_EBCDIC; - push @CLASSES => "# Short properties" => %SHORT_PROPERTIES, "# POSIX like properties" => %d, "# User defined properties" => @USER_DEFINED_PROPERTIES; @@ -313,14 +310,16 @@ sub InNotKana {<<'--'} +utf8::IsCn -- -sub InConsonant {<<'--'} # Not EBCDIC-aware. -0061 007f --0061 --0065 --0069 --006f --0075 --- +sub InConsonant { + + my $return = "+utf8::Lowercase\n&utf8::ASCII\n"; + $return .= sprintf("-%X\n", ord "a"); + $return .= sprintf("-%X\n", ord "e"); + $return .= sprintf("-%X\n", ord "i"); + $return .= sprintf("-%X\n", ord "o"); + $return .= sprintf("-%X\n", ord "u"); + return $return; +} sub IsSyriac1 {<<'--'} 0712 072C @@ -337,12 +336,11 @@ sub IsAsciiHexAndDash {<<'--'} sub IsMyUpper { my $caseless = shift; - if ($caseless) { - return "0041\t005A\n0061\t007A" - } - else { - return "0041\t005A" - } + return "+utf8::" + . (($caseless) + ? 'Alphabetic' + : 'Uppercase') + . "\n&utf8::ASCII"; } # Verify that can use user-defined properties inside another one -- 1.8.3.1