X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d82cefba2906b5813c94d895f9182c8b3d304c97..3b071feee62d0713bd7e9f33098c084e3ee4fdeb:/t/re/regexp_unicode_prop.t diff --git a/t/re/regexp_unicode_prop.t b/t/re/regexp_unicode_prop.t index 711a0eb..06c30e0 100644 --- a/t/re/regexp_unicode_prop.t +++ b/t/re/regexp_unicode_prop.t @@ -6,7 +6,11 @@ use strict; use warnings; -use 5.010; +use v5.16; +use utf8; + +# To verify that messages containing the expansions work on UTF-8 +my $utf8_comment; my @warnings; local $SIG {__WARN__} = sub {push @warnings, "@_"}; @@ -105,7 +109,22 @@ my @CLASSES = ( ); - my @USER_DEFINED_PROPERTIES = ( +my @USER_DEFINED_PROPERTIES; +my @USER_CASELESS_PROPERTIES; +my @USER_ERROR_PROPERTIES; +my @DEFERRED; +my $overflow; +BEGIN { + $utf8_comment = "#\N{U+30CD}"; + + use Config; + $overflow = $Config{uvsize} < 8 ? "80000000" : "80000000000000000"; + + # We defined these at compile time, so that the subroutines that they + # refer to aren't known, so that we can test properties not known until + # runtime + + @USER_DEFINED_PROPERTIES = ( # # User defined properties # @@ -124,19 +143,62 @@ my @CLASSES = ( Dash => ['-'], ASCII_Hex_Digit => ['!-', 'A'], IsAsciiHexAndDash => ['-', 'A'], - - # This overrides the official one - InLatin1 => ['\x{0100}', '!\x{00FF}'], ); - my @USER_CASELESS_PROPERTIES = ( + @USER_CASELESS_PROPERTIES = ( # # User defined properties which differ depending on /i. Second entry # is false normally, true under /i # 'IsMyUpper' => ["M", "!m" ], + 'pkg1::pkg2::IsMyLower' => ["a", "!A" ], ); + @USER_ERROR_PROPERTIES = ( + 'IsOverflow' => qr/Code point too large in (?# + )"0\t$overflow$utf8_comment" in expansion of (?# + )main::IsOverflow/, + 'InRecursedA' => qr/Infinite recursion in user-defined property (?# + )"main::InRecursedA" in expansion of (?# + )main::InRecursedC in expansion of (?# + )main::InRecursedB in expansion of (?# + )main::InRecursedA/, + 'IsRangeReversed' => qr/Illegal range in "200 100$utf8_comment" in (?# + )expansion of main::IsRangeReversed/, + 'IsNonHex' => qr/Can't find Unicode property definition (?# + )"BEEF CAGED" in expansion of main::IsNonHex/, + + # Could have \n, hence /s + 'IsDeath' => qr/Died.* in expansion of main::IsDeath/s, + ); + + # Now create a list of properties whose definitions won't be known at + # runtime. The qr// below thus will have forward references to them, and + # when matched at runtime will not know what's in the property definition + my @DEFERRABLE_USER_DEFINED_PROPERTIES; + push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_DEFINED_PROPERTIES; + push @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_CASELESS_PROPERTIES; + unshift @DEFERRABLE_USER_DEFINED_PROPERTIES, @USER_ERROR_PROPERTIES; + for (my $i = 0; $i < @DEFERRABLE_USER_DEFINED_PROPERTIES; $i+=2) { + my $property = $DEFERRABLE_USER_DEFINED_PROPERTIES[$i]; + if ($property =~ / ^ \# /x) { + $i++; + redo; + } + + # Only do this for the properties in the list that are user-defined + next if ($property !~ / ( ^ | :: ) I[ns] /x); + + push @DEFERRED, qr/\p{$property}/, + $DEFERRABLE_USER_DEFINED_PROPERTIES[$i+1]; + } +} + +# These override the official ones, so if found before defined, the official +# ones prevail, so can't test deferred definition +my @OVERRIDING_USER_DEFINED_PROPERTIES = ( + InLatin1 => ['\x{0100}', '!\x{00FF}'], +); # # From the short properties we populate POSIX-like classes. @@ -187,7 +249,8 @@ while (my ($class, $chars) = each %SHORT_PROPERTIES) { push @CLASSES => "# Short properties" => %SHORT_PROPERTIES, "# POSIX like properties" => %d, - "# User defined properties" => @USER_DEFINED_PROPERTIES; + "# User defined properties" => @USER_DEFINED_PROPERTIES, + "# Overriding user defined properties" => @OVERRIDING_USER_DEFINED_PROPERTIES; # @@ -201,7 +264,8 @@ for (my $i = 0; $i < @CLASSES; $i += 2) { $count += 4 * @ILLEGAL_PROPERTIES; $count += 4 * grep {length $_ == 1} @ILLEGAL_PROPERTIES; $count += 8 * @USER_CASELESS_PROPERTIES; -$count += 1; # Test for pkg:IsMyLower +$count += 1 * (@DEFERRED - @USER_ERROR_PROPERTIES) / 2; +$count += 1 * @USER_ERROR_PROPERTIES; $count += 1; # No warnings generated plan(tests => $count); @@ -218,18 +282,37 @@ sub match { undef $@; my $pat = "qr/$match/$caseless"; my $match_pat = eval $pat; - is($@, '', "$pat compiled correctly to a regexp: $@"); - like($str, $match_pat, "$name correctly matched"); + if (is($@, '', "$pat compiled correctly to a regexp: $@")) { + like($str, $match_pat, "$name correctly matched"); + } undef $@; $pat = "qr/$nomatch/$caseless"; my $nomatch_pat = eval $pat; - is($@, '', "$pat compiled correctly to a regexp: $@"); - unlike($str, $nomatch_pat, "$name correctly did not match"); + if (is($@, '', "$pat compiled correctly to a regexp: $@")) { + unlike($str, $nomatch_pat, "$name correctly did not match"); + } } sub run_tests { + for (my $i = 0; $i < @DEFERRED; $i+=2) { + if (ref $DEFERRED[$i+1] eq 'ARRAY') { + my ($str, $name) = get_str_name($DEFERRED[$i+1][0]); + like($str, $DEFERRED[$i], + "$name correctly matched $DEFERRED[$i] (defn. not known until runtime)"); + } + else { # Single entry rhs indicates a property that is an error + undef $@; + + # Using block eval causes the pattern to not be recompiled, so it + # retains its deferred status until this is executed. + eval { 'A' =~ $DEFERRED[$i] }; + like($@, $DEFERRED[$i+1], + "$DEFERRED[$i] gave correct failure message (defn. not known until runtime)"); + } + } + while (@CLASSES) { my $class = shift @CLASSES; if ($class =~ /^\h*#\h*(.*)/) { @@ -294,15 +377,24 @@ sub run_tests { my $in_pat = eval qq ['\\p{$class}']; my $out_pat = eval qq ['\\P{$class}']; + # Verify that adding /i does change the out set to match. + match $_, $in_pat, $out_pat, 'i' for @out; + + # Verify that adding /i doesn't change the in set. + match $_, $in_pat, $out_pat, 'i' for @in; + # Verify works as regularly for not /i match $_, $in_pat, $out_pat for @in; match $_, $out_pat, $in_pat for @out; + } - # Verify that adding /i doesn't change the in set. - match $_, $in_pat, $out_pat, 'i' for @in; + print "# User-defined properties with errors in their definition\n"; + while (my $error_property = shift @USER_ERROR_PROPERTIES) { + my $error_re = shift @USER_ERROR_PROPERTIES; - # Verify that adding /i does change the out set to match. - match $_, $in_pat, $out_pat, 'i' for @out; + undef $@; + eval { 'A' =~ /\p{$error_property}/; }; + like($@, $error_re, "$error_property gave correct failure message"); } } @@ -312,8 +404,8 @@ sub run_tests { # sub InKana1 {<<'--'} -3040 309F -30A0 30FF +3040 309F # A comment; next line has trailing spaces +30A0 30FF -- sub InKana2 {<<'--'} @@ -322,15 +414,18 @@ sub InKana2 {<<'--'} -- sub InKana3 {<<'--'} +# First line comment +utf8::InHiragana +# Full line comment +utf8::InKatakana -utf8::IsCn -- sub InNotKana {<<'--'} -!utf8::InHiragana --utf8::InKatakana +!utf8::InHiragana # A comment; next line has trailing spaces +-utf8::InKatakana +utf8::IsCn +# Final line comment -- sub InConsonant { @@ -349,6 +444,18 @@ sub IsSyriac1 {<<'--'} 0730 074A -- +sub InRecursedA { + return "+main::InRecursedB\n"; +} + +sub InRecursedB { + return "+main::InRecursedC\n"; +} + +sub InRecursedC { + return "+main::InRecursedA\n"; +} + sub InGreekSmall {return "03B1\t03C9"} sub InGreekCapital {return "0391\t03A9\n-03A2"} @@ -370,14 +477,7 @@ sub IsMyUpper { . "\n&utf8::ASCII"; } -{ # This has to be done here and not like the others, because we have to - # make sure that the property is not known until after the regex is - # compiled. It was previously getting confused about the pkg and /i - # combination - - my $mylower = qr/\p{pkg::IsMyLower}/i; - -sub pkg::IsMyLower { +sub pkg1::pkg2::IsMyLower { my $caseless = shift; return "+utf8::" . (($caseless) @@ -386,8 +486,16 @@ sub pkg::IsMyLower { . "\n&utf8::ASCII"; } - like("A", $mylower, "Not available until runtime user-defined property with pkg:: and /i works"); +sub IsRangeReversed { + return "200 100$utf8_comment"; +} + +sub IsNonHex { + return "BEEF CAGED$utf8_comment"; +} +sub IsDeath { + die; } # Verify that can use user-defined properties inside another one @@ -409,6 +517,10 @@ sub INfoo { die } sub Is::foo { die } sub In::foo { die } +sub IsOverflow { + return "0\t$overflow$utf8_comment"; +} + if (! is(@warnings, 0, "No warnings were generated")) { diag join "\n", @warnings, "\n"; }