This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
t/re/pat.t: Skip tests that don't work on EBCDIC
[perl5.git] / t / re / pat.t
index b046638..338bf8d 100644 (file)
@@ -22,10 +22,9 @@ BEGIN {
     set_up_inc('../lib', '.', '../ext/re');
 }
 
-skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
 skip_all_without_unicode_tables();
 
-plan tests => 965;  # Update this when adding/deleting tests.
+plan tests => 1011;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1430,56 +1429,65 @@ EOP
     {   # Test that it avoids spllitting a multi-char fold across nodes.
         # These all fold to things that are like 'ss', which, if split across
         # nodes could fail to match a single character that folds to the
-        # combination.
+        # combination.  1F0 byte expands when folded;
         my $utf8_locale = find_utf8_ctype_locale();
-        for my $char('F', $sharp_s, "\x{FB00}") {
+        for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") {
             my $length = 260;    # Long enough to overflow an EXACTFish regnode
             my $p = $char x $length;
-            my $s = ($char eq $sharp_s) ? 'ss' : 'ff';
+            my $s = ($char eq $sharp_s) ? 'ss'
+                                        : $char eq "\x{1F0}"
+                                          ? "j\x{30c}"
+                                          : 'ff';
             $s = $s x $length;
             for my $charset (qw(u d l aa)) {
                 for my $utf8 (0..1) {
-                  SKIP:
                     for my $locale ('C', $utf8_locale) {
-                        skip "test skipped for non-C locales", 2
+                      SKIP:
+                        {
+                            skip "test skipped for non-C locales", 2
                                     if $charset ne 'l'
                                     && (! defined $locale || $locale ne 'C');
-                        if ($charset eq 'l') {
-                            if (! defined $locale) {
-                                skip "No UTF-8 locale", 2;
+                            if ($charset eq 'l') {
+                                if (! defined $locale) {
+                                    skip "No UTF-8 locale", 2;
+                                }
+                                skip "Can't test in miniperl",2
+                                  if is_miniperl();
+
+                                require POSIX;
+                                POSIX::setlocale(&LC_CTYPE, $locale);
                             }
 
-                            use POSIX;
-                            POSIX::setlocale(&LC_CTYPE, $locale);
+                            my $pat = $p;
+                            utf8::upgrade($pat) if $utf8;
+                            my $should_pass =
+                              (    $charset eq 'u'
+                                   || ($charset eq 'd' && $utf8)
+                                   || ($charset eq 'd' && (   $char =~ /[[:ascii:]]/
+                                                              || ord $char > 255))
+                                   || ($charset eq 'aa' && $char =~ /[[:ascii:]]/)
+                                   || ($charset eq 'l' && $locale ne 'C')
+                                   || ($charset eq 'l' && $char =~ /[[:ascii:]]/)
+                                  );
+                            my $name = "(?i$charset), utf8=$utf8, locale=$locale,"
+                              . " char=" . sprintf "%x", ord $char;
+                            no warnings 'locale';
+                            is (eval " '$s' =~ qr/(?i$charset)$pat/;",
+                                $should_pass, $name);
+                            fail "$name: $@" if $@;
+                            is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;",
+                                $should_pass, "extra a, $name");
+                            fail "$name: $@" if $@;
                         }
-
-                        my $pat = $p;
-                        utf8::upgrade($pat) if $utf8;
-                        my $should_pass =
-                            (    $charset eq 'u'
-                             || ($charset eq 'd' && $utf8)
-                             || ($charset eq 'd' && (   $char =~ /[[:ascii:]]/
-                                                     || ord $char > 255))
-                             || ($charset eq 'aa' && $char =~ /[[:ascii:]]/)
-                             || ($charset eq 'l' && $locale ne 'C')
-                             || ($charset eq 'l' && $char =~ /[[:ascii:]]/)
-                            );
-                        my $name = "(?i$charset), utf8=$utf8, locale=$locale,"
-                                 . " char=" . sprintf "%x", ord $char;
-                        no warnings 'locale';
-                        is (eval " '$s' =~ qr/(?i$charset)$pat/;",
-                            $should_pass, $name);
-                        fail "$name: $@" if $@;
-                        is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;",
-                            $should_pass, "extra a, $name");
-                        fail "$name: $@" if $@;
                     }
                 }
             }
         }
     }
 
+    SKIP:
     {
+        skip "no re debug", 5 if is_miniperl;
         my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus
                                             # small change
         my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)";
@@ -1500,9 +1508,9 @@ EOP
             fail($pattern);
             fresh_perl($pattern, { stderr => 0, verbose => 1 });
         }
-        like($result, qr/Final program[^X]*\bLEXACT_ONLY8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
+        like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s,
              "Check that an LEXACT_ONLY node is generated with a \\x{100}");
-        like($s, qr/$s/, "Check that LEXACT_ONLY8 nodes match");
+        like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match");
     }
 
     {
@@ -1745,27 +1753,6 @@ EOP
         like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case");
     }
 
-SKIP: {   # make sure we get an error when \p{} cannot load Unicode tables
-        skip("Unicode tables always now loaded", 1);
-        fresh_perl_like(<<'        prog that cannot load uni tables',
-            BEGIN {
-                @INC = '../lib';
-                require utf8; require 'utf8_heavy.pl';
-                @INC = ();
-            }
-            $name = 'A B';
-            if ($name =~ /(\p{IsUpper}) (\p{IsUpper})/){
-                print "It's good! >$1< >$2<\n";
-            } else {
-                print "It's not good...\n";
-            }
-        prog that cannot load uni tables
-                  qr/^Can't locate unicore\/Heavy\.pl(?x:
-                   )|^Can't find Unicode property definition/,
-                  undef,
-                 '\p{} should not fail silently when uni tables evanesce');
-    }
-
     {   # Special handling of literal-ended ranges in [...] was breaking this
         use utf8;
         like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]");
@@ -1827,9 +1814,10 @@ SKIP: {   # make sure we get an error when \p{} cannot load Unicode tables
                 "test that we handle things like m/\\888888888/ without infinite loops" );
         }
 
+        SKIP:
         {   # Test that we handle some malformed UTF-8 without looping [perl
             # #123562]
-
+            skip "no Encode", 1 if is_miniperl;
             my $code='
                 BEGIN{require q(./test.pl);}
                 use Encode qw(_utf8_on);
@@ -2090,7 +2078,10 @@ CODE
     {   # [perl #133871], ASAN/valgrind out-of-bounds access
         fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]");
     }
+  SKIP:
     {   # [perl #133921], segfault
+        skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC;
+
         fresh_perl_is('qr\ 40||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\x\0ÿÿÿþ\0\0\0ù\Q`\Qx`\0\ 1{0c!}\ 1e;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1e;\0\0\0ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \0F\ 5\0n0t0\0\0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4i', "", {}, "[perl #133921]");
         fresh_perl_is('\ 4|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \x\0ÿÿÿÿ\0\0\0ù\Q`\Qx`\0\ 1{0d ;\0\0\0ù\ò`\Qm`\x{0c!}\;\;îçÿ \0\7fç\0\0\0!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q\87 \x\0ÿÿÿÿ\0\0>=\Qx`\Qx`\0\0ù\ò`\Qx`\x{0c!};\;îçÿ \00000000F\ 5\0m0t0\0\0\80\ 1d;t \0\0\0ù \0\7fç\80\0\0!00000000000000000000000m/00000000000000000000\ e00000000000m/\10\10\10\10\x{){} \10\10\10\10)|\10\10\ 4\ 4i', "", {}, "[perl #133921]");
 
@@ -2199,6 +2190,43 @@ x{0c!}\;\;îçÿ \0\7f/0f/!\0F\ 5\0\0/;îçÿù\Q\0\ 1\0\0x\10ÿÿÿÿ\0\0\0ù\0\0\0\7f`x{0c!}\ 1;\0\0\0ù\Q
         like(runperl(prog => "$s", stderr => 1), qr/Unmatched \(/);
    }
 
+   {    # GitHub #17196, caused assertion failure
+        fresh_perl_like('("0" x 258) =~ /(?l)0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/',
+                        qr/^$/,
+                        {},
+                        "Assertion failure with /l exact string longer than a single node");
+    }
+
+SKIP:
+    {   # [perl #134334], Assertion failure
+        my $utf8_locale = find_utf8_ctype_locale();
+        skip "no UTF-8 locale available" unless $utf8_locale;
+        fresh_perl_like("use POSIX; POSIX::setlocale(&LC_CTYPE, '$utf8_locale'); 'ssss' =~ /\xDF+?sX/il;",
+                        qr/^$/,
+                        {},
+                        "Assertion failure matching /il on single char folding to multi");
+    }
+
+    {   # Test ANYOFHs
+        my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/;
+        unlike("\x{4000000}", $pat, "4000000 isn't in pattern");
+        like("\x{4000001}", $pat, "4000001 is in pattern");
+        unlike("\x{4000002}", $pat, "4000002 isn't in pattern");
+        like("\x{4000003}", $pat, "4000003 is in pattern");
+        unlike("\x{4000004}", $pat, "4000004 isn't in pattern");
+        like("\x{4000005}", $pat, "4000005 is in pattern");
+        unlike("\x{4000006}", $pat, "4000006 isn't in pattern");
+
+        # gh #17319
+        $pat = qr/[\N{U+200D}\N{U+2000}]()/;
+        unlike("\x{1FFF}", $pat, "1FFF isn't in pattern");
+        like("\x{2000}", $pat, "2000 is in pattern");
+        unlike("\x{2001}", $pat, "2001 isn't in pattern");
+        unlike("\x{200C}", $pat, "200C isn't in pattern");
+        like("\x{200D}", $pat, "200 is in pattern");
+        unlike("\x{200E}", $pat, "200E isn't in pattern");
+    }
+
 } # End of sub run_tests
 
 1;