This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcharclass.h: Simplify some expressions
authorKarl Williamson <khw@cpan.org>
Sun, 22 Nov 2020 22:45:05 +0000 (15:45 -0700)
committerKarl Williamson <khw@cpan.org>
Sun, 22 Nov 2020 23:01:08 +0000 (16:01 -0700)
The regen script was improperyly collapsing two-element ranges into two
separate elements, which caused extraneous code to be generated.

regcharclass.h
regen/regcharclass.pl

index d6688b2..3067ea8 100644 (file)
 
 /*** GENERATED CODE ***/
 #define is_VERTWS_cp_high(cp)                                               \
-( 0x2028 == cp || 0x2029 == cp )
+( inRANGE(cp, 0x2028, 0x2029) )
 
 /*
        XDIGIT: Hexadecimal digits
 #define is_XPERLSPACE_cp_high(cp)                                           \
 ( 0x1680 == cp || ( 0x1680 < cp &&                                          \
 ( inRANGE(cp, 0x2000, 0x200A) || ( 0x200A < cp &&                           \
-( 0x2028 == cp || ( 0x2028 < cp &&                                          \
-( 0x2029 == cp || ( 0x2029 < cp &&                                          \
+( inRANGE(cp, 0x2028, 0x2029) || ( 0x2029 < cp &&                           \
 ( 0x202F == cp || ( 0x202F < cp &&                                          \
-( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) )
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )
 
 /*
        NONCHAR: Non character code points
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLD_cp(cp)                                   \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PROBLEMATIC_LOCALE_FOLDEDS_START: The first folded character of folds which are problematic under locale
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(cp)                          \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PATWS: pattern white space
 
 /*** GENERATED CODE ***/
 #define is_VERTWS_cp_high(cp)                                               \
-( 0x2028 == cp || 0x2029 == cp )
+( inRANGE(cp, 0x2028, 0x2029) )
 
 /*
        XDIGIT: Hexadecimal digits
 #define is_XPERLSPACE_cp_high(cp)                                           \
 ( 0x1680 == cp || ( 0x1680 < cp &&                                          \
 ( inRANGE(cp, 0x2000, 0x200A) || ( 0x200A < cp &&                           \
-( 0x2028 == cp || ( 0x2028 < cp &&                                          \
-( 0x2029 == cp || ( 0x2029 < cp &&                                          \
+( inRANGE(cp, 0x2028, 0x2029) || ( 0x2029 < cp &&                           \
 ( 0x202F == cp || ( 0x202F < cp &&                                          \
-( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) )
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )
 
 /*
        NONCHAR: Non character code points
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLD_cp(cp)                                   \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PROBLEMATIC_LOCALE_FOLDEDS_START: The first folded character of folds which are problematic under locale
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(cp)                          \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PATWS: pattern white space
 
 /*** GENERATED CODE ***/
 #define is_VERTWS_cp_high(cp)                                               \
-( 0x2028 == cp || 0x2029 == cp )
+( inRANGE(cp, 0x2028, 0x2029) )
 
 /*
        XDIGIT: Hexadecimal digits
 #define is_XPERLSPACE_cp_high(cp)                                           \
 ( 0x1680 == cp || ( 0x1680 < cp &&                                          \
 ( inRANGE(cp, 0x2000, 0x200A) || ( 0x200A < cp &&                           \
-( 0x2028 == cp || ( 0x2028 < cp &&                                          \
-( 0x2029 == cp || ( 0x2029 < cp &&                                          \
+( inRANGE(cp, 0x2028, 0x2029) || ( 0x2029 < cp &&                           \
 ( 0x202F == cp || ( 0x202F < cp &&                                          \
-( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) )
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )
 
 /*
        NONCHAR: Non character code points
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLD_cp(cp)                                   \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PROBLEMATIC_LOCALE_FOLDEDS_START: The first folded character of folds which are problematic under locale
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(cp)                          \
 ( cp <= 0xFF || ( 0xFF < cp &&                                              \
-( 0x130 == cp || ( 0x130 < cp &&                                            \
-( 0x131 == cp || ( 0x131 < cp &&                                            \
+( inRANGE(cp, 0x130, 0x131) || ( 0x131 < cp &&                              \
 ( 0x149 == cp || ( 0x149 < cp &&                                            \
 ( 0x178 == cp || ( 0x178 < cp &&                                            \
 ( 0x17F == cp || ( 0x17F < cp &&                                            \
 ( 0x3BC == cp || ( 0x3BC < cp &&                                            \
 ( inRANGE(cp, 0x1E96, 0x1E9A) || ( 0x1E9A < cp &&                           \
 ( 0x1E9E == cp || ( 0x1E9E < cp &&                                          \
-( 0x212A == cp || ( 0x212A < cp &&                                          \
-( 0x212B == cp || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+( inRANGE(cp, 0x212A, 0x212B) || inRANGE(cp, 0xFB00, 0xFB06) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 
 /*
        PATWS: pattern white space
  * ee0dd174fd5b158d82dfea95d7d822ca0bfcd490182669353dca3ab39a8ee807 lib/unicore/mktables
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
- * 60185ff63360b1d3fc0c8df02a8493e63ea0283966612be245c30ff8f05b48db regen/regcharclass.pl
+ * acef4a732cb0cf63f720e29d8f25b0574a8ba18d553920197d459ad7950c3fd9 regen/regcharclass.pl
  * c0a5e4cb2b9ffad78691938e122c1310bbc98aca2364af243e5c6b2ec0f59dc3 regen/regcharclass_multi_char_folds.pl
  * ex: set ro: */
index 56fa7dd..d0d80d8 100755 (executable)
@@ -1049,25 +1049,26 @@ sub _cond_as_str {
     my $is_cp_ret = $opts_ref->{ret_type} eq "cp";
     return "( $test )" if !defined $cond;
 
-    # rangify the list.
+    # rangify the list.  As we encounter a new value, it is placed in a new
+    # subarray by itself.  If the next value is adjacent to it, the end point
+    # of the subarray is merely incremented; and so on.  When the next value
+    # that isn't adjacent to the previous one is encountered, Update() is
+    # called to hoist any single-element subarray to be a scalar.
     my @ranges;
     my $Update= sub {
         # We skip this if there are optimizations that
         # we can apply (below) to the individual ranges
         if ( ($is_cp_ret || $combine) && @ranges && ref $ranges[-1]) {
-            if ( $ranges[-1][0] == $ranges[-1][1] ) {
-                $ranges[-1]= $ranges[-1][0];
-            } elsif ( $ranges[-1][0] + 1 == $ranges[-1][1] ) {
-                $ranges[-1]= $ranges[-1][0];
-                push @ranges, $ranges[-1] + 1;
-            }
+            $ranges[-1] = $ranges[-1][0] if $ranges[-1][0] == $ranges[-1][1];
         }
     };
     for my $condition ( @$cond ) {
         if ( !@ranges || $condition != $ranges[-1][1] + 1 ) {
+            # Not adjacent to the existing range.  Remove that from being a
+            # range if only a single value;
             $Update->();
             push @ranges, [ $condition, $condition ];
-        } else {
+        } else {    # Adjacent to the existing range; add to the range
             $ranges[-1][1]++;
         }
     }