This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regen/regcharclass.pl: Improve the generated code
authorKarl Williamson <khw@cpan.org>
Sat, 3 May 2014 21:38:27 +0000 (15:38 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 30 May 2014 22:11:28 +0000 (16:11 -0600)
This is a small improvement when a consecutive group of U8 code points
begins at 0 or ends at 255.  These end points are physically impossible
of being exceeded, so there is no need to test for that end of the
range.  In several places this causes a mask operation to not be
generated.

regcharclass.h
regen/regcharclass.pl

index 5b04492..f7ca758 100644 (file)
 */
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLD_utf8(s)                                  \
-( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0x80 ) == 0x00 ) ? 1                  \
+( ( NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x7F ) ? 1                             \
 : ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xFE ) == 0xC2 ) ?                    \
     2                                                                       \
 : ( 0xC4 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0xC7 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\
 */
 /*** GENERATED CODE ***/
 #define is_PROBLEMATIC_LOCALE_FOLDEDS_START_utf8(s)                         \
-( ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0x80 ) == 0x00 ) ? 1                  \
+( ( NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x7F ) ? 1                             \
 : ( ( NATIVE_TO_LATIN1(((U8*)s)[0]) & 0xFE ) == 0xC2 ) ?                    \
     2                                                                       \
 : ( 0xC4 == NATIVE_TO_LATIN1(((U8*)s)[0]) || 0xC7 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?\
index 002cd0f..1f780a4 100755 (executable)
@@ -1097,6 +1097,21 @@ sub _cond_as_str {
             $ranges[$i] =           # Trivial case: single element range
                     sprintf "$self->{val_fmt} == $test", $ranges[$i]->[0];
         }
+        elsif ($ranges[$i]->[0] == 0) {
+            # If the range matches all 256 possible bytes, it is trivially
+            # true.
+            return 1 if $ranges[0]->[1] == 0xFF;    # @ranges must be 1 in
+                                                    # this case
+            $ranges[$i] = sprintf "( $test <= $self->{val_fmt} )",
+                                                               $ranges[$i]->[1];
+        }
+        elsif ($ranges[$i]->[1] == 255) {
+
+            # Similarly the max possible is 255, so can omit an upper bound
+            # test if the calculated max is the max possible one.
+            $ranges[$i] = sprintf "( $test >= $self->{val_fmt} )",
+                                                                $ranges[0]->[0];
+        }
         else {
             my $output = "";