This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.c: Handle qr!\p{nv=6/8}!
authorKarl Williamson <khw@cpan.org>
Tue, 1 May 2018 01:05:54 +0000 (19:05 -0600)
committerKarl Williamson <khw@cpan.org>
Mon, 25 Jun 2018 13:33:30 +0000 (07:33 -0600)
I thought this worked before, but it turns out it never did.  This
commit allows the rational number specified in looking up the Numeric
Value property to not be in lowest possible terms.  Unicode even
furnishes some of its data in non-lowest form, so we should accept
this.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h
uni_keywords.h
utf8.c

index 25030d2..3b7d387 100644 (file)
@@ -374323,7 +374323,7 @@ static const U8 WB_table[24][24] = {
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * c237f9e6bda604db4388693b42a20ee0d5c2cf9c08152beca27aa0e1ee735550 lib/unicore/mktables
+ * 32b3fd3722ec94c467104bb1f24e7ac3a3f0929531b96b744aa2746ecca3595b lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
index aa6c9fc..f526060 100644 (file)
@@ -19132,10 +19132,22 @@ EOF_CODE
                                                  $warning,
                                              );
 
-                    # If the name is a rational number, add tests for the
-                    # floating point equivalent.
+                    # If the name is a rational number, add tests for a
+                    # non-reduced form, and for a floating point equivalent.
                     if ($table_name =~ qr{/}) {
 
+                        # 60 is a number divisible by a bunch of things
+                        my ($numerator, $denominator) = $table_name
+                                                        =~ m! (.+) / (.+) !x;
+                        $numerator *= 60;
+                        $denominator *= 60;
+                        push @output, generate_tests($property_name,
+                                                    "$numerator/$denominator",
+                                                    $valid,
+                                                    $invalid,
+                                                    $warning,
+                                    );
+
                         # Calculate the float, and the %e representation
                         my $float = eval $table_name;
                         my $e_representation = sprintf("%.*e",
index a1a67df..8e4b028 100644 (file)
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * c237f9e6bda604db4388693b42a20ee0d5c2cf9c08152beca27aa0e1ee735550 lib/unicore/mktables
+ * 32b3fd3722ec94c467104bb1f24e7ac3a3f0929531b96b744aa2746ecca3595b lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
  * 9ea6338945a7d70e5ea4b31ac7856c0b521df96be002e94b4b3b7d31debbf3ab regen/regcharclass.pl
index 2c303e6..8bc3de4 100644 (file)
@@ -6834,7 +6834,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * c237f9e6bda604db4388693b42a20ee0d5c2cf9c08152beca27aa0e1ee735550 lib/unicore/mktables
+ * 32b3fd3722ec94c467104bb1f24e7ac3a3f0929531b96b744aa2746ecca3595b lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
  * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
diff --git a/utf8.c b/utf8.c
index 26fea53..51b37c1 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -5933,6 +5933,7 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
     unsigned int i;
     unsigned int j = 0, lookup_len;
     int equals_pos = -1;        /* Where the '=' is found, or negative if none */
+    int slash_pos = -1;        /* Where the '/' is found, or negative if none */
     int table_index = 0;
     bool starts_with_In_or_Is = FALSE;
     Size_t lookup_offset = 0;
@@ -6129,6 +6130,8 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
             continue;
         }
 
+        slash_pos = j;
+
         /* A slash in the 'numeric value' property indicates that what follows
          * is a denominator.  It can have a leading '+' and '0's that should be
          * skipped.  But we have never allowed a negative denominator, so treat
@@ -6194,6 +6197,7 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
             lookup_name += 2;
             lookup_len -= 2;
             equals_pos -= 2;
+            slash_pos -= 2;
 
             table_index = match_uniprop((U8 *) lookup_name, lookup_len);
         }
@@ -6211,9 +6215,9 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
              * we do is make sure we have the number in canonical form and look
              * that up. */
 
-            {
+            if (slash_pos < 0) {    /* No slash */
 
-                /* Take the input, convert it to a
+                /* When it isn't a rational, take the input, convert it to a
                  * NV, then create a canonical string representation of that
                  * NV. */
 
@@ -6236,6 +6240,83 @@ Perl_parse_uniprop_string(pTHX_ const char * const name, const Size_t len, const
                                                 PL_E_FORMAT_PRECISION, value);
                 }
             }
+            else {  /* Has a slash.  Create a rational in canonical form  */
+                UV numerator, denominator, gcd, trial;
+                const char * end_ptr;
+                const char * sign = "";
+
+                /* We can't just find the numerator, denominator, and do the
+                 * division, then use the method above, because that is
+                 * inexact.  And the input could be a rational that is within
+                 * epsilon (given our precision) of a valid rational, and would
+                 * then incorrectly compare valid.
+                 *
+                 * We're only interested in the part after the '=' */
+                lookup_name += equals_pos;
+                lookup_len -= equals_pos;
+                slash_pos -= equals_pos;
+
+                /* Handle any leading minus */
+                if (lookup_name[0] == '-') {
+                    sign = "-";
+                    lookup_name++;
+                    lookup_len--;
+                    slash_pos--;
+                }
+
+                /* Convert the numerator to numeric */
+                end_ptr = lookup_name + slash_pos;
+                if (! grok_atoUV(lookup_name, &numerator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better have included all characters before the slash */
+                if (*end_ptr != '/') {
+                    return NULL;
+                }
+
+                /* Set to look at just the denominator */
+                lookup_name += slash_pos;
+                lookup_len -= slash_pos;
+                end_ptr = lookup_name + lookup_len;
+
+                /* Convert the denominator to numeric */
+                if (! grok_atoUV(lookup_name, &denominator, &end_ptr)) {
+                    return NULL;
+                }
+
+                /* It better be the rest of the characters, and don't divide by
+                 * 0 */
+                if (   end_ptr != lookup_name + lookup_len
+                    || denominator == 0)
+                {
+                    return NULL;
+                }
+
+                /* Get the greatest common denominator using
+                   http://en.wikipedia.org/wiki/Euclidean_algorithm */
+                gcd = numerator;
+                trial = denominator;
+                while (trial != 0) {
+                    UV temp = trial;
+                    trial = gcd % trial;
+                    gcd = temp;
+                }
+
+                /* If already in lowest possible terms, we have already tried
+                 * looking this up */
+                if (gcd == 1) {
+                    return NULL;
+                }
+
+                /* Reduce the rational, which should put it in canonical form.
+                 * Then look it up */
+                numerator /= gcd;
+                denominator /= gcd;
+
+                canonical = Perl_form(aTHX_ "nv=%s%" UVuf "/%" UVuf,
+                                             sign, numerator, denominator);
+            }
 
             /* Here, we have the number in canonical form.  Try that */
             table_index = match_uniprop((U8 *) canonical, strlen(canonical));