This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #133942] BBC: BKB/Lingua-JA-Moji
authorKarl Williamson <khw@cpan.org>
Thu, 28 Mar 2019 18:26:43 +0000 (12:26 -0600)
committerKarl Williamson <khw@cpan.org>
Thu, 28 Mar 2019 19:27:04 +0000 (13:27 -0600)
This ticket was originally fixed by a PR being issued on the
distribution, which was merged and a new version placed on CPAN.

But before Slaven had a chance to test it, commit
765e6ecf32a570694dcff91c1c72f98306a9390e came along, and happened to
break it again from a totally independent cause.

So we have two separate bugs on the same ticket, only one of which was
perl's fault.  And this commit fixes that.  The problematic commit was
still looking in the regnode FLAGS field in one function, whereas that
field has been repurposed for ANYOFH nodes, so should be ignored for
them.

regcomp.c
t/re/re_tests

index aff83ef..d98ad1a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1574,6 +1574,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
     unsigned int i;
     const U32 n = ARG(node);
     bool new_node_has_latin1 = FALSE;
+    const U8 flags = OP(node) == ANYOFH ? 0 : ANYOF_FLAGS(node);
 
     PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
 
@@ -1598,7 +1599,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
         }
 
         /* Get the code points valid only under UTF-8 locales */
-        if (   (ANYOF_FLAGS(node) & ANYOFL_FOLD)
+        if (   (flags & ANYOFL_FOLD)
             &&  av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
         {
             only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
@@ -1619,7 +1620,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      * actually does include them.  (Think about "\xe0" =~ /[^\xc0]/di;).  We
      * have to do this here before we add the unconditionally matched code
      * points */
-    if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+    if (flags & ANYOF_INVERT) {
         _invlist_intersection_complement_2nd(invlist,
                                              PL_UpperLatin1,
                                              &invlist);
@@ -1646,21 +1647,21 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      * as well.  But don't add them if inverting, as when that gets done below,
      * it would exclude all these characters, including the ones it shouldn't
      * that were added just above */
-    if (! (ANYOF_FLAGS(node) & ANYOF_INVERT) && OP(node) == ANYOFD
-        && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+    if (! (flags & ANYOF_INVERT) && OP(node) == ANYOFD
+        && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
     {
         _invlist_union(invlist, PL_UpperLatin1, &invlist);
     }
 
     /* Similarly for these */
-    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+    if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
         _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
     }
 
-    if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+    if (flags & ANYOF_INVERT) {
         _invlist_invert(invlist);
     }
-    else if (ANYOF_FLAGS(node) & ANYOFL_FOLD) {
+    else if (flags & ANYOFL_FOLD) {
         if (new_node_has_latin1) {
 
             /* Under /li, any 0-255 could fold to any other 0-255, depending on
@@ -1688,7 +1689,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
     if (only_utf8_locale_invlist) {
         _invlist_union_maybe_complement_2nd(invlist,
                                             only_utf8_locale_invlist,
-                                            ANYOF_FLAGS(node) & ANYOF_INVERT,
+                                            flags & ANYOF_INVERT,
                                             &invlist);
     }
 
index bd7fc8f..991cde6 100644 (file)
@@ -2014,6 +2014,7 @@ AB\s+\x{100}      AB \x{100}X     y       -       -
 /[\xdf-/i      -       c       -       Invalid [] range        # [perl #133620] likely only fails under valgrind
 /\1a(b)/       bab     n       -       -               # This compiles but fails to match as \1 is not set when parsed.
 /(?iu)(?<=\xdf)hbase/  sshbase y       $&      hbase
+/\x{30c3}?[\x{30a2}\x{30a4}\x{30a6}\x{30a8}\x{30aa}-\x{30e2}\x{30e4}\x{30e6}\x{30e8}-\x{30f4}](?:[\x{30e3}\x{30e5}\x{30e7}\x{30a1}\x{30a3}\x{30a5}\x{30a7}\x{30a9}])?\x{30fc}?\x{30f3}?/       \x{30de}\x{30fc}\x{30af}\x{30b5}\x{30fc}\x{30d3}\x{30b9}        y       $&      \x{30de}\x{30fc}        # part of [perl #133942
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab