regcomp.c: Fix so works on Unicode 5.2
authorKarl Williamson <khw@cpan.org>
Fri, 20 Mar 2015 03:49:30 +0000 (21:49 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 20 Mar 2015 04:53:01 +0000 (22:53 -0600)
Unicode 5.2 had an anomalous situation, fixed in the next release, which
runs afoul of an assert() in regcomp.c.  This just modifies the assert
for it to not fail for this situation.

regcomp.c

index 51065d5..8c3dffd 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -15093,9 +15093,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          * same element, neither should be a digit. */
                         if (index_start == index_final) {
                             assert(! ELEMENT_RANGE_MATCHES_INVLIST(index_start)
-                            || invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
-                            - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
-                            == 10);
+                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
+                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                               == 10)
+                               /* But actually Unicode did have one group of 11
+                                * 'digits' in 5.2, so in case we are operating
+                                * on that version, let that pass */
+                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
+                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                                == 11
+                               && invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                                == 0x19D0)
+                            );
                         }
                         else if ((index_start >= 0
                                   && ELEMENT_RANGE_MATCHES_INVLIST(index_start))