Porting/sync-with-cpan: apply --jobs=N when running module tests

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index 5c5241c..0b5c847 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -445,6 +445,8 @@ S_regcp_restore(pTHX_ regexp *rex, I32 ix, U32 *maxopenparen_p _pDEPTH)
  
  #define regcpblow(cp) LEAVE_SCOPE(cp)  /* Ignores regcppush()ed data. */
  
+#ifndef PERL_IN_XSUB_RE
+
  bool
  Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
  {
@@ -486,6 +488,8 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
      return FALSE;
  }
  
+#endif
+
  STATIC bool
  S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
  {
@@ -9704,6 +9708,64 @@ S_to_byte_substr(pTHX_ regexp *prog)
      return TRUE;
  }
  
+bool
+Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
+{
+    /* Temporary helper function for toke.c.  Verify that the code point 'cp'
+     * is a stand-alone grapheme.  The UTF-8 for 'cp' begins at position 's' in
+     * the larger string bounded by 'strbeg' and 'strend'.
+     *
+     * 'cp' needs to be assigned (if not a future version of the Unicode
+     * Standard could make it something that combines with adjacent characters,
+     * so code using it would then break), and there has to be a GCB break
+     * before and after the character. */
+
+    GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
+    const U8 * prev_cp_start;
+
+    PERL_ARGS_ASSERT__IS_GRAPHEME;
+
+    /* Unassigned code points are forbidden */
+    if (UNLIKELY(! ELEMENT_RANGE_MATCHES_INVLIST(
+                                    _invlist_search(PL_Assigned_invlist, cp))))
+    {
+        return FALSE;
+    }
+
+    cp_gcb_val = getGCB_VAL_CP(cp);
+
+    /* Find the GCB value of the previous code point in the input */
+    prev_cp_start = utf8_hop_back(s, -1, strbeg);
+    if (UNLIKELY(prev_cp_start == s)) {
+        prev_cp_gcb_val = GCB_EDGE;
+    }
+    else {
+        prev_cp_gcb_val = getGCB_VAL_UTF8(prev_cp_start, strend);
+    }
+
+    /* And check that is a grapheme boundary */
+    if (! isGCB(prev_cp_gcb_val, cp_gcb_val, strbeg, s,
+                TRUE /* is UTF-8 encoded */ ))
+    {
+        return FALSE;
+    }
+
+    /* Similarly verify there is a break between the current character and the
+     * following one */
+    s += UTF8SKIP(s);
+    if (s >= strend) {
+        next_cp_gcb_val = GCB_EDGE;
+    }
+    else {
+        next_cp_gcb_val = getGCB_VAL_UTF8(s, strend);
+    }
+
+    return isGCB(cp_gcb_val, next_cp_gcb_val, strbeg, s, TRUE);
+}
+
+
+
+
  /*
   * ex: set ts=8 sts=4 sw=4 et:
   */