-Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const char *s2, char **pe2, register UV l2, bool u2)
-{
- dVAR;
- register const U8 *p1 = (const U8*)s1;
- register const U8 *p2 = (const U8*)s2;
- register const U8 *f1 = NULL;
- register const U8 *f2 = NULL;
- register U8 *e1 = NULL;
- register U8 *q1 = NULL;
- register U8 *e2 = NULL;
- register U8 *q2 = NULL;
- STRLEN n1 = 0, n2 = 0;
- U8 foldbuf1[UTF8_MAXBYTES_CASE+1];
- U8 foldbuf2[UTF8_MAXBYTES_CASE+1];
- U8 natbuf[1+1];
- STRLEN foldlen1, foldlen2;
- bool match;
-
- if (pe1)
- e1 = *(U8**)pe1;
- if (e1 == 0 || (l1 && l1 < (UV)(e1 - (const U8*)s1)))
- f1 = (const U8*)s1 + l1;
- if (pe2)
- e2 = *(U8**)pe2;
- if (e2 == 0 || (l2 && l2 < (UV)(e2 - (const U8*)s2)))
- f2 = (const U8*)s2 + l2;
-
- if ((e1 == 0 && f1 == 0) || (e2 == 0 && f2 == 0) || (f1 == 0 && f2 == 0))
- return 1; /* mismatch; possible infinite loop or false positive */
-
- if (!u1 || !u2)
- natbuf[1] = 0; /* Need to terminate the buffer. */
-
- while ((e1 == 0 || p1 < e1) &&
- (f1 == 0 || p1 < f1) &&
- (e2 == 0 || p2 < e2) &&
- (f2 == 0 || p2 < f2)) {
- if (n1 == 0) {
- if (u1)
- to_utf8_fold(p1, foldbuf1, &foldlen1);
- else {
- uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
- to_utf8_fold(natbuf, foldbuf1, &foldlen1);
- }
- q1 = foldbuf1;
- n1 = foldlen1;
- }
- if (n2 == 0) {
- if (u2)
- to_utf8_fold(p2, foldbuf2, &foldlen2);
- else {
- uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
- to_utf8_fold(natbuf, foldbuf2, &foldlen2);
- }
- q2 = foldbuf2;
- n2 = foldlen2;
- }
- while (n1 && n2) {
- if ( UTF8SKIP(q1) != UTF8SKIP(q2) ||
- (UTF8SKIP(q1) == 1 && *q1 != *q2) ||
- memNE((char*)q1, (char*)q2, UTF8SKIP(q1)) )
- return 1; /* mismatch */
- n1 -= UTF8SKIP(q1);
- q1 += UTF8SKIP(q1);
- n2 -= UTF8SKIP(q2);
- q2 += UTF8SKIP(q2);
- }
- if (n1 == 0)
- p1 += u1 ? UTF8SKIP(p1) : 1;
- if (n2 == 0)
- p2 += u2 ? UTF8SKIP(p2) : 1;
-
- }
-
- /* A match is defined by all the scans that specified
- * an explicit length reaching their final goals. */
- match = (f1 == 0 || p1 == f1) && (f2 == 0 || p2 == f2);
-
- if (match) {
- if (pe1)
- *pe1 = (char*)p1;
- if (pe2)
- *pe2 = (char*)p2;
- }
-
- return match ? 0 : 1; /* 0 match, 1 mismatch */
+Perl_foldEQ_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const char *s2, char **pe2, register UV l2, bool u2)
+{
+ dVAR;
+ register const U8 *p1 = (const U8*)s1; /* Point to current char */
+ register const U8 *p2 = (const U8*)s2;
+ register const U8 *g1 = NULL; /* goal for s1 */
+ register const U8 *g2 = NULL;
+ register const U8 *e1 = NULL; /* Don't scan s1 past this */
+ register U8 *f1 = NULL; /* Point to current folded */
+ register const U8 *e2 = NULL;
+ register U8 *f2 = NULL;
+ STRLEN n1 = 0, n2 = 0; /* Number of bytes in current char */
+ U8 foldbuf1[UTF8_MAXBYTES_CASE+1];
+ U8 foldbuf2[UTF8_MAXBYTES_CASE+1];
+ U8 natbuf[2]; /* Holds native 8-bit char converted to utf8;
+ these always fit in 2 bytes */
+
+ PERL_ARGS_ASSERT_FOLDEQ_UTF8;
+
+ if (pe1) {
+ e1 = *(U8**)pe1;
+ }
+
+ if (l1) {
+ g1 = (const U8*)s1 + l1;
+ }
+
+ if (pe2) {
+ e2 = *(U8**)pe2;
+ }
+
+ if (l2) {
+ g2 = (const U8*)s2 + l2;
+ }
+
+ /* Must have at least one goal */
+ assert(g1 || g2);
+
+ if (g1) {
+
+ /* Will never match if goal is out-of-bounds */
+ assert(! e1 || e1 >= g1);
+
+ /* Here, there isn't an end pointer, or it is beyond the goal. We
+ * only go as far as the goal */
+ e1 = g1;
+ }
+ else {
+ assert(e1); /* Must have an end for looking at s1 */
+ }
+
+ /* Same for goal for s2 */
+ if (g2) {
+ assert(! e2 || e2 >= g2);
+ e2 = g2;
+ }
+ else {
+ assert(e2);
+ }
+
+ /* Look through both strings, a character at a time */
+ while (p1 < e1 && p2 < e2) {
+
+ /* If at the beginning of a new character in s1, get its fold to use
+ * and the length of the fold */
+ if (n1 == 0) {
+ if (u1) {
+ to_utf8_fold(p1, foldbuf1, &n1);
+ }
+ else { /* Not utf8, convert to it first and then get fold */
+ uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
+ to_utf8_fold(natbuf, foldbuf1, &n1);
+ }
+ f1 = foldbuf1;
+ }
+
+ if (n2 == 0) { /* Same for s2 */
+ if (u2) {
+ to_utf8_fold(p2, foldbuf2, &n2);
+ }
+ else {
+ uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
+ to_utf8_fold(natbuf, foldbuf2, &n2);
+ }
+ f2 = foldbuf2;
+ }
+
+ /* While there is more to look for in both folds, see if they
+ * continue to match */
+ while (n1 && n2) {
+ U8 fold_length = UTF8SKIP(f1);
+ if (fold_length != UTF8SKIP(f2)
+ || (fold_length == 1 && *f1 != *f2) /* Short circuit memNE
+ function call for single
+ character */
+ || memNE((char*)f1, (char*)f2, fold_length))
+ {
+ return 0; /* mismatch */
+ }
+
+ /* Here, they matched, advance past them */
+ n1 -= fold_length;
+ f1 += fold_length;
+ n2 -= fold_length;
+ f2 += fold_length;
+ }
+
+ /* When reach the end of any fold, advance the input past it */
+ if (n1 == 0) {
+ p1 += u1 ? UTF8SKIP(p1) : 1;
+ }
+ if (n2 == 0) {
+ p2 += u2 ? UTF8SKIP(p2) : 1;
+ }
+ } /* End of loop through both strings */
+
+ /* A match is defined by each scan that specified an explicit length
+ * reaching its final goal, and the other not having matched a partial
+ * character (which can happen when the fold of a character is more than one
+ * character). */
+ if (! ((g1 == 0 || p1 == g1) && (g2 == 0 || p2 == g2)) || n1 || n2) {
+ return 0;
+ }
+
+ /* Successful match. Set output pointers */
+ if (pe1) {
+ *pe1 = (char*)p1;
+ }
+ if (pe2) {
+ *pe2 = (char*)p2;
+ }
+ return 1;