/* Add 1 for the trailing NUL */
PL_collxfrm_base = base + 1;
}
+
+#ifdef DEBUGGING
+ if (DEBUG_L_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "%s:%d: ?UTF-8 locale=%d; x_len_shorter=%"UVuf", "
+ "x_len_longer=%"UVuf","
+ " collate multipler=%"UVuf", collate base=%"UVuf"\n",
+ __FILE__, __LINE__,
+ PL_in_utf8_COLLATE_locale,
+ x_len_shorter, x_len_longer,
+ PL_collxfrm_mult, PL_collxfrm_base);
+ }
+#endif
}
}
/* _mem_collxfrm() is a bit like strxfrm() but with two important
* differences. First, it handles embedded NULs. Second, it allocates a bit
* more memory than needed for the transformed data itself. The real
- * transformed data begins at offset sizeof(collationix). *xlen is set to
+ * transformed data begins at offset COLLXFRM_HDR_LEN. *xlen is set to
* the length of that, and doesn't include the collation index size.
* Please see sv_collxfrm() to see how this is used. */
+#define COLLXFRM_HDR_LEN sizeof(PL_collation_ix)
+
char * s = (char *) input_string;
STRLEN s_strlen = strlen(input_string);
char *xbuf = NULL;
- STRLEN xAlloc, xout; /* xalloc is a reserved word in VC */
+ STRLEN xAlloc; /* xalloc is a reserved word in VC */
bool first_time = TRUE; /* Cleared after first loop iteration */
PERL_ARGS_ASSERT__MEM_COLLXFRM;
* locale, find it */
if (*PL_strxfrm_min_char == '\0') {
int j;
- char * cur_min_x = NULL; /* Cur cp's xfrm, (except it also
+#ifdef DEBUGGING
+ U8 cur_min_cp = 1; /* The code point that sorts lowest, so far */
+#endif
+ char * cur_min_x = NULL; /* And its xfrm, (except it also
includes the collation index
prefixed. */
/* If something went wrong (which it shouldn't), just
* ignore this code point */
if ( x_len == 0
- || strlen(x + sizeof(PL_collation_ix)) < x_len)
+ || strlen(x + COLLXFRM_HDR_LEN) < x_len)
{
continue;
}
/* If this character's transformation is lower than
* the current lowest, this one becomes the lowest */
if ( cur_min_x == NULL
- || strLT(x + sizeof(PL_collation_ix),
- cur_min_x + sizeof(PL_collation_ix)))
+ || strLT(x + COLLXFRM_HDR_LEN,
+ cur_min_x + COLLXFRM_HDR_LEN))
{
strcpy(PL_strxfrm_min_char, cur_source);
cur_min_x = x;
+#ifdef DEBUGGING
+ cur_min_cp = j;
+#endif
}
else {
Safefree(x);
/* cur_min_cp was already initialized to 1 */
}
+ DEBUG_L(PerlIO_printf(Perl_debug_log,
+ "_mem_collxfrm: lowest collating control in the 0-255 "
+ "range in locale %s is 0x%02X\n",
+ PL_collation_name,
+ cur_min_cp));
+ if (DEBUG_Lv_TEST) {
+ unsigned i;
+ PerlIO_printf(Perl_debug_log, "Its xfrm is");
+ for (i = 0; i < strlen(cur_min_x + COLLXFRM_HDR_LEN); i ++) {
+ PerlIO_printf(Perl_debug_log, " %02x",
+ (U8) *(cur_min_x + COLLXFRM_HDR_LEN + i));
+ }
+ PerlIO_printf(Perl_debug_log, "\n");
+ }
+
Safefree(cur_min_x);
}
/* If this character's transformation is higher than
* the current highest, this one becomes the highest */
if ( cur_max_x == NULL
- || strGT(x + sizeof(PL_collation_ix),
- cur_max_x + sizeof(PL_collation_ix)))
+ || strGT(x + COLLXFRM_HDR_LEN,
+ cur_max_x + COLLXFRM_HDR_LEN))
{
PL_strxfrm_max_cp = j;
cur_max_x = x;
}
}
+ DEBUG_L(PerlIO_printf(Perl_debug_log,
+ "_mem_collxfrm: highest 1-byte collating character"
+ " in locale %s is 0x%02X\n",
+ PL_collation_name,
+ PL_strxfrm_max_cp));
+ if (DEBUG_Lv_TEST) {
+ unsigned i;
+ PerlIO_printf(Perl_debug_log, "Its xfrm is ");
+ for (i = 0;
+ i < strlen(cur_max_x + COLLXFRM_HDR_LEN);
+ i++)
+ {
+ PerlIO_printf(Perl_debug_log, " %02x",
+ (U8) cur_max_x[i + COLLXFRM_HDR_LEN]);
+ }
+ PerlIO_printf(Perl_debug_log, "\n");
+ }
+
Safefree(cur_max_x);
}
/* The first element in the output is the collation id, used by
* sv_collxfrm(); then comes the space for the transformed string. The
* equation should give us a good estimate as to how much is needed */
- xAlloc = sizeof(PL_collation_ix)
+ xAlloc = COLLXFRM_HDR_LEN
+ PL_collxfrm_base
+ (PL_collxfrm_mult * ((utf8)
? utf8_length((U8 *) s, (U8 *) s + len)
/* Store the collation id */
*(U32*)xbuf = PL_collation_ix;
- xout = sizeof(PL_collation_ix);
/* Then the transformation of the input. We loop until successful, or we
* give up */
for (;;) {
- STRLEN xused = strxfrm(xbuf + xout, s, xAlloc - xout);
+ *xlen = strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN);
/* If the transformed string occupies less space than we told strxfrm()
* was available, it means it successfully transformed the whole
* string. */
- if (xused < xAlloc - xout) {
- xout += xused;
+ if (*xlen < xAlloc - COLLXFRM_HDR_LEN) {
break;
}
- if (UNLIKELY(xused >= PERL_INT_MAX))
+ if (UNLIKELY(*xlen >= PERL_INT_MAX))
goto bad;
/* A well-behaved strxfrm() returns exactly how much space it needs
* space being provided. Assume that this is the case unless it's been
* proven otherwise */
if (LIKELY(PL_strxfrm_is_behaved) && first_time) {
- xAlloc = xused + sizeof(PL_collation_ix) + 1;
+ xAlloc = *xlen + COLLXFRM_HDR_LEN + 1;
}
else { /* Here, either:
* 1) The strxfrm() has previously shown bad behavior; or
* isn't sufficient, they return the input size instead of
* how much is needed.)
* Increase the buffer size by a fixed percentage and try again. */
- xAlloc = (2 * xAlloc) + 1;
+ xAlloc += (xAlloc / 4) + 1;
PL_strxfrm_is_behaved = FALSE;
- }
+#ifdef DEBUGGING
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log,
+ "_mem_collxfrm required more space than previously calculated"
+ " for locale %s, trying again with new guess=%d+%"UVuf"\n",
+ PL_collation_name, (int) COLLXFRM_HDR_LEN,
+ (UV) xAlloc - COLLXFRM_HDR_LEN);
+ }
+#endif
+ }
Renew(xbuf, xAlloc, char);
if (UNLIKELY(! xbuf))
first_time = FALSE;
}
- *xlen = xout - sizeof(PL_collation_ix);
+
+#ifdef DEBUGGING
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ unsigned i;
+ PerlIO_printf(Perl_debug_log,
+ "_mem_collxfrm[%d]: returning %"UVuf" for locale %s '%s'\n",
+ PL_collation_ix, *xlen, PL_collation_name, input_string);
+ PerlIO_printf(Perl_debug_log, "Its xfrm is");
+ for (i = COLLXFRM_HDR_LEN; i < *xlen + COLLXFRM_HDR_LEN; i++) {
+ PerlIO_printf(Perl_debug_log, " %02x", (U8) xbuf[i]);
+ }
+ PerlIO_printf(Perl_debug_log, "\n");
+ }
+#endif
/* Free up unneeded space; retain ehough for trailing NUL */
- Renew(xbuf, xout + 1, char);
+ Renew(xbuf, COLLXFRM_HDR_LEN + *xlen + 1, char);
if (s != input_string) {
Safefree(s);
Safefree(s);
}
*xlen = 0;
+#ifdef DEBUGGING
+ if (DEBUG_Lv_TEST || debug_initialization) {
+ PerlIO_printf(Perl_debug_log, "_mem_collxfrm[%d] returning NULL\n",
+ PL_collation_ix);
+ }
+#endif
return NULL;
}
#endif /* USE_LOCALE_COLLATE */
+
#ifdef USE_LOCALE
bool