From c664130fefeaef47ddc7dcbf7ec1830d04af8ea7 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 11 Apr 2016 19:11:07 -0600 Subject: [PATCH] locale.c: Change algorithm for strxfrm() trials It's kind of guess work deciding how big a buffer to give to strxfrm(). If you give it too small a one, it will fail. Prior to this commit, the buffer size was doubled and then strxfrm() was called again, looping until it worked, or we used too much memory. Each time a new locale is made, we try to minimize the necessity of doing this by calculating numbers 'm' and 'b' that can be plugged into the equation mx + b where 'x' is the size of the string passed to strxfrm(). strxfrm() is roughly linear with respect to its input's length, so this generally works without us having to do many loops to get a large enough size. But on many systems, strxfrm(), in failing, returns how much space you should have given it. On such systems, we can just use that number on the 2nd try and not have to keep guessing. This commit changes to do that. But on other systems this doesn't work. So the original method is retained if we determine that there are problems with strxfrm(), either from previous experience, or because using the size returned from the first trial didn't work --- embedvar.h | 1 + intrpvar.h | 2 ++ locale.c | 28 +++++++++++++++++++++++++--- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/embedvar.h b/embedvar.h index 794ed9a..6738368 100644 --- a/embedvar.h +++ b/embedvar.h @@ -309,6 +309,7 @@ #define PL_stderrgv (vTHX->Istderrgv) #define PL_stdingv (vTHX->Istdingv) #define PL_strtab (vTHX->Istrtab) +#define PL_strxfrm_is_behaved (vTHX->Istrxfrm_is_behaved) #define PL_strxfrm_min_char (vTHX->Istrxfrm_min_char) #define PL_sub_generation (vTHX->Isub_generation) #define PL_subline (vTHX->Isubline) diff --git a/intrpvar.h b/intrpvar.h index 42872e5..f540a9d 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -565,6 +565,8 @@ PERLVAR(I, collxfrm_base, Size_t) /* Basic overhead in *xfrm() */ PERLVARI(I, collxfrm_mult,Size_t, 2) /* Expansion factor in *xfrm() */ PERLVARI(I, collation_ix, U32, 0) /* Collation generation index */ PERLVARA(I, strxfrm_min_char, 3, char) +PERLVARI(I, strxfrm_is_behaved, bool, TRUE) + /* Assume until proven otherwise that it works */ PERLVARI(I, collation_standard, bool, TRUE) /* Assume simple collation */ #endif /* USE_LOCALE_COLLATE */ diff --git a/locale.c b/locale.c index fd46a77..a534cdf 100644 --- a/locale.c +++ b/locale.c @@ -1306,6 +1306,7 @@ Perl_mem_collxfrm(pTHX_ const char *input_string, STRLEN s_strlen = strlen(input_string); char *xbuf; STRLEN xAlloc, xout; /* xalloc is a reserved word in VC */ + bool first_time = TRUE; /* Cleared after first loop iteration */ PERL_ARGS_ASSERT_MEM_COLLXFRM; @@ -1455,12 +1456,33 @@ Perl_mem_collxfrm(pTHX_ const char *input_string, if (UNLIKELY(xused >= PERL_INT_MAX)) goto bad; - /* Otherwise it should be that the transformation stopped in the middle - * because it ran out of space. Malloc more, and try again. */ - xAlloc = (2 * xAlloc) + 1; + /* A well-behaved strxfrm() returns exactly how much space it needs + * (not including the trailing NUL) when it fails due to not enough + * space being provided. Assume that this is the case unless it's been + * proven otherwise */ + if (LIKELY(PL_strxfrm_is_behaved) && first_time) { + xAlloc = xused + sizeof(PL_collation_ix) + 1; + } + else { /* Here, either: + * 1) The strxfrm() has previously shown bad behavior; or + * 2) It isn't the first time through the loop, which means + * that the strxfrm() is now showing bad behavior, because + * we gave it what it said was needed in the previous + * iteration, and it came back saying it needed still more. + * (Many versions of cygwin fit this. When the buffer size + * isn't sufficient, they return the input size instead of + * how much is needed.) + * Increase the buffer size by a fixed percentage and try again. */ + xAlloc = (2 * xAlloc) + 1; + PL_strxfrm_is_behaved = FALSE; + } + + Renew(xbuf, xAlloc, char); if (UNLIKELY(! xbuf)) goto bad; + + first_time = FALSE; } *xlen = xout - sizeof(PL_collation_ix); -- 1.8.3.1