U8 * s = (U8 *) SvPVX_const(sv);
U8 * e = (U8 *) SvEND(sv);
U8 *t = s;
- STRLEN two_byte_count = 0;
+ STRLEN two_byte_count;
- if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
-
- /* See if really will need to convert to utf8. We mustn't rely on our
- * incoming SV being well formed and having a trailing '\0', as certain
- * code in pp_formline can send us partially built SVs. */
-
- while (t < e) {
- const U8 ch = *t++;
- if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
-
- t--; /* t already incremented; re-point to first variant */
- two_byte_count = 1;
- goto must_be_utf8;
- }
+ if (flags & SV_FORCE_UTF8_UPGRADE) {
+ two_byte_count = 0;
+ }
+ else {
+ if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
- /* utf8 conversion not needed because all are invariants. Mark as
- * UTF-8 even if no variant - saves scanning loop */
- SvUTF8_on(sv);
- if (extra) SvGROW(sv, SvCUR(sv) + extra);
- return SvCUR(sv);
+ /* utf8 conversion not needed because all are invariants. Mark
+ * as UTF-8 even if no variant - saves scanning loop */
+ SvUTF8_on(sv);
+ if (extra) SvGROW(sv, SvCUR(sv) + extra);
+ return SvCUR(sv);
+ }
- must_be_utf8:
+ /* Here, there is at least one variant, and t points to the first
+ * one */
+ two_byte_count = 1;
+ }
- /* Here, the string should be converted to utf8, either because of an
- * input flag (two_byte_count = 0), or because a character that
- * requires 2 bytes was found (two_byte_count = 1). t points either to
- * the beginning of the string (if we didn't examine anything), or to
- * the first variant. In either case, everything from s to t - 1 will
- * occupy only 1 byte each on output.
+ /* Note that the incoming SV may not have a trailing '\0', as certain
+ * code in pp_formline can send us partially built SVs.
+ *
+ * Here, the string should be converted to utf8, either because of an
+ * input flag (which causes two_byte_count to be set to 0), or because
+ * a character that requires 2 bytes was found (two_byte_count = 1). t
+ * points either to the beginning of the string (if we didn't examine
+ * anything), or to the first variant. In either case, everything from
+ * s to t - 1 will occupy only 1 byte each on output.
*
* There are two main ways to convert. One is to create a new string
* and go through the input starting from the beginning, appending each
* from s to t - 1 is invariant, the destination can be initialized
* with these using a fast memory copy
*
- * The other way is to figure out exactly how big the string should be
+ * The other way is to figure out exactly how big the string should be,
* by parsing the entire input. Then you don't have to make it big
* enough to handle the worst possible case, and more importantly, if
* the string you already have is large enough, you don't have to
* value. We go backwards through the string, converting until we
* get to the position we are at now, and then stop. If this
* position is far enough along in the string, this method is
- * faster than the other method. If the memory copy were the same
- * speed as the byte-by-byte loop, that position would be about
- * half-way, as at the half-way mark, parsing to the end and back
- * is one complete string's parse, the same amount as starting
- * over and going all the way through. Actually, it would be
- * somewhat less than half-way, as it's faster to just count bytes
- * than to also copy, and we don't have the overhead of allocating
- * a new string, changing the scalar to use it, and freeing the
- * existing one. But if the memory copy is fast, the break-even
- * point is somewhere after half way. The counting loop could be
- * sped up by vectorization, etc, to move the break-even point
- * further towards the beginning.
+ * faster than the first method above. If the memory copy were
+ * the same speed as the byte-by-byte loop, that position would be
+ * about half-way, as at the half-way mark, parsing to the end and
+ * back is one complete string's parse, the same amount as
+ * starting over and going all the way through. Actually, it
+ * would be somewhat less than half-way, as it's faster to just
+ * count bytes than to also copy, and we don't have the overhead
+ * of allocating a new string, changing the scalar to use it, and
+ * freeing the existing one. But if the memory copy is fast, the
+ * break-even point is somewhere after half way. The counting
+ * loop could be sped up by vectorization, etc, to move the
+ * break-even point further towards the beginning.
* 2) if the string doesn't have enough space to handle the converted
* value. A new string will have to be allocated, and one might
* as well, given that, start from the beginning doing the first
void
Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VSETPVFN;
SvPVCLEAR(sv);
- sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, 0);
+ sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0);
+}
+
+
+/* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */
+
+PERL_STATIC_INLINE void
+S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len)
+{
+ STRLEN const need = len + SvCUR(sv) + 1;
+ char *end;
+
+ /* can't wrap as both len and SvCUR() are allocated in
+ * memory and together can't consume all the address space
+ */
+ assert(need > len);
+
+ assert(SvPOK(sv));
+ SvGROW(sv, need);
+ end = SvEND(sv);
+ Copy(buf, end, len, char);
+ end += len;
+ *end = '\0';
+ SvCUR_set(sv, need - 1);
}
void
Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VCATPVFN;
- sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
+ sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
}
void
Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted,
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted,
const U32 flags)
{
const char *fmtstart; /* character following the current '%' */
SV *argsv = NULL;
bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */
const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */
- SV *nsv = NULL;
/* Times 4: a decimal digit takes more than 3 binary digits.
* NV_DIG: mantissa takes than many decimal digits.
* Plus 32: Playing safe. */
* warnings etc.
*/
- if (patlen == 0 && (args || svmax == 0))
+ if (patlen == 0 && (args || sv_count == 0))
return;
- if (patlen <= 4 && pat[0] == '%' && (args || svmax == 1)) {
+ if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) {
/* "%s" */
if (patlen == 2 && pat[1] == 's') {
{};
if (q > fmtstart) {
- if (has_utf8 && !pat_utf8)
- sv_catpvn_nomg_utf8_upgrade(sv, fmtstart, q - fmtstart, nsv);
+ if (has_utf8 && !pat_utf8) {
+ /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on
+ * the fly */
+ const char *p;
+ char *dst;
+ STRLEN need = SvCUR(sv) + (q - fmtstart) + 1;
+
+ for (p = fmtstart; p < q; p++)
+ if (!NATIVE_BYTE_IS_INVARIANT(*p))
+ need++;
+ SvGROW(sv, need);
+
+ dst = SvEND(sv);
+ for (p = fmtstart; p < q; p++)
+ append_utf8_from_native_byte((U8)*p, (U8**)&dst);
+ *dst = '\0';
+ SvCUR_set(sv, need - 1);
+ }
else
- sv_catpvn_nomg(sv, fmtstart, q - fmtstart);
+ S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart);
}
if (q++ >= patend)
break;
vecsv = va_arg(*args, SV*);
else {
ix = ix ? ix - 1 : svix++;
- vecsv = ix < svmax ? svargs[ix]
+ vecsv = ix < sv_count ? svargs[ix]
: (arg_missing = TRUE, &PL_sv_no);
}
dotstr = SvPV_const(vecsv, dotstrlen);
i = va_arg(*args, int);
else {
ix = ix ? ix - 1 : svix++;
- sv = (ix < svmax) ? svargs[ix]
+ sv = (ix < sv_count) ? svargs[ix]
: (arg_missing = TRUE, (SV*)NULL);
}
width = S_sprintf_arg_num_val(aTHX_ args, i, sv, &left);
i = va_arg(*args, int);
else {
ix = ix ? ix - 1 : svix++;
- sv = (ix < svmax) ? svargs[ix]
+ sv = (ix < sv_count) ? svargs[ix]
: (arg_missing = TRUE, (SV*)NULL);
}
precis = S_sprintf_arg_num_val(aTHX_ args, i, sv, &neg);
if (!args) {
efix = efix ? efix - 1 : svix++;
- argsv = efix < svmax ? svargs[efix]
+ argsv = efix < sv_count ? svargs[efix]
: (arg_missing = TRUE, &PL_sv_no);
}
* over the individual characters of a vector arg */
vector:
if (!veclen)
- goto donevalidconversion;
+ goto done_valid_conversion;
if (vec_utf8)
uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
UTF8_ALLOW_ANYUV);
if (float_need < width)
float_need = width;
- if (PL_efloatsize < float_need) {
+ if (PL_efloatsize <= float_need) {
+ /* PL_efloatbuf should be at least 1 greater than
+ * float_need to allow a trailing \0 to be returned by
+ * snprintf(). If we need to grow, overgrow for the
+ * benefit of future generations */
+ const STRLEN extra = 0x20;
+ if (float_need >= ((STRLEN)~0) - extra)
+ croak_memory_wrap();
+ float_need += extra;
Safefree(PL_efloatbuf);
PL_efloatsize = float_need;
Newx(PL_efloatbuf, PL_efloatsize, char);
assert(elen);
assert(elen >= width);
+ S_sv_catpvn_simple(aTHX_ sv, eptr, elen);
- {
- /* unrolled Perl_sv_catpvn */
- STRLEN need = elen + SvCUR(sv) + 1;
- char *end;
- /* can't wrap as both elen and SvCUR() are allocated in
- * memory and together can't consume all the address space
- */
- assert(need > elen);
- SvGROW(sv, need);
- end = SvEND(sv);
- Copy(eptr, end, elen, char);
- end += elen;
- *end = '\0';
- SvCUR_set(sv, need - 1);
- }
-
- goto donevalidconversion;
+ goto done_valid_conversion;
}
/* SPECIAL */
PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len);
}
- goto donevalidconversion;
+ goto done_valid_conversion;
}
/* UNKNOWN */
goto vector; /* do next iteration */
}
- donevalidconversion:
+ done_valid_conversion:
if (arg_missing)
S_warn_vcatpvfn_missing_argument(aTHX);
/* Now that we've consumed all our printf format arguments (svix)
* do we have things left on the stack that we didn't use?
*/
- if (!no_redundant_warning && svmax >= svix + 1 && ckWARN(WARN_REDUNDANT)) {
+ if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) {
Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
}