U8 * s = (U8 *) SvPVX_const(sv);
U8 * e = (U8 *) SvEND(sv);
U8 *t = s;
- STRLEN two_byte_count = 0;
+ STRLEN two_byte_count;
- if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
-
- /* See if really will need to convert to utf8. We mustn't rely on our
- * incoming SV being well formed and having a trailing '\0', as certain
- * code in pp_formline can send us partially built SVs. */
-
- while (t < e) {
- const U8 ch = *t++;
- if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
-
- t--; /* t already incremented; re-point to first variant */
- two_byte_count = 1;
- goto must_be_utf8;
- }
+ if (flags & SV_FORCE_UTF8_UPGRADE) {
+ two_byte_count = 0;
+ }
+ else {
+ if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
- /* utf8 conversion not needed because all are invariants. Mark as
- * UTF-8 even if no variant - saves scanning loop */
- SvUTF8_on(sv);
- if (extra) SvGROW(sv, SvCUR(sv) + extra);
- return SvCUR(sv);
+ /* utf8 conversion not needed because all are invariants. Mark
+ * as UTF-8 even if no variant - saves scanning loop */
+ SvUTF8_on(sv);
+ if (extra) SvGROW(sv, SvCUR(sv) + extra);
+ return SvCUR(sv);
+ }
- must_be_utf8:
+ /* Here, there is at least one variant, and t points to the first
+ * one */
+ two_byte_count = 1;
+ }
- /* Here, the string should be converted to utf8, either because of an
- * input flag (two_byte_count = 0), or because a character that
- * requires 2 bytes was found (two_byte_count = 1). t points either to
- * the beginning of the string (if we didn't examine anything), or to
- * the first variant. In either case, everything from s to t - 1 will
- * occupy only 1 byte each on output.
+ /* Note that the incoming SV may not have a trailing '\0', as certain
+ * code in pp_formline can send us partially built SVs.
+ *
+ * Here, the string should be converted to utf8, either because of an
+ * input flag (which causes two_byte_count to be set to 0), or because
+ * a character that requires 2 bytes was found (two_byte_count = 1). t
+ * points either to the beginning of the string (if we didn't examine
+ * anything), or to the first variant. In either case, everything from
+ * s to t - 1 will occupy only 1 byte each on output.
*
* There are two main ways to convert. One is to create a new string
* and go through the input starting from the beginning, appending each
* from s to t - 1 is invariant, the destination can be initialized
* with these using a fast memory copy
*
- * The other way is to figure out exactly how big the string should be
+ * The other way is to figure out exactly how big the string should be,
* by parsing the entire input. Then you don't have to make it big
* enough to handle the worst possible case, and more importantly, if
* the string you already have is large enough, you don't have to
* value. We go backwards through the string, converting until we
* get to the position we are at now, and then stop. If this
* position is far enough along in the string, this method is
- * faster than the other method. If the memory copy were the same
- * speed as the byte-by-byte loop, that position would be about
- * half-way, as at the half-way mark, parsing to the end and back
- * is one complete string's parse, the same amount as starting
- * over and going all the way through. Actually, it would be
- * somewhat less than half-way, as it's faster to just count bytes
- * than to also copy, and we don't have the overhead of allocating
- * a new string, changing the scalar to use it, and freeing the
- * existing one. But if the memory copy is fast, the break-even
- * point is somewhere after half way. The counting loop could be
- * sped up by vectorization, etc, to move the break-even point
- * further towards the beginning.
+ * faster than the first method above. If the memory copy were
+ * the same speed as the byte-by-byte loop, that position would be
+ * about half-way, as at the half-way mark, parsing to the end and
+ * back is one complete string's parse, the same amount as
+ * starting over and going all the way through. Actually, it
+ * would be somewhat less than half-way, as it's faster to just
+ * count bytes than to also copy, and we don't have the overhead
+ * of allocating a new string, changing the scalar to use it, and
+ * freeing the existing one. But if the memory copy is fast, the
+ * break-even point is somewhere after half way. The counting
+ * loop could be sped up by vectorization, etc, to move the
+ * break-even point further towards the beginning.
* 2) if the string doesn't have enough space to handle the converted
* value. A new string will have to be allocated, and one might
* as well, given that, start from the beginning doing the first
* over the individual characters of a vector arg */
vector:
if (!veclen)
- goto donevalidconversion;
+ goto done_valid_conversion;
if (vec_utf8)
uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
UTF8_ALLOW_ANYUV);
if (float_need < width)
float_need = width;
- if (PL_efloatsize < float_need) {
+ if (PL_efloatsize <= float_need) {
+ /* PL_efloatbuf should be at least 1 greater than
+ * float_need to allow a trailing \0 to be returned by
+ * snprintf(). If we need to grow, overgrow for the
+ * benefit of future generations */
+ const STRLEN extra = 0x20;
+ if (float_need >= ((STRLEN)~0) - extra)
+ croak_memory_wrap();
+ float_need += extra;
Safefree(PL_efloatbuf);
PL_efloatsize = float_need;
Newx(PL_efloatbuf, PL_efloatsize, char);
S_sv_catpvn_simple(aTHX_ sv, eptr, elen);
- goto donevalidconversion;
+ goto done_valid_conversion;
}
/* SPECIAL */
PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len);
}
- goto donevalidconversion;
+ goto done_valid_conversion;
}
/* UNKNOWN */
goto vector; /* do next iteration */
}
- donevalidconversion:
+ done_valid_conversion:
if (arg_missing)
S_warn_vcatpvfn_missing_argument(aTHX);