# error "Unsupported byteorder"
/* Need to add code here to re-instate mixed endian support.
NEEDS_SWAP would need to hold a flag indicating which action to
- take, and S_reverse_copy and the code in uni_to_bytes would need
+ take, and S_reverse_copy and the code in S_utf8_to_bytes would need
logic adding to deal with any mixed-endian transformations needed.
*/
#endif
#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \
STMT_START { \
if (UNLIKELY(utf8)) { \
- if (!uni_to_bytes(aTHX_ &s, strend, \
+ if (!S_utf8_to_bytes(aTHX_ &s, strend, \
(char *) (buf), len, datumtype)) break; \
} else { \
if (UNLIKELY(needs_swap)) \
}
STATIC U8
-uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
+utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
{
STRLEN retlen;
UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
}
#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
- uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
+ utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
*(U8 *)(s)++)
STATIC bool
-uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
+S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
{
UV val;
STRLEN retlen;
}
STATIC bool
-next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
+next_utf8_uu(pTHX_ const char **s, const char *end, I32 *out)
{
STRLEN retlen;
const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
}
STATIC char *
-S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
+S_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
PERL_ARGS_ASSERT_BYTES_TO_UNI;
if (UNLIKELY(needs_swap)) {
#define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \
STMT_START { \
if (UNLIKELY(utf8)) \
- (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \
+ (cur) = S_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \
else { \
if (UNLIKELY(needs_swap)) \
S_reverse_copy((char *)(buf), cur, len); \
STMT_START { \
if (utf8) { \
const U8 au8 = (byte); \
- (s) = S_bytes_to_uni(&au8, 1, (s), 0); \
+ (s) = S_bytes_to_utf8(&au8, 1, (s), 0); \
} else *(U8 *)(s)++ = (byte); \
} STMT_END
if (checksum) {
if (utf8)
while (len >= 8 && s < strend) {
- cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
+ cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
len -= 8;
}
else
if (len & 7) bits >>= 1;
else if (utf8) {
if (s >= strend) break;
- bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+ bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
*str++ = bits & 1 ? '1' : '0';
}
if (len & 7) bits <<= 1;
else if (utf8) {
if (s >= strend) break;
- bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+ bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
*str++ = bits & 0x80 ? '1' : '0';
}
if (len & 1) bits >>= 4;
else if (utf8) {
if (s >= strend) break;
- bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+ bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
} else bits = * (U8 *) s++;
if (!checksum)
*str++ = PL_hexdigit[bits & 15];
if (len & 1) bits <<= 4;
else if (utf8) {
if (s >= strend) break;
- bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+ bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
} else bits = *(U8 *) s++;
if (!checksum)
*str++ = PL_hexdigit[(bits >> 4) & 15];
STRLEN len;
/* Bug: warns about bad utf8 even if we are short on bytes
and will break out of the loop */
- if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
+ if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
'U'))
break;
len = UTF8SKIP(result);
- if (!uni_to_bytes(aTHX_ &ptr, strend,
+ if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
(char *) &result[1], len-1, 'U')) break;
auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
len,
if (l) SvPOK_on(sv);
}
if (utf8) {
- while (next_uni_uu(aTHX_ &s, strend, &len)) {
+ while (next_utf8_uu(aTHX_ &s, strend, &len)) {
I32 a, b, c, d;
char hunk[3];
while (len > 0) {
- next_uni_uu(aTHX_ &s, strend, &a);
- next_uni_uu(aTHX_ &s, strend, &b);
- next_uni_uu(aTHX_ &s, strend, &c);
- next_uni_uu(aTHX_ &s, strend, &d);
+ next_utf8_uu(aTHX_ &s, strend, &a);
+ next_utf8_uu(aTHX_ &s, strend, &b);
+ next_utf8_uu(aTHX_ &s, strend, &c);
+ next_utf8_uu(aTHX_ &s, strend, &d);
hunk[0] = (char)((a << 2) | (b >> 4));
hunk[1] = (char)((b << 4) | (c >> 2));
hunk[2] = (char)((c << 6) | d);
if (datumtype == 'Z') len++;
}
GROWING(0, cat, start, cur, len);
- if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
+ if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
datumtype | TYPE_IS_PACK))
Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
"for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
len+(endb-buffer)*UTF8_EXPAND);
end = start+SvLEN(cat);
}
- cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
+ cur = S_bytes_to_utf8(buffer, endb-buffer, cur, 0);
} else {
if (cur >= end) {
*cur = '\0';
todo = fromlen;
if (from_utf8) {
char buffer[64];
- if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
+ if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
'u' | TYPE_IS_PACK)) {
*cur = '\0';
SvCUR_set(cat, cur - start);