strlen(value),
/* We mark it as UTF-8 if a utf8 locale
- * and is valid, non-ascii UTF-8 */
+ * and is valid and variant under UTF-8 */
is_utf8_locale
- && ! is_ascii_string((U8 *) value, 0)
+ && ! is_invariant_string((U8 *) value, 0)
&& is_utf8_string((U8 *) value, 0)),
0);
}
STRLEN len = strlen(buf);
sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL);
if (SvUTF8(fmt)
- || (! is_ascii_string((U8*) buf, len)
+ || (! is_invariant_string((U8*) buf, len)
&& is_utf8_string((U8*) buf, len)
#ifdef USE_LOCALE_TIME
&& _is_cur_LC_category_utf8(LC_TIME)
sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
else
PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
- if (! is_ascii_string((U8 *) lc->decimal_point, 0)
+ if (! is_invariant_string((U8 *) lc->decimal_point, 0)
&& is_utf8_string((U8 *) lc->decimal_point, 0)
&& _is_cur_LC_category_utf8(LC_NUMERIC))
{
lc = localeconv();
if (! lc
|| ! lc->currency_symbol
- || is_ascii_string((U8 *) lc->currency_symbol, 0))
+ || is_invariant_string((U8 *) lc->currency_symbol, 0))
{
DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
only_ascii = TRUE;
/* Here the current LC_TIME is set to the locale of the category
* whose information is desired. Look at all the days of the week and
- * month names, and the timezone and am/pm indicator for non-ASCII
+ * month names, and the timezone and am/pm indicator for UTF-8 variant
* characters. The first such a one found will tell us if the locale
* is UTF-8 or not */
for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */
formatted_time = my_strftime("%A %B %Z %p",
0, 0, hour, dom, month, 112, 0, 0, is_dst);
- if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) {
+ if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) {
/* Here, we didn't find a non-ASCII. Try the next time through
* with the complemented dst and am/pm, and try with the next
break;
}
errmsg = savepv(errmsg);
- if (! is_ascii_string((U8 *) errmsg, 0)) {
+ if (! is_invariant_string((U8 *) errmsg, 0)) {
non_ascii = TRUE;
is_utf8 = is_utf8_string((U8 *) errmsg, 0);
break;
* case we should turn on that flag. This didn't use to happen, and to
* avoid as many possible backward compatibility issues as possible, we
* don't turn on the flag unless we have to. So the flag stays off for
- * an entirely ASCII string. We assume that if the string looks like
- * UTF-8, it really is UTF-8: "text in any other encoding that uses
- * bytes with the high bit set is extremely unlikely to pass a UTF-8
- * validity test" (http://en.wikipedia.org/wiki/Charset_detection).
- * There is a potential that we will get it wrong however, especially
- * on short error message text. (If it turns out to be necessary, we
- * could also keep track if the current LC_MESSAGES locale is UTF-8) */
+ * an entirely invariant string. We assume that if the string looks
+ * like UTF-8, it really is UTF-8: "text in any other encoding that
+ * uses bytes with the high bit set is extremely unlikely to pass a
+ * UTF-8 validity test"
+ * (http://en.wikipedia.org/wiki/Charset_detection). There is a
+ * potential that we will get it wrong however, especially on short
+ * error message text. (If it turns out to be necessary, we could also
+ * keep track if the current LC_MESSAGES locale is UTF-8) */
if (! IN_BYTES /* respect 'use bytes' */
- && ! is_ascii_string((U8*) SvPVX_const(sv), SvCUR(sv))
+ && ! is_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))
&& is_utf8_string((U8*) SvPVX_const(sv), SvCUR(sv)))
{
SvUTF8_on(sv);
#endif
assert(len);
- if (! is_ascii_string((U8 *) s, len)) {
+ if (! is_invariant_string((U8 *) s, len)) {
const U8 *ep;
- /* Here contains a non-ASCII. See if the entire string is UTF-8. But
- * the buffer may end in a partial character, so consider it UTF-8 if
- * the first non-UTF8 char is an ending partial */
+ /* Here contains a variant under UTF-8 . See if the entire string is
+ * UTF-8. But the buffer may end in a partial character, so consider
+ * it UTF-8 if the first non-UTF8 char is an ending partial */
if (is_utf8_string_loc((U8 *) s, len, &ep)
|| ep + UTF8SKIP(ep) > (U8 *) (s + len))
{
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}