STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
* lowercased) character stored in tmpbuf. May be either
* UTF-8 or not, but in either case is the number of bytes */
- bool tainted = FALSE;
s = (const U8*)SvPV_const(source, slen);
doing_utf8 = TRUE;
ulen = UTF8SKIP(s);
if (op_type == OP_UCFIRST) {
- _to_utf8_title_flags(s, tmpbuf, &tculen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+ _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
}
else {
- _to_utf8_lower_flags(s, tmpbuf, &tculen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+ _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LOCALE_RUNTIME);
}
/* we can't do in-place if the length changes. */
}
/* is ucfirst() */
else if (IN_LOCALE_RUNTIME) {
- *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
- * have upper and title case different
- */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_rules;
+ }
+
+ *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
+ locales have upper and title case
+ different */
}
else if (! IN_UNI_8_BIT) {
*tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
* on EBCDIC machines whatever the
* native function does */
}
- else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
- UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
+ else {
+ /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
+ * UTF-8, which we treat as not in locale), and cased latin1 */
+ UV title_ord;
+
+ do_uni_rules:
+
+ title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
if (tculen > 1) {
assert(tculen == 2);
SvCUR_set(dest, need - 1);
}
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Neither source nor dest are in or need to be UTF-8 */
if (slen) {
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
- }
if (inplace) { /* in-place, only need to change the 1st char */
*d = *tmpbuf;
}
SvCUR_set(dest, need - 1);
}
}
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
(SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
&& !SvREADONLY(source) && SvPOK(source)
&& !DO_UTF8(source)
- && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
-
- /* We can convert in place. The reason we can't if in UNI_8_BIT is to
- * make the loop tight, so we overwrite the source with the dest before
- * looking at it, and we need to look at the original source
- * afterwards. There would also need to be code added to handle
- * switching to not in-place in midstream if we run into characters
- * that change the length.
- */
+ && ((IN_LOCALE_RUNTIME)
+ ? ! IN_UTF8_CTYPE_LOCALE
+ : ! IN_UNI_8_BIT))
+ {
+
+ /* We can convert in place. The reason we can't if in UNI_8_BIT is to
+ * make the loop tight, so we overwrite the source with the dest before
+ * looking at it, and we need to look at the original source
+ * afterwards. There would also need to be code added to handle
+ * switching to not in-place in midstream if we run into characters
+ * that change the length. Since being in locale overrides UNI_8_BIT,
+ * that latter becomes irrelevant in the above test; instead for
+ * locale, the size can't normally change, except if the locale is a
+ * UTF-8 one */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, len);
min = len + 1;
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
/* All occurrences of these are to be moved to follow any other marks.
* This is context-dependent. We may not be passed enough context to
* and copy it to the output buffer */
u = UTF8SKIP(s);
- uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+ uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
#define GREEK_CAPITAL_LETTER_IOTA 0x0399
#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
if (uv == GREEK_CAPITAL_LETTER_IOTA
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Not UTF-8 */
if (len) {
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_rules;
+ }
for (; s < send; d++, s++)
- *d = toUPPER_LC(*s);
+ *d = (U8) toUPPER_LC(*s);
}
else if (! IN_UNI_8_BIT) {
for (; s < send; d++, s++) {
}
}
else {
+ do_uni_rules:
for (; s < send; d++, s++) {
*d = toUPPER_LATIN1_MOD(*s);
if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
} /* End of isn't utf8 */
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_lower_flags(s, tmpbuf, &ulen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+ _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LOCALE_RUNTIME);
/* Here is where we would do context-sensitive actions. See the
* commit message for 86510fb15 for why there isn't any */
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} else { /* Not utf8 */
if (len) {
const U8 *const send = s + len;
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
+ if (IN_LOCALE_RUNTIME) {
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
- }
+ }
else if (! IN_UNI_8_BIT) {
for (; s < send; d++, s++) {
*d = toLOWER(*s);
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
}
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
send = s + len;
if (DO_UTF8(source)) { /* UTF-8 flagged string. */
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
+ _to_utf8_fold_flags(s, tmpbuf, &ulen, flags);
if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
const UV o = d - (U8*)SvPVX_const(dest);
s += u;
}
SvUTF8_on(dest);
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} /* Unflagged string */
else if (len) {
if ( IN_LOCALE_RUNTIME ) { /* Under locale */
- TAINT;
- SvTAINTED_on(dest);
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_folding;
+ }
for (; s < send; d++, s++)
- *d = toFOLD_LC(*s);
+ *d = (U8) toFOLD_LC(*s);
}
else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
for (; s < send; d++, s++)
*d = toFOLD(*s);
}
else {
+ do_uni_folding:
/* For ASCII and the Latin-1 range, there's only two troublesome
* folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
* casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+ if (IN_LOCALE_RUNTIME) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
if (SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
else if (do_utf8 == (RX_UTF8(rx) != 0) &&
(RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
&& (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
- && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
+ && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) {
const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
SV * const csv = CALLREG_INTUIT_STRING(rx);