SvTEMP_off(sv);
SvREFCNT_inc_void_NN(sv);
}
- else if (SvPADTMP(sv) && !IS_PADGV(sv))
+ else if (SvPADTMP(sv)) {
+ assert(!IS_PADGV(sv));
sv = newSVsv(sv);
+ }
else {
SvTEMP_off(sv);
SvREFCNT_inc_void_NN(sv);
else stash = NULL;
}
+ SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
gp_free(MUTABLE_GV(sv));
Newxz(gp, 1, GP);
GvGP_set(sv, gp_ref(gp));
SvREADONLY_on(*SP);
}
#else
- if (*SP)
- {
- if (mod && SvPADTMP(*SP) && !IS_PADGV(*SP))
+ if (*SP) {
+ if (mod && SvPADTMP(*SP)) {
+ assert(!IS_PADGV(*SP));
*SP = sv_mortalcopy(*SP);
+ }
SvTEMP_off((*SP));
}
#endif
tryAMAGICbin_MG(amg_type, AMGf_set);
{
dPOPTOPssrl;
- const int cmp = (IN_LOCALE_RUNTIME
- ? sv_cmp_locale_flags(left, right, 0)
- : sv_cmp_flags(left, right, 0));
+ const int cmp =
+#ifdef USE_LC_COLLATE
+ (IN_LC_RUNTIME(LC_COLLATE))
+ ? sv_cmp_locale_flags(left, right, 0)
+ :
+#endif
+ sv_cmp_flags(left, right, 0);
SETs(boolSV(cmp * multiplier < rhs));
RETURN;
}
tryAMAGICbin_MG(scmp_amg, 0);
{
dPOPTOPssrl;
- const int cmp = (IN_LOCALE_RUNTIME
- ? sv_cmp_locale_flags(left, right, 0)
- : sv_cmp_flags(left, right, 0));
+ const int cmp =
+#ifdef USE_LC_COLLATE
+ (IN_LC_RUNTIME(LC_COLLATE))
+ ? sv_cmp_locale_flags(left, right, 0)
+ :
+#endif
+ sv_cmp_flags(left, right, 0);
SETi( cmp );
RETURN;
}
int pos2_is_uv;
PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
+ PERL_UNUSED_CONTEXT;
if (!pos1_is_uv && pos1_iv < 0 && curlen) {
pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
SV *temp = NULL;
STRLEN biglen;
STRLEN llen = 0;
- I32 offset;
- I32 retval;
+ SSize_t offset = 0;
+ SSize_t retval;
const char *big_p;
const char *little_p;
bool big_utf8;
offset = is_index ? 0 : biglen;
else {
if (big_utf8 && offset > 0)
- sv_pos_u2b(big, &offset, 0);
+ offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
if (!is_index)
offset += llen;
}
if (offset < 0)
offset = 0;
- else if (offset > (I32)biglen)
+ else if (offset > (SSize_t)biglen)
offset = biglen;
if (!(little_p = is_index
? fbm_instr((unsigned char*)big_p + offset,
else {
retval = little_p - big_p;
if (retval > 0 && big_utf8)
- sv_pos_b2u(big, &retval);
+ retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
}
SvREFCNT_dec(temp);
fail:
if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
SV * const tmpsv = sv_2mortal(newSVsv(argsv));
s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
+ len = UTF8SKIP(s); /* Should be well-formed; so this is its length */
argsv = tmpsv;
}
XPUSHu(DO_UTF8(argsv)
- ? utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV)
- : (UV)(*s & 0xff));
+ ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV)
+ : (UV)(*s));
RETURN;
}
top = top2;
}
Perl_warner(aTHX_ packWARN(WARN_UTF8),
- "Invalid negative number (%"SVf") in chr", top);
+ "Invalid negative number (%"SVf") in chr", SVfARG(top));
}
value = UNICODE_REPLACEMENT;
} else {
STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
* lowercased) character stored in tmpbuf. May be either
* UTF-8 or not, but in either case is the number of bytes */
- bool tainted = FALSE;
- SvGETMAGIC(source);
- if (SvOK(source)) {
- s = (const U8*)SvPV_nomg_const(source, slen);
- } else {
- if (ckWARN(WARN_UNINITIALIZED))
- report_uninit(source);
- s = (const U8*)"";
- slen = 0;
- }
+ s = (const U8*)SvPV_const(source, slen);
/* We may be able to get away with changing only the first character, in
* place, but not if read-only, etc. Later we may discover more reasons to
doing_utf8 = TRUE;
ulen = UTF8SKIP(s);
if (op_type == OP_UCFIRST) {
- _to_utf8_title_flags(s, tmpbuf, &tculen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#ifdef USE_LOCALE_CTYPE
+ _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+#else
+ _to_utf8_title_flags(s, tmpbuf, &tculen, 0);
+#endif
}
else {
- _to_utf8_lower_flags(s, tmpbuf, &tculen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#ifdef USE_LOCALE_CTYPE
+ _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+#else
+ _to_utf8_lower_flags(s, tmpbuf, &tculen, 0);
+#endif
}
/* we can't do in-place if the length changes. */
if (op_type == OP_LCFIRST) {
/* lower case the first letter: no trickiness for any character */
- *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
- ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
+ *tmpbuf =
+#ifdef USE_LOCALE_CTYPE
+ (IN_LC_RUNTIME(LC_CTYPE))
+ ? toLOWER_LC(*s)
+ :
+#endif
+ (IN_UNI_8_BIT)
+ ? toLOWER_LATIN1(*s)
+ : toLOWER(*s);
}
/* is ucfirst() */
- else if (IN_LOCALE_RUNTIME) {
- *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
- * have upper and title case different
- */
+#ifdef USE_LOCALE_CTYPE
+ else if (IN_LC_RUNTIME(LC_CTYPE)) {
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_rules;
+ }
+
+ *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
+ locales have upper and title case
+ different */
}
+#endif
else if (! IN_UNI_8_BIT) {
*tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
* on EBCDIC machines whatever the
* native function does */
}
- else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
- UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
+ else {
+ /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
+ * UTF-8, which we treat as not in locale), and cased latin1 */
+ UV title_ord;
+#ifdef USE_LOCALE_CTYPE
+ do_uni_rules:
+#endif
+
+ title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
if (tculen > 1) {
assert(tculen == 2);
SvCUR_set(dest, need - 1);
}
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Neither source nor dest are in or need to be UTF-8 */
if (slen) {
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
- }
if (inplace) { /* in-place, only need to change the 1st char */
*d = *tmpbuf;
}
SvCUR_set(dest, need - 1);
}
}
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
+#endif
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
(SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
&& !SvREADONLY(source) && SvPOK(source)
&& !DO_UTF8(source)
- && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
-
- /* We can convert in place. The reason we can't if in UNI_8_BIT is to
- * make the loop tight, so we overwrite the source with the dest before
- * looking at it, and we need to look at the original source
- * afterwards. There would also need to be code added to handle
- * switching to not in-place in midstream if we run into characters
- * that change the length.
- */
+ && (
+#ifdef USE_LOCALE_CTYPE
+ (IN_LC_RUNTIME(LC_CTYPE))
+ ? ! IN_UTF8_CTYPE_LOCALE
+ :
+#endif
+ ! IN_UNI_8_BIT))
+ {
+
+ /* We can convert in place. The reason we can't if in UNI_8_BIT is to
+ * make the loop tight, so we overwrite the source with the dest before
+ * looking at it, and we need to look at the original source
+ * afterwards. There would also need to be code added to handle
+ * switching to not in-place in midstream if we run into characters
+ * that change the length. Since being in locale overrides UNI_8_BIT,
+ * that latter becomes irrelevant in the above test; instead for
+ * locale, the size can't normally change, except if the locale is a
+ * UTF-8 one */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, len);
min = len + 1;
dest = TARG;
- /* The old implementation would copy source into TARG at this point.
- This had the side effect that if source was undef, TARG was now
- an undefined SV with PADTMP set, and they don't warn inside
- sv_2pv_flags(). However, we're now getting the PV direct from
- source, which doesn't have PADTMP set, so it would warn. Hence the
- little games. */
-
- if (SvOK(source)) {
- s = (const U8*)SvPV_nomg_const(source, len);
- } else {
- if (ckWARN(WARN_UNINITIALIZED))
- report_uninit(source);
- s = (const U8*)"";
- len = 0;
- }
+ s = (const U8*)SvPV_nomg_const(source, len);
min = len + 1;
SvUPGRADE(dest, SVt_PV);
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
/* All occurrences of these are to be moved to follow any other marks.
* This is context-dependent. We may not be passed enough context to
* and copy it to the output buffer */
u = UTF8SKIP(s);
- uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#ifdef USE_LOCALE_CTYPE
+ uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+#else
+ uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, 0);
+#endif
#define GREEK_CAPITAL_LETTER_IOTA 0x0399
#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
if (uv == GREEK_CAPITAL_LETTER_IOTA
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
}
else { /* Not UTF-8 */
if (len) {
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_rules;
+ }
for (; s < send; d++, s++)
- *d = toUPPER_LC(*s);
+ *d = (U8) toUPPER_LC(*s);
}
- else if (! IN_UNI_8_BIT) {
+ else
+#endif
+ if (! IN_UNI_8_BIT) {
for (; s < send; d++, s++) {
*d = toUPPER(*s);
}
}
else {
+#ifdef USE_LOCALE_CTYPE
+ do_uni_rules:
+#endif
for (; s < send; d++, s++) {
*d = toUPPER_LATIN1_MOD(*s);
if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
} /* End of isn't utf8 */
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
+#endif
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
dest = TARG;
- /* The old implementation would copy source into TARG at this point.
- This had the side effect that if source was undef, TARG was now
- an undefined SV with PADTMP set, and they don't warn inside
- sv_2pv_flags(). However, we're now getting the PV direct from
- source, which doesn't have PADTMP set, so it would warn. Hence the
- little games. */
-
- if (SvOK(source)) {
- s = (const U8*)SvPV_nomg_const(source, len);
- } else {
- if (ckWARN(WARN_UNINITIALIZED))
- report_uninit(source);
- s = (const U8*)"";
- len = 0;
- }
+ s = (const U8*)SvPV_nomg_const(source, len);
min = len + 1;
SvUPGRADE(dest, SVt_PV);
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_lower_flags(s, tmpbuf, &ulen,
- cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#ifdef USE_LOCALE_CTYPE
+ _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+#else
+ _to_utf8_lower_flags(s, tmpbuf, &ulen, 0);
+#endif
/* Here is where we would do context-sensitive actions. See the
- * commit message for this comment for why there isn't any */
+ * commit message for 86510fb15 for why there isn't any */
if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} else { /* Not utf8 */
if (len) {
const U8 *const send = s + len;
/* Use locale casing if in locale; regular style if not treating
* latin1 as having case; otherwise the latin1 casing. Do the
* whole thing in a tight loop, for speed, */
- if (IN_LOCALE_RUNTIME) {
- TAINT;
- SvTAINTED_on(dest);
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
for (; s < send; d++, s++)
*d = toLOWER_LC(*s);
- }
- else if (! IN_UNI_8_BIT) {
+ }
+ else
+#endif
+ if (! IN_UNI_8_BIT) {
for (; s < send; d++, s++) {
*d = toLOWER(*s);
}
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
}
}
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
+#endif
if (dest != source && SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
}
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
-
+#ifdef USE_LOCALE_CTYPE
/* In locale, we quote all non-ASCII Latin1 chars.
* Otherwise use the quoting rules */
- if (IN_LOCALE_RUNTIME
+ if (IN_LC_RUNTIME(LC_CTYPE)
|| _isQUOTEMETA(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s + 1))))
{
to_quote = TRUE;
}
+#endif
}
else if (is_QUOTEMETA_high(s)) {
to_quote = TRUE;
const U8 *send;
U8 *d;
U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
- const bool full_folding = TRUE;
+ const bool full_folding = TRUE; /* This variable is here so we can easily
+ move to more generality later */
const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
- | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
+#ifdef USE_LOCALE_CTYPE
+ | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
+#endif
+ ;
/* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
* You are welcome(?) -Hugmeir
send = s + len;
if (DO_UTF8(source)) { /* UTF-8 flagged string. */
- bool tainted = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
STRLEN ulen;
- _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
+ _to_utf8_fold_flags(s, tmpbuf, &ulen, flags);
if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
const UV o = d - (U8*)SvPVX_const(dest);
s += u;
}
SvUTF8_on(dest);
- if (tainted) {
- TAINT;
- SvTAINTED_on(dest);
- }
} /* Unflagged string */
else if (len) {
- if ( IN_LOCALE_RUNTIME ) { /* Under locale */
- TAINT;
- SvTAINTED_on(dest);
+#ifdef USE_LOCALE_CTYPE
+ if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_folding;
+ }
for (; s < send; d++, s++)
- *d = toFOLD_LC(*s);
+ *d = (U8) toFOLD_LC(*s);
}
- else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
+ else
+#endif
+ if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
for (; s < send; d++, s++)
*d = toFOLD(*s);
}
else {
+#ifdef USE_LOCALE_CTYPE
+ do_uni_folding:
+#endif
/* For ASCII and the Latin-1 range, there's only two troublesome
* folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
* casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+#ifdef USE_LOCALE_CTYPE
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ TAINT;
+ SvTAINTED_on(dest);
+ }
+#endif
if (SvTAINTED(source))
SvTAINT(dest);
SvSETMAGIC(dest);
IV *iterp = Perl_av_iter_p(aTHX_ array);
const IV current = (*iterp)++;
- if (current > av_len(array)) {
+ if (current > av_tindex(array)) {
*iterp = 0;
if (gimme == G_SCALAR)
RETPUSHUNDEF;
if (gimme == G_SCALAR) {
dTARGET;
- PUSHi(av_len(array) + 1);
+ PUSHi(av_tindex(array) + 1);
}
else if (gimme == G_ARRAY) {
IV n = Perl_av_len(aTHX_ array);
const MAGIC *mg;
HV *stash;
const bool sliced = !!(PL_op->op_private & OPpSLICE);
- SV *unsliced_keysv = sliced ? NULL : POPs;
+ SV **unsliced_keysv = sliced ? NULL : sp--;
SV * const osv = POPs;
- SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1;
+ SV **mark = sliced ? PL_stack_base + POPMARK : unsliced_keysv-1;
dORIGMARK;
const bool tied = SvRMAGICAL(osv)
&& mg_find((const SV *)osv, PERL_MAGIC_tied);
const bool can_preserve = SvCANEXISTDELETE(osv);
const U32 type = SvTYPE(osv);
- SV ** const end = sliced ? SP : &unsliced_keysv;
+ SV ** const end = sliced ? SP : unsliced_keysv;
if (type == SVt_PVHV) { /* hash element */
HV * const hv = MUTABLE_HV(osv);
}
}
else if (gimme != G_VOID)
- PUSHs(unsliced_keysv);
+ PUSHs(*unsliced_keysv);
RETURN;
}
PP(pp_list)
{
- dVAR; dSP; dMARK;
+ dVAR;
+ I32 markidx = POPMARK;
if (GIMME != G_ARRAY) {
+ SV **mark = PL_stack_base + markidx;
+ dSP;
if (++MARK <= SP)
*MARK = *SP; /* unwanted list, return last item */
else
*MARK = &PL_sv_undef;
SP = MARK;
+ PUTBACK;
}
- RETURN;
+ return NORMAL;
}
PP(pp_lslice)
is_something_there = TRUE;
if (!(*lelem = firstrelem[ix]))
*lelem = &PL_sv_undef;
- else if (mod && SvPADTMP(*lelem) && !IS_PADGV(*lelem))
+ else if (mod && SvPADTMP(*lelem)) {
+ assert(!IS_PADGV(*lelem));
*lelem = firstrelem[ix] = sv_mortalcopy(*lelem);
+ }
}
}
if (is_something_there)
const MAGIC *mg;
bool can_preserve = SvCANEXISTDELETE(av);
- for (i = 0, j = av_len(av); i < j; ++i, --j) {
+ for (i = 0, j = av_tindex(av); i < j; ++i, --j) {
SV *begin, *end;
if (can_preserve) {
else if (do_utf8 == (RX_UTF8(rx) != 0) &&
(RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
&& (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
- && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
+ && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) {
const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
SV * const csv = CALLREG_INTUIT_STRING(rx);