#include "perl.h"
#include "keywords.h"
+#include "invlist_inline.h"
#include "reentr.h"
#include "regcharclass.h"
NV nr = SvNVX(svr);
NV result;
- if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
- && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
- nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
- )
+ if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
/* nothing was lost by converting to IVs */
goto do_iv;
+ }
SP--;
result = nl * nr;
# if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
alow = aiv;
auvok = TRUE; /* effectively it's a UV now */
} else {
- /* abs, auvok == false records sign */
- alow = -(UV)aiv;
+ /* abs, auvok == false records sign; Using 0- here and
+ * later to silence bogus warning from MS VC */
+ alow = (UV) (0 - (UV) aiv);
}
}
if (buvok) {
buvok = TRUE; /* effectively it's a UV now */
} else {
/* abs, buvok == false records sign */
- blow = -(UV)biv;
+ blow = (UV) (0 - (UV) biv);
}
}
right = biv;
right_neg = FALSE; /* effectively it's a UV now */
} else {
- right = -(UV)biv;
+ right = (UV) (0 - (UV) biv);
}
}
}
left = aiv;
left_neg = FALSE; /* effectively it's a UV now */
} else {
- left = -(UV)aiv;
+ left = (UV) (0 - (UV) aiv);
}
}
}
else {
dTOPss;
ASSUME(MARK + 1 == SP);
- XPUSHs(sv);
+ MEXTEND(SP, 1);
+ PUSHs(sv);
MARK[1] = &PL_sv_undef;
}
SP = MARK + 2;
NV nl = SvNVX(svl);
NV nr = SvNVX(svr);
- if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
- !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
- && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
- nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
- )
+ if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
/* nothing was lost by converting to IVs */
goto do_iv;
+ }
SP--;
TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
SETs(TARG);
auv = aiv;
auvok = 1; /* Now acting as a sign flag. */
} else {
- auv = -(UV)aiv;
+ auv = (UV) (0 - (UV) aiv);
}
}
a_valid = 1;
buv = biv;
buvok = 1;
} else
- buv = -(UV)biv;
+ buv = (UV) (0 - (UV) biv);
}
/* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
else "IV" now, independent of how it came in.
shift = -shift;
left = !left;
}
- if (shift >= IV_BITS) {
+ if (UNLIKELY(shift >= IV_BITS)) {
return 0;
}
return left ? uv << shift : uv >> shift;
static IV S_iv_shift(IV iv, int shift, bool left)
{
- if (shift < 0) {
- shift = -shift;
- left = !left;
- }
- if (shift >= IV_BITS) {
- return iv < 0 && !left ? -1 : 0;
- }
- return left ? iv << shift : iv >> shift;
+ if (shift < 0) {
+ shift = -shift;
+ left = !left;
+ }
+
+ if (UNLIKELY(shift >= IV_BITS)) {
+ return iv < 0 && !left ? -1 : 0;
+ }
+
+ /* For left shifts, perl 5 has chosen to treat the value as unsigned for
+ * the * purposes of shifting, then cast back to signed. This is very
+ * different from Raku:
+ *
+ * $ raku -e 'say -2 +< 5'
+ * -64
+ *
+ * $ ./perl -le 'print -2 << 5'
+ * 18446744073709551552
+ * */
+ if (left) {
+ if (iv == IV_MIN) { /* Casting this to a UV is undefined behavior */
+ return 0;
+ }
+ return (IV) (((UV) iv) << shift);
+ }
+
+ /* Here is right shift */
+ return iv >> shift;
}
#define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
{
dSP;
SV *left, *right;
+ U32 flags_and, flags_or;
- tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
+ tryAMAGICbin_MG(lt_amg, AMGf_numeric);
right = POPs;
left = TOPs;
+ flags_and = SvFLAGS(left) & SvFLAGS(right);
+ flags_or = SvFLAGS(left) | SvFLAGS(right);
+
SETs(boolSV(
- (SvIOK_notUV(left) && SvIOK_notUV(right))
- ? (SvIVX(left) < SvIVX(right))
- : (do_ncmp(left, right) == -1)
+ ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+ ? (SvIVX(left) < SvIVX(right))
+ : (flags_and & SVf_NOK)
+ ? (SvNVX(left) < SvNVX(right))
+ : (do_ncmp(left, right) == -1)
));
RETURN;
}
{
dSP;
SV *left, *right;
+ U32 flags_and, flags_or;
- tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
+ tryAMAGICbin_MG(gt_amg, AMGf_numeric);
right = POPs;
left = TOPs;
+ flags_and = SvFLAGS(left) & SvFLAGS(right);
+ flags_or = SvFLAGS(left) | SvFLAGS(right);
+
SETs(boolSV(
- (SvIOK_notUV(left) && SvIOK_notUV(right))
- ? (SvIVX(left) > SvIVX(right))
- : (do_ncmp(left, right) == 1)
+ ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+ ? (SvIVX(left) > SvIVX(right))
+ : (flags_and & SVf_NOK)
+ ? (SvNVX(left) > SvNVX(right))
+ : (do_ncmp(left, right) == 1)
));
RETURN;
}
{
dSP;
SV *left, *right;
+ U32 flags_and, flags_or;
- tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
+ tryAMAGICbin_MG(le_amg, AMGf_numeric);
right = POPs;
left = TOPs;
+ flags_and = SvFLAGS(left) & SvFLAGS(right);
+ flags_or = SvFLAGS(left) | SvFLAGS(right);
+
SETs(boolSV(
- (SvIOK_notUV(left) && SvIOK_notUV(right))
- ? (SvIVX(left) <= SvIVX(right))
- : (do_ncmp(left, right) <= 0)
+ ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+ ? (SvIVX(left) <= SvIVX(right))
+ : (flags_and & SVf_NOK)
+ ? (SvNVX(left) <= SvNVX(right))
+ : (do_ncmp(left, right) <= 0)
));
RETURN;
}
{
dSP;
SV *left, *right;
+ U32 flags_and, flags_or;
- tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
+ tryAMAGICbin_MG(ge_amg, AMGf_numeric);
right = POPs;
left = TOPs;
+ flags_and = SvFLAGS(left) & SvFLAGS(right);
+ flags_or = SvFLAGS(left) | SvFLAGS(right);
+
SETs(boolSV(
- (SvIOK_notUV(left) && SvIOK_notUV(right))
- ? (SvIVX(left) >= SvIVX(right))
- : ( (do_ncmp(left, right) & 2) == 0)
+ ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+ ? (SvIVX(left) >= SvIVX(right))
+ : (flags_and & SVf_NOK)
+ ? (SvNVX(left) >= SvNVX(right))
+ : ( (do_ncmp(left, right) & 2) == 0)
));
RETURN;
}
{
dSP;
SV *left, *right;
+ U32 flags_and, flags_or;
- tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
+ tryAMAGICbin_MG(ne_amg, AMGf_numeric);
right = POPs;
left = TOPs;
+ flags_and = SvFLAGS(left) & SvFLAGS(right);
+ flags_or = SvFLAGS(left) | SvFLAGS(right);
+
SETs(boolSV(
- (SvIOK_notUV(left) && SvIOK_notUV(right))
- ? (SvIVX(left) != SvIVX(right))
- : (do_ncmp(left, right) != 0)
+ ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+ ? (SvIVX(left) != SvIVX(right))
+ : (flags_and & SVf_NOK)
+ ? (SvNVX(left) != SvNVX(right))
+ : (do_ncmp(left, right) != 0)
));
RETURN;
}
break;
}
- tryAMAGICbin_MG(amg_type, AMGf_set);
+ tryAMAGICbin_MG(amg_type, 0);
{
dPOPTOPssrl;
const int cmp =
PP(pp_seq)
{
dSP;
- tryAMAGICbin_MG(seq_amg, AMGf_set);
+ tryAMAGICbin_MG(seq_amg, 0);
{
dPOPTOPssrl;
SETs(boolSV(sv_eq_flags(left, right, 0)));
PP(pp_sne)
{
dSP;
- tryAMAGICbin_MG(sne_amg, AMGf_set);
+ tryAMAGICbin_MG(sne_amg, 0);
{
dPOPTOPssrl;
SETs(boolSV(!sv_eq_flags(left, right, 0)));
dSP;
SV *sv;
- tryAMAGICun_MG(not_amg, AMGf_set);
+ tryAMAGICun_MG(not_amg, 0);
sv = *PL_stack_sp;
*PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv));
return NORMAL;
if (len && ! utf8_to_bytes(tmps, &len)) {
Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]);
}
- SvCUR(TARG) = len;
+ SvCUR_set(TARG, len);
SvUTF8_off(TARG);
}
anum = len;
-#ifdef LIBERAL
{
long *tmpl;
- for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
+ for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++)
*tmps = ~*tmps;
tmpl = (long*)tmps;
for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
*tmpl = ~*tmpl;
tmps = (U8*)tmpl;
}
-#endif
+
for ( ; anum > 0; anum--, tmps++)
*tmps = ~*tmps;
}
PP(pp_i_modulo)
{
- /* This is the vanilla old i_modulo. */
dSP; dATARGET;
tryAMAGICbin_MG(modulo_amg, AMGf_assign);
{
}
}
-#if defined(__GLIBC__) && IVSIZE == 8 \
- && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
-
-PP(pp_i_modulo_glibc_bugfix)
-{
- /* This is the i_modulo with the workaround for the _moddi3 bug
- * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
- * See below for pp_i_modulo. */
- dSP; dATARGET;
- tryAMAGICbin_MG(modulo_amg, AMGf_assign);
- {
- dPOPTOPiirl_nomg;
- if (!right)
- DIE(aTHX_ "Illegal modulus zero");
- /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
- if (right == -1)
- SETi( 0 );
- else
- SETi( left % PERL_ABS(right) );
- RETURN;
- }
-}
-#endif
-
PP(pp_i_add)
{
dSP; dATARGET;
PP(pp_i_lt)
{
dSP;
- tryAMAGICbin_MG(lt_amg, AMGf_set);
+ tryAMAGICbin_MG(lt_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left < right));
PP(pp_i_gt)
{
dSP;
- tryAMAGICbin_MG(gt_amg, AMGf_set);
+ tryAMAGICbin_MG(gt_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left > right));
PP(pp_i_le)
{
dSP;
- tryAMAGICbin_MG(le_amg, AMGf_set);
+ tryAMAGICbin_MG(le_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left <= right));
PP(pp_i_ge)
{
dSP;
- tryAMAGICbin_MG(ge_amg, AMGf_set);
+ tryAMAGICbin_MG(ge_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left >= right));
PP(pp_i_eq)
{
dSP;
- tryAMAGICbin_MG(eq_amg, AMGf_set);
+ tryAMAGICbin_MG(eq_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left == right));
PP(pp_i_ne)
{
dSP;
- tryAMAGICbin_MG(ne_amg, AMGf_set);
+ tryAMAGICbin_MG(ne_amg, 0);
{
dPOPTOPiirl_nomg;
SETs(boolSV(left != right));
if (*tmps == '0')
tmps++, len--;
if (isALPHA_FOLD_EQ(*tmps, 'x')) {
+ tmps++, len--;
+ flags |= PERL_SCAN_DISALLOW_PREFIX;
hex:
result_uv = grok_hex (tmps, &len, &flags, &result_nv);
}
- else if (isALPHA_FOLD_EQ(*tmps, 'b'))
+ else if (isALPHA_FOLD_EQ(*tmps, 'b')) {
+ tmps++, len--;
+ flags |= PERL_SCAN_DISALLOW_PREFIX;
result_uv = grok_bin (tmps, &len, &flags, &result_nv);
+ }
else
result_uv = grok_oct (tmps, &len, &flags, &result_nv);
push_result:
/* OPpTRUEBOOL indicates an '== -1' has been optimised away */
if (PL_op->op_private & OPpTRUEBOOL) {
- PUSHs( ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
- ? &PL_sv_yes : &PL_sv_no);
- if (PL_op->op_private & OPpTARGET_MY)
+ SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
+ ? &PL_sv_yes : &PL_sv_no;
+ if (PL_op->op_private & OPpTARGET_MY) {
/* $lex = (index() == -1) */
- sv_setsv(TARG, TOPs);
+ sv_setsv_mg(TARG, result);
+ PUSHs(TARG);
+ }
+ else {
+ PUSHs(result);
+ }
}
else
PUSHi(retval);
sv_utf8_downgrade(tsv, FALSE);
tmps = SvPV_const(tsv, len);
}
-# ifdef USE_ITHREADS
-# ifdef HAS_CRYPT_R
+# ifdef USE_ITHREADS
+# ifdef HAS_CRYPT_R
if (!PL_reentrant_buffer->_crypt_struct_buffer) {
/* This should be threadsafe because in ithreads there is only
* one thread per interpreter. If this would not be true,
* we would need a mutex to protect this malloc. */
PL_reentrant_buffer->_crypt_struct_buffer =
(struct crypt_data *)safemalloc(sizeof(struct crypt_data));
-#if defined(__GLIBC__) || defined(__EMX__)
+# if defined(__GLIBC__) || defined(__EMX__)
if (PL_reentrant_buffer->_crypt_struct_buffer) {
PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
-#if (defined(__GLIBC__) && __GLIBC__ == 2) && \
- (defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ >= 2 && __GLIBC_MINOR__ < 4)
- /* work around glibc-2.2.5 bug, has been fixed at some
- * time in glibc-2.3.X */
- PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
-#endif
}
-#endif
+# endif
}
-# endif /* HAS_CRYPT_R */
-# endif /* USE_ITHREADS */
-# ifdef FCRYPT
- sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
-# else
+# endif /* HAS_CRYPT_R */
+# endif /* USE_ITHREADS */
+
sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
-# endif
+
SvUTF8_off(TARG);
SETTARG;
RETURN;
STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
* lowercased) character stored in tmpbuf. May be either
* UTF-8 or not, but in either case is the number of bytes */
+ bool remove_dot_above = FALSE;
s = (const U8*)SvPV_const(source, slen);
else if (DO_UTF8(source)) { /* Is the source utf8? */
doing_utf8 = TRUE;
ulen = UTF8SKIP(s);
+
if (op_type == OP_UCFIRST) {
#ifdef USE_LOCALE_CTYPE
_toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
#ifdef USE_LOCALE_CTYPE
_toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+
+ /* In turkic locales, lower casing an 'I' normally yields U+0131,
+ * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also
+ * contains a COMBINING DOT ABOVE. Instead it is treated like
+ * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'. The
+ * call to lowercase above has handled this. But SpecialCasing.txt
+ * says we are supposed to remove the COMBINING DOT ABOVE. We can
+ * tell if we have this situation if I ==> i in a turkic locale. */
+ if ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && IN_LC_RUNTIME(LC_CTYPE)
+ && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')))
+ {
+ /* Here, we know there was a COMBINING DOT ABOVE. We won't be
+ * able to handle this in-place. */
+ inplace = FALSE;
+
+ /* It seems likely that the DOT will immediately follow the
+ * 'I'. If so, we can remove it simply by indicating to the
+ * code below to start copying the source just beyond the DOT.
+ * We know its length is 2 */
+ if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) {
+ ulen += 2;
+ }
+ else { /* But if it doesn't follow immediately, set a flag for
+ the code below */
+ remove_dot_above = TRUE;
+ }
+ }
#else
+ PERL_UNUSED_VAR(remove_dot_above);
+
_toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
#endif
* need to be overridden for the tricky ones */
need = slen + 1;
- if (op_type == OP_LCFIRST) {
- /* lower case the first letter: no trickiness for any character */
#ifdef USE_LOCALE_CTYPE
- if (IN_LC_RUNTIME(LC_CTYPE)) {
- *tmpbuf = toLOWER_LC(*s);
- }
- else
-#endif
+
+ if (IN_LC_RUNTIME(LC_CTYPE)) {
+ if ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I'))
+ || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i'))))
{
- *tmpbuf = (IN_UNI_8_BIT)
- ? toLOWER_LATIN1(*s)
- : toLOWER(*s);
+ if (*s == 'I') { /* lcfirst('I') */
+ tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+ tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+ }
+ else { /* ucfirst('i') */
+ tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+ tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+ }
+ tculen = 2;
+ inplace = FALSE;
+ doing_utf8 = TRUE;
+ convert_source_to_utf8 = TRUE;
+ need += variant_under_utf8_count(s, s + slen);
}
- }
-#ifdef USE_LOCALE_CTYPE
- /* is ucfirst() */
- else if (IN_LC_RUNTIME(LC_CTYPE)) {
- if (IN_UTF8_CTYPE_LOCALE) {
- goto do_uni_rules;
+ else if (op_type == OP_LCFIRST) {
+
+ /* For lc, there are no gotchas for UTF-8 locales (other than
+ * the turkish ones already handled above) */
+ *tmpbuf = toLOWER_LC(*s);
}
+ else { /* ucfirst */
- *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
- locales have upper and title case
- different */
- }
+ /* But for uc, some characters require special handling */
+ if (IN_UTF8_CTYPE_LOCALE) {
+ goto do_uni_rules;
+ }
+
+ /* This would be a bug if any locales have upper and title case
+ * different */
+ *tmpbuf = (U8) toUPPER_LC(*s);
+ }
+ }
+ else
#endif
- else if (! IN_UNI_8_BIT) {
- *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
- * on EBCDIC machines whatever the
- * native function does */
- }
+ /* Here, not in locale. If not using Unicode rules, is a simple
+ * lower/upper, depending */
+ if (! IN_UNI_8_BIT) {
+ *tmpbuf = (op_type == OP_LCFIRST)
+ ? toLOWER(*s)
+ : toUPPER(*s);
+ }
+ else if (op_type == OP_LCFIRST) {
+ /* lower case the first letter: no trickiness for any character */
+ *tmpbuf = toLOWER_LATIN1(*s);
+ }
else {
/* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
- * UTF-8, which we treat as not in locale), and cased latin1 */
+ * non-turkic UTF-8, which we treat as not in locale), and cased
+ * latin1 */
UV title_ord;
#ifdef USE_LOCALE_CTYPE
do_uni_rules:
inplace = FALSE;
/* If the result won't fit in a byte, the entire result
- * will have to be in UTF-8. Assume worst case sizing in
- * conversion. (all latin1 characters occupy at most two
- * bytes in utf8) */
+ * will have to be in UTF-8. Allocate enough space for the
+ * expanded first byte, and if UTF-8, the rest of the input
+ * string, some or all of which may also expand to two
+ * bytes, plus the terminating NUL. */
if (title_ord > 255) {
doing_utf8 = TRUE;
convert_source_to_utf8 = TRUE;
- need = slen * 2 + 1;
+ need = slen
+ + variant_under_utf8_count(s, s + slen)
+ + 1;
/* The (converted) UTF-8 and UTF-EBCDIC lengths of all
- * (both) characters whose title case is above 255 is
+ * characters whose title case is above 255 is
* 2. */
ulen = 2;
}
* of the string. */
sv_setpvn(dest, (char*)tmpbuf, tculen);
if (slen > ulen) {
+
+ /* But this boolean being set means we are in a turkic
+ * locale, and there is a DOT character that needs to be
+ * removed, and it isn't immediately after the current
+ * character. Keep concatenating characters to the output
+ * one at a time, until we find the DOT, which we simply
+ * skip */
+ if (UNLIKELY(remove_dot_above)) {
+ do {
+ Size_t this_len = UTF8SKIP(s + ulen);
+
+ sv_catpvn(dest, (char*)(s + ulen), this_len);
+
+ ulen += this_len;
+ if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) {
+ ulen += 2;
+ break;
+ }
+ } while (s + ulen < s + slen);
+ }
+
+ /* The rest of the string can be concatenated unchanged,
+ * all at once */
sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
}
}
* into tmpbuf. First put that into dest, and then append the
* rest of the source, converting it to UTF-8 as we go. */
- /* Assert tculen is 2 here because the only two characters that
+ /* Assert tculen is 2 here because the only characters that
* get to this part of the code have 2-byte UTF-8 equivalents */
assert(tculen == 2);
*d++ = *tmpbuf;
/* In a "use bytes" we don't treat the source as UTF-8, but, still want
* the destination to retain that flag */
- if (SvUTF8(source) && ! IN_BYTES)
+ if (DO_UTF8(source))
SvUTF8_on(dest);
if (!inplace) { /* Finish the rest of the string, unchanged */
STRLEN u;
STRLEN ulen;
UV uv;
- if (in_iota_subscript && ! _is_utf8_mark(s)) {
+ if (UNLIKELY(in_iota_subscript)) {
+ UV cp = utf8_to_uvchr_buf(s, send, NULL);
- /* A non-mark. Time to output the iota subscript */
- *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
- *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
- in_iota_subscript = FALSE;
+ if (! _invlist_contains_cp(PL_utf8_mark, cp)) {
+
+ /* A non-mark. Time to output the iota subscript */
+ *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
+ *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
+ in_iota_subscript = FALSE;
+ }
}
/* Then handle the current character. Get the changed case value
do_uni_rules:
#endif
for (; s < send; d++, s++) {
+ Size_t extra;
+
*d = toUPPER_LATIN1_MOD(*s);
- if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
+ if ( LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
+
+#ifdef USE_LOCALE_CTYPE
+
+ && (LIKELY( ! PL_in_utf8_turkic_locale
+ || ! IN_LC_RUNTIME(LC_CTYPE))
+ || *s != 'i')
+#endif
+
+ ) {
continue;
}
/* The mainstream case is the tight loop above. To avoid
- * extra tests in that, all three characters that require
- * special handling are mapped by the MOD to the one tested
- * just above.
- * Use the source to distinguish between the three cases */
+ * extra tests in that, all three characters that always
+ * require special handling are mapped by the MOD to the
+ * one tested just above. Use the source to distinguish
+ * between those cases */
#if UNICODE_MAJOR_VERSION > 2 \
|| (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
}
#endif
- /* The other two special handling characters have their
+ /* The other special handling characters have their
* upper cases outside the latin1 range, hence need to be
* in UTF-8, so the whole result needs to be in UTF-8.
*
* come separately to UTF-8, then jump into the loop that
* handles UTF-8. But the most efficient time-wise of the
* ones I could think of is what follows, and turned out to
- * not require much extra code. */
+ * not require much extra code.
+ *
+ * First, calculate the extra space needed for the
+ * remainder of the source needing to be in UTF-8. Except
+ * for the 'i' in Turkic locales, in UTF-8 strings, the
+ * uppercase of a character below 256 occupies the same
+ * number of bytes as the original. Therefore, the space
+ * needed is the that number plus the number of characters
+ * that become two bytes when converted to UTF-8, plus, in
+ * turkish locales, the number of 'i's. */
+
+ extra = send - s + variant_under_utf8_count(s, send);
+
+#ifdef USE_LOCALE_CTYPE
+
+ if (UNLIKELY(*s == 'i')) { /* We wouldn't get an 'i' here
+ unless are in a Turkic
+ locale */
+ const U8 * s_peek = s;
+
+ do {
+ extra++;
+
+ s_peek = (U8 *) memchr(s_peek + 1, 'i',
+ send - (s_peek + 1));
+ } while (s_peek != NULL);
+ }
+#endif
/* Convert what we have so far into UTF-8, telling the
* function that we know it should be converted, and to
* allow extra space for what we haven't processed yet.
- * Assume the worst case space requirements for converting
- * what we haven't processed so far: that it will require
- * two bytes for each remaining source character, plus the
- * NUL at the end. This may cause the string pointer to
- * move, so re-find it. */
+ *
+ * This may cause the string pointer to move, so need to
+ * save and re-find it. */
len = d - (U8*)SvPVX_const(dest);
SvCUR_set(dest, len);
len = sv_utf8_upgrade_flags_grow(dest,
SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- (send -s) * 2 + 1);
+ extra
+ + 1 /* trailing NUL */ );
d = (U8*)SvPVX(dest) + len;
/* Now process the remainder of the source, simultaneously
- * converting to upper and UTF-8. */
- for (; s < send; s++) {
- (void) _to_upper_title_latin1(*s, d, &len, 'S');
- d += len;
- }
-
- /* Here have processed the whole source; no need to continue
- * with the outer loop. Each character has been converted
- * to upper case and converted to UTF-8 */
+ * converting to upper and UTF-8.
+ *
+ * To avoid extra tests in the loop body, and since the
+ * loop is so simple, split out the rare Turkic case into
+ * its own loop */
+#ifdef USE_LOCALE_CTYPE
+ if ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)))
+ {
+ for (; s < send; s++) {
+ if (*s == 'i') {
+ *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+ *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+ }
+ else {
+ (void) _to_upper_title_latin1(*s, d, &len, 'S');
+ d += len;
+ }
+ }
+ }
+ else
+#endif
+ for (; s < send; s++) {
+ (void) _to_upper_title_latin1(*s, d, &len, 'S');
+ d += len;
+ }
+
+ /* Here have processed the whole source; no need to
+ * continue with the outer loop. Each character has been
+ * converted to upper case and converted to UTF-8. */
break;
} /* End of processing all latin1-style chars */
} /* End of processing all chars */
SV *dest;
const U8 *s;
U8 *d;
+ bool has_turkic_I = FALSE;
SvGETMAGIC(source);
if ( SvPADTMP(source)
&& !SvREADONLY(source) && SvPOK(source)
- && !DO_UTF8(source)) {
+ && !DO_UTF8(source)
+
+#ifdef USE_LOCALE_CTYPE
+
+ && ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE))
+ || LIKELY(! PL_in_utf8_turkic_locale))
- /* We can convert in place, as lowercasing anything in the latin1 range
- * (or else DO_UTF8 would have been on) doesn't lengthen it */
+#endif
+
+ ) {
+
+ /* We can convert in place, as, outside of Turkic UTF-8 locales,
+ * lowercasing anything in the latin1 range (or else DO_UTF8 would have
+ * been on) doesn't lengthen it. */
dest = source;
s = d = (U8*)SvPV_force_nomg(source, len);
min = len + 1;
#ifdef USE_LOCALE_CTYPE
if (IN_LC_RUNTIME(LC_CTYPE)) {
+ const U8 * next_I;
+
_CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+
+ /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become
+ * UTF-8 for the single case of the character 'I' */
+ if ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && ! DO_UTF8(source)
+ && (next_I = (U8 *) memchr(s, 'I', len)))
+ {
+ Size_t I_count = 0;
+ const U8 *const send = s + len;
+
+ do {
+ I_count++;
+
+ next_I = (U8 *) memchr(next_I + 1, 'I',
+ send - (next_I + 1));
+ } while (next_I != NULL);
+
+ /* Except for the 'I', in UTF-8 strings, the lower case of a
+ * character below 256 occupies the same number of bytes as the
+ * original. Therefore, the space needed is the original length
+ * plus I_count plus the number of characters that become two bytes
+ * when converted to UTF-8 */
+ sv_utf8_upgrade_flags_grow(dest, 0, len
+ + I_count
+ + variant_under_utf8_count(s, send)
+ + 1 /* Trailing NUL */ );
+ d = (U8*)SvPVX(dest);
+ has_turkic_I = TRUE;
+ }
}
#endif
if (DO_UTF8(source)) {
const U8 *const send = s + len;
U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+ bool remove_dot_above = FALSE;
while (s < send) {
const STRLEN u = UTF8SKIP(s);
#ifdef USE_LOCALE_CTYPE
_toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+
+ /* If we are in a Turkic locale, we have to do more work. As noted
+ * in the comments for lcfirst, there is a special case if a 'I'
+ * is in a grapheme with COMBINING DOT ABOVE UTF8. It turns into a
+ * 'i', and the DOT must be removed. We check for that situation,
+ * and set a flag if the DOT is there. Then each time through the
+ * loop, we have to see if we need to remove the next DOT above,
+ * and if so, do it. We know that there is a DOT because
+ * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there
+ * was one in a proper position. */
+ if ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && IN_LC_RUNTIME(LC_CTYPE))
+ {
+ if ( UNLIKELY(remove_dot_above)
+ && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8))
+ {
+ s += u;
+ remove_dot_above = FALSE;
+ continue;
+ }
+ else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) {
+ remove_dot_above = TRUE;
+ }
+ }
#else
+ PERL_UNUSED_VAR(remove_dot_above);
+
_toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
#endif
SvUTF8_on(dest);
*d = '\0';
SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
- } else { /* Not utf8 */
+ } else { /* 'source' not utf8 */
if (len) {
const U8 *const send = s + len;
* whole thing in a tight loop, for speed, */
#ifdef USE_LOCALE_CTYPE
if (IN_LC_RUNTIME(LC_CTYPE)) {
- for (; s < send; d++, s++)
- *d = toLOWER_LC(*s);
+ if (LIKELY( ! has_turkic_I)) {
+ for (; s < send; d++, s++)
+ *d = toLOWER_LC(*s);
+ }
+ else { /* This is the only case where lc() converts 'dest'
+ into UTF-8 from a non-UTF-8 'source' */
+ for (; s < send; s++) {
+ if (*s == 'I') {
+ *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+ *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+ }
+ else {
+ append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d);
+ }
+ }
+ }
}
else
#endif
#ifdef USE_LOCALE_CTYPE
do_uni_folding:
#endif
- /* For ASCII and the Latin-1 range, there's two
+ /* For ASCII and the Latin-1 range, there's potentially three
* troublesome folds:
* \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
* casefolding becomes 'ss';
* \x{B5} (\N{MICRO SIGN}), which under any fold becomes
* \x{3BC} (\N{GREEK SMALL LETTER MU})
+ * I only in Turkic locales, this folds to \x{131}
+ * \N{LATIN SMALL LETTER DOTLESS I}
* For the rest, the casefold is their lowercase. */
for (; s < send; d++, s++) {
- if (*s == MICRO_SIGN) {
+ if ( UNLIKELY(*s == MICRO_SIGN)
+#ifdef USE_LOCALE_CTYPE
+ || ( UNLIKELY(PL_in_utf8_turkic_locale)
+ && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))
+ && UNLIKELY(*s == 'I'))
+#endif
+ ) {
+ Size_t extra = send - s
+ + variant_under_utf8_count(s, send);
+
/* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
- * which is outside of the latin-1 range. There's a couple
- * of ways to deal with this -- khw discusses them in
- * pp_lc/uc, so go there :) What we do here is upgrade what
- * we had already casefolded, then enter an inner loop that
- * appends the rest of the characters as UTF-8. */
+ * and 'I' in Turkic locales is \N{LATIN SMALL LETTER
+ * DOTLESS I} both of which are outside of the latin-1
+ * range. There's a couple of ways to deal with this -- khw
+ * discusses them in pp_lc/uc, so go there :) What we do
+ * here is upgrade what we had already casefolded, then
+ * enter an inner loop that appends the rest of the
+ * characters as UTF-8.
+ *
+ * First we calculate the needed size of the upgraded dest
+ * beyond what's been processed already (the upgrade
+ * function figures that out). Except for the 'I' in
+ * Turkic locales, in UTF-8 strings, the fold case of a
+ * character below 256 occupies the same number of bytes as
+ * the original (even the Sharp S). Therefore, the space
+ * needed is the number of bytes remaining plus the number
+ * of characters that become two bytes when converted to
+ * UTF-8 plus, in turkish locales, the number of 'I's */
+
+ if (UNLIKELY(*s == 'I')) {
+ const U8 * s_peek = s;
+
+ do {
+ extra++;
+
+ s_peek = (U8 *) memchr(s_peek + 1, 'i',
+ send - (s_peek + 1));
+ } while (s_peek != NULL);
+ }
+
+ /* Growing may move things, so have to save and recalculate
+ * 'd' */
len = d - (U8*)SvPVX_const(dest);
SvCUR_set(dest, len);
len = sv_utf8_upgrade_flags_grow(dest,
SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- /* The max expansion for latin1
- * chars is 1 byte becomes 2 */
- (send -s) * 2 + 1);
+ extra
+ + 1 /* Trailing NUL */ );
d = (U8*)SvPVX(dest) + len;
*d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
IV *iterp = Perl_av_iter_p(aTHX_ array);
const IV current = (*iterp)++;
- if (current > av_tindex(array)) {
+ if (current > av_top_index(array)) {
*iterp = 0;
if (gimme == G_SCALAR)
RETPUSHUNDEF;
if (gimme == G_SCALAR) {
dTARGET;
- PUSHi(av_tindex(array) + 1);
+ PUSHi(av_count(array));
}
else if (gimme == G_ARRAY) {
if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
"Can't modify keys on array in list assignment");
}
{
- IV n = Perl_av_len(aTHX_ array);
+ IV n = av_top_index(array);
IV i;
EXTEND(SP, n + 1);
const MAGIC *mg;
bool can_preserve = SvCANEXISTDELETE(av);
- for (i = 0, j = av_tindex(av); i < j; ++i, --j) {
+ for (i = 0, j = av_top_index(av); i < j; ++i, --j) {
SV *begin, *end;
if (can_preserve) {
sv_setsv(TARG, DEFSV);
XPUSHs(TARG);
}
+ SvSETMAGIC(TARG); /* remove any utf8 length magic */
up = SvPV_force(TARG, len);
if (len > 1) {
/* handle @ary = split(...) optimisation */
if (PL_op->op_private & OPpSPLIT_ASSIGN) {
+ realarray = 1;
if (!(PL_op->op_flags & OPf_STACKED)) {
if (PL_op->op_private & OPpSPLIT_LEX) {
if (PL_op->op_private & OPpLVAL_INTRO)
oldsave = PL_savestack_ix;
}
- realarray = 1;
- PUTBACK;
- av_extend(ary,0);
- (void)sv_2mortal(SvREFCNT_inc_simple_NN(sv));
- av_clear(ary);
- SPAGAIN;
+ /* Some defence against stack-not-refcounted bugs */
+ (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary));
+
if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
PUSHMARK(SP);
XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
- }
- else {
- if (!AvREAL(ary)) {
- I32 i;
- AvREAL_on(ary);
- AvREIFY_off(ary);
- for (i = AvFILLp(ary); i >= 0; i--)
- AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
- }
- /* temporarily switch stacks */
- SAVESWITCHSTACK(PL_curstack, ary);
+ } else {
make_mortal = 0;
}
}
}
}
else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
- /*
- Pre-extend the stack, either the number of bytes or
- characters in the string or a limited amount, triggered by:
-
- my ($x, $y) = split //, $str;
- or
- split //, $str, $i;
- */
- if (!gimme_scalar) {
- const IV items = limit - 1;
- /* setting it to -1 will trigger a panic in EXTEND() */
- const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen;
- if (items >=0 && items < sslen)
- EXTEND(SP, items);
- else
- EXTEND(SP, sslen);
- }
-
- if (do_utf8) {
- while (--limit) {
- /* keep track of how many bytes we skip over */
- m = s;
- s += UTF8SKIP(s);
- if (gimme_scalar) {
- iters++;
- if (s-m == 0)
- trailing_empty++;
- else
- trailing_empty = 0;
- } else {
- dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
-
- PUSHs(dstr);
- }
-
- if (s >= strend)
- break;
+ /* This case boils down to deciding which is the smaller of:
+ * limit - effectively a number of characters
+ * slen - which already contains the number of characters in s
+ *
+ * The resulting number is the number of iters (for gimme_scalar)
+ * or the number of SVs to create (!gimme_scalar). */
+
+ /* setting it to -1 will trigger a panic in EXTEND() */
+ const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen;
+ const IV items = limit - 1;
+ if (sslen < items || items < 0) {
+ iters = slen -1;
+ limit = slen + 1;
+ /* Note: The same result is returned if the following block
+ * is removed, because of the "keep field after final delim?"
+ * adjustment, but having the following makes the "correct"
+ * behaviour more apparent. */
+ if (gimme_scalar) {
+ s = strend;
+ iters++;
}
} else {
- while (--limit) {
- if (gimme_scalar) {
- iters++;
- } else {
- dstr = newSVpvn(s, 1);
-
-
- if (make_mortal)
- sv_2mortal(dstr);
-
- PUSHs(dstr);
- }
-
- s++;
-
- if (s >= strend)
- break;
+ iters = items;
+ }
+ if (!gimme_scalar) {
+ /*
+ Pre-extend the stack, either the number of bytes or
+ characters in the string or a limited amount, triggered by:
+ my ($x, $y) = split //, $str;
+ or
+ split //, $str, $i;
+ */
+ EXTEND(SP, limit);
+ if (do_utf8) {
+ while (--limit) {
+ m = s;
+ s += UTF8SKIP(s);
+ dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
+ PUSHs(dstr);
+ }
+ } else {
+ while (--limit) {
+ dstr = newSVpvn_flags(s, 1, make_mortal);
+ PUSHs(dstr);
+ s++;
+ }
}
}
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
if (do_utf8)
- s = (char*)utf8_hop((U8*)m, len);
+ s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend);
else
s = m + len; /* Fake \n at the end */
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
if (do_utf8)
- s = (char*)utf8_hop((U8*)m, len);
+ s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend);
else
s = m + len; /* Fake \n at the end */
}
}
PUTBACK;
- LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
+ LEAVE_SCOPE(oldsave);
SPAGAIN;
if (realarray) {
- if (!mg) {
- if (SvSMAGICAL(ary)) {
- PUTBACK;
+ if (!mg) {
+ PUTBACK;
+ if(AvREAL(ary)) {
+ if (av_count(ary) > 0)
+ av_clear(ary);
+ } else {
+ AvREAL_on(ary);
+ AvREIFY_off(ary);
+
+ if (AvMAX(ary) > -1) {
+ /* don't free mere refs */
+ Zero(AvARRAY(ary), AvMAX(ary), SV*);
+ }
+ }
+ if(AvMAX(ary) < iters)
+ av_extend(ary,iters);
+ SPAGAIN;
+
+ /* Need to copy the SV*s from the stack into ary */
+ Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*);
+ AvFILLp(ary) = iters - 1;
+
+ if (SvSMAGICAL(ary)) {
+ PUTBACK;
mg_set(MUTABLE_SV(ary));
SPAGAIN;
- }
- if (gimme == G_ARRAY) {
- EXTEND(SP, iters);
- Copy(AvARRAY(ary), SP + 1, iters, SV*);
- SP += iters;
- RETURN;
- }
+ }
+
+ if (gimme != G_ARRAY) {
+ /* SP points to the final SV* pushed to the stack. But the SV* */
+ /* are not going to be used from the stack. Point SP to below */
+ /* the first of these SV*. */
+ SP -= iters;
+ PUTBACK;
+ }
}
else {
- PUTBACK;
- ENTER_with_name("call_PUSH");
- call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
- LEAVE_with_name("call_PUSH");
- SPAGAIN;
+ PUTBACK;
+ av_extend(ary,iters);
+ av_clear(ary);
+
+ ENTER_with_name("call_PUSH");
+ call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
+ LEAVE_with_name("call_PUSH");
+ SPAGAIN;
+
if (gimme == G_ARRAY) {
SSize_t i;
/* EXTEND should not be needed - we just popped them */
- EXTEND(SP, iters);
+ EXTEND_SKIP(SP, iters);
for (i=0; i < iters; i++) {
SV **svp = av_fetch(ary, i, FALSE);
PUSHs((svp) ? *svp : &PL_sv_undef);
}
}
}
- else {
- if (gimme == G_ARRAY)
- RETURN;
- }
- GETTARGET;
- XPUSHi(iters);
+ if (gimme != G_ARRAY) {
+ GETTARGET;
+ XPUSHi(iters);
+ }
+
RETURN;
}
Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
if we get here for a custom op then that means that the custom op didn't
have an implementation. Given that OP_NAME() looks up the custom op
- by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
- registers &PL_unimplemented_op as the address of their custom op.
+ by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully)
+ registers &Perl_unimplemented_op as the address of their custom op.
NULL doesn't generate a useful error message. "custom" does. */
const char *const name = op_type >= OP_max
- ? "[out of range]" : PL_op_name[PL_op->op_type];
+ ? "[out of range]" : PL_op_name[op_type];
if(OP_IS_SOCKET(op_type))
DIE(aTHX_ PL_no_sock_func, name);
DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
PP(pp_avhvswitch)
{
- dVAR; dSP;
+ dSP;
return PL_ppaddr[
(SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH)
+ (PL_op->op_private & OPpAVHVSWITCH_MASK)
return sv;
}
-/* Check a a subs arguments - i.e. that it has the correct number of args
+/* Check a sub's arguments - i.e. that it has the correct number of args
* (and anything else we might think of in future). Typically used with
* signatured subs.
*/
PP(pp_argcheck)
{
OP * const o = PL_op;
- UNOP_AUX_item *aux = cUNOP_AUXo->op_aux;
- IV params = aux[0].iv;
- IV opt_params = aux[1].iv;
- char slurpy = (char)(aux[2].iv);
+ struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux;
+ UV params = aux->params;
+ UV opt_params = aux->opt_params;
+ char slurpy = aux->slurpy;
AV *defav = GvAV(PL_defgv); /* @_ */
- IV argc;
+ UV argc;
bool too_few;
assert(!SvMAGICAL(defav));
- argc = (AvFILLp(defav) + 1);
+ argc = (UV)(AvFILLp(defav) + 1);
too_few = (argc < (params - opt_params));
if (UNLIKELY(too_few || (!slurpy && argc > params)))
return NORMAL;
}
+PP(pp_isa)
+{
+ dSP;
+ SV *left, *right;
+
+ right = POPs;
+ left = TOPs;
+
+ SETs(boolSV(sv_isa_sv(left, right)));
+ RETURN;
+}
+
+PP(pp_cmpchain_and)
+{
+ dSP;
+ SV *result = POPs;
+ PUTBACK;
+ if (SvTRUE_NN(result)) {
+ return cLOGOP->op_other;
+ } else {
+ TOPs = result;
+ return NORMAL;
+ }
+}
+
+PP(pp_cmpchain_dup)
+{
+ dSP;
+ SV *right = TOPs;
+ SV *left = TOPm1s;
+ TOPm1s = right;
+ TOPs = left;
+ XPUSHs(right);
+ RETURN;
+}
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/