X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/78ed8e3629d58d11345e4367dbe14b9603e8c84b..6987f4434e4dfee71506125954ee1ae41c46f1cb:/pp.c diff --git a/pp.c b/pp.c index 01b979e..5b5e163 100644 --- a/pp.c +++ b/pp.c @@ -28,6 +28,7 @@ #include "perl.h" #include "keywords.h" +#include "invlist_inline.h" #include "reentr.h" #include "regcharclass.h" @@ -1267,16 +1268,10 @@ PP(pp_multiply) NV nr = SvNVX(svr); NV result; - if ( -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - !Perl_isnan(nl) && nl == (NV)(il = (IV)nl) - && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr) -#else - nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr) -#endif - ) + if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { /* nothing was lost by converting to IVs */ goto do_iv; + } SP--; result = nl * nr; # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16 @@ -1315,8 +1310,9 @@ PP(pp_multiply) alow = aiv; auvok = TRUE; /* effectively it's a UV now */ } else { - /* abs, auvok == false records sign */ - alow = -(UV)aiv; + /* abs, auvok == false records sign; Using 0- here and + * later to silence bogus warning from MS VC */ + alow = (UV) (0 - (UV) aiv); } } if (buvok) { @@ -1328,7 +1324,7 @@ PP(pp_multiply) buvok = TRUE; /* effectively it's a UV now */ } else { /* abs, buvok == false records sign */ - blow = -(UV)biv; + blow = (UV) (0 - (UV) biv); } } @@ -1563,7 +1559,7 @@ PP(pp_modulo) right = biv; right_neg = FALSE; /* effectively it's a UV now */ } else { - right = -(UV)biv; + right = (UV) (0 - (UV) biv); } } } @@ -1593,7 +1589,7 @@ PP(pp_modulo) left = aiv; left_neg = FALSE; /* effectively it's a UV now */ } else { - left = -(UV)aiv; + left = (UV) (0 - (UV) aiv); } } } @@ -1693,7 +1689,8 @@ PP(pp_repeat) else { dTOPss; ASSUME(MARK + 1 == SP); - XPUSHs(sv); + MEXTEND(SP, 1); + PUSHs(sv); MARK[1] = &PL_sv_undef; } SP = MARK + 2; @@ -1846,16 +1843,10 @@ PP(pp_subtract) NV nl = SvNVX(svl); NV nr = SvNVX(svr); - if ( -#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) - !Perl_isnan(nl) && nl == (NV)(il = (IV)nl) - && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr) -#else - nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr) -#endif - ) + if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) { /* nothing was lost by converting to IVs */ goto do_iv; + } SP--; TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */ SETs(TARG); @@ -1891,7 +1882,7 @@ PP(pp_subtract) auv = aiv; auvok = 1; /* Now acting as a sign flag. */ } else { - auv = -(UV)aiv; + auv = (UV) (0 - (UV) aiv); } } a_valid = 1; @@ -1911,7 +1902,7 @@ PP(pp_subtract) buv = biv; buvok = 1; } else - buv = -(UV)biv; + buv = (UV) (0 - (UV) biv); } /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve, else "IV" now, independent of how it came in. @@ -1988,7 +1979,7 @@ static UV S_uv_shift(UV uv, int shift, bool left) shift = -shift; left = !left; } - if (shift >= IV_BITS) { + if (UNLIKELY(shift >= IV_BITS)) { return 0; } return left ? uv << shift : uv >> shift; @@ -1996,14 +1987,34 @@ static UV S_uv_shift(UV uv, int shift, bool left) static IV S_iv_shift(IV iv, int shift, bool left) { - if (shift < 0) { - shift = -shift; - left = !left; - } - if (shift >= IV_BITS) { - return iv < 0 && !left ? -1 : 0; - } - return left ? iv << shift : iv >> shift; + if (shift < 0) { + shift = -shift; + left = !left; + } + + if (UNLIKELY(shift >= IV_BITS)) { + return iv < 0 && !left ? -1 : 0; + } + + /* For left shifts, perl 5 has chosen to treat the value as unsigned for + * the * purposes of shifting, then cast back to signed. This is very + * different from Raku: + * + * $ raku -e 'say -2 +< 5' + * -64 + * + * $ ./perl -le 'print -2 << 5' + * 18446744073709551552 + * */ + if (left) { + if (iv == IV_MIN) { /* Casting this to a UV is undefined behavior */ + return 0; + } + return (IV) (((UV) iv) << shift); + } + + /* Here is right shift */ + return iv >> shift; } #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE) @@ -2051,14 +2062,20 @@ PP(pp_lt) { dSP; SV *left, *right; + U32 flags_and, flags_or; - tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric); + tryAMAGICbin_MG(lt_amg, AMGf_numeric); right = POPs; left = TOPs; + flags_and = SvFLAGS(left) & SvFLAGS(right); + flags_or = SvFLAGS(left) | SvFLAGS(right); + SETs(boolSV( - (SvIOK_notUV(left) && SvIOK_notUV(right)) - ? (SvIVX(left) < SvIVX(right)) - : (do_ncmp(left, right) == -1) + ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) + ? (SvIVX(left) < SvIVX(right)) + : (flags_and & SVf_NOK) + ? (SvNVX(left) < SvNVX(right)) + : (do_ncmp(left, right) == -1) )); RETURN; } @@ -2067,14 +2084,20 @@ PP(pp_gt) { dSP; SV *left, *right; + U32 flags_and, flags_or; - tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric); + tryAMAGICbin_MG(gt_amg, AMGf_numeric); right = POPs; left = TOPs; + flags_and = SvFLAGS(left) & SvFLAGS(right); + flags_or = SvFLAGS(left) | SvFLAGS(right); + SETs(boolSV( - (SvIOK_notUV(left) && SvIOK_notUV(right)) - ? (SvIVX(left) > SvIVX(right)) - : (do_ncmp(left, right) == 1) + ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) + ? (SvIVX(left) > SvIVX(right)) + : (flags_and & SVf_NOK) + ? (SvNVX(left) > SvNVX(right)) + : (do_ncmp(left, right) == 1) )); RETURN; } @@ -2083,14 +2106,20 @@ PP(pp_le) { dSP; SV *left, *right; + U32 flags_and, flags_or; - tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric); + tryAMAGICbin_MG(le_amg, AMGf_numeric); right = POPs; left = TOPs; + flags_and = SvFLAGS(left) & SvFLAGS(right); + flags_or = SvFLAGS(left) | SvFLAGS(right); + SETs(boolSV( - (SvIOK_notUV(left) && SvIOK_notUV(right)) - ? (SvIVX(left) <= SvIVX(right)) - : (do_ncmp(left, right) <= 0) + ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) + ? (SvIVX(left) <= SvIVX(right)) + : (flags_and & SVf_NOK) + ? (SvNVX(left) <= SvNVX(right)) + : (do_ncmp(left, right) <= 0) )); RETURN; } @@ -2099,14 +2128,20 @@ PP(pp_ge) { dSP; SV *left, *right; + U32 flags_and, flags_or; - tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric); + tryAMAGICbin_MG(ge_amg, AMGf_numeric); right = POPs; left = TOPs; + flags_and = SvFLAGS(left) & SvFLAGS(right); + flags_or = SvFLAGS(left) | SvFLAGS(right); + SETs(boolSV( - (SvIOK_notUV(left) && SvIOK_notUV(right)) - ? (SvIVX(left) >= SvIVX(right)) - : ( (do_ncmp(left, right) & 2) == 0) + ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) + ? (SvIVX(left) >= SvIVX(right)) + : (flags_and & SVf_NOK) + ? (SvNVX(left) >= SvNVX(right)) + : ( (do_ncmp(left, right) & 2) == 0) )); RETURN; } @@ -2115,14 +2150,20 @@ PP(pp_ne) { dSP; SV *left, *right; + U32 flags_and, flags_or; - tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric); + tryAMAGICbin_MG(ne_amg, AMGf_numeric); right = POPs; left = TOPs; + flags_and = SvFLAGS(left) & SvFLAGS(right); + flags_or = SvFLAGS(left) | SvFLAGS(right); + SETs(boolSV( - (SvIOK_notUV(left) && SvIOK_notUV(right)) - ? (SvIVX(left) != SvIVX(right)) - : (do_ncmp(left, right) != 0) + ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) ) + ? (SvIVX(left) != SvIVX(right)) + : (flags_and & SVf_NOK) + ? (SvNVX(left) != SvNVX(right)) + : (do_ncmp(left, right) != 0) )); RETURN; } @@ -2248,7 +2289,7 @@ PP(pp_sle) break; } - tryAMAGICbin_MG(amg_type, AMGf_set); + tryAMAGICbin_MG(amg_type, 0); { dPOPTOPssrl; const int cmp = @@ -2266,7 +2307,7 @@ PP(pp_sle) PP(pp_seq) { dSP; - tryAMAGICbin_MG(seq_amg, AMGf_set); + tryAMAGICbin_MG(seq_amg, 0); { dPOPTOPssrl; SETs(boolSV(sv_eq_flags(left, right, 0))); @@ -2277,7 +2318,7 @@ PP(pp_seq) PP(pp_sne) { dSP; - tryAMAGICbin_MG(sne_amg, AMGf_set); + tryAMAGICbin_MG(sne_amg, 0); { dPOPTOPssrl; SETs(boolSV(!sv_eq_flags(left, right, 0))); @@ -2512,7 +2553,7 @@ PP(pp_not) dSP; SV *sv; - tryAMAGICun_MG(not_amg, AMGf_set); + tryAMAGICun_MG(not_amg, 0); sv = *PL_stack_sp; *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv)); return NORMAL; @@ -2532,23 +2573,22 @@ S_scomplement(pTHX_ SV *targ, SV *sv) if (len && ! utf8_to_bytes(tmps, &len)) { Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]); } - SvCUR(TARG) = len; + SvCUR_set(TARG, len); SvUTF8_off(TARG); } anum = len; -#ifdef LIBERAL { long *tmpl; - for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++) + for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++) *tmps = ~*tmps; tmpl = (long*)tmps; for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++) *tmpl = ~*tmpl; tmps = (U8*)tmpl; } -#endif + for ( ; anum > 0; anum--, tmps++) *tmps = ~*tmps; } @@ -2644,7 +2684,6 @@ PP(pp_i_divide) PP(pp_i_modulo) { - /* This is the vanilla old i_modulo. */ dSP; dATARGET; tryAMAGICbin_MG(modulo_amg, AMGf_assign); { @@ -2660,30 +2699,6 @@ PP(pp_i_modulo) } } -#if defined(__GLIBC__) && IVSIZE == 8 \ - && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8)) - -PP(pp_i_modulo_glibc_bugfix) -{ - /* This is the i_modulo with the workaround for the _moddi3 bug - * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround). - * See below for pp_i_modulo. */ - dSP; dATARGET; - tryAMAGICbin_MG(modulo_amg, AMGf_assign); - { - dPOPTOPiirl_nomg; - if (!right) - DIE(aTHX_ "Illegal modulus zero"); - /* avoid FPE_INTOVF on some platforms when left is IV_MIN */ - if (right == -1) - SETi( 0 ); - else - SETi( left % PERL_ABS(right) ); - RETURN; - } -} -#endif - PP(pp_i_add) { dSP; dATARGET; @@ -2709,7 +2724,7 @@ PP(pp_i_subtract) PP(pp_i_lt) { dSP; - tryAMAGICbin_MG(lt_amg, AMGf_set); + tryAMAGICbin_MG(lt_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left < right)); @@ -2720,7 +2735,7 @@ PP(pp_i_lt) PP(pp_i_gt) { dSP; - tryAMAGICbin_MG(gt_amg, AMGf_set); + tryAMAGICbin_MG(gt_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left > right)); @@ -2731,7 +2746,7 @@ PP(pp_i_gt) PP(pp_i_le) { dSP; - tryAMAGICbin_MG(le_amg, AMGf_set); + tryAMAGICbin_MG(le_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left <= right)); @@ -2742,7 +2757,7 @@ PP(pp_i_le) PP(pp_i_ge) { dSP; - tryAMAGICbin_MG(ge_amg, AMGf_set); + tryAMAGICbin_MG(ge_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left >= right)); @@ -2753,7 +2768,7 @@ PP(pp_i_ge) PP(pp_i_eq) { dSP; - tryAMAGICbin_MG(eq_amg, AMGf_set); + tryAMAGICbin_MG(eq_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left == right)); @@ -2764,7 +2779,7 @@ PP(pp_i_eq) PP(pp_i_ne) { dSP; - tryAMAGICbin_MG(ne_amg, AMGf_set); + tryAMAGICbin_MG(ne_amg, 0); { dPOPTOPiirl_nomg; SETs(boolSV(left != right)); @@ -3074,11 +3089,16 @@ PP(pp_oct) if (*tmps == '0') tmps++, len--; if (isALPHA_FOLD_EQ(*tmps, 'x')) { + tmps++, len--; + flags |= PERL_SCAN_DISALLOW_PREFIX; hex: result_uv = grok_hex (tmps, &len, &flags, &result_nv); } - else if (isALPHA_FOLD_EQ(*tmps, 'b')) + else if (isALPHA_FOLD_EQ(*tmps, 'b')) { + tmps++, len--; + flags |= PERL_SCAN_DISALLOW_PREFIX; result_uv = grok_bin (tmps, &len, &flags, &result_nv); + } else result_uv = grok_oct (tmps, &len, &flags, &result_nv); @@ -3530,11 +3550,16 @@ PP(pp_index) push_result: /* OPpTRUEBOOL indicates an '== -1' has been optimised away */ if (PL_op->op_private & OPpTRUEBOOL) { - PUSHs( ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG)) - ? &PL_sv_yes : &PL_sv_no); - if (PL_op->op_private & OPpTARGET_MY) + SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG)) + ? &PL_sv_yes : &PL_sv_no; + if (PL_op->op_private & OPpTARGET_MY) { /* $lex = (index() == -1) */ - sv_setsv(TARG, TOPs); + sv_setsv_mg(TARG, result); + PUSHs(TARG); + } + else { + PUSHs(result); + } } else PUSHi(retval); @@ -3642,33 +3667,25 @@ PP(pp_crypt) sv_utf8_downgrade(tsv, FALSE); tmps = SvPV_const(tsv, len); } -# ifdef USE_ITHREADS -# ifdef HAS_CRYPT_R +# ifdef USE_ITHREADS +# ifdef HAS_CRYPT_R if (!PL_reentrant_buffer->_crypt_struct_buffer) { /* This should be threadsafe because in ithreads there is only * one thread per interpreter. If this would not be true, * we would need a mutex to protect this malloc. */ PL_reentrant_buffer->_crypt_struct_buffer = (struct crypt_data *)safemalloc(sizeof(struct crypt_data)); -#if defined(__GLIBC__) || defined(__EMX__) +# if defined(__GLIBC__) || defined(__EMX__) if (PL_reentrant_buffer->_crypt_struct_buffer) { PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0; -#if (defined(__GLIBC__) && __GLIBC__ == 2) && \ - (defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ >= 2 && __GLIBC_MINOR__ < 4) - /* work around glibc-2.2.5 bug, has been fixed at some - * time in glibc-2.3.X */ - PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0; -#endif } -#endif +# endif } -# endif /* HAS_CRYPT_R */ -# endif /* USE_ITHREADS */ -# ifdef FCRYPT - sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right))); -# else +# endif /* HAS_CRYPT_R */ +# endif /* USE_ITHREADS */ + sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right))); -# endif + SvUTF8_off(TARG); SETTARG; RETURN; @@ -3708,6 +3725,7 @@ PP(pp_ucfirst) STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or * lowercased) character stored in tmpbuf. May be either * UTF-8 or not, but in either case is the number of bytes */ + bool remove_dot_above = FALSE; s = (const U8*)SvPV_const(source, slen); @@ -3736,6 +3754,7 @@ PP(pp_ucfirst) else if (DO_UTF8(source)) { /* Is the source utf8? */ doing_utf8 = TRUE; ulen = UTF8SKIP(s); + if (op_type == OP_UCFIRST) { #ifdef USE_LOCALE_CTYPE _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); @@ -3748,7 +3767,37 @@ PP(pp_ucfirst) #ifdef USE_LOCALE_CTYPE _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); + + /* In turkic locales, lower casing an 'I' normally yields U+0131, + * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also + * contains a COMBINING DOT ABOVE. Instead it is treated like + * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'. The + * call to lowercase above has handled this. But SpecialCasing.txt + * says we are supposed to remove the COMBINING DOT ABOVE. We can + * tell if we have this situation if I ==> i in a turkic locale. */ + if ( UNLIKELY(PL_in_utf8_turkic_locale) + && IN_LC_RUNTIME(LC_CTYPE) + && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i'))) + { + /* Here, we know there was a COMBINING DOT ABOVE. We won't be + * able to handle this in-place. */ + inplace = FALSE; + + /* It seems likely that the DOT will immediately follow the + * 'I'. If so, we can remove it simply by indicating to the + * code below to start copying the source just beyond the DOT. + * We know its length is 2 */ + if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) { + ulen += 2; + } + else { /* But if it doesn't follow immediately, set a flag for + the code below */ + remove_dot_above = TRUE; + } + } #else + PERL_UNUSED_VAR(remove_dot_above); + _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0); #endif @@ -3766,41 +3815,63 @@ PP(pp_ucfirst) * need to be overridden for the tricky ones */ need = slen + 1; - if (op_type == OP_LCFIRST) { - /* lower case the first letter: no trickiness for any character */ #ifdef USE_LOCALE_CTYPE - if (IN_LC_RUNTIME(LC_CTYPE)) { - *tmpbuf = toLOWER_LC(*s); - } - else -#endif + + if (IN_LC_RUNTIME(LC_CTYPE)) { + if ( UNLIKELY(PL_in_utf8_turkic_locale) + && ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I')) + || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i')))) { - *tmpbuf = (IN_UNI_8_BIT) - ? toLOWER_LATIN1(*s) - : toLOWER(*s); + if (*s == 'I') { /* lcfirst('I') */ + tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); + tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); + } + else { /* ucfirst('i') */ + tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); + tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); + } + tculen = 2; + inplace = FALSE; + doing_utf8 = TRUE; + convert_source_to_utf8 = TRUE; + need += variant_under_utf8_count(s, s + slen); } - } -#ifdef USE_LOCALE_CTYPE - /* is ucfirst() */ - else if (IN_LC_RUNTIME(LC_CTYPE)) { - if (IN_UTF8_CTYPE_LOCALE) { - goto do_uni_rules; + else if (op_type == OP_LCFIRST) { + + /* For lc, there are no gotchas for UTF-8 locales (other than + * the turkish ones already handled above) */ + *tmpbuf = toLOWER_LC(*s); } + else { /* ucfirst */ - *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any - locales have upper and title case - different */ - } + /* But for uc, some characters require special handling */ + if (IN_UTF8_CTYPE_LOCALE) { + goto do_uni_rules; + } + + /* This would be a bug if any locales have upper and title case + * different */ + *tmpbuf = (U8) toUPPER_LC(*s); + } + } + else #endif - else if (! IN_UNI_8_BIT) { - *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or - * on EBCDIC machines whatever the - * native function does */ - } + /* Here, not in locale. If not using Unicode rules, is a simple + * lower/upper, depending */ + if (! IN_UNI_8_BIT) { + *tmpbuf = (op_type == OP_LCFIRST) + ? toLOWER(*s) + : toUPPER(*s); + } + else if (op_type == OP_LCFIRST) { + /* lower case the first letter: no trickiness for any character */ + *tmpbuf = toLOWER_LATIN1(*s); + } else { /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is - * UTF-8, which we treat as not in locale), and cased latin1 */ + * non-turkic UTF-8, which we treat as not in locale), and cased + * latin1 */ UV title_ord; #ifdef USE_LOCALE_CTYPE do_uni_rules: @@ -3824,16 +3895,19 @@ PP(pp_ucfirst) inplace = FALSE; /* If the result won't fit in a byte, the entire result - * will have to be in UTF-8. Assume worst case sizing in - * conversion. (all latin1 characters occupy at most two - * bytes in utf8) */ + * will have to be in UTF-8. Allocate enough space for the + * expanded first byte, and if UTF-8, the rest of the input + * string, some or all of which may also expand to two + * bytes, plus the terminating NUL. */ if (title_ord > 255) { doing_utf8 = TRUE; convert_source_to_utf8 = TRUE; - need = slen * 2 + 1; + need = slen + + variant_under_utf8_count(s, s + slen) + + 1; /* The (converted) UTF-8 and UTF-EBCDIC lengths of all - * (both) characters whose title case is above 255 is + * characters whose title case is above 255 is * 2. */ ulen = 2; } @@ -3877,6 +3951,29 @@ PP(pp_ucfirst) * of the string. */ sv_setpvn(dest, (char*)tmpbuf, tculen); if (slen > ulen) { + + /* But this boolean being set means we are in a turkic + * locale, and there is a DOT character that needs to be + * removed, and it isn't immediately after the current + * character. Keep concatenating characters to the output + * one at a time, until we find the DOT, which we simply + * skip */ + if (UNLIKELY(remove_dot_above)) { + do { + Size_t this_len = UTF8SKIP(s + ulen); + + sv_catpvn(dest, (char*)(s + ulen), this_len); + + ulen += this_len; + if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) { + ulen += 2; + break; + } + } while (s + ulen < s + slen); + } + + /* The rest of the string can be concatenated unchanged, + * all at once */ sv_catpvn(dest, (char*)(s + ulen), slen - ulen); } } @@ -3888,7 +3985,7 @@ PP(pp_ucfirst) * into tmpbuf. First put that into dest, and then append the * rest of the source, converting it to UTF-8 as we go. */ - /* Assert tculen is 2 here because the only two characters that + /* Assert tculen is 2 here because the only characters that * get to this part of the code have 2-byte UTF-8 equivalents */ assert(tculen == 2); *d++ = *tmpbuf; @@ -3931,7 +4028,7 @@ PP(pp_ucfirst) /* In a "use bytes" we don't treat the source as UTF-8, but, still want * the destination to retain that flag */ - if (SvUTF8(source) && ! IN_BYTES) + if (DO_UTF8(source)) SvUTF8_on(dest); if (!inplace) { /* Finish the rest of the string, unchanged */ @@ -4036,12 +4133,16 @@ PP(pp_uc) STRLEN u; STRLEN ulen; UV uv; - if (in_iota_subscript && ! _is_utf8_mark(s)) { + if (UNLIKELY(in_iota_subscript)) { + UV cp = utf8_to_uvchr_buf(s, send, NULL); - /* A non-mark. Time to output the iota subscript */ - *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); - *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); - in_iota_subscript = FALSE; + if (! _invlist_contains_cp(PL_utf8_mark, cp)) { + + /* A non-mark. Time to output the iota subscript */ + *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA); + *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA); + in_iota_subscript = FALSE; + } } /* Then handle the current character. Get the changed case value @@ -4113,16 +4214,27 @@ PP(pp_uc) do_uni_rules: #endif for (; s < send; d++, s++) { + Size_t extra; + *d = toUPPER_LATIN1_MOD(*s); - if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) { + if ( LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) + +#ifdef USE_LOCALE_CTYPE + + && (LIKELY( ! PL_in_utf8_turkic_locale + || ! IN_LC_RUNTIME(LC_CTYPE)) + || *s != 'i') +#endif + + ) { continue; } /* The mainstream case is the tight loop above. To avoid - * extra tests in that, all three characters that require - * special handling are mapped by the MOD to the one tested - * just above. - * Use the source to distinguish between the three cases */ + * extra tests in that, all three characters that always + * require special handling are mapped by the MOD to the + * one tested just above. Use the source to distinguish + * between those cases */ #if UNICODE_MAJOR_VERSION > 2 \ || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \ @@ -4140,7 +4252,7 @@ PP(pp_uc) } #endif - /* The other two special handling characters have their + /* The other special handling characters have their * upper cases outside the latin1 range, hence need to be * in UTF-8, so the whole result needs to be in UTF-8. * @@ -4163,35 +4275,82 @@ PP(pp_uc) * come separately to UTF-8, then jump into the loop that * handles UTF-8. But the most efficient time-wise of the * ones I could think of is what follows, and turned out to - * not require much extra code. */ + * not require much extra code. + * + * First, calculate the extra space needed for the + * remainder of the source needing to be in UTF-8. Except + * for the 'i' in Turkic locales, in UTF-8 strings, the + * uppercase of a character below 256 occupies the same + * number of bytes as the original. Therefore, the space + * needed is the that number plus the number of characters + * that become two bytes when converted to UTF-8, plus, in + * turkish locales, the number of 'i's. */ + + extra = send - s + variant_under_utf8_count(s, send); + +#ifdef USE_LOCALE_CTYPE + + if (UNLIKELY(*s == 'i')) { /* We wouldn't get an 'i' here + unless are in a Turkic + locale */ + const U8 * s_peek = s; + + do { + extra++; + + s_peek = (U8 *) memchr(s_peek + 1, 'i', + send - (s_peek + 1)); + } while (s_peek != NULL); + } +#endif /* Convert what we have so far into UTF-8, telling the * function that we know it should be converted, and to * allow extra space for what we haven't processed yet. - * Assume the worst case space requirements for converting - * what we haven't processed so far: that it will require - * two bytes for each remaining source character, plus the - * NUL at the end. This may cause the string pointer to - * move, so re-find it. */ + * + * This may cause the string pointer to move, so need to + * save and re-find it. */ len = d - (U8*)SvPVX_const(dest); SvCUR_set(dest, len); len = sv_utf8_upgrade_flags_grow(dest, SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - (send -s) * 2 + 1); + extra + + 1 /* trailing NUL */ ); d = (U8*)SvPVX(dest) + len; /* Now process the remainder of the source, simultaneously - * converting to upper and UTF-8. */ - for (; s < send; s++) { - (void) _to_upper_title_latin1(*s, d, &len, 'S'); - d += len; - } - - /* Here have processed the whole source; no need to continue - * with the outer loop. Each character has been converted - * to upper case and converted to UTF-8 */ + * converting to upper and UTF-8. + * + * To avoid extra tests in the loop body, and since the + * loop is so simple, split out the rare Turkic case into + * its own loop */ +#ifdef USE_LOCALE_CTYPE + if ( UNLIKELY(PL_in_utf8_turkic_locale) + && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))) + { + for (; s < send; s++) { + if (*s == 'i') { + *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); + *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); + } + else { + (void) _to_upper_title_latin1(*s, d, &len, 'S'); + d += len; + } + } + } + else +#endif + for (; s < send; s++) { + (void) _to_upper_title_latin1(*s, d, &len, 'S'); + d += len; + } + + /* Here have processed the whole source; no need to + * continue with the outer loop. Each character has been + * converted to upper case and converted to UTF-8. */ break; } /* End of processing all latin1-style chars */ } /* End of processing all chars */ @@ -4223,15 +4382,26 @@ PP(pp_lc) SV *dest; const U8 *s; U8 *d; + bool has_turkic_I = FALSE; SvGETMAGIC(source); if ( SvPADTMP(source) && !SvREADONLY(source) && SvPOK(source) - && !DO_UTF8(source)) { + && !DO_UTF8(source) + +#ifdef USE_LOCALE_CTYPE + + && ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE)) + || LIKELY(! PL_in_utf8_turkic_locale)) - /* We can convert in place, as lowercasing anything in the latin1 range - * (or else DO_UTF8 would have been on) doesn't lengthen it */ +#endif + + ) { + + /* We can convert in place, as, outside of Turkic UTF-8 locales, + * lowercasing anything in the latin1 range (or else DO_UTF8 would have + * been on) doesn't lengthen it. */ dest = source; s = d = (U8*)SvPV_force_nomg(source, len); min = len + 1; @@ -4253,7 +4423,38 @@ PP(pp_lc) #ifdef USE_LOCALE_CTYPE if (IN_LC_RUNTIME(LC_CTYPE)) { + const U8 * next_I; + _CHECK_AND_WARN_PROBLEMATIC_LOCALE; + + /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become + * UTF-8 for the single case of the character 'I' */ + if ( UNLIKELY(PL_in_utf8_turkic_locale) + && ! DO_UTF8(source) + && (next_I = (U8 *) memchr(s, 'I', len))) + { + Size_t I_count = 0; + const U8 *const send = s + len; + + do { + I_count++; + + next_I = (U8 *) memchr(next_I + 1, 'I', + send - (next_I + 1)); + } while (next_I != NULL); + + /* Except for the 'I', in UTF-8 strings, the lower case of a + * character below 256 occupies the same number of bytes as the + * original. Therefore, the space needed is the original length + * plus I_count plus the number of characters that become two bytes + * when converted to UTF-8 */ + sv_utf8_upgrade_flags_grow(dest, 0, len + + I_count + + variant_under_utf8_count(s, send) + + 1 /* Trailing NUL */ ); + d = (U8*)SvPVX(dest); + has_turkic_I = TRUE; + } } #endif @@ -4264,6 +4465,7 @@ PP(pp_lc) if (DO_UTF8(source)) { const U8 *const send = s + len; U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; + bool remove_dot_above = FALSE; while (s < send) { const STRLEN u = UTF8SKIP(s); @@ -4272,7 +4474,33 @@ PP(pp_lc) #ifdef USE_LOCALE_CTYPE _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); + + /* If we are in a Turkic locale, we have to do more work. As noted + * in the comments for lcfirst, there is a special case if a 'I' + * is in a grapheme with COMBINING DOT ABOVE UTF8. It turns into a + * 'i', and the DOT must be removed. We check for that situation, + * and set a flag if the DOT is there. Then each time through the + * loop, we have to see if we need to remove the next DOT above, + * and if so, do it. We know that there is a DOT because + * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there + * was one in a proper position. */ + if ( UNLIKELY(PL_in_utf8_turkic_locale) + && IN_LC_RUNTIME(LC_CTYPE)) + { + if ( UNLIKELY(remove_dot_above) + && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8)) + { + s += u; + remove_dot_above = FALSE; + continue; + } + else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) { + remove_dot_above = TRUE; + } + } #else + PERL_UNUSED_VAR(remove_dot_above); + _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0); #endif @@ -4304,7 +4532,7 @@ PP(pp_lc) SvUTF8_on(dest); *d = '\0'; SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); - } else { /* Not utf8 */ + } else { /* 'source' not utf8 */ if (len) { const U8 *const send = s + len; @@ -4313,8 +4541,22 @@ PP(pp_lc) * whole thing in a tight loop, for speed, */ #ifdef USE_LOCALE_CTYPE if (IN_LC_RUNTIME(LC_CTYPE)) { - for (; s < send; d++, s++) - *d = toLOWER_LC(*s); + if (LIKELY( ! has_turkic_I)) { + for (; s < send; d++, s++) + *d = toLOWER_LC(*s); + } + else { /* This is the only case where lc() converts 'dest' + into UTF-8 from a non-UTF-8 'source' */ + for (; s < send; s++) { + if (*s == 'I') { + *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); + *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I); + } + else { + append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d); + } + } + } } else #endif @@ -4522,28 +4764,64 @@ PP(pp_fc) #ifdef USE_LOCALE_CTYPE do_uni_folding: #endif - /* For ASCII and the Latin-1 range, there's two + /* For ASCII and the Latin-1 range, there's potentially three * troublesome folds: * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full * casefolding becomes 'ss'; * \x{B5} (\N{MICRO SIGN}), which under any fold becomes * \x{3BC} (\N{GREEK SMALL LETTER MU}) + * I only in Turkic locales, this folds to \x{131} + * \N{LATIN SMALL LETTER DOTLESS I} * For the rest, the casefold is their lowercase. */ for (; s < send; d++, s++) { - if (*s == MICRO_SIGN) { + if ( UNLIKELY(*s == MICRO_SIGN) +#ifdef USE_LOCALE_CTYPE + || ( UNLIKELY(PL_in_utf8_turkic_locale) + && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)) + && UNLIKELY(*s == 'I')) +#endif + ) { + Size_t extra = send - s + + variant_under_utf8_count(s, send); + /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, - * which is outside of the latin-1 range. There's a couple - * of ways to deal with this -- khw discusses them in - * pp_lc/uc, so go there :) What we do here is upgrade what - * we had already casefolded, then enter an inner loop that - * appends the rest of the characters as UTF-8. */ + * and 'I' in Turkic locales is \N{LATIN SMALL LETTER + * DOTLESS I} both of which are outside of the latin-1 + * range. There's a couple of ways to deal with this -- khw + * discusses them in pp_lc/uc, so go there :) What we do + * here is upgrade what we had already casefolded, then + * enter an inner loop that appends the rest of the + * characters as UTF-8. + * + * First we calculate the needed size of the upgraded dest + * beyond what's been processed already (the upgrade + * function figures that out). Except for the 'I' in + * Turkic locales, in UTF-8 strings, the fold case of a + * character below 256 occupies the same number of bytes as + * the original (even the Sharp S). Therefore, the space + * needed is the number of bytes remaining plus the number + * of characters that become two bytes when converted to + * UTF-8 plus, in turkish locales, the number of 'I's */ + + if (UNLIKELY(*s == 'I')) { + const U8 * s_peek = s; + + do { + extra++; + + s_peek = (U8 *) memchr(s_peek + 1, 'i', + send - (s_peek + 1)); + } while (s_peek != NULL); + } + + /* Growing may move things, so have to save and recalculate + * 'd' */ len = d - (U8*)SvPVX_const(dest); SvCUR_set(dest, len); len = sv_utf8_upgrade_flags_grow(dest, SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - /* The max expansion for latin1 - * chars is 1 byte becomes 2 */ - (send -s) * 2 + 1); + extra + + 1 /* Trailing NUL */ ); d = (U8*)SvPVX(dest) + len; *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU); @@ -4710,7 +4988,7 @@ PP(pp_aeach) IV *iterp = Perl_av_iter_p(aTHX_ array); const IV current = (*iterp)++; - if (current > av_tindex(array)) { + if (current > av_top_index(array)) { *iterp = 0; if (gimme == G_SCALAR) RETPUSHUNDEF; @@ -4738,7 +5016,7 @@ PP(pp_akeys) if (gimme == G_SCALAR) { dTARGET; - PUSHi(av_tindex(array) + 1); + PUSHi(av_count(array)); } else if (gimme == G_ARRAY) { if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { @@ -4749,7 +5027,7 @@ PP(pp_akeys) "Can't modify keys on array in list assignment"); } { - IV n = Perl_av_len(aTHX_ array); + IV n = av_top_index(array); IV i; EXTEND(SP, n + 1); @@ -5581,7 +5859,7 @@ PP(pp_reverse) const MAGIC *mg; bool can_preserve = SvCANEXISTDELETE(av); - for (i = 0, j = av_tindex(av); i < j; ++i, --j) { + for (i = 0, j = av_top_index(av); i < j; ++i, --j) { SV *begin, *end; if (can_preserve) { @@ -5651,6 +5929,7 @@ PP(pp_reverse) sv_setsv(TARG, DEFSV); XPUSHs(TARG); } + SvSETMAGIC(TARG); /* remove any utf8 length magic */ up = SvPV_force(TARG, len); if (len > 1) { @@ -5732,6 +6011,7 @@ PP(pp_split) /* handle @ary = split(...) optimisation */ if (PL_op->op_private & OPpSPLIT_ASSIGN) { + realarray = 1; if (!(PL_op->op_flags & OPf_STACKED)) { if (PL_op->op_private & OPpSPLIT_LEX) { if (PL_op->op_private & OPpLVAL_INTRO) @@ -5754,26 +6034,13 @@ PP(pp_split) oldsave = PL_savestack_ix; } - realarray = 1; - PUTBACK; - av_extend(ary,0); - (void)sv_2mortal(SvREFCNT_inc_simple_NN(sv)); - av_clear(ary); - SPAGAIN; + /* Some defence against stack-not-refcounted bugs */ + (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary)); + if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) { PUSHMARK(SP); XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg)); - } - else { - if (!AvREAL(ary)) { - I32 i; - AvREAL_on(ary); - AvREIFY_off(ary); - for (i = AvFILLp(ary); i >= 0; i--) - AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ - } - /* temporarily switch stacks */ - SAVESWITCHSTACK(PL_curstack, ary); + } else { make_mortal = 0; } } @@ -5895,62 +6162,52 @@ PP(pp_split) } } else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) { - /* - Pre-extend the stack, either the number of bytes or - characters in the string or a limited amount, triggered by: - - my ($x, $y) = split //, $str; - or - split //, $str, $i; - */ - if (!gimme_scalar) { - const IV items = limit - 1; - /* setting it to -1 will trigger a panic in EXTEND() */ - const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen; - if (items >=0 && items < sslen) - EXTEND(SP, items); - else - EXTEND(SP, sslen); - } - - if (do_utf8) { - while (--limit) { - /* keep track of how many bytes we skip over */ - m = s; - s += UTF8SKIP(s); - if (gimme_scalar) { - iters++; - if (s-m == 0) - trailing_empty++; - else - trailing_empty = 0; - } else { - dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal); - - PUSHs(dstr); - } - - if (s >= strend) - break; + /* This case boils down to deciding which is the smaller of: + * limit - effectively a number of characters + * slen - which already contains the number of characters in s + * + * The resulting number is the number of iters (for gimme_scalar) + * or the number of SVs to create (!gimme_scalar). */ + + /* setting it to -1 will trigger a panic in EXTEND() */ + const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen; + const IV items = limit - 1; + if (sslen < items || items < 0) { + iters = slen -1; + limit = slen + 1; + /* Note: The same result is returned if the following block + * is removed, because of the "keep field after final delim?" + * adjustment, but having the following makes the "correct" + * behaviour more apparent. */ + if (gimme_scalar) { + s = strend; + iters++; } } else { - while (--limit) { - if (gimme_scalar) { - iters++; - } else { - dstr = newSVpvn(s, 1); - - - if (make_mortal) - sv_2mortal(dstr); - - PUSHs(dstr); - } - - s++; - - if (s >= strend) - break; + iters = items; + } + if (!gimme_scalar) { + /* + Pre-extend the stack, either the number of bytes or + characters in the string or a limited amount, triggered by: + my ($x, $y) = split //, $str; + or + split //, $str, $i; + */ + EXTEND(SP, limit); + if (do_utf8) { + while (--limit) { + m = s; + s += UTF8SKIP(s); + dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal); + PUSHs(dstr); + } + } else { + while (--limit) { + dstr = newSVpvn_flags(s, 1, make_mortal); + PUSHs(dstr); + s++; + } } } } @@ -5983,7 +6240,7 @@ PP(pp_split) /* The rx->minlen is in characters but we want to step * s ahead by bytes. */ if (do_utf8) - s = (char*)utf8_hop((U8*)m, len); + s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend); else s = m + len; /* Fake \n at the end */ } @@ -6007,7 +6264,7 @@ PP(pp_split) /* The rx->minlen is in characters but we want to step * s ahead by bytes. */ if (do_utf8) - s = (char*)utf8_hop((U8*)m, len); + s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend); else s = m + len; /* Fake \n at the end */ } @@ -6102,32 +6359,59 @@ PP(pp_split) } PUTBACK; - LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */ + LEAVE_SCOPE(oldsave); SPAGAIN; if (realarray) { - if (!mg) { - if (SvSMAGICAL(ary)) { - PUTBACK; + if (!mg) { + PUTBACK; + if(AvREAL(ary)) { + if (av_count(ary) > 0) + av_clear(ary); + } else { + AvREAL_on(ary); + AvREIFY_off(ary); + + if (AvMAX(ary) > -1) { + /* don't free mere refs */ + Zero(AvARRAY(ary), AvMAX(ary), SV*); + } + } + if(AvMAX(ary) < iters) + av_extend(ary,iters); + SPAGAIN; + + /* Need to copy the SV*s from the stack into ary */ + Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*); + AvFILLp(ary) = iters - 1; + + if (SvSMAGICAL(ary)) { + PUTBACK; mg_set(MUTABLE_SV(ary)); SPAGAIN; - } - if (gimme == G_ARRAY) { - EXTEND(SP, iters); - Copy(AvARRAY(ary), SP + 1, iters, SV*); - SP += iters; - RETURN; - } + } + + if (gimme != G_ARRAY) { + /* SP points to the final SV* pushed to the stack. But the SV* */ + /* are not going to be used from the stack. Point SP to below */ + /* the first of these SV*. */ + SP -= iters; + PUTBACK; + } } else { - PUTBACK; - ENTER_with_name("call_PUSH"); - call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); - LEAVE_with_name("call_PUSH"); - SPAGAIN; + PUTBACK; + av_extend(ary,iters); + av_clear(ary); + + ENTER_with_name("call_PUSH"); + call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); + LEAVE_with_name("call_PUSH"); + SPAGAIN; + if (gimme == G_ARRAY) { SSize_t i; /* EXTEND should not be needed - we just popped them */ - EXTEND(SP, iters); + EXTEND_SKIP(SP, iters); for (i=0; i < iters; i++) { SV **svp = av_fetch(ary, i, FALSE); PUSHs((svp) ? *svp : &PL_sv_undef); @@ -6136,13 +6420,12 @@ PP(pp_split) } } } - else { - if (gimme == G_ARRAY) - RETURN; - } - GETTARGET; - XPUSHi(iters); + if (gimme != G_ARRAY) { + GETTARGET; + XPUSHi(iters); + } + RETURN; } @@ -6185,11 +6468,11 @@ PP(unimplemented_op) Secondly, as the three ops we "panic" on are padmy, mapstart and custom, if we get here for a custom op then that means that the custom op didn't have an implementation. Given that OP_NAME() looks up the custom op - by its pp_addr, likely it will return NULL, unless someone (unhelpfully) - registers &PL_unimplemented_op as the address of their custom op. + by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully) + registers &Perl_unimplemented_op as the address of their custom op. NULL doesn't generate a useful error message. "custom" does. */ const char *const name = op_type >= OP_max - ? "[out of range]" : PL_op_name[PL_op->op_type]; + ? "[out of range]" : PL_op_name[op_type]; if(OP_IS_SOCKET(op_type)) DIE(aTHX_ PL_no_sock_func, name); DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type); @@ -6385,7 +6668,7 @@ PP(pp_coreargs) PP(pp_avhvswitch) { - dVAR; dSP; + dSP; return PL_ppaddr[ (SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH) + (PL_op->op_private & OPpAVHVSWITCH_MASK) @@ -6837,7 +7120,7 @@ S_find_runcv_name(void) return sv; } -/* Check a a subs arguments - i.e. that it has the correct number of args +/* Check a sub's arguments - i.e. that it has the correct number of args * (and anything else we might think of in future). Typically used with * signatured subs. */ @@ -6845,16 +7128,16 @@ S_find_runcv_name(void) PP(pp_argcheck) { OP * const o = PL_op; - UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; - IV params = aux[0].iv; - IV opt_params = aux[1].iv; - char slurpy = (char)(aux[2].iv); + struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux; + UV params = aux->params; + UV opt_params = aux->opt_params; + char slurpy = aux->slurpy; AV *defav = GvAV(PL_defgv); /* @_ */ - IV argc; + UV argc; bool too_few; assert(!SvMAGICAL(defav)); - argc = (AvFILLp(defav) + 1); + argc = (UV)(AvFILLp(defav) + 1); too_few = (argc < (params - opt_params)); if (UNLIKELY(too_few || (!slurpy && argc > params))) @@ -6871,6 +7154,42 @@ PP(pp_argcheck) return NORMAL; } +PP(pp_isa) +{ + dSP; + SV *left, *right; + + right = POPs; + left = TOPs; + + SETs(boolSV(sv_isa_sv(left, right))); + RETURN; +} + +PP(pp_cmpchain_and) +{ + dSP; + SV *result = POPs; + PUTBACK; + if (SvTRUE_NN(result)) { + return cLOGOP->op_other; + } else { + TOPs = result; + return NORMAL; + } +} + +PP(pp_cmpchain_dup) +{ + dSP; + SV *right = TOPs; + SV *left = TOPm1s; + TOPm1s = right; + TOPs = left; + XPUSHs(right); + RETURN; +} + /* * ex: set ts=8 sts=4 sw=4 et: */