3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
32 #include "regcharclass.h"
34 /* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
38 #ifdef NEED_GETPID_PROTO
39 extern Pid_t getpid (void);
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
46 #if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
50 static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
51 static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
53 /* variations on pp_null */
59 if (GIMME_V == G_SCALAR)
70 assert(SvTYPE(TARG) == SVt_PVAV);
71 if (PL_op->op_private & OPpLVAL_INTRO)
72 if (!(PL_op->op_private & OPpPAD_STATE))
73 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
75 if (PL_op->op_flags & OPf_REF) {
78 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
79 const I32 flags = is_lvalue_sub();
80 if (flags && !(flags & OPpENTERSUB_INARGS)) {
81 if (GIMME == G_SCALAR)
82 /* diag_listed_as: Can't return %s to lvalue scalar context */
83 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
89 if (gimme == G_ARRAY) {
90 /* XXX see also S_pushav in pp_hot.c */
91 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
93 if (SvMAGICAL(TARG)) {
95 for (i=0; i < (U32)maxarg; i++) {
96 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
97 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
101 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
105 else if (gimme == G_SCALAR) {
106 SV* const sv = sv_newmortal();
107 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
108 sv_setiv(sv, maxarg);
119 assert(SvTYPE(TARG) == SVt_PVHV);
121 if (PL_op->op_private & OPpLVAL_INTRO)
122 if (!(PL_op->op_private & OPpPAD_STATE))
123 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
124 if (PL_op->op_flags & OPf_REF)
126 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
127 const I32 flags = is_lvalue_sub();
128 if (flags && !(flags & OPpENTERSUB_INARGS)) {
129 if (GIMME == G_SCALAR)
130 /* diag_listed_as: Can't return %s to lvalue scalar context */
131 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
136 if (gimme == G_ARRAY) {
137 RETURNOP(Perl_do_kv(aTHX));
139 else if ((PL_op->op_private & OPpTRUEBOOL
140 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
141 && block_gimme() == G_VOID ))
142 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied)))
143 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0)));
144 else if (gimme == G_SCALAR) {
145 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
154 assert(SvTYPE(TARG) == SVt_PVCV);
162 SvPADSTALE_off(TARG);
170 mg_find(PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG],
172 assert(SvTYPE(TARG) == SVt_PVCV);
175 if (CvISXSUB(mg->mg_obj)) { /* constant */
176 /* XXX Should we clone it here? */
177 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
178 to introcv and remove the SvPADSTALE_off. */
179 SAVEPADSVANDMORTALIZE(ARGTARG);
180 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(mg->mg_obj);
183 if (CvROOT(mg->mg_obj)) {
184 assert(CvCLONE(mg->mg_obj));
185 assert(!CvCLONED(mg->mg_obj));
187 cv_clone_into((CV *)mg->mg_obj,(CV *)TARG);
188 SAVECLEARSV(PAD_SVl(ARGTARG));
195 static const char S_no_symref_sv[] =
196 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
198 /* In some cases this function inspects PL_op. If this function is called
199 for new op types, more bool parameters may need to be added in place of
202 When noinit is true, the absence of a gv will cause a retval of undef.
203 This is unrelated to the cv-to-gv assignment case.
207 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
211 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
214 sv = amagic_deref_call(sv, to_gv_amg);
218 if (SvTYPE(sv) == SVt_PVIO) {
219 GV * const gv = MUTABLE_GV(sv_newmortal());
220 gv_init(gv, 0, "__ANONIO__", 10, 0);
221 GvIOp(gv) = MUTABLE_IO(sv);
222 SvREFCNT_inc_void_NN(sv);
225 else if (!isGV_with_GP(sv))
226 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
229 if (!isGV_with_GP(sv)) {
231 /* If this is a 'my' scalar and flag is set then vivify
234 if (vivify_sv && sv != &PL_sv_undef) {
237 Perl_croak_no_modify();
238 if (cUNOP->op_targ) {
239 SV * const namesv = PAD_SV(cUNOP->op_targ);
240 HV *stash = CopSTASH(PL_curcop);
241 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
242 gv = MUTABLE_GV(newSV(0));
243 gv_init_sv(gv, stash, namesv, 0);
246 const char * const name = CopSTASHPV(PL_curcop);
247 gv = newGVgen_flags(name,
248 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
250 prepare_SV_for_RV(sv);
251 SvRV_set(sv, MUTABLE_SV(gv));
256 if (PL_op->op_flags & OPf_REF || strict)
257 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
258 if (ckWARN(WARN_UNINITIALIZED))
264 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
265 sv, GV_ADDMG, SVt_PVGV
275 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
278 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
279 == OPpDONT_INIT_GV) {
280 /* We are the target of a coderef assignment. Return
281 the scalar unchanged, and let pp_sasssign deal with
285 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
287 /* FAKE globs in the symbol table cause weird bugs (#77810) */
291 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
292 SV *newsv = sv_newmortal();
293 sv_setsv_flags(newsv, sv, 0);
305 sv, PL_op->op_private & OPpDEREF,
306 PL_op->op_private & HINT_STRICT_REFS,
307 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
308 || PL_op->op_type == OP_READLINE
310 if (PL_op->op_private & OPpLVAL_INTRO)
311 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
316 /* Helper function for pp_rv2sv and pp_rv2av */
318 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
319 const svtype type, SV ***spp)
324 PERL_ARGS_ASSERT_SOFTREF2XV;
326 if (PL_op->op_private & HINT_STRICT_REFS) {
328 Perl_die(aTHX_ S_no_symref_sv, sv,
329 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
331 Perl_die(aTHX_ PL_no_usym, what);
335 PL_op->op_flags & OPf_REF
337 Perl_die(aTHX_ PL_no_usym, what);
338 if (ckWARN(WARN_UNINITIALIZED))
340 if (type != SVt_PV && GIMME_V == G_ARRAY) {
344 **spp = &PL_sv_undef;
347 if ((PL_op->op_flags & OPf_SPECIAL) &&
348 !(PL_op->op_flags & OPf_MOD))
350 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
352 **spp = &PL_sv_undef;
357 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
370 sv = amagic_deref_call(sv, to_sv_amg);
374 switch (SvTYPE(sv)) {
380 DIE(aTHX_ "Not a SCALAR reference");
387 if (!isGV_with_GP(gv)) {
388 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
394 if (PL_op->op_flags & OPf_MOD) {
395 if (PL_op->op_private & OPpLVAL_INTRO) {
396 if (cUNOP->op_first->op_type == OP_NULL)
397 sv = save_scalar(MUTABLE_GV(TOPs));
399 sv = save_scalar(gv);
401 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
403 else if (PL_op->op_private & OPpDEREF)
404 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
413 AV * const av = MUTABLE_AV(TOPs);
414 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
416 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
418 *sv = newSV_type(SVt_PVMG);
419 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
423 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
432 if (PL_op->op_flags & OPf_MOD || LVRET) {
433 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
434 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
436 LvTARG(ret) = SvREFCNT_inc_simple(sv);
437 PUSHs(ret); /* no SvSETMAGIC */
441 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
442 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
443 if (mg && mg->mg_len >= 0) {
461 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
463 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
464 == OPpMAY_RETURN_CONSTANT)
467 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
468 /* (But not in defined().) */
470 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
472 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
476 cv = MUTABLE_CV(&PL_sv_undef);
477 SETs(MUTABLE_SV(cv));
487 SV *ret = &PL_sv_undef;
489 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
490 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
491 const char * s = SvPVX_const(TOPs);
492 if (strnEQ(s, "CORE::", 6)) {
493 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
494 if (!code || code == -KEY_CORE)
495 DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
496 SVfARG(newSVpvn_flags(
498 (SvFLAGS(TOPs) & SVf_UTF8)|SVs_TEMP
501 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
507 cv = sv_2cv(TOPs, &stash, &gv, 0);
509 ret = newSVpvn_flags(
510 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
520 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
522 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
524 PUSHs(MUTABLE_SV(cv));
538 if (GIMME != G_ARRAY) {
542 *MARK = &PL_sv_undef;
543 *MARK = refto(*MARK);
547 EXTEND_MORTAL(SP - MARK);
549 *MARK = refto(*MARK);
554 S_refto(pTHX_ SV *sv)
559 PERL_ARGS_ASSERT_REFTO;
561 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
564 if (!(sv = LvTARG(sv)))
567 SvREFCNT_inc_void_NN(sv);
569 else if (SvTYPE(sv) == SVt_PVAV) {
570 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
571 av_reify(MUTABLE_AV(sv));
573 SvREFCNT_inc_void_NN(sv);
575 else if (SvPADTMP(sv) && !IS_PADGV(sv))
579 SvREFCNT_inc_void_NN(sv);
582 sv_upgrade(rv, SVt_IV);
591 SV * const sv = POPs;
596 if (!sv || !SvROK(sv))
599 (void)sv_ref(TARG,SvRV(sv),TRUE);
612 stash = CopSTASH(PL_curcop);
613 if (SvTYPE(stash) != SVt_PVHV)
614 Perl_croak(aTHX_ "Attempt to bless into a freed package");
617 SV * const ssv = POPs;
621 if (!ssv) goto curstash;
622 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
623 Perl_croak(aTHX_ "Attempt to bless into a reference");
624 ptr = SvPV_const(ssv,len);
626 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
627 "Explicit blessing to '' (assuming package main)");
628 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
631 (void)sv_bless(TOPs, stash);
641 const char * const elem = SvPV_const(sv, len);
642 GV * const gv = MUTABLE_GV(POPs);
647 /* elem will always be NUL terminated. */
648 const char * const second_letter = elem + 1;
651 if (len == 5 && strEQ(second_letter, "RRAY"))
653 tmpRef = MUTABLE_SV(GvAV(gv));
654 if (tmpRef && !AvREAL((const AV *)tmpRef)
655 && AvREIFY((const AV *)tmpRef))
656 av_reify(MUTABLE_AV(tmpRef));
660 if (len == 4 && strEQ(second_letter, "ODE"))
661 tmpRef = MUTABLE_SV(GvCVu(gv));
664 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
665 /* finally deprecated in 5.8.0 */
666 deprecate("*glob{FILEHANDLE}");
667 tmpRef = MUTABLE_SV(GvIOp(gv));
670 if (len == 6 && strEQ(second_letter, "ORMAT"))
671 tmpRef = MUTABLE_SV(GvFORM(gv));
674 if (len == 4 && strEQ(second_letter, "LOB"))
675 tmpRef = MUTABLE_SV(gv);
678 if (len == 4 && strEQ(second_letter, "ASH"))
679 tmpRef = MUTABLE_SV(GvHV(gv));
682 if (*second_letter == 'O' && !elem[2] && len == 2)
683 tmpRef = MUTABLE_SV(GvIOp(gv));
686 if (len == 4 && strEQ(second_letter, "AME"))
687 sv = newSVhek(GvNAME_HEK(gv));
690 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
691 const HV * const stash = GvSTASH(gv);
692 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
693 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
697 if (len == 6 && strEQ(second_letter, "CALAR"))
712 /* Pattern matching */
720 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
721 /* Historically, study was skipped in these cases. */
725 /* Make study a no-op. It's no longer useful and its existence
726 complicates matters elsewhere. */
735 if (PL_op->op_flags & OPf_STACKED)
737 else if (PL_op->op_private & OPpTARGET_MY)
743 if(PL_op->op_type == OP_TRANSR) {
745 const char * const pv = SvPV(sv,len);
746 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
751 TARG = sv_newmortal();
757 /* Lvalue operators. */
760 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
766 PERL_ARGS_ASSERT_DO_CHOMP;
768 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
770 if (SvTYPE(sv) == SVt_PVAV) {
772 AV *const av = MUTABLE_AV(sv);
773 const I32 max = AvFILL(av);
775 for (i = 0; i <= max; i++) {
776 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
777 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
778 do_chomp(retval, sv, chomping);
782 else if (SvTYPE(sv) == SVt_PVHV) {
783 HV* const hv = MUTABLE_HV(sv);
785 (void)hv_iterinit(hv);
786 while ((entry = hv_iternext(hv)))
787 do_chomp(retval, hv_iterval(hv,entry), chomping);
790 else if (SvREADONLY(sv)) {
791 Perl_croak_no_modify();
793 else if (SvIsCOW(sv)) {
794 sv_force_normal_flags(sv, 0);
799 /* XXX, here sv is utf8-ized as a side-effect!
800 If encoding.pm is used properly, almost string-generating
801 operations, including literal strings, chr(), input data, etc.
802 should have been utf8-ized already, right?
804 sv_recode_to_utf8(sv, PL_encoding);
810 char *temp_buffer = NULL;
819 while (len && s[-1] == '\n') {
826 STRLEN rslen, rs_charlen;
827 const char *rsptr = SvPV_const(PL_rs, rslen);
829 rs_charlen = SvUTF8(PL_rs)
833 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
834 /* Assumption is that rs is shorter than the scalar. */
836 /* RS is utf8, scalar is 8 bit. */
838 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
841 /* Cannot downgrade, therefore cannot possibly match
843 assert (temp_buffer == rsptr);
849 else if (PL_encoding) {
850 /* RS is 8 bit, encoding.pm is used.
851 * Do not recode PL_rs as a side-effect. */
852 svrecode = newSVpvn(rsptr, rslen);
853 sv_recode_to_utf8(svrecode, PL_encoding);
854 rsptr = SvPV_const(svrecode, rslen);
855 rs_charlen = sv_len_utf8(svrecode);
858 /* RS is 8 bit, scalar is utf8. */
859 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
873 if (memNE(s, rsptr, rslen))
875 SvIVX(retval) += rs_charlen;
878 s = SvPV_force_nomg_nolen(sv);
886 SvREFCNT_dec(svrecode);
888 Safefree(temp_buffer);
890 if (len && !SvPOK(sv))
891 s = SvPV_force_nomg(sv, len);
894 char * const send = s + len;
895 char * const start = s;
897 while (s > start && UTF8_IS_CONTINUATION(*s))
899 if (is_utf8_string((U8*)s, send - s)) {
900 sv_setpvn(retval, s, send - s);
902 SvCUR_set(sv, s - start);
908 sv_setpvs(retval, "");
912 sv_setpvn(retval, s, 1);
919 sv_setpvs(retval, "");
927 const bool chomping = PL_op->op_type == OP_SCHOMP;
931 do_chomp(TARG, TOPs, chomping);
938 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
939 const bool chomping = PL_op->op_type == OP_CHOMP;
944 do_chomp(TARG, *++MARK, chomping);
955 if (!PL_op->op_private) {
964 SV_CHECK_THINKFIRST_COW_DROP(sv);
966 switch (SvTYPE(sv)) {
970 av_undef(MUTABLE_AV(sv));
973 hv_undef(MUTABLE_HV(sv));
976 if (cv_const_sv((const CV *)sv))
977 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
978 "Constant subroutine %"SVf" undefined",
979 SVfARG(CvANON((const CV *)sv)
980 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
981 : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
985 /* let user-undef'd sub keep its identity */
986 GV* const gv = CvGV((const CV *)sv);
987 HEK * const hek = CvNAME_HEK((CV *)sv);
988 if (hek) share_hek_hek(hek);
989 cv_undef(MUTABLE_CV(sv));
990 if (gv) CvGV_set(MUTABLE_CV(sv), gv);
992 SvANY((CV *)sv)->xcv_gv_u.xcv_hek = hek;
998 assert(isGV_with_GP(sv));
1004 /* undef *Pkg::meth_name ... */
1006 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1007 && HvENAME_get(stash);
1009 if((stash = GvHV((const GV *)sv))) {
1010 if(HvENAME_get(stash))
1011 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1015 gp_free(MUTABLE_GV(sv));
1017 GvGP_set(sv, gp_ref(gp));
1018 GvSV(sv) = newSV(0);
1019 GvLINE(sv) = CopLINE(PL_curcop);
1020 GvEGV(sv) = MUTABLE_GV(sv);
1024 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1026 /* undef *Foo::ISA */
1027 if( strEQ(GvNAME((const GV *)sv), "ISA")
1028 && (stash = GvSTASH((const GV *)sv))
1029 && (method_changed || HvENAME(stash)) )
1030 mro_isa_changed_in(stash);
1031 else if(method_changed)
1032 mro_method_changed_in(
1033 GvSTASH((const GV *)sv)
1039 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1055 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1056 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
1057 Perl_croak_no_modify();
1059 TARG = sv_newmortal();
1060 sv_setsv(TARG, TOPs);
1061 if (!SvREADONLY(TOPs) && !SvGMAGICAL(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1062 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
1064 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
1065 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1069 else sv_dec_nomg(TOPs);
1071 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1072 if (inc && !SvOK(TARG))
1078 /* Ordinary operators. */
1082 dVAR; dSP; dATARGET; SV *svl, *svr;
1083 #ifdef PERL_PRESERVE_IVUV
1086 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1089 #ifdef PERL_PRESERVE_IVUV
1090 /* For integer to integer power, we do the calculation by hand wherever
1091 we're sure it is safe; otherwise we call pow() and try to convert to
1092 integer afterwards. */
1093 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1101 const IV iv = SvIVX(svr);
1105 goto float_it; /* Can't do negative powers this way. */
1109 baseuok = SvUOK(svl);
1111 baseuv = SvUVX(svl);
1113 const IV iv = SvIVX(svl);
1116 baseuok = TRUE; /* effectively it's a UV now */
1118 baseuv = -iv; /* abs, baseuok == false records sign */
1121 /* now we have integer ** positive integer. */
1124 /* foo & (foo - 1) is zero only for a power of 2. */
1125 if (!(baseuv & (baseuv - 1))) {
1126 /* We are raising power-of-2 to a positive integer.
1127 The logic here will work for any base (even non-integer
1128 bases) but it can be less accurate than
1129 pow (base,power) or exp (power * log (base)) when the
1130 intermediate values start to spill out of the mantissa.
1131 With powers of 2 we know this can't happen.
1132 And powers of 2 are the favourite thing for perl
1133 programmers to notice ** not doing what they mean. */
1135 NV base = baseuok ? baseuv : -(NV)baseuv;
1140 while (power >>= 1) {
1148 SvIV_please_nomg(svr);
1151 unsigned int highbit = 8 * sizeof(UV);
1152 unsigned int diff = 8 * sizeof(UV);
1153 while (diff >>= 1) {
1155 if (baseuv >> highbit) {
1159 /* we now have baseuv < 2 ** highbit */
1160 if (power * highbit <= 8 * sizeof(UV)) {
1161 /* result will definitely fit in UV, so use UV math
1162 on same algorithm as above */
1165 const bool odd_power = cBOOL(power & 1);
1169 while (power >>= 1) {
1176 if (baseuok || !odd_power)
1177 /* answer is positive */
1179 else if (result <= (UV)IV_MAX)
1180 /* answer negative, fits in IV */
1181 SETi( -(IV)result );
1182 else if (result == (UV)IV_MIN)
1183 /* 2's complement assumption: special case IV_MIN */
1186 /* answer negative, doesn't fit */
1187 SETn( -(NV)result );
1195 NV right = SvNV_nomg(svr);
1196 NV left = SvNV_nomg(svl);
1199 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1201 We are building perl with long double support and are on an AIX OS
1202 afflicted with a powl() function that wrongly returns NaNQ for any
1203 negative base. This was reported to IBM as PMR #23047-379 on
1204 03/06/2006. The problem exists in at least the following versions
1205 of AIX and the libm fileset, and no doubt others as well:
1207 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1208 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1209 AIX 5.2.0 bos.adt.libm 5.2.0.85
1211 So, until IBM fixes powl(), we provide the following workaround to
1212 handle the problem ourselves. Our logic is as follows: for
1213 negative bases (left), we use fmod(right, 2) to check if the
1214 exponent is an odd or even integer:
1216 - if odd, powl(left, right) == -powl(-left, right)
1217 - if even, powl(left, right) == powl(-left, right)
1219 If the exponent is not an integer, the result is rightly NaNQ, so
1220 we just return that (as NV_NAN).
1224 NV mod2 = Perl_fmod( right, 2.0 );
1225 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1226 SETn( -Perl_pow( -left, right) );
1227 } else if (mod2 == 0.0) { /* even integer */
1228 SETn( Perl_pow( -left, right) );
1229 } else { /* fractional power */
1233 SETn( Perl_pow( left, right) );
1236 SETn( Perl_pow( left, right) );
1237 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1239 #ifdef PERL_PRESERVE_IVUV
1241 SvIV_please_nomg(svr);
1249 dVAR; dSP; dATARGET; SV *svl, *svr;
1250 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1253 #ifdef PERL_PRESERVE_IVUV
1254 if (SvIV_please_nomg(svr)) {
1255 /* Unless the left argument is integer in range we are going to have to
1256 use NV maths. Hence only attempt to coerce the right argument if
1257 we know the left is integer. */
1258 /* Left operand is defined, so is it IV? */
1259 if (SvIV_please_nomg(svl)) {
1260 bool auvok = SvUOK(svl);
1261 bool buvok = SvUOK(svr);
1262 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1263 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1272 const IV aiv = SvIVX(svl);
1275 auvok = TRUE; /* effectively it's a UV now */
1277 alow = -aiv; /* abs, auvok == false records sign */
1283 const IV biv = SvIVX(svr);
1286 buvok = TRUE; /* effectively it's a UV now */
1288 blow = -biv; /* abs, buvok == false records sign */
1292 /* If this does sign extension on unsigned it's time for plan B */
1293 ahigh = alow >> (4 * sizeof (UV));
1295 bhigh = blow >> (4 * sizeof (UV));
1297 if (ahigh && bhigh) {
1299 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1300 which is overflow. Drop to NVs below. */
1301 } else if (!ahigh && !bhigh) {
1302 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1303 so the unsigned multiply cannot overflow. */
1304 const UV product = alow * blow;
1305 if (auvok == buvok) {
1306 /* -ve * -ve or +ve * +ve gives a +ve result. */
1310 } else if (product <= (UV)IV_MIN) {
1311 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1312 /* -ve result, which could overflow an IV */
1314 SETi( -(IV)product );
1316 } /* else drop to NVs below. */
1318 /* One operand is large, 1 small */
1321 /* swap the operands */
1323 bhigh = blow; /* bhigh now the temp var for the swap */
1327 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1328 multiplies can't overflow. shift can, add can, -ve can. */
1329 product_middle = ahigh * blow;
1330 if (!(product_middle & topmask)) {
1331 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1333 product_middle <<= (4 * sizeof (UV));
1334 product_low = alow * blow;
1336 /* as for pp_add, UV + something mustn't get smaller.
1337 IIRC ANSI mandates this wrapping *behaviour* for
1338 unsigned whatever the actual representation*/
1339 product_low += product_middle;
1340 if (product_low >= product_middle) {
1341 /* didn't overflow */
1342 if (auvok == buvok) {
1343 /* -ve * -ve or +ve * +ve gives a +ve result. */
1345 SETu( product_low );
1347 } else if (product_low <= (UV)IV_MIN) {
1348 /* 2s complement assumption again */
1349 /* -ve result, which could overflow an IV */
1351 SETi( -(IV)product_low );
1353 } /* else drop to NVs below. */
1355 } /* product_middle too large */
1356 } /* ahigh && bhigh */
1361 NV right = SvNV_nomg(svr);
1362 NV left = SvNV_nomg(svl);
1364 SETn( left * right );
1371 dVAR; dSP; dATARGET; SV *svl, *svr;
1372 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1375 /* Only try to do UV divide first
1376 if ((SLOPPYDIVIDE is true) or
1377 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1379 The assumption is that it is better to use floating point divide
1380 whenever possible, only doing integer divide first if we can't be sure.
1381 If NV_PRESERVES_UV is true then we know at compile time that no UV
1382 can be too large to preserve, so don't need to compile the code to
1383 test the size of UVs. */
1386 # define PERL_TRY_UV_DIVIDE
1387 /* ensure that 20./5. == 4. */
1389 # ifdef PERL_PRESERVE_IVUV
1390 # ifndef NV_PRESERVES_UV
1391 # define PERL_TRY_UV_DIVIDE
1396 #ifdef PERL_TRY_UV_DIVIDE
1397 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1398 bool left_non_neg = SvUOK(svl);
1399 bool right_non_neg = SvUOK(svr);
1403 if (right_non_neg) {
1407 const IV biv = SvIVX(svr);
1410 right_non_neg = TRUE; /* effectively it's a UV now */
1416 /* historically undef()/0 gives a "Use of uninitialized value"
1417 warning before dieing, hence this test goes here.
1418 If it were immediately before the second SvIV_please, then
1419 DIE() would be invoked before left was even inspected, so
1420 no inspection would give no warning. */
1422 DIE(aTHX_ "Illegal division by zero");
1428 const IV aiv = SvIVX(svl);
1431 left_non_neg = TRUE; /* effectively it's a UV now */
1440 /* For sloppy divide we always attempt integer division. */
1442 /* Otherwise we only attempt it if either or both operands
1443 would not be preserved by an NV. If both fit in NVs
1444 we fall through to the NV divide code below. However,
1445 as left >= right to ensure integer result here, we know that
1446 we can skip the test on the right operand - right big
1447 enough not to be preserved can't get here unless left is
1450 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1453 /* Integer division can't overflow, but it can be imprecise. */
1454 const UV result = left / right;
1455 if (result * right == left) {
1456 SP--; /* result is valid */
1457 if (left_non_neg == right_non_neg) {
1458 /* signs identical, result is positive. */
1462 /* 2s complement assumption */
1463 if (result <= (UV)IV_MIN)
1464 SETi( -(IV)result );
1466 /* It's exact but too negative for IV. */
1467 SETn( -(NV)result );
1470 } /* tried integer divide but it was not an integer result */
1471 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1472 } /* one operand wasn't SvIOK */
1473 #endif /* PERL_TRY_UV_DIVIDE */
1475 NV right = SvNV_nomg(svr);
1476 NV left = SvNV_nomg(svl);
1477 (void)POPs;(void)POPs;
1478 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1479 if (! Perl_isnan(right) && right == 0.0)
1483 DIE(aTHX_ "Illegal division by zero");
1484 PUSHn( left / right );
1491 dVAR; dSP; dATARGET;
1492 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1496 bool left_neg = FALSE;
1497 bool right_neg = FALSE;
1498 bool use_double = FALSE;
1499 bool dright_valid = FALSE;
1502 SV * const svr = TOPs;
1503 SV * const svl = TOPm1s;
1504 if (SvIV_please_nomg(svr)) {
1505 right_neg = !SvUOK(svr);
1509 const IV biv = SvIVX(svr);
1512 right_neg = FALSE; /* effectively it's a UV now */
1519 dright = SvNV_nomg(svr);
1520 right_neg = dright < 0;
1523 if (dright < UV_MAX_P1) {
1524 right = U_V(dright);
1525 dright_valid = TRUE; /* In case we need to use double below. */
1531 /* At this point use_double is only true if right is out of range for
1532 a UV. In range NV has been rounded down to nearest UV and
1533 use_double false. */
1534 if (!use_double && SvIV_please_nomg(svl)) {
1535 left_neg = !SvUOK(svl);
1539 const IV aiv = SvIVX(svl);
1542 left_neg = FALSE; /* effectively it's a UV now */
1549 dleft = SvNV_nomg(svl);
1550 left_neg = dleft < 0;
1554 /* This should be exactly the 5.6 behaviour - if left and right are
1555 both in range for UV then use U_V() rather than floor. */
1557 if (dleft < UV_MAX_P1) {
1558 /* right was in range, so is dleft, so use UVs not double.
1562 /* left is out of range for UV, right was in range, so promote
1563 right (back) to double. */
1565 /* The +0.5 is used in 5.6 even though it is not strictly
1566 consistent with the implicit +0 floor in the U_V()
1567 inside the #if 1. */
1568 dleft = Perl_floor(dleft + 0.5);
1571 dright = Perl_floor(dright + 0.5);
1582 DIE(aTHX_ "Illegal modulus zero");
1584 dans = Perl_fmod(dleft, dright);
1585 if ((left_neg != right_neg) && dans)
1586 dans = dright - dans;
1589 sv_setnv(TARG, dans);
1595 DIE(aTHX_ "Illegal modulus zero");
1598 if ((left_neg != right_neg) && ans)
1601 /* XXX may warn: unary minus operator applied to unsigned type */
1602 /* could change -foo to be (~foo)+1 instead */
1603 if (ans <= ~((UV)IV_MAX)+1)
1604 sv_setiv(TARG, ~ans+1);
1606 sv_setnv(TARG, -(NV)ans);
1609 sv_setuv(TARG, ans);
1618 dVAR; dSP; dATARGET;
1622 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1623 /* TODO: think of some way of doing list-repeat overloading ??? */
1628 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1634 const UV uv = SvUV_nomg(sv);
1636 count = IV_MAX; /* The best we can do? */
1640 const IV iv = SvIV_nomg(sv);
1647 else if (SvNOKp(sv)) {
1648 const NV nv = SvNV_nomg(sv);
1655 count = SvIV_nomg(sv);
1657 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1659 static const char* const oom_list_extend = "Out of memory during list extend";
1660 const I32 items = SP - MARK;
1661 const I32 max = items * count;
1663 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1664 /* Did the max computation overflow? */
1665 if (items > 0 && max > 0 && (max < items || max < count))
1666 Perl_croak(aTHX_ "%s", oom_list_extend);
1671 /* This code was intended to fix 20010809.028:
1674 for (($x =~ /./g) x 2) {
1675 print chop; # "abcdabcd" expected as output.
1678 * but that change (#11635) broke this code:
1680 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1682 * I can't think of a better fix that doesn't introduce
1683 * an efficiency hit by copying the SVs. The stack isn't
1684 * refcounted, and mortalisation obviously doesn't
1685 * Do The Right Thing when the stack has more than
1686 * one pointer to the same mortal value.
1690 *SP = sv_2mortal(newSVsv(*SP));
1700 repeatcpy((char*)(MARK + items), (char*)MARK,
1701 items * sizeof(const SV *), count - 1);
1704 else if (count <= 0)
1707 else { /* Note: mark already snarfed by pp_list */
1708 SV * const tmpstr = POPs;
1711 static const char* const oom_string_extend =
1712 "Out of memory during string extend";
1715 sv_setsv_nomg(TARG, tmpstr);
1716 SvPV_force_nomg(TARG, len);
1717 isutf = DO_UTF8(TARG);
1722 const STRLEN max = (UV)count * len;
1723 if (len > MEM_SIZE_MAX / count)
1724 Perl_croak(aTHX_ "%s", oom_string_extend);
1725 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1726 SvGROW(TARG, max + 1);
1727 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1728 SvCUR_set(TARG, SvCUR(TARG) * count);
1730 *SvEND(TARG) = '\0';
1733 (void)SvPOK_only_UTF8(TARG);
1735 (void)SvPOK_only(TARG);
1737 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1738 /* The parser saw this as a list repeat, and there
1739 are probably several items on the stack. But we're
1740 in scalar context, and there's no pp_list to save us
1741 now. So drop the rest of the items -- robin@kitsite.com
1753 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1754 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1757 useleft = USE_LEFT(svl);
1758 #ifdef PERL_PRESERVE_IVUV
1759 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1760 "bad things" happen if you rely on signed integers wrapping. */
1761 if (SvIV_please_nomg(svr)) {
1762 /* Unless the left argument is integer in range we are going to have to
1763 use NV maths. Hence only attempt to coerce the right argument if
1764 we know the left is integer. */
1771 a_valid = auvok = 1;
1772 /* left operand is undef, treat as zero. */
1774 /* Left operand is defined, so is it IV? */
1775 if (SvIV_please_nomg(svl)) {
1776 if ((auvok = SvUOK(svl)))
1779 const IV aiv = SvIVX(svl);
1782 auvok = 1; /* Now acting as a sign flag. */
1783 } else { /* 2s complement assumption for IV_MIN */
1791 bool result_good = 0;
1794 bool buvok = SvUOK(svr);
1799 const IV biv = SvIVX(svr);
1806 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1807 else "IV" now, independent of how it came in.
1808 if a, b represents positive, A, B negative, a maps to -A etc
1813 all UV maths. negate result if A negative.
1814 subtract if signs same, add if signs differ. */
1816 if (auvok ^ buvok) {
1825 /* Must get smaller */
1830 if (result <= buv) {
1831 /* result really should be -(auv-buv). as its negation
1832 of true value, need to swap our result flag */
1844 if (result <= (UV)IV_MIN)
1845 SETi( -(IV)result );
1847 /* result valid, but out of range for IV. */
1848 SETn( -(NV)result );
1852 } /* Overflow, drop through to NVs. */
1857 NV value = SvNV_nomg(svr);
1861 /* left operand is undef, treat as zero - value */
1865 SETn( SvNV_nomg(svl) - value );
1872 dVAR; dSP; dATARGET; SV *svl, *svr;
1873 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1877 const IV shift = SvIV_nomg(svr);
1878 if (PL_op->op_private & HINT_INTEGER) {
1879 const IV i = SvIV_nomg(svl);
1883 const UV u = SvUV_nomg(svl);
1892 dVAR; dSP; dATARGET; SV *svl, *svr;
1893 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1897 const IV shift = SvIV_nomg(svr);
1898 if (PL_op->op_private & HINT_INTEGER) {
1899 const IV i = SvIV_nomg(svl);
1903 const UV u = SvUV_nomg(svl);
1915 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1919 (SvIOK_notUV(left) && SvIOK_notUV(right))
1920 ? (SvIVX(left) < SvIVX(right))
1921 : (do_ncmp(left, right) == -1)
1931 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1935 (SvIOK_notUV(left) && SvIOK_notUV(right))
1936 ? (SvIVX(left) > SvIVX(right))
1937 : (do_ncmp(left, right) == 1)
1947 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1951 (SvIOK_notUV(left) && SvIOK_notUV(right))
1952 ? (SvIVX(left) <= SvIVX(right))
1953 : (do_ncmp(left, right) <= 0)
1963 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1967 (SvIOK_notUV(left) && SvIOK_notUV(right))
1968 ? (SvIVX(left) >= SvIVX(right))
1969 : ( (do_ncmp(left, right) & 2) == 0)
1979 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
1983 (SvIOK_notUV(left) && SvIOK_notUV(right))
1984 ? (SvIVX(left) != SvIVX(right))
1985 : (do_ncmp(left, right) != 0)
1990 /* compare left and right SVs. Returns:
1994 * 2: left or right was a NaN
1997 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2001 PERL_ARGS_ASSERT_DO_NCMP;
2002 #ifdef PERL_PRESERVE_IVUV
2003 /* Fortunately it seems NaN isn't IOK */
2004 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2006 const IV leftiv = SvIVX(left);
2007 if (!SvUOK(right)) {
2008 /* ## IV <=> IV ## */
2009 const IV rightiv = SvIVX(right);
2010 return (leftiv > rightiv) - (leftiv < rightiv);
2012 /* ## IV <=> UV ## */
2014 /* As (b) is a UV, it's >=0, so it must be < */
2017 const UV rightuv = SvUVX(right);
2018 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2023 /* ## UV <=> UV ## */
2024 const UV leftuv = SvUVX(left);
2025 const UV rightuv = SvUVX(right);
2026 return (leftuv > rightuv) - (leftuv < rightuv);
2028 /* ## UV <=> IV ## */
2030 const IV rightiv = SvIVX(right);
2032 /* As (a) is a UV, it's >=0, so it cannot be < */
2035 const UV leftuv = SvUVX(left);
2036 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2039 assert(0); /* NOTREACHED */
2043 NV const rnv = SvNV_nomg(right);
2044 NV const lnv = SvNV_nomg(left);
2046 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2047 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2050 return (lnv > rnv) - (lnv < rnv);
2069 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2072 value = do_ncmp(left, right);
2087 int amg_type = sle_amg;
2091 switch (PL_op->op_type) {
2110 tryAMAGICbin_MG(amg_type, AMGf_set);
2113 const int cmp = (IN_LOCALE_RUNTIME
2114 ? sv_cmp_locale_flags(left, right, 0)
2115 : sv_cmp_flags(left, right, 0));
2116 SETs(boolSV(cmp * multiplier < rhs));
2124 tryAMAGICbin_MG(seq_amg, AMGf_set);
2127 SETs(boolSV(sv_eq_flags(left, right, 0)));
2135 tryAMAGICbin_MG(sne_amg, AMGf_set);
2138 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2146 tryAMAGICbin_MG(scmp_amg, 0);
2149 const int cmp = (IN_LOCALE_RUNTIME
2150 ? sv_cmp_locale_flags(left, right, 0)
2151 : sv_cmp_flags(left, right, 0));
2159 dVAR; dSP; dATARGET;
2160 tryAMAGICbin_MG(band_amg, AMGf_assign);
2163 if (SvNIOKp(left) || SvNIOKp(right)) {
2164 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2165 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2166 if (PL_op->op_private & HINT_INTEGER) {
2167 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2171 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2174 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2175 if (right_ro_nonnum) SvNIOK_off(right);
2178 do_vop(PL_op->op_type, TARG, left, right);
2187 dVAR; dSP; dATARGET;
2188 const int op_type = PL_op->op_type;
2190 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2193 if (SvNIOKp(left) || SvNIOKp(right)) {
2194 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2195 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2196 if (PL_op->op_private & HINT_INTEGER) {
2197 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2198 const IV r = SvIV_nomg(right);
2199 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2203 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2204 const UV r = SvUV_nomg(right);
2205 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2208 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2209 if (right_ro_nonnum) SvNIOK_off(right);
2212 do_vop(op_type, TARG, left, right);
2219 PERL_STATIC_INLINE bool
2220 S_negate_string(pTHX)
2225 SV * const sv = TOPs;
2226 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2228 s = SvPV_nomg_const(sv, len);
2229 if (isIDFIRST(*s)) {
2230 sv_setpvs(TARG, "-");
2233 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2234 sv_setsv_nomg(TARG, sv);
2235 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2245 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2246 if (S_negate_string(aTHX)) return NORMAL;
2248 SV * const sv = TOPs;
2251 /* It's publicly an integer */
2254 if (SvIVX(sv) == IV_MIN) {
2255 /* 2s complement assumption. */
2256 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2260 else if (SvUVX(sv) <= IV_MAX) {
2265 else if (SvIVX(sv) != IV_MIN) {
2269 #ifdef PERL_PRESERVE_IVUV
2276 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2277 SETn(-SvNV_nomg(sv));
2278 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2279 goto oops_its_an_int;
2281 SETn(-SvNV_nomg(sv));
2289 tryAMAGICun_MG(not_amg, AMGf_set);
2290 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2297 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2301 if (PL_op->op_private & HINT_INTEGER) {
2302 const IV i = ~SvIV_nomg(sv);
2306 const UV u = ~SvUV_nomg(sv);
2315 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2316 sv_setsv_nomg(TARG, sv);
2317 tmps = (U8*)SvPV_force_nomg(TARG, len);
2320 /* Calculate exact length, let's not estimate. */
2325 U8 * const send = tmps + len;
2326 U8 * const origtmps = tmps;
2327 const UV utf8flags = UTF8_ALLOW_ANYUV;
2329 while (tmps < send) {
2330 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2332 targlen += UNISKIP(~c);
2338 /* Now rewind strings and write them. */
2345 Newx(result, targlen + 1, U8);
2347 while (tmps < send) {
2348 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2350 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2353 sv_usepvn_flags(TARG, (char*)result, targlen,
2354 SV_HAS_TRAILING_NUL);
2361 Newx(result, nchar + 1, U8);
2363 while (tmps < send) {
2364 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2369 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2378 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2381 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2386 for ( ; anum > 0; anum--, tmps++)
2394 /* integer versions of some of the above */
2398 dVAR; dSP; dATARGET;
2399 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2402 SETi( left * right );
2410 dVAR; dSP; dATARGET;
2411 tryAMAGICbin_MG(div_amg, AMGf_assign);
2414 IV value = SvIV_nomg(right);
2416 DIE(aTHX_ "Illegal division by zero");
2417 num = SvIV_nomg(left);
2419 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2423 value = num / value;
2429 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2436 /* This is the vanilla old i_modulo. */
2437 dVAR; dSP; dATARGET;
2438 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2442 DIE(aTHX_ "Illegal modulus zero");
2443 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2447 SETi( left % right );
2452 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
2457 /* This is the i_modulo with the workaround for the _moddi3 bug
2458 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2459 * See below for pp_i_modulo. */
2460 dVAR; dSP; dATARGET;
2461 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2465 DIE(aTHX_ "Illegal modulus zero");
2466 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2470 SETi( left % PERL_ABS(right) );
2477 dVAR; dSP; dATARGET;
2478 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2482 DIE(aTHX_ "Illegal modulus zero");
2483 /* The assumption is to use hereafter the old vanilla version... */
2485 PL_ppaddr[OP_I_MODULO] =
2487 /* .. but if we have glibc, we might have a buggy _moddi3
2488 * (at least glicb 2.2.5 is known to have this bug), in other
2489 * words our integer modulus with negative quad as the second
2490 * argument might be broken. Test for this and re-patch the
2491 * opcode dispatch table if that is the case, remembering to
2492 * also apply the workaround so that this first round works
2493 * right, too. See [perl #9402] for more information. */
2497 /* Cannot do this check with inlined IV constants since
2498 * that seems to work correctly even with the buggy glibc. */
2500 /* Yikes, we have the bug.
2501 * Patch in the workaround version. */
2503 PL_ppaddr[OP_I_MODULO] =
2504 &Perl_pp_i_modulo_1;
2505 /* Make certain we work right this time, too. */
2506 right = PERL_ABS(right);
2509 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2513 SETi( left % right );
2521 dVAR; dSP; dATARGET;
2522 tryAMAGICbin_MG(add_amg, AMGf_assign);
2524 dPOPTOPiirl_ul_nomg;
2525 SETi( left + right );
2532 dVAR; dSP; dATARGET;
2533 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2535 dPOPTOPiirl_ul_nomg;
2536 SETi( left - right );
2544 tryAMAGICbin_MG(lt_amg, AMGf_set);
2547 SETs(boolSV(left < right));
2555 tryAMAGICbin_MG(gt_amg, AMGf_set);
2558 SETs(boolSV(left > right));
2566 tryAMAGICbin_MG(le_amg, AMGf_set);
2569 SETs(boolSV(left <= right));
2577 tryAMAGICbin_MG(ge_amg, AMGf_set);
2580 SETs(boolSV(left >= right));
2588 tryAMAGICbin_MG(eq_amg, AMGf_set);
2591 SETs(boolSV(left == right));
2599 tryAMAGICbin_MG(ne_amg, AMGf_set);
2602 SETs(boolSV(left != right));
2610 tryAMAGICbin_MG(ncmp_amg, 0);
2617 else if (left < right)
2629 tryAMAGICun_MG(neg_amg, 0);
2630 if (S_negate_string(aTHX)) return NORMAL;
2632 SV * const sv = TOPs;
2633 IV const i = SvIV_nomg(sv);
2639 /* High falutin' math. */
2644 tryAMAGICbin_MG(atan2_amg, 0);
2647 SETn(Perl_atan2(left, right));
2655 int amg_type = sin_amg;
2656 const char *neg_report = NULL;
2657 NV (*func)(NV) = Perl_sin;
2658 const int op_type = PL_op->op_type;
2675 amg_type = sqrt_amg;
2677 neg_report = "sqrt";
2682 tryAMAGICun_MG(amg_type, 0);
2684 SV * const arg = POPs;
2685 const NV value = SvNV_nomg(arg);
2687 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2688 SET_NUMERIC_STANDARD();
2689 /* diag_listed_as: Can't take log of %g */
2690 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2693 XPUSHn(func(value));
2698 /* Support Configure command-line overrides for rand() functions.
2699 After 5.005, perhaps we should replace this by Configure support
2700 for drand48(), random(), or rand(). For 5.005, though, maintain
2701 compatibility by calling rand() but allow the user to override it.
2702 See INSTALL for details. --Andy Dougherty 15 July 1998
2704 /* Now it's after 5.005, and Configure supports drand48() and random(),
2705 in addition to rand(). So the overrides should not be needed any more.
2706 --Jarkko Hietaniemi 27 September 1998
2709 #ifndef HAS_DRAND48_PROTO
2710 extern double drand48 (void);
2716 if (!PL_srand_called) {
2717 (void)seedDrand01((Rand_seed_t)seed());
2718 PL_srand_called = TRUE;
2728 SV * const sv = POPs;
2734 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
2742 sv_setnv_mg(TARG, value);
2753 if (MAXARG >= 1 && (TOPs || POPs)) {
2760 pv = SvPV(top, len);
2761 flags = grok_number(pv, len, &anum);
2763 if (!(flags & IS_NUMBER_IN_UV)) {
2764 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2765 "Integer overflow in srand");
2773 (void)seedDrand01((Rand_seed_t)anum);
2774 PL_srand_called = TRUE;
2778 /* Historically srand always returned true. We can avoid breaking
2780 sv_setpvs(TARG, "0 but true");
2789 tryAMAGICun_MG(int_amg, AMGf_numeric);
2791 SV * const sv = TOPs;
2792 const IV iv = SvIV_nomg(sv);
2793 /* XXX it's arguable that compiler casting to IV might be subtly
2794 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2795 else preferring IV has introduced a subtle behaviour change bug. OTOH
2796 relying on floating point to be accurate is a bug. */
2801 else if (SvIOK(sv)) {
2803 SETu(SvUV_nomg(sv));
2808 const NV value = SvNV_nomg(sv);
2810 if (value < (NV)UV_MAX + 0.5) {
2813 SETn(Perl_floor(value));
2817 if (value > (NV)IV_MIN - 0.5) {
2820 SETn(Perl_ceil(value));
2831 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2833 SV * const sv = TOPs;
2834 /* This will cache the NV value if string isn't actually integer */
2835 const IV iv = SvIV_nomg(sv);
2840 else if (SvIOK(sv)) {
2841 /* IVX is precise */
2843 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2851 /* 2s complement assumption. Also, not really needed as
2852 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2858 const NV value = SvNV_nomg(sv);
2872 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2876 SV* const sv = POPs;
2878 tmps = (SvPV_const(sv, len));
2880 /* If Unicode, try to downgrade
2881 * If not possible, croak. */
2882 SV* const tsv = sv_2mortal(newSVsv(sv));
2885 sv_utf8_downgrade(tsv, FALSE);
2886 tmps = SvPV_const(tsv, len);
2888 if (PL_op->op_type == OP_HEX)
2891 while (*tmps && len && isSPACE(*tmps))
2895 if (*tmps == 'x' || *tmps == 'X') {
2897 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2899 else if (*tmps == 'b' || *tmps == 'B')
2900 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2902 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2904 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2918 SV * const sv = TOPs;
2923 SETi(sv_len_utf8_nomg(sv));
2927 (void)SvPV_nomg_const(sv,len);
2931 if (!SvPADTMP(TARG)) {
2932 sv_setsv_nomg(TARG, &PL_sv_undef);
2940 /* Returns false if substring is completely outside original string.
2941 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
2942 always be true for an explicit 0.
2945 Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
2946 bool pos1_is_uv, IV len_iv,
2947 bool len_is_uv, STRLEN *posp,
2953 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
2955 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
2956 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
2959 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
2962 if (len_iv || len_is_uv) {
2963 if (!len_is_uv && len_iv < 0) {
2964 pos2_iv = curlen + len_iv;
2966 pos2_is_uv = curlen-1 > ~(UV)len_iv;
2969 } else { /* len_iv >= 0 */
2970 if (!pos1_is_uv && pos1_iv < 0) {
2971 pos2_iv = pos1_iv + len_iv;
2972 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
2974 if ((UV)len_iv > curlen-(UV)pos1_iv)
2977 pos2_iv = pos1_iv+len_iv;
2987 if (!pos2_is_uv && pos2_iv < 0) {
2988 if (!pos1_is_uv && pos1_iv < 0)
2992 else if (!pos1_is_uv && pos1_iv < 0)
2995 if ((UV)pos2_iv < (UV)pos1_iv)
2997 if ((UV)pos2_iv > curlen)
3000 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3001 *posp = (STRLEN)( (UV)pos1_iv );
3002 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3019 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3020 const bool rvalue = (GIMME_V != G_VOID);
3023 const char *repl = NULL;
3025 int num_args = PL_op->op_private & 7;
3026 bool repl_need_utf8_upgrade = FALSE;
3030 if(!(repl_sv = POPs)) num_args--;
3032 if ((len_sv = POPs)) {
3033 len_iv = SvIV(len_sv);
3034 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3039 pos1_iv = SvIV(pos_sv);
3040 pos1_is_uv = SvIOK_UV(pos_sv);
3042 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3047 if (lvalue && !repl_sv) {
3049 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3050 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3052 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3054 pos1_is_uv || pos1_iv >= 0
3055 ? (STRLEN)(UV)pos1_iv
3056 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3058 len_is_uv || len_iv > 0
3059 ? (STRLEN)(UV)len_iv
3060 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3063 PUSHs(ret); /* avoid SvSETMAGIC here */
3067 repl = SvPV_const(repl_sv, repl_len);
3070 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3071 "Attempt to use reference as lvalue in substr"
3073 tmps = SvPV_force_nomg(sv, curlen);
3074 if (DO_UTF8(repl_sv) && repl_len) {
3076 sv_utf8_upgrade_nomg(sv);
3080 else if (DO_UTF8(sv))
3081 repl_need_utf8_upgrade = TRUE;
3083 else tmps = SvPV_const(sv, curlen);
3085 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3086 if (utf8_curlen == curlen)
3089 curlen = utf8_curlen;
3095 STRLEN pos, len, byte_len, byte_pos;
3097 if (!translate_substr_offsets(
3098 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3102 byte_pos = utf8_curlen
3103 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3108 SvTAINTED_off(TARG); /* decontaminate */
3109 SvUTF8_off(TARG); /* decontaminate */
3110 sv_setpvn(TARG, tmps, byte_len);
3111 #ifdef USE_LOCALE_COLLATE
3112 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3119 SV* repl_sv_copy = NULL;
3121 if (repl_need_utf8_upgrade) {
3122 repl_sv_copy = newSVsv(repl_sv);
3123 sv_utf8_upgrade(repl_sv_copy);
3124 repl = SvPV_const(repl_sv_copy, repl_len);
3128 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3129 SvREFCNT_dec(repl_sv_copy);
3141 Perl_croak(aTHX_ "substr outside of string");
3142 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3149 const IV size = POPi;
3150 const IV offset = POPi;
3151 SV * const src = POPs;
3152 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3155 if (lvalue) { /* it's an lvalue! */
3156 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3157 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3159 LvTARG(ret) = SvREFCNT_inc_simple(src);
3160 LvTARGOFF(ret) = offset;
3161 LvTARGLEN(ret) = size;
3165 SvTAINTED_off(TARG); /* decontaminate */
3169 sv_setuv(ret, do_vecget(src, offset, size));
3185 const char *little_p;
3188 const bool is_index = PL_op->op_type == OP_INDEX;
3189 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3195 big_p = SvPV_const(big, biglen);
3196 little_p = SvPV_const(little, llen);
3198 big_utf8 = DO_UTF8(big);
3199 little_utf8 = DO_UTF8(little);
3200 if (big_utf8 ^ little_utf8) {
3201 /* One needs to be upgraded. */
3202 if (little_utf8 && !PL_encoding) {
3203 /* Well, maybe instead we might be able to downgrade the small
3205 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3208 /* If the large string is ISO-8859-1, and it's not possible to
3209 convert the small string to ISO-8859-1, then there is no
3210 way that it could be found anywhere by index. */
3215 /* At this point, pv is a malloc()ed string. So donate it to temp
3216 to ensure it will get free()d */
3217 little = temp = newSV(0);
3218 sv_usepvn(temp, pv, llen);
3219 little_p = SvPVX(little);
3222 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3225 sv_recode_to_utf8(temp, PL_encoding);
3227 sv_utf8_upgrade(temp);
3232 big_p = SvPV_const(big, biglen);
3235 little_p = SvPV_const(little, llen);
3239 if (SvGAMAGIC(big)) {
3240 /* Life just becomes a lot easier if I use a temporary here.
3241 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3242 will trigger magic and overloading again, as will fbm_instr()
3244 big = newSVpvn_flags(big_p, biglen,
3245 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3248 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3249 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3250 warn on undef, and we've already triggered a warning with the
3251 SvPV_const some lines above. We can't remove that, as we need to
3252 call some SvPV to trigger overloading early and find out if the
3254 This is all getting to messy. The API isn't quite clean enough,
3255 because data access has side effects.
3257 little = newSVpvn_flags(little_p, llen,
3258 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3259 little_p = SvPVX(little);
3263 offset = is_index ? 0 : biglen;
3265 if (big_utf8 && offset > 0)
3266 sv_pos_u2b(big, &offset, 0);
3272 else if (offset > (I32)biglen)
3274 if (!(little_p = is_index
3275 ? fbm_instr((unsigned char*)big_p + offset,
3276 (unsigned char*)big_p + biglen, little, 0)
3277 : rninstr(big_p, big_p + offset,
3278 little_p, little_p + llen)))
3281 retval = little_p - big_p;
3282 if (retval > 0 && big_utf8)
3283 sv_pos_b2u(big, &retval);
3293 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3294 SvTAINTED_off(TARG);
3295 do_sprintf(TARG, SP-MARK, MARK+1);
3296 TAINT_IF(SvTAINTED(TARG));
3308 const U8 *s = (U8*)SvPV_const(argsv, len);
3310 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3311 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3312 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3316 XPUSHu(DO_UTF8(argsv) ?
3317 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3331 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3332 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3334 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3335 && SvNV_nomg(top) < 0.0))) {
3336 if (ckWARN(WARN_UTF8)) {
3337 if (SvGMAGICAL(top)) {
3338 SV *top2 = sv_newmortal();
3339 sv_setsv_nomg(top2, top);
3342 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3343 "Invalid negative number (%"SVf") in chr", top);
3345 value = UNICODE_REPLACEMENT;
3347 value = SvUV_nomg(top);
3350 SvUPGRADE(TARG,SVt_PV);
3352 if (value > 255 && !IN_BYTES) {
3353 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3354 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3355 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3357 (void)SvPOK_only(TARG);
3366 *tmps++ = (char)value;
3368 (void)SvPOK_only(TARG);
3370 if (PL_encoding && !IN_BYTES) {
3371 sv_recode_to_utf8(TARG, PL_encoding);
3373 if (SvCUR(TARG) == 0
3374 || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
3375 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
3380 *tmps++ = (char)value;
3396 const char *tmps = SvPV_const(left, len);
3398 if (DO_UTF8(left)) {
3399 /* If Unicode, try to downgrade.
3400 * If not possible, croak.
3401 * Yes, we made this up. */
3402 SV* const tsv = sv_2mortal(newSVsv(left));
3405 sv_utf8_downgrade(tsv, FALSE);
3406 tmps = SvPV_const(tsv, len);
3408 # ifdef USE_ITHREADS
3410 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3411 /* This should be threadsafe because in ithreads there is only
3412 * one thread per interpreter. If this would not be true,
3413 * we would need a mutex to protect this malloc. */
3414 PL_reentrant_buffer->_crypt_struct_buffer =
3415 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3416 #if defined(__GLIBC__) || defined(__EMX__)
3417 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3418 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3419 /* work around glibc-2.2.5 bug */
3420 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3424 # endif /* HAS_CRYPT_R */
3425 # endif /* USE_ITHREADS */
3427 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3429 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3435 "The crypt() function is unimplemented due to excessive paranoia.");
3439 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3440 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3444 /* Actually is both lcfirst() and ucfirst(). Only the first character
3445 * changes. This means that possibly we can change in-place, ie., just
3446 * take the source and change that one character and store it back, but not
3447 * if read-only etc, or if the length changes */
3452 STRLEN slen; /* slen is the byte length of the whole SV. */
3455 bool inplace; /* ? Convert first char only, in-place */
3456 bool doing_utf8 = FALSE; /* ? using utf8 */
3457 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3458 const int op_type = PL_op->op_type;
3461 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3462 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3463 * stored as UTF-8 at s. */
3464 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3465 * lowercased) character stored in tmpbuf. May be either
3466 * UTF-8 or not, but in either case is the number of bytes */
3467 bool tainted = FALSE;
3471 s = (const U8*)SvPV_nomg_const(source, slen);
3473 if (ckWARN(WARN_UNINITIALIZED))
3474 report_uninit(source);
3479 /* We may be able to get away with changing only the first character, in
3480 * place, but not if read-only, etc. Later we may discover more reasons to
3481 * not convert in-place. */
3482 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3484 /* First calculate what the changed first character should be. This affects
3485 * whether we can just swap it out, leaving the rest of the string unchanged,
3486 * or even if have to convert the dest to UTF-8 when the source isn't */
3488 if (! slen) { /* If empty */
3489 need = 1; /* still need a trailing NUL */
3492 else if (DO_UTF8(source)) { /* Is the source utf8? */
3495 if (op_type == OP_UCFIRST) {
3496 _to_utf8_title_flags(s, tmpbuf, &tculen,
3497 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3500 _to_utf8_lower_flags(s, tmpbuf, &tculen,
3501 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3504 /* we can't do in-place if the length changes. */
3505 if (ulen != tculen) inplace = FALSE;
3506 need = slen + 1 - ulen + tculen;
3508 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3509 * latin1 is treated as caseless. Note that a locale takes
3511 ulen = 1; /* Original character is 1 byte */
3512 tculen = 1; /* Most characters will require one byte, but this will
3513 * need to be overridden for the tricky ones */
3516 if (op_type == OP_LCFIRST) {
3518 /* lower case the first letter: no trickiness for any character */
3519 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3520 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3523 else if (IN_LOCALE_RUNTIME) {
3524 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3525 * have upper and title case different
3528 else if (! IN_UNI_8_BIT) {
3529 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3530 * on EBCDIC machines whatever the
3531 * native function does */
3533 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3534 UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3536 assert(tculen == 2);
3538 /* If the result is an upper Latin1-range character, it can
3539 * still be represented in one byte, which is its ordinal */
3540 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3541 *tmpbuf = (U8) title_ord;
3545 /* Otherwise it became more than one ASCII character (in
3546 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3547 * beyond Latin1, so the number of bytes changed, so can't
3548 * replace just the first character in place. */
3551 /* If the result won't fit in a byte, the entire result
3552 * will have to be in UTF-8. Assume worst case sizing in
3553 * conversion. (all latin1 characters occupy at most two
3555 if (title_ord > 255) {
3557 convert_source_to_utf8 = TRUE;
3558 need = slen * 2 + 1;
3560 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3561 * (both) characters whose title case is above 255 is
3565 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3566 need = slen + 1 + 1;
3570 } /* End of use Unicode (Latin1) semantics */
3571 } /* End of changing the case of the first character */
3573 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3574 * generate the result */
3577 /* We can convert in place. This means we change just the first
3578 * character without disturbing the rest; no need to grow */
3580 s = d = (U8*)SvPV_force_nomg(source, slen);
3586 /* Here, we can't convert in place; we earlier calculated how much
3587 * space we will need, so grow to accommodate that */
3588 SvUPGRADE(dest, SVt_PV);
3589 d = (U8*)SvGROW(dest, need);
3590 (void)SvPOK_only(dest);
3597 if (! convert_source_to_utf8) {
3599 /* Here both source and dest are in UTF-8, but have to create
3600 * the entire output. We initialize the result to be the
3601 * title/lower cased first character, and then append the rest
3603 sv_setpvn(dest, (char*)tmpbuf, tculen);
3605 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3609 const U8 *const send = s + slen;
3611 /* Here the dest needs to be in UTF-8, but the source isn't,
3612 * except we earlier UTF-8'd the first character of the source
3613 * into tmpbuf. First put that into dest, and then append the
3614 * rest of the source, converting it to UTF-8 as we go. */
3616 /* Assert tculen is 2 here because the only two characters that
3617 * get to this part of the code have 2-byte UTF-8 equivalents */
3619 *d++ = *(tmpbuf + 1);
3620 s++; /* We have just processed the 1st char */
3622 for (; s < send; s++) {
3623 d = uvchr_to_utf8(d, *s);
3626 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3630 else { /* in-place UTF-8. Just overwrite the first character */
3631 Copy(tmpbuf, d, tculen, U8);
3632 SvCUR_set(dest, need - 1);
3640 else { /* Neither source nor dest are in or need to be UTF-8 */
3642 if (IN_LOCALE_RUNTIME) {
3646 if (inplace) { /* in-place, only need to change the 1st char */
3649 else { /* Not in-place */
3651 /* Copy the case-changed character(s) from tmpbuf */
3652 Copy(tmpbuf, d, tculen, U8);
3653 d += tculen - 1; /* Code below expects d to point to final
3654 * character stored */
3657 else { /* empty source */
3658 /* See bug #39028: Don't taint if empty */
3662 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3663 * the destination to retain that flag */
3667 if (!inplace) { /* Finish the rest of the string, unchanged */
3668 /* This will copy the trailing NUL */
3669 Copy(s + 1, d + 1, slen, U8);
3670 SvCUR_set(dest, need - 1);
3673 if (dest != source && SvTAINTED(source))
3679 /* There's so much setup/teardown code common between uc and lc, I wonder if
3680 it would be worth merging the two, and just having a switch outside each
3681 of the three tight loops. There is less and less commonality though */
3695 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3696 && SvTEMP(source) && !DO_UTF8(source)
3697 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3699 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3700 * make the loop tight, so we overwrite the source with the dest before
3701 * looking at it, and we need to look at the original source
3702 * afterwards. There would also need to be code added to handle
3703 * switching to not in-place in midstream if we run into characters
3704 * that change the length.
3707 s = d = (U8*)SvPV_force_nomg(source, len);
3714 /* The old implementation would copy source into TARG at this point.
3715 This had the side effect that if source was undef, TARG was now
3716 an undefined SV with PADTMP set, and they don't warn inside
3717 sv_2pv_flags(). However, we're now getting the PV direct from
3718 source, which doesn't have PADTMP set, so it would warn. Hence the
3722 s = (const U8*)SvPV_nomg_const(source, len);
3724 if (ckWARN(WARN_UNINITIALIZED))
3725 report_uninit(source);
3731 SvUPGRADE(dest, SVt_PV);
3732 d = (U8*)SvGROW(dest, min);
3733 (void)SvPOK_only(dest);
3738 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3739 to check DO_UTF8 again here. */
3741 if (DO_UTF8(source)) {
3742 const U8 *const send = s + len;
3743 U8 tmpbuf[UTF8_MAXBYTES+1];
3744 bool tainted = FALSE;
3746 /* All occurrences of these are to be moved to follow any other marks.
3747 * This is context-dependent. We may not be passed enough context to
3748 * move the iota subscript beyond all of them, but we do the best we can
3749 * with what we're given. The result is always better than if we
3750 * hadn't done this. And, the problem would only arise if we are
3751 * passed a character without all its combining marks, which would be
3752 * the caller's mistake. The information this is based on comes from a
3753 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3754 * itself) and so can't be checked properly to see if it ever gets
3755 * revised. But the likelihood of it changing is remote */
3756 bool in_iota_subscript = FALSE;
3762 if (in_iota_subscript && ! _is_utf8_mark(s)) {
3764 /* A non-mark. Time to output the iota subscript */
3765 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
3766 d += capital_iota_len;
3767 in_iota_subscript = FALSE;
3770 /* Then handle the current character. Get the changed case value
3771 * and copy it to the output buffer */
3774 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
3775 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3776 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3777 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3778 if (uv == GREEK_CAPITAL_LETTER_IOTA
3779 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3781 in_iota_subscript = TRUE;
3784 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3785 /* If the eventually required minimum size outgrows the
3786 * available space, we need to grow. */
3787 const UV o = d - (U8*)SvPVX_const(dest);
3789 /* If someone uppercases one million U+03B0s we SvGROW()
3790 * one million times. Or we could try guessing how much to
3791 * allocate without allocating too much. Such is life.
3792 * See corresponding comment in lc code for another option
3795 d = (U8*)SvPVX(dest) + o;
3797 Copy(tmpbuf, d, ulen, U8);
3802 if (in_iota_subscript) {
3803 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
3804 d += capital_iota_len;
3809 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3815 else { /* Not UTF-8 */
3817 const U8 *const send = s + len;
3819 /* Use locale casing if in locale; regular style if not treating
3820 * latin1 as having case; otherwise the latin1 casing. Do the
3821 * whole thing in a tight loop, for speed, */
3822 if (IN_LOCALE_RUNTIME) {
3825 for (; s < send; d++, s++)
3826 *d = toUPPER_LC(*s);
3828 else if (! IN_UNI_8_BIT) {
3829 for (; s < send; d++, s++) {
3834 for (; s < send; d++, s++) {
3835 *d = toUPPER_LATIN1_MOD(*s);
3836 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
3840 /* The mainstream case is the tight loop above. To avoid
3841 * extra tests in that, all three characters that require
3842 * special handling are mapped by the MOD to the one tested
3844 * Use the source to distinguish between the three cases */
3846 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3848 /* uc() of this requires 2 characters, but they are
3849 * ASCII. If not enough room, grow the string */
3850 if (SvLEN(dest) < ++min) {
3851 const UV o = d - (U8*)SvPVX_const(dest);
3853 d = (U8*)SvPVX(dest) + o;
3855 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3856 continue; /* Back to the tight loop; still in ASCII */
3859 /* The other two special handling characters have their
3860 * upper cases outside the latin1 range, hence need to be
3861 * in UTF-8, so the whole result needs to be in UTF-8. So,
3862 * here we are somewhere in the middle of processing a
3863 * non-UTF-8 string, and realize that we will have to convert
3864 * the whole thing to UTF-8. What to do? There are
3865 * several possibilities. The simplest to code is to
3866 * convert what we have so far, set a flag, and continue on
3867 * in the loop. The flag would be tested each time through
3868 * the loop, and if set, the next character would be
3869 * converted to UTF-8 and stored. But, I (khw) didn't want
3870 * to slow down the mainstream case at all for this fairly
3871 * rare case, so I didn't want to add a test that didn't
3872 * absolutely have to be there in the loop, besides the
3873 * possibility that it would get too complicated for
3874 * optimizers to deal with. Another possibility is to just
3875 * give up, convert the source to UTF-8, and restart the
3876 * function that way. Another possibility is to convert
3877 * both what has already been processed and what is yet to
3878 * come separately to UTF-8, then jump into the loop that
3879 * handles UTF-8. But the most efficient time-wise of the
3880 * ones I could think of is what follows, and turned out to
3881 * not require much extra code. */
3883 /* Convert what we have so far into UTF-8, telling the
3884 * function that we know it should be converted, and to
3885 * allow extra space for what we haven't processed yet.
3886 * Assume the worst case space requirements for converting
3887 * what we haven't processed so far: that it will require
3888 * two bytes for each remaining source character, plus the
3889 * NUL at the end. This may cause the string pointer to
3890 * move, so re-find it. */
3892 len = d - (U8*)SvPVX_const(dest);
3893 SvCUR_set(dest, len);
3894 len = sv_utf8_upgrade_flags_grow(dest,
3895 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3897 d = (U8*)SvPVX(dest) + len;
3899 /* Now process the remainder of the source, converting to
3900 * upper and UTF-8. If a resulting byte is invariant in
3901 * UTF-8, output it as-is, otherwise convert to UTF-8 and
3902 * append it to the output. */
3903 for (; s < send; s++) {
3904 (void) _to_upper_title_latin1(*s, d, &len, 'S');
3908 /* Here have processed the whole source; no need to continue
3909 * with the outer loop. Each character has been converted
3910 * to upper case and converted to UTF-8 */
3913 } /* End of processing all latin1-style chars */
3914 } /* End of processing all chars */
3915 } /* End of source is not empty */
3917 if (source != dest) {
3918 *d = '\0'; /* Here d points to 1 after last char, add NUL */
3919 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3921 } /* End of isn't utf8 */
3922 if (dest != source && SvTAINTED(source))
3941 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3942 && SvTEMP(source) && !DO_UTF8(source)) {
3944 /* We can convert in place, as lowercasing anything in the latin1 range
3945 * (or else DO_UTF8 would have been on) doesn't lengthen it */
3947 s = d = (U8*)SvPV_force_nomg(source, len);
3954 /* The old implementation would copy source into TARG at this point.
3955 This had the side effect that if source was undef, TARG was now
3956 an undefined SV with PADTMP set, and they don't warn inside
3957 sv_2pv_flags(). However, we're now getting the PV direct from
3958 source, which doesn't have PADTMP set, so it would warn. Hence the
3962 s = (const U8*)SvPV_nomg_const(source, len);
3964 if (ckWARN(WARN_UNINITIALIZED))
3965 report_uninit(source);
3971 SvUPGRADE(dest, SVt_PV);
3972 d = (U8*)SvGROW(dest, min);
3973 (void)SvPOK_only(dest);
3978 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3979 to check DO_UTF8 again here. */
3981 if (DO_UTF8(source)) {
3982 const U8 *const send = s + len;
3983 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3984 bool tainted = FALSE;
3987 const STRLEN u = UTF8SKIP(s);
3990 _to_utf8_lower_flags(s, tmpbuf, &ulen,
3991 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3993 /* Here is where we would do context-sensitive actions. See the
3994 * commit message for this comment for why there isn't any */
3996 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3998 /* If the eventually required minimum size outgrows the
3999 * available space, we need to grow. */
4000 const UV o = d - (U8*)SvPVX_const(dest);
4002 /* If someone lowercases one million U+0130s we SvGROW() one
4003 * million times. Or we could try guessing how much to
4004 * allocate without allocating too much. Such is life.
4005 * Another option would be to grow an extra byte or two more
4006 * each time we need to grow, which would cut down the million
4007 * to 500K, with little waste */
4009 d = (U8*)SvPVX(dest) + o;
4012 /* Copy the newly lowercased letter to the output buffer we're
4014 Copy(tmpbuf, d, ulen, U8);
4017 } /* End of looping through the source string */
4020 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4025 } else { /* Not utf8 */
4027 const U8 *const send = s + len;
4029 /* Use locale casing if in locale; regular style if not treating
4030 * latin1 as having case; otherwise the latin1 casing. Do the
4031 * whole thing in a tight loop, for speed, */
4032 if (IN_LOCALE_RUNTIME) {
4035 for (; s < send; d++, s++)
4036 *d = toLOWER_LC(*s);
4038 else if (! IN_UNI_8_BIT) {
4039 for (; s < send; d++, s++) {
4044 for (; s < send; d++, s++) {
4045 *d = toLOWER_LATIN1(*s);
4049 if (source != dest) {
4051 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4054 if (dest != source && SvTAINTED(source))
4063 SV * const sv = TOPs;
4065 const char *s = SvPV_const(sv,len);
4067 SvUTF8_off(TARG); /* decontaminate */
4070 SvUPGRADE(TARG, SVt_PV);
4071 SvGROW(TARG, (len * 2) + 1);
4075 STRLEN ulen = UTF8SKIP(s);
4076 bool to_quote = FALSE;
4078 if (UTF8_IS_INVARIANT(*s)) {
4079 if (_isQUOTEMETA(*s)) {
4083 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4085 /* In locale, we quote all non-ASCII Latin1 chars.
4086 * Otherwise use the quoting rules */
4087 if (IN_LOCALE_RUNTIME
4088 || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1))))
4093 else if (is_QUOTEMETA_high(s)) {
4108 else if (IN_UNI_8_BIT) {
4110 if (_isQUOTEMETA(*s))
4116 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4117 * including everything above ASCII */
4119 if (!isWORDCHAR_A(*s))
4125 SvCUR_set(TARG, d - SvPVX_const(TARG));
4126 (void)SvPOK_only_UTF8(TARG);
4129 sv_setpvn(TARG, s, len);
4146 U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1];
4147 const bool full_folding = TRUE;
4148 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4149 | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 );
4151 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4152 * You are welcome(?) -Hugmeir
4160 s = (const U8*)SvPV_nomg_const(source, len);
4162 if (ckWARN(WARN_UNINITIALIZED))
4163 report_uninit(source);
4170 SvUPGRADE(dest, SVt_PV);
4171 d = (U8*)SvGROW(dest, min);
4172 (void)SvPOK_only(dest);
4177 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4178 bool tainted = FALSE;
4180 const STRLEN u = UTF8SKIP(s);
4183 _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted);
4185 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4186 const UV o = d - (U8*)SvPVX_const(dest);
4188 d = (U8*)SvPVX(dest) + o;
4191 Copy(tmpbuf, d, ulen, U8);
4200 } /* Unflagged string */
4202 if ( IN_LOCALE_RUNTIME ) { /* Under locale */
4205 for (; s < send; d++, s++)
4208 else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4209 for (; s < send; d++, s++)
4213 /* For ASCII and the Latin-1 range, there's only two troublesome
4214 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4215 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4216 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4217 * For the rest, the casefold is their lowercase. */
4218 for (; s < send; d++, s++) {
4219 if (*s == MICRO_SIGN) {
4220 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4221 * which is outside of the latin-1 range. There's a couple
4222 * of ways to deal with this -- khw discusses them in
4223 * pp_lc/uc, so go there :) What we do here is upgrade what
4224 * we had already casefolded, then enter an inner loop that
4225 * appends the rest of the characters as UTF-8. */
4226 len = d - (U8*)SvPVX_const(dest);
4227 SvCUR_set(dest, len);
4228 len = sv_utf8_upgrade_flags_grow(dest,
4229 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4230 /* The max expansion for latin1
4231 * chars is 1 byte becomes 2 */
4233 d = (U8*)SvPVX(dest) + len;
4235 Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
4238 for (; s < send; s++) {
4240 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4241 if UNI_IS_INVARIANT(fc) {
4243 && *s == LATIN_SMALL_LETTER_SHARP_S)
4252 Copy(tmpbuf, d, ulen, U8);
4258 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4259 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4260 * becomes "ss", which may require growing the SV. */
4261 if (SvLEN(dest) < ++min) {
4262 const UV o = d - (U8*)SvPVX_const(dest);
4264 d = (U8*)SvPVX(dest) + o;
4269 else { /* If it's not one of those two, the fold is their lower
4271 *d = toLOWER_LATIN1(*s);
4277 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4279 if (SvTAINTED(source))
4289 dVAR; dSP; dMARK; dORIGMARK;
4290 AV *const av = MUTABLE_AV(POPs);
4291 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4293 if (SvTYPE(av) == SVt_PVAV) {
4294 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4295 bool can_preserve = FALSE;
4301 can_preserve = SvCANEXISTDELETE(av);
4304 if (lval && localizing) {
4307 for (svp = MARK + 1; svp <= SP; svp++) {
4308 const I32 elem = SvIV(*svp);
4312 if (max > AvMAX(av))
4316 while (++MARK <= SP) {
4318 I32 elem = SvIV(*MARK);
4319 bool preeminent = TRUE;
4321 if (localizing && can_preserve) {
4322 /* If we can determine whether the element exist,
4323 * Try to preserve the existenceness of a tied array
4324 * element by using EXISTS and DELETE if possible.
4325 * Fallback to FETCH and STORE otherwise. */
4326 preeminent = av_exists(av, elem);
4329 svp = av_fetch(av, elem, lval);
4331 if (!svp || *svp == &PL_sv_undef)
4332 DIE(aTHX_ PL_no_aelem, elem);
4335 save_aelem(av, elem, svp);
4337 SAVEADELETE(av, elem);
4340 *MARK = svp ? *svp : &PL_sv_undef;
4343 if (GIMME != G_ARRAY) {
4345 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4351 /* Smart dereferencing for keys, values and each */
4363 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4368 "Type of argument to %s must be unblessed hashref or arrayref",
4369 PL_op_desc[PL_op->op_type] );
4372 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4374 "Can't modify %s in %s",
4375 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4378 /* Delegate to correct function for op type */
4380 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4381 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4384 return (SvTYPE(sv) == SVt_PVHV)
4385 ? Perl_pp_each(aTHX)
4386 : Perl_pp_aeach(aTHX);
4394 AV *array = MUTABLE_AV(POPs);
4395 const I32 gimme = GIMME_V;
4396 IV *iterp = Perl_av_iter_p(aTHX_ array);
4397 const IV current = (*iterp)++;
4399 if (current > av_len(array)) {
4401 if (gimme == G_SCALAR)
4409 if (gimme == G_ARRAY) {
4410 SV **const element = av_fetch(array, current, 0);
4411 PUSHs(element ? *element : &PL_sv_undef);
4420 AV *array = MUTABLE_AV(POPs);
4421 const I32 gimme = GIMME_V;
4423 *Perl_av_iter_p(aTHX_ array) = 0;
4425 if (gimme == G_SCALAR) {
4427 PUSHi(av_len(array) + 1);
4429 else if (gimme == G_ARRAY) {
4430 IV n = Perl_av_len(aTHX_ array);
4435 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4436 for (i = 0; i <= n; i++) {
4441 for (i = 0; i <= n; i++) {
4442 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4443 PUSHs(elem ? *elem : &PL_sv_undef);
4450 /* Associative arrays. */
4456 HV * hash = MUTABLE_HV(POPs);
4458 const I32 gimme = GIMME_V;
4461 /* might clobber stack_sp */
4462 entry = hv_iternext(hash);
4467 SV* const sv = hv_iterkeysv(entry);
4468 PUSHs(sv); /* won't clobber stack_sp */
4469 if (gimme == G_ARRAY) {
4472 /* might clobber stack_sp */
4473 val = hv_iterval(hash, entry);
4478 else if (gimme == G_SCALAR)
4485 S_do_delete_local(pTHX)
4489 const I32 gimme = GIMME_V;
4492 const bool sliced = !!(PL_op->op_private & OPpSLICE);
4493 SV *unsliced_keysv = sliced ? NULL : POPs;
4494 SV * const osv = POPs;
4495 SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1;
4497 const bool tied = SvRMAGICAL(osv)
4498 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4499 const bool can_preserve = SvCANEXISTDELETE(osv);
4500 const U32 type = SvTYPE(osv);
4501 SV ** const end = sliced ? SP : &unsliced_keysv;
4503 if (type == SVt_PVHV) { /* hash element */
4504 HV * const hv = MUTABLE_HV(osv);
4505 while (++MARK <= end) {
4506 SV * const keysv = *MARK;
4508 bool preeminent = TRUE;
4510 preeminent = hv_exists_ent(hv, keysv, 0);
4512 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4519 sv = hv_delete_ent(hv, keysv, 0, 0);
4521 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4524 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4525 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4527 *MARK = sv_mortalcopy(sv);
4533 SAVEHDELETE(hv, keysv);
4534 *MARK = &PL_sv_undef;
4538 else if (type == SVt_PVAV) { /* array element */
4539 if (PL_op->op_flags & OPf_SPECIAL) {
4540 AV * const av = MUTABLE_AV(osv);
4541 while (++MARK <= end) {
4542 I32 idx = SvIV(*MARK);
4544 bool preeminent = TRUE;
4546 preeminent = av_exists(av, idx);
4548 SV **svp = av_fetch(av, idx, 1);
4555 sv = av_delete(av, idx, 0);
4557 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4560 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4562 *MARK = sv_mortalcopy(sv);
4568 SAVEADELETE(av, idx);
4569 *MARK = &PL_sv_undef;
4574 DIE(aTHX_ "panic: avhv_delete no longer supported");
4577 DIE(aTHX_ "Not a HASH reference");
4579 if (gimme == G_VOID)
4581 else if (gimme == G_SCALAR) {
4586 *++MARK = &PL_sv_undef;
4590 else if (gimme != G_VOID)
4591 PUSHs(unsliced_keysv);
4603 if (PL_op->op_private & OPpLVAL_INTRO)
4604 return do_delete_local();
4607 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4609 if (PL_op->op_private & OPpSLICE) {
4611 HV * const hv = MUTABLE_HV(POPs);
4612 const U32 hvtype = SvTYPE(hv);
4613 if (hvtype == SVt_PVHV) { /* hash element */
4614 while (++MARK <= SP) {
4615 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4616 *MARK = sv ? sv : &PL_sv_undef;
4619 else if (hvtype == SVt_PVAV) { /* array element */
4620 if (PL_op->op_flags & OPf_SPECIAL) {
4621 while (++MARK <= SP) {
4622 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4623 *MARK = sv ? sv : &PL_sv_undef;
4628 DIE(aTHX_ "Not a HASH reference");
4631 else if (gimme == G_SCALAR) {