3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
32 #include "regcharclass.h"
34 /* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
38 #ifdef NEED_GETPID_PROTO
39 extern Pid_t getpid (void);
42 static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
43 static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
45 /* variations on pp_null */
50 if (GIMME_V == G_SCALAR)
62 assert(SvTYPE(TARG) == SVt_PVCV);
77 CV * const protocv = PadnamePROTOCV(
78 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG]
80 assert(SvTYPE(TARG) == SVt_PVCV);
82 if (CvISXSUB(protocv)) { /* constant */
83 /* XXX Should we clone it here? */
84 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
85 to introcv and remove the SvPADSTALE_off. */
86 SAVEPADSVANDMORTALIZE(ARGTARG);
87 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv);
90 if (CvROOT(protocv)) {
91 assert(CvCLONE(protocv));
92 assert(!CvCLONED(protocv));
94 cv_clone_into(protocv,(CV *)TARG);
95 SAVECLEARSV(PAD_SVl(ARGTARG));
102 /* In some cases this function inspects PL_op. If this function is called
103 for new op types, more bool parameters may need to be added in place of
106 When noinit is true, the absence of a gv will cause a retval of undef.
107 This is unrelated to the cv-to-gv assignment case.
111 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
114 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
117 sv = amagic_deref_call(sv, to_gv_amg);
121 if (SvTYPE(sv) == SVt_PVIO) {
122 GV * const gv = MUTABLE_GV(sv_newmortal());
123 gv_init(gv, 0, "__ANONIO__", 10, 0);
124 GvIOp(gv) = MUTABLE_IO(sv);
125 SvREFCNT_inc_void_NN(sv);
128 else if (!isGV_with_GP(sv)) {
129 Perl_die(aTHX_ "Not a GLOB reference");
133 if (!isGV_with_GP(sv)) {
135 /* If this is a 'my' scalar and flag is set then vivify
138 if (vivify_sv && sv != &PL_sv_undef) {
141 Perl_croak_no_modify();
142 if (cUNOP->op_targ) {
143 SV * const namesv = PAD_SV(cUNOP->op_targ);
144 HV *stash = CopSTASH(PL_curcop);
145 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
146 gv = MUTABLE_GV(newSV(0));
147 gv_init_sv(gv, stash, namesv, 0);
150 const char * const name = CopSTASHPV(PL_curcop);
151 gv = newGVgen_flags(name,
152 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
153 SvREFCNT_inc_simple_void_NN(gv);
155 prepare_SV_for_RV(sv);
156 SvRV_set(sv, MUTABLE_SV(gv));
161 if (PL_op->op_flags & OPf_REF || strict) {
162 Perl_die(aTHX_ PL_no_usym, "a symbol");
164 if (ckWARN(WARN_UNINITIALIZED))
170 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
171 sv, GV_ADDMG, SVt_PVGV
180 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
184 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
185 == OPpDONT_INIT_GV) {
186 /* We are the target of a coderef assignment. Return
187 the scalar unchanged, and let pp_sasssign deal with
191 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
193 /* FAKE globs in the symbol table cause weird bugs (#77810) */
197 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
198 SV *newsv = sv_newmortal();
199 sv_setsv_flags(newsv, sv, 0);
211 sv, PL_op->op_private & OPpDEREF,
212 PL_op->op_private & HINT_STRICT_REFS,
213 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
214 || PL_op->op_type == OP_READLINE
216 if (PL_op->op_private & OPpLVAL_INTRO)
217 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
222 /* Helper function for pp_rv2sv and pp_rv2av */
224 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
225 const svtype type, SV ***spp)
229 PERL_ARGS_ASSERT_SOFTREF2XV;
231 if (PL_op->op_private & HINT_STRICT_REFS) {
233 Perl_die(aTHX_ PL_no_symref_sv, sv,
234 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
236 Perl_die(aTHX_ PL_no_usym, what);
240 PL_op->op_flags & OPf_REF
242 Perl_die(aTHX_ PL_no_usym, what);
243 if (ckWARN(WARN_UNINITIALIZED))
245 if (type != SVt_PV && GIMME_V == G_ARRAY) {
249 **spp = &PL_sv_undef;
252 if ((PL_op->op_flags & OPf_SPECIAL) &&
253 !(PL_op->op_flags & OPf_MOD))
255 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
257 **spp = &PL_sv_undef;
262 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
275 sv = amagic_deref_call(sv, to_sv_amg);
279 if (SvTYPE(sv) >= SVt_PVAV)
280 DIE(aTHX_ "Not a SCALAR reference");
285 if (!isGV_with_GP(gv)) {
286 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
292 if (PL_op->op_flags & OPf_MOD) {
293 if (PL_op->op_private & OPpLVAL_INTRO) {
294 if (cUNOP->op_first->op_type == OP_NULL)
295 sv = save_scalar(MUTABLE_GV(TOPs));
297 sv = save_scalar(gv);
299 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
301 else if (PL_op->op_private & OPpDEREF)
302 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
304 SPAGAIN; /* in case chasing soft refs reallocated the stack */
312 AV * const av = MUTABLE_AV(TOPs);
313 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
315 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
317 *svp = newSV_type(SVt_PVMG);
318 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
322 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
331 if (PL_op->op_flags & OPf_MOD || LVRET) {
332 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
333 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
335 LvTARG(ret) = SvREFCNT_inc_simple(sv);
336 SETs(ret); /* no SvSETMAGIC */
339 const MAGIC * const mg = mg_find_mglob(sv);
340 if (mg && mg->mg_len != -1) {
341 STRLEN i = mg->mg_len;
342 if (PL_op->op_private & OPpTRUEBOOL)
343 SETs(i ? &PL_sv_yes : &PL_sv_zero);
346 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
347 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
362 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
364 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
365 == OPpMAY_RETURN_CONSTANT)
368 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
369 /* (But not in defined().) */
371 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
373 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
374 cv = SvTYPE(SvRV(gv)) == SVt_PVCV
375 ? MUTABLE_CV(SvRV(gv))
379 cv = MUTABLE_CV(&PL_sv_undef);
380 SETs(MUTABLE_SV(cv));
390 SV *ret = &PL_sv_undef;
392 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
393 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
394 const char * s = SvPVX_const(TOPs);
395 if (strnEQ(s, "CORE::", 6)) {
396 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
398 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"",
399 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
401 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
407 cv = sv_2cv(TOPs, &stash, &gv, 0);
409 ret = newSVpvn_flags(
410 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
420 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
422 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
424 PUSHs(MUTABLE_SV(cv));
438 if (GIMME_V != G_ARRAY) {
444 *MARK = &PL_sv_undef;
446 *MARK = refto(*MARK);
450 EXTEND_MORTAL(SP - MARK);
452 *MARK = refto(*MARK);
457 S_refto(pTHX_ SV *sv)
461 PERL_ARGS_ASSERT_REFTO;
463 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
466 if (!(sv = LvTARG(sv)))
469 SvREFCNT_inc_void_NN(sv);
471 else if (SvTYPE(sv) == SVt_PVAV) {
472 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
473 av_reify(MUTABLE_AV(sv));
475 SvREFCNT_inc_void_NN(sv);
477 else if (SvPADTMP(sv)) {
482 SvREFCNT_inc_void_NN(sv);
485 sv_upgrade(rv, SVt_IV);
494 SV * const sv = TOPs;
502 /* op is in boolean context? */
503 if ( (PL_op->op_private & OPpTRUEBOOL)
504 || ( (PL_op->op_private & OPpMAYBE_TRUEBOOL)
505 && block_gimme() == G_VOID))
507 /* refs are always true - unless it's to an object blessed into a
508 * class with a false name, i.e. "0". So we have to check for
509 * that remote possibility. The following is is basically an
510 * unrolled SvTRUE(sv_reftype(rv)) */
511 SV * const rv = SvRV(sv);
513 HV *stash = SvSTASH(rv);
514 HEK *hek = HvNAME_HEK(stash);
516 I32 len = HEK_LEN(hek);
517 /* bail out and do it the hard way? */
520 || (len == 1 && HEK_KEY(hek)[0] == '0')
533 sv_ref(TARG, SvRV(sv), TRUE);
549 stash = CopSTASH(PL_curcop);
550 if (SvTYPE(stash) != SVt_PVHV)
551 Perl_croak(aTHX_ "Attempt to bless into a freed package");
554 SV * const ssv = POPs;
558 if (!ssv) goto curstash;
561 if (!SvAMAGIC(ssv)) {
563 Perl_croak(aTHX_ "Attempt to bless into a reference");
565 /* SvAMAGIC is on here, but it only means potentially overloaded,
566 so after stringification: */
567 ptr = SvPV_nomg_const(ssv,len);
568 /* We need to check the flag again: */
569 if (!SvAMAGIC(ssv)) goto frog;
571 else ptr = SvPV_nomg_const(ssv,len);
573 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
574 "Explicit blessing to '' (assuming package main)");
575 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
578 (void)sv_bless(TOPs, stash);
588 const char * const elem = SvPV_const(sv, len);
589 GV * const gv = MUTABLE_GV(TOPs);
594 /* elem will always be NUL terminated. */
597 if (memEQs(elem, len, "ARRAY"))
599 tmpRef = MUTABLE_SV(GvAV(gv));
600 if (tmpRef && !AvREAL((const AV *)tmpRef)
601 && AvREIFY((const AV *)tmpRef))
602 av_reify(MUTABLE_AV(tmpRef));
606 if (memEQs(elem, len, "CODE"))
607 tmpRef = MUTABLE_SV(GvCVu(gv));
610 if (memEQs(elem, len, "FILEHANDLE")) {
611 tmpRef = MUTABLE_SV(GvIOp(gv));
614 if (memEQs(elem, len, "FORMAT"))
615 tmpRef = MUTABLE_SV(GvFORM(gv));
618 if (memEQs(elem, len, "GLOB"))
619 tmpRef = MUTABLE_SV(gv);
622 if (memEQs(elem, len, "HASH"))
623 tmpRef = MUTABLE_SV(GvHV(gv));
626 if (memEQs(elem, len, "IO"))
627 tmpRef = MUTABLE_SV(GvIOp(gv));
630 if (memEQs(elem, len, "NAME"))
631 sv = newSVhek(GvNAME_HEK(gv));
634 if (memEQs(elem, len, "PACKAGE")) {
635 const HV * const stash = GvSTASH(gv);
636 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
637 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
641 if (memEQs(elem, len, "SCALAR"))
656 /* Pattern matching */
664 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
665 /* Historically, study was skipped in these cases. */
670 /* Make study a no-op. It's no longer useful and its existence
671 complicates matters elsewhere. */
677 /* also used for: pp_transr() */
684 if (PL_op->op_flags & OPf_STACKED)
689 sv = PAD_SV(ARGTARG);
694 if(PL_op->op_type == OP_TRANSR) {
696 const char * const pv = SvPV(sv,len);
697 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
702 I32 i = do_trans(sv);
708 /* Lvalue operators. */
711 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
717 PERL_ARGS_ASSERT_DO_CHOMP;
719 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
721 if (SvTYPE(sv) == SVt_PVAV) {
723 AV *const av = MUTABLE_AV(sv);
724 const I32 max = AvFILL(av);
726 for (i = 0; i <= max; i++) {
727 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
728 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
729 count += do_chomp(retval, sv, chomping);
733 else if (SvTYPE(sv) == SVt_PVHV) {
734 HV* const hv = MUTABLE_HV(sv);
736 (void)hv_iterinit(hv);
737 while ((entry = hv_iternext(hv)))
738 count += do_chomp(retval, hv_iterval(hv,entry), chomping);
741 else if (SvREADONLY(sv)) {
742 Perl_croak_no_modify();
748 char *temp_buffer = NULL;
753 goto nope_free_nothing;
755 while (len && s[-1] == '\n') {
762 STRLEN rslen, rs_charlen;
763 const char *rsptr = SvPV_const(PL_rs, rslen);
765 rs_charlen = SvUTF8(PL_rs)
769 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
770 /* Assumption is that rs is shorter than the scalar. */
772 /* RS is utf8, scalar is 8 bit. */
774 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
777 /* Cannot downgrade, therefore cannot possibly match.
778 At this point, temp_buffer is not alloced, and
779 is the buffer inside PL_rs, so dont free it.
781 assert (temp_buffer == rsptr);
787 /* RS is 8 bit, scalar is utf8. */
788 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
802 if (memNE(s, rsptr, rslen))
807 SvPV_force_nomg_nolen(sv);
814 Safefree(temp_buffer);
816 SvREFCNT_dec(svrecode);
820 if (len && (!SvPOK(sv) || SvIsCOW(sv)))
821 s = SvPV_force_nomg(sv, len);
824 char * const send = s + len;
825 char * const start = s;
827 while (s > start && UTF8_IS_CONTINUATION(*s))
829 if (is_utf8_string((U8*)s, send - s)) {
830 sv_setpvn(retval, s, send - s);
832 SvCUR_set(sv, s - start);
842 sv_setpvn(retval, s, 1);
856 /* also used for: pp_schomp() */
861 const bool chomping = PL_op->op_type == OP_SCHOMP;
863 const size_t count = do_chomp(TARG, TOPs, chomping);
865 sv_setiv(TARG, count);
871 /* also used for: pp_chomp() */
875 dSP; dMARK; dTARGET; dORIGMARK;
876 const bool chomping = PL_op->op_type == OP_CHOMP;
880 count += do_chomp(TARG, *++MARK, chomping);
882 sv_setiv(TARG, count);
893 if (!PL_op->op_private) {
905 if (SvTHINKFIRST(sv))
906 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF);
908 switch (SvTYPE(sv)) {
912 av_undef(MUTABLE_AV(sv));
915 hv_undef(MUTABLE_HV(sv));
918 if (cv_const_sv((const CV *)sv))
919 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
920 "Constant subroutine %" SVf " undefined",
921 SVfARG(CvANON((const CV *)sv)
922 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
923 : sv_2mortal(newSVhek(
925 ? CvNAME_HEK((CV *)sv)
926 : GvENAME_HEK(CvGV((const CV *)sv))
931 /* let user-undef'd sub keep its identity */
932 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME);
935 assert(isGV_with_GP(sv));
941 /* undef *Pkg::meth_name ... */
943 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
944 && HvENAME_get(stash);
946 if((stash = GvHV((const GV *)sv))) {
947 if(HvENAME_get(stash))
948 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
952 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
953 gp_free(MUTABLE_GV(sv));
955 GvGP_set(sv, gp_ref(gp));
956 #ifndef PERL_DONT_CREATE_GVSV
959 GvLINE(sv) = CopLINE(PL_curcop);
960 GvEGV(sv) = MUTABLE_GV(sv);
964 mro_package_moved(NULL, stash, (const GV *)sv, 0);
966 /* undef *Foo::ISA */
967 if( strEQ(GvNAME((const GV *)sv), "ISA")
968 && (stash = GvSTASH((const GV *)sv))
969 && (method_changed || HvENAME(stash)) )
970 mro_isa_changed_in(stash);
971 else if(method_changed)
972 mro_method_changed_in(
973 GvSTASH((const GV *)sv)
979 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
993 /* common "slow" code for pp_postinc and pp_postdec */
996 S_postincdec_common(pTHX_ SV *sv, SV *targ)
1000 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1003 TARG = sv_newmortal();
1010 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1011 if (inc && !SvOK(TARG))
1018 /* also used for: pp_i_postinc() */
1025 /* special-case sv being a simple integer */
1026 if (LIKELY(((sv->sv_flags &
1027 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1028 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1030 && SvIVX(sv) != IV_MAX)
1033 SvIV_set(sv, iv + 1);
1034 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1039 return S_postincdec_common(aTHX_ sv, TARG);
1043 /* also used for: pp_i_postdec() */
1050 /* special-case sv being a simple integer */
1051 if (LIKELY(((sv->sv_flags &
1052 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1053 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1055 && SvIVX(sv) != IV_MIN)
1058 SvIV_set(sv, iv - 1);
1059 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1064 return S_postincdec_common(aTHX_ sv, TARG);
1068 /* Ordinary operators. */
1072 dSP; dATARGET; SV *svl, *svr;
1073 #ifdef PERL_PRESERVE_IVUV
1076 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1079 #ifdef PERL_PRESERVE_IVUV
1080 /* For integer to integer power, we do the calculation by hand wherever
1081 we're sure it is safe; otherwise we call pow() and try to convert to
1082 integer afterwards. */
1083 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1091 const IV iv = SvIVX(svr);
1095 goto float_it; /* Can't do negative powers this way. */
1099 baseuok = SvUOK(svl);
1101 baseuv = SvUVX(svl);
1103 const IV iv = SvIVX(svl);
1106 baseuok = TRUE; /* effectively it's a UV now */
1108 baseuv = -iv; /* abs, baseuok == false records sign */
1111 /* now we have integer ** positive integer. */
1114 /* foo & (foo - 1) is zero only for a power of 2. */
1115 if (!(baseuv & (baseuv - 1))) {
1116 /* We are raising power-of-2 to a positive integer.
1117 The logic here will work for any base (even non-integer
1118 bases) but it can be less accurate than
1119 pow (base,power) or exp (power * log (base)) when the
1120 intermediate values start to spill out of the mantissa.
1121 With powers of 2 we know this can't happen.
1122 And powers of 2 are the favourite thing for perl
1123 programmers to notice ** not doing what they mean. */
1125 NV base = baseuok ? baseuv : -(NV)baseuv;
1130 while (power >>= 1) {
1138 SvIV_please_nomg(svr);
1141 unsigned int highbit = 8 * sizeof(UV);
1142 unsigned int diff = 8 * sizeof(UV);
1143 while (diff >>= 1) {
1145 if (baseuv >> highbit) {
1149 /* we now have baseuv < 2 ** highbit */
1150 if (power * highbit <= 8 * sizeof(UV)) {
1151 /* result will definitely fit in UV, so use UV math
1152 on same algorithm as above */
1155 const bool odd_power = cBOOL(power & 1);
1159 while (power >>= 1) {
1166 if (baseuok || !odd_power)
1167 /* answer is positive */
1169 else if (result <= (UV)IV_MAX)
1170 /* answer negative, fits in IV */
1171 SETi( -(IV)result );
1172 else if (result == (UV)IV_MIN)
1173 /* 2's complement assumption: special case IV_MIN */
1176 /* answer negative, doesn't fit */
1177 SETn( -(NV)result );
1185 NV right = SvNV_nomg(svr);
1186 NV left = SvNV_nomg(svl);
1189 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1191 We are building perl with long double support and are on an AIX OS
1192 afflicted with a powl() function that wrongly returns NaNQ for any
1193 negative base. This was reported to IBM as PMR #23047-379 on
1194 03/06/2006. The problem exists in at least the following versions
1195 of AIX and the libm fileset, and no doubt others as well:
1197 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1198 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1199 AIX 5.2.0 bos.adt.libm 5.2.0.85
1201 So, until IBM fixes powl(), we provide the following workaround to
1202 handle the problem ourselves. Our logic is as follows: for
1203 negative bases (left), we use fmod(right, 2) to check if the
1204 exponent is an odd or even integer:
1206 - if odd, powl(left, right) == -powl(-left, right)
1207 - if even, powl(left, right) == powl(-left, right)
1209 If the exponent is not an integer, the result is rightly NaNQ, so
1210 we just return that (as NV_NAN).
1214 NV mod2 = Perl_fmod( right, 2.0 );
1215 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1216 SETn( -Perl_pow( -left, right) );
1217 } else if (mod2 == 0.0) { /* even integer */
1218 SETn( Perl_pow( -left, right) );
1219 } else { /* fractional power */
1223 SETn( Perl_pow( left, right) );
1226 SETn( Perl_pow( left, right) );
1227 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1229 #ifdef PERL_PRESERVE_IVUV
1231 SvIV_please_nomg(svr);
1239 dSP; dATARGET; SV *svl, *svr;
1240 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1244 #ifdef PERL_PRESERVE_IVUV
1246 /* special-case some simple common cases */
1247 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1249 U32 flags = (svl->sv_flags & svr->sv_flags);
1250 if (flags & SVf_IOK) {
1251 /* both args are simple IVs */
1256 topl = ((UV)il) >> (UVSIZE * 4 - 1);
1257 topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1259 /* if both are in a range that can't under/overflow, do a
1260 * simple integer multiply: if the top halves(*) of both numbers
1261 * are 00...00 or 11...11, then it's safe.
1262 * (*) for 32-bits, the "top half" is the top 17 bits,
1263 * for 64-bits, its 33 bits */
1265 ((topl+1) | (topr+1))
1266 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1269 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1275 else if (flags & SVf_NOK) {
1276 /* both args are NVs */
1282 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1283 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1284 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1286 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1289 /* nothing was lost by converting to IVs */
1293 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1294 if (Perl_isinf(result)) {
1295 Zero((U8*)&result + 8, 8, U8);
1298 TARGn(result, 0); /* args not GMG, so can't be tainted */
1306 if (SvIV_please_nomg(svr)) {
1307 /* Unless the left argument is integer in range we are going to have to
1308 use NV maths. Hence only attempt to coerce the right argument if
1309 we know the left is integer. */
1310 /* Left operand is defined, so is it IV? */
1311 if (SvIV_please_nomg(svl)) {
1312 bool auvok = SvUOK(svl);
1313 bool buvok = SvUOK(svr);
1314 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1315 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1324 const IV aiv = SvIVX(svl);
1327 auvok = TRUE; /* effectively it's a UV now */
1329 /* abs, auvok == false records sign */
1330 alow = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1336 const IV biv = SvIVX(svr);
1339 buvok = TRUE; /* effectively it's a UV now */
1341 /* abs, buvok == false records sign */
1342 blow = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1346 /* If this does sign extension on unsigned it's time for plan B */
1347 ahigh = alow >> (4 * sizeof (UV));
1349 bhigh = blow >> (4 * sizeof (UV));
1351 if (ahigh && bhigh) {
1353 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1354 which is overflow. Drop to NVs below. */
1355 } else if (!ahigh && !bhigh) {
1356 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1357 so the unsigned multiply cannot overflow. */
1358 const UV product = alow * blow;
1359 if (auvok == buvok) {
1360 /* -ve * -ve or +ve * +ve gives a +ve result. */
1364 } else if (product <= (UV)IV_MIN) {
1365 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1366 /* -ve result, which could overflow an IV */
1368 /* can't negate IV_MIN, but there are aren't two
1369 * integers such that !ahigh && !bhigh, where the
1370 * product equals 0x800....000 */
1371 assert(product != (UV)IV_MIN);
1372 SETi( -(IV)product );
1374 } /* else drop to NVs below. */
1376 /* One operand is large, 1 small */
1379 /* swap the operands */
1381 bhigh = blow; /* bhigh now the temp var for the swap */
1385 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1386 multiplies can't overflow. shift can, add can, -ve can. */
1387 product_middle = ahigh * blow;
1388 if (!(product_middle & topmask)) {
1389 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1391 product_middle <<= (4 * sizeof (UV));
1392 product_low = alow * blow;
1394 /* as for pp_add, UV + something mustn't get smaller.
1395 IIRC ANSI mandates this wrapping *behaviour* for
1396 unsigned whatever the actual representation*/
1397 product_low += product_middle;
1398 if (product_low >= product_middle) {
1399 /* didn't overflow */
1400 if (auvok == buvok) {
1401 /* -ve * -ve or +ve * +ve gives a +ve result. */
1403 SETu( product_low );
1405 } else if (product_low <= (UV)IV_MIN) {
1406 /* 2s complement assumption again */
1407 /* -ve result, which could overflow an IV */
1409 SETi(product_low == (UV)IV_MIN
1410 ? IV_MIN : -(IV)product_low);
1412 } /* else drop to NVs below. */
1414 } /* product_middle too large */
1415 } /* ahigh && bhigh */
1420 NV right = SvNV_nomg(svr);
1421 NV left = SvNV_nomg(svl);
1422 NV result = left * right;
1425 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1426 if (Perl_isinf(result)) {
1427 Zero((U8*)&result + 8, 8, U8);
1437 dSP; dATARGET; SV *svl, *svr;
1438 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1441 /* Only try to do UV divide first
1442 if ((SLOPPYDIVIDE is true) or
1443 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1445 The assumption is that it is better to use floating point divide
1446 whenever possible, only doing integer divide first if we can't be sure.
1447 If NV_PRESERVES_UV is true then we know at compile time that no UV
1448 can be too large to preserve, so don't need to compile the code to
1449 test the size of UVs. */
1452 # define PERL_TRY_UV_DIVIDE
1453 /* ensure that 20./5. == 4. */
1455 # ifdef PERL_PRESERVE_IVUV
1456 # ifndef NV_PRESERVES_UV
1457 # define PERL_TRY_UV_DIVIDE
1462 #ifdef PERL_TRY_UV_DIVIDE
1463 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1464 bool left_non_neg = SvUOK(svl);
1465 bool right_non_neg = SvUOK(svr);
1469 if (right_non_neg) {
1473 const IV biv = SvIVX(svr);
1476 right_non_neg = TRUE; /* effectively it's a UV now */
1479 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1482 /* historically undef()/0 gives a "Use of uninitialized value"
1483 warning before dieing, hence this test goes here.
1484 If it were immediately before the second SvIV_please, then
1485 DIE() would be invoked before left was even inspected, so
1486 no inspection would give no warning. */
1488 DIE(aTHX_ "Illegal division by zero");
1494 const IV aiv = SvIVX(svl);
1497 left_non_neg = TRUE; /* effectively it's a UV now */
1500 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1506 /* For sloppy divide we always attempt integer division. */
1508 /* Otherwise we only attempt it if either or both operands
1509 would not be preserved by an NV. If both fit in NVs
1510 we fall through to the NV divide code below. However,
1511 as left >= right to ensure integer result here, we know that
1512 we can skip the test on the right operand - right big
1513 enough not to be preserved can't get here unless left is
1516 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1519 /* Integer division can't overflow, but it can be imprecise. */
1520 const UV result = left / right;
1521 if (result * right == left) {
1522 SP--; /* result is valid */
1523 if (left_non_neg == right_non_neg) {
1524 /* signs identical, result is positive. */
1528 /* 2s complement assumption */
1529 if (result <= (UV)IV_MIN)
1530 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result);
1532 /* It's exact but too negative for IV. */
1533 SETn( -(NV)result );
1536 } /* tried integer divide but it was not an integer result */
1537 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1538 } /* one operand wasn't SvIOK */
1539 #endif /* PERL_TRY_UV_DIVIDE */
1541 NV right = SvNV_nomg(svr);
1542 NV left = SvNV_nomg(svl);
1543 (void)POPs;(void)POPs;
1544 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1545 if (! Perl_isnan(right) && right == 0.0)
1549 DIE(aTHX_ "Illegal division by zero");
1550 PUSHn( left / right );
1558 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1562 bool left_neg = FALSE;
1563 bool right_neg = FALSE;
1564 bool use_double = FALSE;
1565 bool dright_valid = FALSE;
1568 SV * const svr = TOPs;
1569 SV * const svl = TOPm1s;
1570 if (SvIV_please_nomg(svr)) {
1571 right_neg = !SvUOK(svr);
1575 const IV biv = SvIVX(svr);
1578 right_neg = FALSE; /* effectively it's a UV now */
1580 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1585 dright = SvNV_nomg(svr);
1586 right_neg = dright < 0;
1589 if (dright < UV_MAX_P1) {
1590 right = U_V(dright);
1591 dright_valid = TRUE; /* In case we need to use double below. */
1597 /* At this point use_double is only true if right is out of range for
1598 a UV. In range NV has been rounded down to nearest UV and
1599 use_double false. */
1600 if (!use_double && SvIV_please_nomg(svl)) {
1601 left_neg = !SvUOK(svl);
1605 const IV aiv = SvIVX(svl);
1608 left_neg = FALSE; /* effectively it's a UV now */
1610 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1615 dleft = SvNV_nomg(svl);
1616 left_neg = dleft < 0;
1620 /* This should be exactly the 5.6 behaviour - if left and right are
1621 both in range for UV then use U_V() rather than floor. */
1623 if (dleft < UV_MAX_P1) {
1624 /* right was in range, so is dleft, so use UVs not double.
1628 /* left is out of range for UV, right was in range, so promote
1629 right (back) to double. */
1631 /* The +0.5 is used in 5.6 even though it is not strictly
1632 consistent with the implicit +0 floor in the U_V()
1633 inside the #if 1. */
1634 dleft = Perl_floor(dleft + 0.5);
1637 dright = Perl_floor(dright + 0.5);
1648 DIE(aTHX_ "Illegal modulus zero");
1650 dans = Perl_fmod(dleft, dright);
1651 if ((left_neg != right_neg) && dans)
1652 dans = dright - dans;
1655 sv_setnv(TARG, dans);
1661 DIE(aTHX_ "Illegal modulus zero");
1664 if ((left_neg != right_neg) && ans)
1667 /* XXX may warn: unary minus operator applied to unsigned type */
1668 /* could change -foo to be (~foo)+1 instead */
1669 if (ans <= ~((UV)IV_MAX)+1)
1670 sv_setiv(TARG, ~ans+1);
1672 sv_setnv(TARG, -(NV)ans);
1675 sv_setuv(TARG, ans);
1687 bool infnan = FALSE;
1689 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1690 /* TODO: think of some way of doing list-repeat overloading ??? */
1695 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) {
1696 /* The parser saw this as a list repeat, and there
1697 are probably several items on the stack. But we're
1698 in scalar/void context, and there's no pp_list to save us
1699 now. So drop the rest of the items -- robin@kitsite.com
1702 if (MARK + 1 < SP) {
1708 ASSUME(MARK + 1 == SP);
1710 MARK[1] = &PL_sv_undef;
1714 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1720 const UV uv = SvUV_nomg(sv);
1722 count = IV_MAX; /* The best we can do? */
1726 count = SvIV_nomg(sv);
1729 else if (SvNOKp(sv)) {
1730 const NV nv = SvNV_nomg(sv);
1731 infnan = Perl_isinfnan(nv);
1732 if (UNLIKELY(infnan)) {
1736 count = -1; /* An arbitrary negative integer */
1742 count = SvIV_nomg(sv);
1745 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1746 "Non-finite repeat count does nothing");
1747 } else if (count < 0) {
1749 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1750 "Negative repeat count does nothing");
1753 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1755 const SSize_t items = SP - MARK;
1756 const U8 mod = PL_op->op_flags & OPf_MOD;
1761 if ( items > SSize_t_MAX / count /* max would overflow */
1762 /* repeatcpy would overflow */
1763 || items > I32_MAX / (I32)sizeof(SV *)
1765 Perl_croak(aTHX_ "%s","Out of memory during list extend");
1766 max = items * count;
1771 if (mod && SvPADTMP(*SP)) {
1772 *SP = sv_mortalcopy(*SP);
1779 repeatcpy((char*)(MARK + items), (char*)MARK,
1780 items * sizeof(const SV *), count - 1);
1783 else if (count <= 0)
1786 else { /* Note: mark already snarfed by pp_list */
1787 SV * const tmpstr = POPs;
1792 sv_setsv_nomg(TARG, tmpstr);
1793 SvPV_force_nomg(TARG, len);
1794 isutf = DO_UTF8(TARG);
1801 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */
1802 || len > (U32)I32_MAX /* repeatcpy would overflow */
1804 Perl_croak(aTHX_ "%s",
1805 "Out of memory during string extend");
1806 max = (UV)count * len + 1;
1809 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1810 SvCUR_set(TARG, SvCUR(TARG) * count);
1812 *SvEND(TARG) = '\0';
1815 (void)SvPOK_only_UTF8(TARG);
1817 (void)SvPOK_only(TARG);
1826 dSP; dATARGET; bool useleft; SV *svl, *svr;
1827 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1831 #ifdef PERL_PRESERVE_IVUV
1833 /* special-case some simple common cases */
1834 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1836 U32 flags = (svl->sv_flags & svr->sv_flags);
1837 if (flags & SVf_IOK) {
1838 /* both args are simple IVs */
1843 topl = ((UV)il) >> (UVSIZE * 8 - 2);
1844 topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1846 /* if both are in a range that can't under/overflow, do a
1847 * simple integer subtract: if the top of both numbers
1848 * are 00 or 11, then it's safe */
1849 if (!( ((topl+1) | (topr+1)) & 2)) {
1851 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1857 else if (flags & SVf_NOK) {
1858 /* both args are NVs */
1863 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1864 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1865 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1867 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1870 /* nothing was lost by converting to IVs */
1873 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
1881 useleft = USE_LEFT(svl);
1882 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1883 "bad things" happen if you rely on signed integers wrapping. */
1884 if (SvIV_please_nomg(svr)) {
1885 /* Unless the left argument is integer in range we are going to have to
1886 use NV maths. Hence only attempt to coerce the right argument if
1887 we know the left is integer. */
1894 a_valid = auvok = 1;
1895 /* left operand is undef, treat as zero. */
1897 /* Left operand is defined, so is it IV? */
1898 if (SvIV_please_nomg(svl)) {
1899 if ((auvok = SvUOK(svl)))
1902 const IV aiv = SvIVX(svl);
1905 auvok = 1; /* Now acting as a sign flag. */
1906 } else { /* 2s complement assumption for IV_MIN */
1907 auv = (aiv == IV_MIN) ? (UV)aiv : (UV)-aiv;
1914 bool result_good = 0;
1917 bool buvok = SvUOK(svr);
1922 const IV biv = SvIVX(svr);
1927 buv = (biv == IV_MIN) ? (UV)biv : (UV)-biv;
1929 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1930 else "IV" now, independent of how it came in.
1931 if a, b represents positive, A, B negative, a maps to -A etc
1936 all UV maths. negate result if A negative.
1937 subtract if signs same, add if signs differ. */
1939 if (auvok ^ buvok) {
1948 /* Must get smaller */
1953 if (result <= buv) {
1954 /* result really should be -(auv-buv). as its negation
1955 of true value, need to swap our result flag */
1967 if (result <= (UV)IV_MIN)
1968 SETi(result == (UV)IV_MIN
1969 ? IV_MIN : -(IV)result);
1971 /* result valid, but out of range for IV. */
1972 SETn( -(NV)result );
1976 } /* Overflow, drop through to NVs. */
1980 useleft = USE_LEFT(svl);
1983 NV value = SvNV_nomg(svr);
1987 /* left operand is undef, treat as zero - value */
1991 SETn( SvNV_nomg(svl) - value );
1996 #define IV_BITS (IVSIZE * 8)
1998 static UV S_uv_shift(UV uv, int shift, bool left)
2004 if (shift >= IV_BITS) {
2007 return left ? uv << shift : uv >> shift;
2010 static IV S_iv_shift(IV iv, int shift, bool left)
2016 if (shift >= IV_BITS) {
2017 return iv < 0 && !left ? -1 : 0;
2019 return left ? iv << shift : iv >> shift;
2022 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
2023 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE)
2024 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE)
2025 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE)
2029 dSP; dATARGET; SV *svl, *svr;
2030 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
2034 const IV shift = SvIV_nomg(svr);
2035 if (PL_op->op_private & HINT_INTEGER) {
2036 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift));
2039 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift));
2047 dSP; dATARGET; SV *svl, *svr;
2048 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2052 const IV shift = SvIV_nomg(svr);
2053 if (PL_op->op_private & HINT_INTEGER) {
2054 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift));
2057 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift));
2068 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2072 (SvIOK_notUV(left) && SvIOK_notUV(right))
2073 ? (SvIVX(left) < SvIVX(right))
2074 : (do_ncmp(left, right) == -1)
2084 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2088 (SvIOK_notUV(left) && SvIOK_notUV(right))
2089 ? (SvIVX(left) > SvIVX(right))
2090 : (do_ncmp(left, right) == 1)
2100 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2104 (SvIOK_notUV(left) && SvIOK_notUV(right))
2105 ? (SvIVX(left) <= SvIVX(right))
2106 : (do_ncmp(left, right) <= 0)
2116 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2120 (SvIOK_notUV(left) && SvIOK_notUV(right))
2121 ? (SvIVX(left) >= SvIVX(right))
2122 : ( (do_ncmp(left, right) & 2) == 0)
2132 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2136 (SvIOK_notUV(left) && SvIOK_notUV(right))
2137 ? (SvIVX(left) != SvIVX(right))
2138 : (do_ncmp(left, right) != 0)
2143 /* compare left and right SVs. Returns:
2147 * 2: left or right was a NaN
2150 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2152 PERL_ARGS_ASSERT_DO_NCMP;
2153 #ifdef PERL_PRESERVE_IVUV
2154 /* Fortunately it seems NaN isn't IOK */
2155 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2157 const IV leftiv = SvIVX(left);
2158 if (!SvUOK(right)) {
2159 /* ## IV <=> IV ## */
2160 const IV rightiv = SvIVX(right);
2161 return (leftiv > rightiv) - (leftiv < rightiv);
2163 /* ## IV <=> UV ## */
2165 /* As (b) is a UV, it's >=0, so it must be < */
2168 const UV rightuv = SvUVX(right);
2169 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2174 /* ## UV <=> UV ## */
2175 const UV leftuv = SvUVX(left);
2176 const UV rightuv = SvUVX(right);
2177 return (leftuv > rightuv) - (leftuv < rightuv);
2179 /* ## UV <=> IV ## */
2181 const IV rightiv = SvIVX(right);
2183 /* As (a) is a UV, it's >=0, so it cannot be < */
2186 const UV leftuv = SvUVX(left);
2187 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2190 NOT_REACHED; /* NOTREACHED */
2194 NV const rnv = SvNV_nomg(right);
2195 NV const lnv = SvNV_nomg(left);
2197 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2198 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2201 return (lnv > rnv) - (lnv < rnv);
2220 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2223 value = do_ncmp(left, right);
2235 /* also used for: pp_sge() pp_sgt() pp_slt() */
2241 int amg_type = sle_amg;
2245 switch (PL_op->op_type) {
2264 tryAMAGICbin_MG(amg_type, AMGf_set);
2268 #ifdef USE_LOCALE_COLLATE
2269 (IN_LC_RUNTIME(LC_COLLATE))
2270 ? sv_cmp_locale_flags(left, right, 0)
2273 sv_cmp_flags(left, right, 0);
2274 SETs(boolSV(cmp * multiplier < rhs));
2282 tryAMAGICbin_MG(seq_amg, AMGf_set);
2285 SETs(boolSV(sv_eq_flags(left, right, 0)));
2293 tryAMAGICbin_MG(sne_amg, AMGf_set);
2296 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2304 tryAMAGICbin_MG(scmp_amg, 0);
2308 #ifdef USE_LOCALE_COLLATE
2309 (IN_LC_RUNTIME(LC_COLLATE))
2310 ? sv_cmp_locale_flags(left, right, 0)
2313 sv_cmp_flags(left, right, 0);
2322 tryAMAGICbin_MG(band_amg, AMGf_assign);
2325 if (SvNIOKp(left) || SvNIOKp(right)) {
2326 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2327 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2328 if (PL_op->op_private & HINT_INTEGER) {
2329 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2333 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2336 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2337 if (right_ro_nonnum) SvNIOK_off(right);
2340 do_vop(PL_op->op_type, TARG, left, right);
2350 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg);
2352 dATARGET; dPOPTOPssrl;
2353 if (PL_op->op_private & HINT_INTEGER) {
2354 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2358 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2368 tryAMAGICbin_MG(sband_amg, AMGf_assign);
2370 dATARGET; dPOPTOPssrl;
2371 do_vop(OP_BIT_AND, TARG, left, right);
2376 /* also used for: pp_bit_xor() */
2381 const int op_type = PL_op->op_type;
2383 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2386 if (SvNIOKp(left) || SvNIOKp(right)) {
2387 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2388 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2389 if (PL_op->op_private & HINT_INTEGER) {
2390 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2391 const IV r = SvIV_nomg(right);
2392 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2396 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2397 const UV r = SvUV_nomg(right);
2398 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2401 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2402 if (right_ro_nonnum) SvNIOK_off(right);
2405 do_vop(op_type, TARG, left, right);
2412 /* also used for: pp_nbit_xor() */
2417 const int op_type = PL_op->op_type;
2419 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg),
2420 AMGf_assign|AMGf_numarg);
2422 dATARGET; dPOPTOPssrl;
2423 if (PL_op->op_private & HINT_INTEGER) {
2424 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2425 const IV r = SvIV_nomg(right);
2426 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2430 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2431 const UV r = SvUV_nomg(right);
2432 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2439 /* also used for: pp_sbit_xor() */
2444 const int op_type = PL_op->op_type;
2446 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg),
2449 dATARGET; dPOPTOPssrl;
2450 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left,
2456 PERL_STATIC_INLINE bool
2457 S_negate_string(pTHX)
2462 SV * const sv = TOPs;
2463 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2465 s = SvPV_nomg_const(sv, len);
2466 if (isIDFIRST(*s)) {
2467 sv_setpvs(TARG, "-");
2470 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2471 sv_setsv_nomg(TARG, sv);
2472 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2482 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2483 if (S_negate_string(aTHX)) return NORMAL;
2485 SV * const sv = TOPs;
2488 /* It's publicly an integer */
2491 if (SvIVX(sv) == IV_MIN) {
2492 /* 2s complement assumption. */
2493 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2497 else if (SvUVX(sv) <= IV_MAX) {
2502 else if (SvIVX(sv) != IV_MIN) {
2506 #ifdef PERL_PRESERVE_IVUV
2513 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2514 SETn(-SvNV_nomg(sv));
2515 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2516 goto oops_its_an_int;
2518 SETn(-SvNV_nomg(sv));
2528 tryAMAGICun_MG(not_amg, AMGf_set);
2530 *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv));
2535 S_scomplement(pTHX_ SV *targ, SV *sv)
2541 sv_copypv_nomg(TARG, sv);
2542 tmps = (U8*)SvPV_nomg(TARG, len);
2545 if (len && ! utf8_to_bytes(tmps, &len)) {
2546 Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[PL_op->op_type]);
2557 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2560 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2565 for ( ; anum > 0; anum--, tmps++)
2572 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2576 if (PL_op->op_private & HINT_INTEGER) {
2577 const IV i = ~SvIV_nomg(sv);
2581 const UV u = ~SvUV_nomg(sv);
2586 S_scomplement(aTHX_ TARG, sv);
2596 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg);
2599 if (PL_op->op_private & HINT_INTEGER) {
2600 const IV i = ~SvIV_nomg(sv);
2604 const UV u = ~SvUV_nomg(sv);
2614 tryAMAGICun_MG(scompl_amg, AMGf_numeric);
2617 S_scomplement(aTHX_ TARG, sv);
2623 /* integer versions of some of the above */
2628 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2631 SETi( left * right );
2640 tryAMAGICbin_MG(div_amg, AMGf_assign);
2643 IV value = SvIV_nomg(right);
2645 DIE(aTHX_ "Illegal division by zero");
2646 num = SvIV_nomg(left);
2648 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2652 value = num / value;
2660 /* This is the vanilla old i_modulo. */
2662 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2666 DIE(aTHX_ "Illegal modulus zero");
2667 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2671 SETi( left % right );
2676 #if defined(__GLIBC__) && IVSIZE == 8 \
2677 && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
2679 PP(pp_i_modulo_glibc_bugfix)
2681 /* This is the i_modulo with the workaround for the _moddi3 bug
2682 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2683 * See below for pp_i_modulo. */
2685 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2689 DIE(aTHX_ "Illegal modulus zero");
2690 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2694 SETi( left % PERL_ABS(right) );
2703 tryAMAGICbin_MG(add_amg, AMGf_assign);
2705 dPOPTOPiirl_ul_nomg;
2706 SETi( left + right );
2714 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2716 dPOPTOPiirl_ul_nomg;
2717 SETi( left - right );
2725 tryAMAGICbin_MG(lt_amg, AMGf_set);
2728 SETs(boolSV(left < right));
2736 tryAMAGICbin_MG(gt_amg, AMGf_set);
2739 SETs(boolSV(left > right));
2747 tryAMAGICbin_MG(le_amg, AMGf_set);
2750 SETs(boolSV(left <= right));
2758 tryAMAGICbin_MG(ge_amg, AMGf_set);
2761 SETs(boolSV(left >= right));
2769 tryAMAGICbin_MG(eq_amg, AMGf_set);
2772 SETs(boolSV(left == right));
2780 tryAMAGICbin_MG(ne_amg, AMGf_set);
2783 SETs(boolSV(left != right));
2791 tryAMAGICbin_MG(ncmp_amg, 0);
2798 else if (left < right)
2810 tryAMAGICun_MG(neg_amg, 0);
2811 if (S_negate_string(aTHX)) return NORMAL;
2813 SV * const sv = TOPs;
2814 IV const i = SvIV_nomg(sv);
2820 /* High falutin' math. */
2825 tryAMAGICbin_MG(atan2_amg, 0);
2828 SETn(Perl_atan2(left, right));
2834 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */
2839 int amg_type = fallback_amg;
2840 const char *neg_report = NULL;
2841 const int op_type = PL_op->op_type;
2844 case OP_SIN: amg_type = sin_amg; break;
2845 case OP_COS: amg_type = cos_amg; break;
2846 case OP_EXP: amg_type = exp_amg; break;
2847 case OP_LOG: amg_type = log_amg; neg_report = "log"; break;
2848 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break;
2851 assert(amg_type != fallback_amg);
2853 tryAMAGICun_MG(amg_type, 0);
2855 SV * const arg = TOPs;
2856 const NV value = SvNV_nomg(arg);
2862 if (neg_report) { /* log or sqrt */
2864 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2865 ! Perl_isnan(value) &&
2867 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) {
2868 SET_NUMERIC_STANDARD();
2869 /* diag_listed_as: Can't take log of %g */
2870 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value);
2875 case OP_SIN: result = Perl_sin(value); break;
2876 case OP_COS: result = Perl_cos(value); break;
2877 case OP_EXP: result = Perl_exp(value); break;
2878 case OP_LOG: result = Perl_log(value); break;
2879 case OP_SQRT: result = Perl_sqrt(value); break;
2886 /* Support Configure command-line overrides for rand() functions.
2887 After 5.005, perhaps we should replace this by Configure support
2888 for drand48(), random(), or rand(). For 5.005, though, maintain
2889 compatibility by calling rand() but allow the user to override it.
2890 See INSTALL for details. --Andy Dougherty 15 July 1998
2892 /* Now it's after 5.005, and Configure supports drand48() and random(),
2893 in addition to rand(). So the overrides should not be needed any more.
2894 --Jarkko Hietaniemi 27 September 1998
2899 if (!PL_srand_called) {
2900 (void)seedDrand01((Rand_seed_t)seed());
2901 PL_srand_called = TRUE;
2913 SV * const sv = POPs;
2919 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
2920 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2921 if (! Perl_isnan(value) && value == 0.0)
2931 sv_setnv_mg(TARG, value);
2942 if (MAXARG >= 1 && (TOPs || POPs)) {
2949 pv = SvPV(top, len);
2950 flags = grok_number(pv, len, &anum);
2952 if (!(flags & IS_NUMBER_IN_UV)) {
2953 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2954 "Integer overflow in srand");
2962 (void)seedDrand01((Rand_seed_t)anum);
2963 PL_srand_called = TRUE;
2967 /* Historically srand always returned true. We can avoid breaking
2969 sv_setpvs(TARG, "0 but true");
2978 tryAMAGICun_MG(int_amg, AMGf_numeric);
2980 SV * const sv = TOPs;
2981 const IV iv = SvIV_nomg(sv);
2982 /* XXX it's arguable that compiler casting to IV might be subtly
2983 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2984 else preferring IV has introduced a subtle behaviour change bug. OTOH
2985 relying on floating point to be accurate is a bug. */
2990 else if (SvIOK(sv)) {
2992 SETu(SvUV_nomg(sv));
2997 const NV value = SvNV_nomg(sv);
2998 if (UNLIKELY(Perl_isinfnan(value)))
3000 else if (value >= 0.0) {
3001 if (value < (NV)UV_MAX + 0.5) {
3004 SETn(Perl_floor(value));
3008 if (value > (NV)IV_MIN - 0.5) {
3011 SETn(Perl_ceil(value));
3022 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3024 SV * const sv = TOPs;
3025 /* This will cache the NV value if string isn't actually integer */
3026 const IV iv = SvIV_nomg(sv);
3031 else if (SvIOK(sv)) {
3032 /* IVX is precise */
3034 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3042 /* 2s complement assumption. Also, not really needed as
3043 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3049 const NV value = SvNV_nomg(sv);
3060 /* also used for: pp_hex() */
3066 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3070 SV* const sv = TOPs;
3072 tmps = (SvPV_const(sv, len));
3074 /* If Unicode, try to downgrade
3075 * If not possible, croak. */
3076 SV* const tsv = sv_2mortal(newSVsv(sv));
3079 sv_utf8_downgrade(tsv, FALSE);
3080 tmps = SvPV_const(tsv, len);
3082 if (PL_op->op_type == OP_HEX)
3085 while (*tmps && len && isSPACE(*tmps))
3089 if (isALPHA_FOLD_EQ(*tmps, 'x')) {
3091 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3093 else if (isALPHA_FOLD_EQ(*tmps, 'b'))
3094 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3096 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3098 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3113 SV * const sv = TOPs;
3115 U32 in_bytes = IN_BYTES;
3116 /* Simplest case shortcut:
3117 * set svflags to just the SVf_POK|SVs_GMG|SVf_UTF8 from the SV,
3118 * with the SVf_UTF8 flag inverted if under 'use bytes' (HINT_BYTES
3121 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
3123 STATIC_ASSERT_STMT(SVf_UTF8 == (HINT_BYTES << 26));
3126 if (LIKELY(svflags == SVf_POK))
3129 if (svflags & SVs_GMG)
3134 if (!IN_BYTES) { /* reread to avoid using an C auto/register */
3135 if ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == SVf_POK)
3137 if ( SvPOK(sv) && (PL_op->op_private & OPpTRUEBOOL)) {
3138 /* no need to convert from bytes to chars */
3142 len = sv_len_utf8_nomg(sv);
3145 /* unrolled SvPV_nomg_const(sv,len) */
3146 if (SvPOK_nog(sv)) {
3149 if (PL_op->op_private & OPpTRUEBOOL) {
3151 SETs(len ? &PL_sv_yes : &PL_sv_zero);
3156 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
3159 TARGi((IV)(len), 1);
3162 if (!SvPADTMP(TARG)) {
3163 /* OPpTARGET_MY: targ is var in '$lex = length()' */
3168 /* TARG is on stack at this point and is overwriten by SETs.
3169 * This branch is the odd one out, so put TARG by default on
3170 * stack earlier to let local SP go out of liveness sooner */
3173 return NORMAL; /* no putback, SP didn't move in this opcode */
3177 /* Returns false if substring is completely outside original string.
3178 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
3179 always be true for an explicit 0.
3182 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv,
3183 bool pos1_is_uv, IV len_iv,
3184 bool len_is_uv, STRLEN *posp,
3190 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
3192 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3193 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3196 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3199 if (len_iv || len_is_uv) {
3200 if (!len_is_uv && len_iv < 0) {
3201 pos2_iv = curlen + len_iv;
3203 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3206 } else { /* len_iv >= 0 */
3207 if (!pos1_is_uv && pos1_iv < 0) {
3208 pos2_iv = pos1_iv + len_iv;
3209 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3211 if ((UV)len_iv > curlen-(UV)pos1_iv)
3214 pos2_iv = pos1_iv+len_iv;
3224 if (!pos2_is_uv && pos2_iv < 0) {
3225 if (!pos1_is_uv && pos1_iv < 0)
3229 else if (!pos1_is_uv && pos1_iv < 0)
3232 if ((UV)pos2_iv < (UV)pos1_iv)
3234 if ((UV)pos2_iv > curlen)
3237 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3238 *posp = (STRLEN)( (UV)pos1_iv );
3239 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3256 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3257 const bool rvalue = (GIMME_V != G_VOID);
3260 const char *repl = NULL;
3262 int num_args = PL_op->op_private & 7;
3263 bool repl_need_utf8_upgrade = FALSE;
3267 if(!(repl_sv = POPs)) num_args--;
3269 if ((len_sv = POPs)) {
3270 len_iv = SvIV(len_sv);
3271 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3276 pos1_iv = SvIV(pos_sv);
3277 pos1_is_uv = SvIOK_UV(pos_sv);
3279 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3283 if (lvalue && !repl_sv) {
3285 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3286 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3288 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3290 pos1_is_uv || pos1_iv >= 0
3291 ? (STRLEN)(UV)pos1_iv
3292 : (LvFLAGS(ret) |= LVf_NEG_OFF, (STRLEN)(UV)-pos1_iv);
3294 len_is_uv || len_iv > 0
3295 ? (STRLEN)(UV)len_iv
3296 : (LvFLAGS(ret) |= LVf_NEG_LEN, (STRLEN)(UV)-len_iv);
3298 PUSHs(ret); /* avoid SvSETMAGIC here */
3302 repl = SvPV_const(repl_sv, repl_len);
3305 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3306 "Attempt to use reference as lvalue in substr"
3308 tmps = SvPV_force_nomg(sv, curlen);
3309 if (DO_UTF8(repl_sv) && repl_len) {
3311 /* Upgrade the dest, and recalculate tmps in case the buffer
3312 * got reallocated; curlen may also have been changed */
3313 sv_utf8_upgrade_nomg(sv);
3314 tmps = SvPV_nomg(sv, curlen);
3317 else if (DO_UTF8(sv))
3318 repl_need_utf8_upgrade = TRUE;
3320 else tmps = SvPV_const(sv, curlen);
3322 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3323 if (utf8_curlen == curlen)
3326 curlen = utf8_curlen;
3332 STRLEN pos, len, byte_len, byte_pos;
3334 if (!translate_substr_offsets(
3335 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3339 byte_pos = utf8_curlen
3340 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3345 SvTAINTED_off(TARG); /* decontaminate */
3346 SvUTF8_off(TARG); /* decontaminate */
3347 sv_setpvn(TARG, tmps, byte_len);
3348 #ifdef USE_LOCALE_COLLATE
3349 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3356 SV* repl_sv_copy = NULL;
3358 if (repl_need_utf8_upgrade) {
3359 repl_sv_copy = newSVsv(repl_sv);
3360 sv_utf8_upgrade(repl_sv_copy);
3361 repl = SvPV_const(repl_sv_copy, repl_len);
3365 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3366 SvREFCNT_dec(repl_sv_copy);
3369 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST)
3379 Perl_croak(aTHX_ "substr outside of string");
3380 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3387 const IV size = POPi;
3388 SV* offsetsv = POPs;
3389 SV * const src = POPs;
3390 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3396 /* extract a STRLEN-ranged integer value from offsetsv into offset,
3397 * or flag that its out of range */
3399 IV iv = SvIV(offsetsv);
3401 /* avoid a large UV being wrapped to a negative value */
3402 if (SvIOK_UV(offsetsv) && SvUVX(offsetsv) > (UV)IV_MAX)
3403 errflags = LVf_OUT_OF_RANGE;
3405 errflags = (LVf_NEG_OFF|LVf_OUT_OF_RANGE);
3406 #if PTRSIZE < IVSIZE
3407 else if (iv > Size_t_MAX)
3408 errflags = LVf_OUT_OF_RANGE;
3411 offset = (STRLEN)iv;
3414 retuv = errflags ? 0 : do_vecget(src, offset, size);
3416 if (lvalue) { /* it's an lvalue! */
3417 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3418 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3420 LvTARG(ret) = SvREFCNT_inc_simple(src);
3421 LvTARGOFF(ret) = offset;
3422 LvTARGLEN(ret) = size;
3423 LvFLAGS(ret) = errflags;
3427 SvTAINTED_off(TARG); /* decontaminate */
3431 sv_setuv(ret, retuv);
3439 /* also used for: pp_rindex() */
3452 const char *little_p;
3455 const bool is_index = PL_op->op_type == OP_INDEX;
3456 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3462 big_p = SvPV_const(big, biglen);
3463 little_p = SvPV_const(little, llen);
3465 big_utf8 = DO_UTF8(big);
3466 little_utf8 = DO_UTF8(little);
3467 if (big_utf8 ^ little_utf8) {
3468 /* One needs to be upgraded. */
3470 /* Well, maybe instead we might be able to downgrade the small
3472 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3475 /* If the large string is ISO-8859-1, and it's not possible to
3476 convert the small string to ISO-8859-1, then there is no
3477 way that it could be found anywhere by index. */
3482 /* At this point, pv is a malloc()ed string. So donate it to temp
3483 to ensure it will get free()d */
3484 little = temp = newSV(0);
3485 sv_usepvn(temp, pv, llen);
3486 little_p = SvPVX(little);
3488 temp = newSVpvn(little_p, llen);
3490 sv_utf8_upgrade(temp);
3492 little_p = SvPV_const(little, llen);
3495 if (SvGAMAGIC(big)) {
3496 /* Life just becomes a lot easier if I use a temporary here.
3497 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3498 will trigger magic and overloading again, as will fbm_instr()
3500 big = newSVpvn_flags(big_p, biglen,
3501 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3504 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3505 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3506 warn on undef, and we've already triggered a warning with the
3507 SvPV_const some lines above. We can't remove that, as we need to
3508 call some SvPV to trigger overloading early and find out if the
3510 This is all getting too messy. The API isn't quite clean enough,
3511 because data access has side effects.
3513 little = newSVpvn_flags(little_p, llen,
3514 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3515 little_p = SvPVX(little);
3519 offset = is_index ? 0 : biglen;
3521 if (big_utf8 && offset > 0)
3522 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
3528 else if (offset > (SSize_t)biglen)
3530 if (!(little_p = is_index
3531 ? fbm_instr((unsigned char*)big_p + offset,
3532 (unsigned char*)big_p + biglen, little, 0)
3533 : rninstr(big_p, big_p + offset,
3534 little_p, little_p + llen)))
3537 retval = little_p - big_p;
3538 if (retval > 1 && big_utf8)
3539 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
3544 /* OPpTRUEBOOL indicates an '== -1' has been optimised away */
3545 if (PL_op->op_private & OPpTRUEBOOL) {
3546 PUSHs( ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
3547 ? &PL_sv_yes : &PL_sv_no);
3548 if (PL_op->op_private & OPpTARGET_MY)
3549 /* $lex = (index() == -1) */
3550 sv_setsv(TARG, TOPs);
3559 dSP; dMARK; dORIGMARK; dTARGET;
3560 SvTAINTED_off(TARG);
3561 do_sprintf(TARG, SP-MARK, MARK+1);
3562 TAINT_IF(SvTAINTED(TARG));
3574 const U8 *s = (U8*)SvPV_const(argsv, len);
3577 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0)
3591 if (UNLIKELY(SvAMAGIC(top)))
3593 if (UNLIKELY(isinfnansv(top)))
3594 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top));
3596 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3597 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3599 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3600 && SvNV_nomg(top) < 0.0)))
3602 if (ckWARN(WARN_UTF8)) {
3603 if (SvGMAGICAL(top)) {
3604 SV *top2 = sv_newmortal();
3605 sv_setsv_nomg(top2, top);
3608 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3609 "Invalid negative number (%" SVf ") in chr", SVfARG(top));
3611 value = UNICODE_REPLACEMENT;
3613 value = SvUV_nomg(top);
3617 SvUPGRADE(TARG,SVt_PV);
3619 if (value > 255 && !IN_BYTES) {
3620 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
3621 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3622 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3624 (void)SvPOK_only(TARG);
3633 *tmps++ = (char)value;
3635 (void)SvPOK_only(TARG);
3647 const char *tmps = SvPV_const(left, len);
3649 if (DO_UTF8(left)) {
3650 /* If Unicode, try to downgrade.
3651 * If not possible, croak.
3652 * Yes, we made this up. */
3653 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP);
3655 sv_utf8_downgrade(tsv, FALSE);
3656 tmps = SvPV_const(tsv, len);
3658 # ifdef USE_ITHREADS
3660 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3661 /* This should be threadsafe because in ithreads there is only
3662 * one thread per interpreter. If this would not be true,
3663 * we would need a mutex to protect this malloc. */
3664 PL_reentrant_buffer->_crypt_struct_buffer =
3665 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3666 #if defined(__GLIBC__) || defined(__EMX__)
3667 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3668 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3669 /* work around glibc-2.2.5 bug */
3670 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3674 # endif /* HAS_CRYPT_R */
3675 # endif /* USE_ITHREADS */
3677 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3679 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3686 "The crypt() function is unimplemented due to excessive paranoia.");
3690 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3691 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3694 /* also used for: pp_lcfirst() */
3698 /* Actually is both lcfirst() and ucfirst(). Only the first character
3699 * changes. This means that possibly we can change in-place, ie., just
3700 * take the source and change that one character and store it back, but not
3701 * if read-only etc, or if the length changes */
3705 STRLEN slen; /* slen is the byte length of the whole SV. */
3708 bool inplace; /* ? Convert first char only, in-place */
3709 bool doing_utf8 = FALSE; /* ? using utf8 */
3710 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3711 const int op_type = PL_op->op_type;
3714 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3715 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3716 * stored as UTF-8 at s. */
3717 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3718 * lowercased) character stored in tmpbuf. May be either
3719 * UTF-8 or not, but in either case is the number of bytes */
3721 s = (const U8*)SvPV_const(source, slen);
3723 /* We may be able to get away with changing only the first character, in
3724 * place, but not if read-only, etc. Later we may discover more reasons to
3725 * not convert in-place. */
3726 inplace = !SvREADONLY(source) && SvPADTMP(source);
3728 /* First calculate what the changed first character should be. This affects
3729 * whether we can just swap it out, leaving the rest of the string unchanged,
3730 * or even if have to convert the dest to UTF-8 when the source isn't */
3732 if (! slen) { /* If empty */
3733 need = 1; /* still need a trailing NUL */
3736 else if (DO_UTF8(source)) { /* Is the source utf8? */
3739 if (op_type == OP_UCFIRST) {
3740 #ifdef USE_LOCALE_CTYPE
3741 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3743 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0);
3747 #ifdef USE_LOCALE_CTYPE
3748 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3750 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
3754 /* we can't do in-place if the length changes. */
3755 if (ulen != tculen) inplace = FALSE;
3756 need = slen + 1 - ulen + tculen;
3758 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3759 * latin1 is treated as caseless. Note that a locale takes
3761 ulen = 1; /* Original character is 1 byte */
3762 tculen = 1; /* Most characters will require one byte, but this will
3763 * need to be overridden for the tricky ones */
3766 if (op_type == OP_LCFIRST) {
3768 /* lower case the first letter: no trickiness for any character */
3769 #ifdef USE_LOCALE_CTYPE
3770 if (IN_LC_RUNTIME(LC_CTYPE)) {
3771 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3772 *tmpbuf = toLOWER_LC(*s);
3777 *tmpbuf = (IN_UNI_8_BIT)
3778 ? toLOWER_LATIN1(*s)
3782 #ifdef USE_LOCALE_CTYPE
3784 else if (IN_LC_RUNTIME(LC_CTYPE)) {
3785 if (IN_UTF8_CTYPE_LOCALE) {
3789 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3790 *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
3791 locales have upper and title case
3795 else if (! IN_UNI_8_BIT) {
3796 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3797 * on EBCDIC machines whatever the
3798 * native function does */
3801 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
3802 * UTF-8, which we treat as not in locale), and cased latin1 */
3804 #ifdef USE_LOCALE_CTYPE
3808 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3810 assert(tculen == 2);
3812 /* If the result is an upper Latin1-range character, it can
3813 * still be represented in one byte, which is its ordinal */
3814 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3815 *tmpbuf = (U8) title_ord;
3819 /* Otherwise it became more than one ASCII character (in
3820 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3821 * beyond Latin1, so the number of bytes changed, so can't
3822 * replace just the first character in place. */
3825 /* If the result won't fit in a byte, the entire result
3826 * will have to be in UTF-8. Assume worst case sizing in
3827 * conversion. (all latin1 characters occupy at most two
3829 if (title_ord > 255) {
3831 convert_source_to_utf8 = TRUE;
3832 need = slen * 2 + 1;
3834 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3835 * (both) characters whose title case is above 255 is
3839 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3840 need = slen + 1 + 1;
3844 } /* End of use Unicode (Latin1) semantics */
3845 } /* End of changing the case of the first character */
3847 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3848 * generate the result */
3851 /* We can convert in place. This means we change just the first
3852 * character without disturbing the rest; no need to grow */
3854 s = d = (U8*)SvPV_force_nomg(source, slen);
3860 /* Here, we can't convert in place; we earlier calculated how much
3861 * space we will need, so grow to accommodate that */
3862 SvUPGRADE(dest, SVt_PV);
3863 d = (U8*)SvGROW(dest, need);
3864 (void)SvPOK_only(dest);
3871 if (! convert_source_to_utf8) {
3873 /* Here both source and dest are in UTF-8, but have to create
3874 * the entire output. We initialize the result to be the
3875 * title/lower cased first character, and then append the rest
3877 sv_setpvn(dest, (char*)tmpbuf, tculen);
3879 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3883 const U8 *const send = s + slen;
3885 /* Here the dest needs to be in UTF-8, but the source isn't,
3886 * except we earlier UTF-8'd the first character of the source
3887 * into tmpbuf. First put that into dest, and then append the
3888 * rest of the source, converting it to UTF-8 as we go. */
3890 /* Assert tculen is 2 here because the only two characters that
3891 * get to this part of the code have 2-byte UTF-8 equivalents */
3893 *d++ = *(tmpbuf + 1);
3894 s++; /* We have just processed the 1st char */
3896 for (; s < send; s++) {
3897 d = uvchr_to_utf8(d, *s);
3900 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3904 else { /* in-place UTF-8. Just overwrite the first character */
3905 Copy(tmpbuf, d, tculen, U8);
3906 SvCUR_set(dest, need - 1);
3910 else { /* Neither source nor dest are in or need to be UTF-8 */
3912 if (inplace) { /* in-place, only need to change the 1st char */
3915 else { /* Not in-place */
3917 /* Copy the case-changed character(s) from tmpbuf */
3918 Copy(tmpbuf, d, tculen, U8);
3919 d += tculen - 1; /* Code below expects d to point to final
3920 * character stored */
3923 else { /* empty source */
3924 /* See bug #39028: Don't taint if empty */
3928 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3929 * the destination to retain that flag */
3930 if (SvUTF8(source) && ! IN_BYTES)
3933 if (!inplace) { /* Finish the rest of the string, unchanged */
3934 /* This will copy the trailing NUL */
3935 Copy(s + 1, d + 1, slen, U8);
3936 SvCUR_set(dest, need - 1);
3939 #ifdef USE_LOCALE_CTYPE
3940 if (IN_LC_RUNTIME(LC_CTYPE)) {
3945 if (dest != source && SvTAINTED(source))
3951 /* There's so much setup/teardown code common between uc and lc, I wonder if
3952 it would be worth merging the two, and just having a switch outside each
3953 of the three tight loops. There is less and less commonality though */
3966 if ( SvPADTMP(source)
3967 && !SvREADONLY(source) && SvPOK(source)
3970 #ifdef USE_LOCALE_CTYPE
3971 (IN_LC_RUNTIME(LC_CTYPE))
3972 ? ! IN_UTF8_CTYPE_LOCALE
3978 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3979 * make the loop tight, so we overwrite the source with the dest before
3980 * looking at it, and we need to look at the original source
3981 * afterwards. There would also need to be code added to handle
3982 * switching to not in-place in midstream if we run into characters
3983 * that change the length. Since being in locale overrides UNI_8_BIT,
3984 * that latter becomes irrelevant in the above test; instead for
3985 * locale, the size can't normally change, except if the locale is a
3988 s = d = (U8*)SvPV_force_nomg(source, len);
3995 s = (const U8*)SvPV_nomg_const(source, len);
3998 SvUPGRADE(dest, SVt_PV);
3999 d = (U8*)SvGROW(dest, min);
4000 (void)SvPOK_only(dest);
4005 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4006 to check DO_UTF8 again here. */
4008 if (DO_UTF8(source)) {
4009 const U8 *const send = s + len;
4010 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4012 /* All occurrences of these are to be moved to follow any other marks.
4013 * This is context-dependent. We may not be passed enough context to
4014 * move the iota subscript beyond all of them, but we do the best we can
4015 * with what we're given. The result is always better than if we
4016 * hadn't done this. And, the problem would only arise if we are
4017 * passed a character without all its combining marks, which would be
4018 * the caller's mistake. The information this is based on comes from a
4019 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4020 * itself) and so can't be checked properly to see if it ever gets
4021 * revised. But the likelihood of it changing is remote */
4022 bool in_iota_subscript = FALSE;
4028 if (in_iota_subscript && ! _is_utf8_mark(s)) {
4030 /* A non-mark. Time to output the iota subscript */
4031 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4032 d += capital_iota_len;
4033 in_iota_subscript = FALSE;
4036 /* Then handle the current character. Get the changed case value
4037 * and copy it to the output buffer */
4040 #ifdef USE_LOCALE_CTYPE
4041 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4043 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4045 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4046 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4047 if (uv == GREEK_CAPITAL_LETTER_IOTA
4048 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4050 in_iota_subscript = TRUE;
4053 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4054 /* If the eventually required minimum size outgrows the
4055 * available space, we need to grow. */
4056 const UV o = d - (U8*)SvPVX_const(dest);
4058 /* If someone uppercases one million U+03B0s we SvGROW()
4059 * one million times. Or we could try guessing how much to
4060 * allocate without allocating too much. Such is life.
4061 * See corresponding comment in lc code for another option
4063 d = o + (U8*) SvGROW(dest, min);
4065 Copy(tmpbuf, d, ulen, U8);
4070 if (in_iota_subscript) {
4071 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4072 d += capital_iota_len;
4077 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4079 else { /* Not UTF-8 */
4081 const U8 *const send = s + len;
4083 /* Use locale casing if in locale; regular style if not treating
4084 * latin1 as having case; otherwise the latin1 casing. Do the
4085 * whole thing in a tight loop, for speed, */
4086 #ifdef USE_LOCALE_CTYPE
4087 if (IN_LC_RUNTIME(LC_CTYPE)) {
4088 if (IN_UTF8_CTYPE_LOCALE) {
4091 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4092 for (; s < send; d++, s++)
4093 *d = (U8) toUPPER_LC(*s);
4097 if (! IN_UNI_8_BIT) {
4098 for (; s < send; d++, s++) {
4103 #ifdef USE_LOCALE_CTYPE
4106 for (; s < send; d++, s++) {
4107 *d = toUPPER_LATIN1_MOD(*s);
4108 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
4112 /* The mainstream case is the tight loop above. To avoid
4113 * extra tests in that, all three characters that require
4114 * special handling are mapped by the MOD to the one tested
4116 * Use the source to distinguish between the three cases */
4118 #if UNICODE_MAJOR_VERSION > 2 \
4119 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
4120 && UNICODE_DOT_DOT_VERSION >= 8)
4121 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4123 /* uc() of this requires 2 characters, but they are
4124 * ASCII. If not enough room, grow the string */
4125 if (SvLEN(dest) < ++min) {
4126 const UV o = d - (U8*)SvPVX_const(dest);
4127 d = o + (U8*) SvGROW(dest, min);
4129 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4130 continue; /* Back to the tight loop; still in ASCII */
4134 /* The other two special handling characters have their
4135 * upper cases outside the latin1 range, hence need to be
4136 * in UTF-8, so the whole result needs to be in UTF-8. So,
4137 * here we are somewhere in the middle of processing a
4138 * non-UTF-8 string, and realize that we will have to convert
4139 * the whole thing to UTF-8. What to do? There are
4140 * several possibilities. The simplest to code is to
4141 * convert what we have so far, set a flag, and continue on
4142 * in the loop. The flag would be tested each time through
4143 * the loop, and if set, the next character would be
4144 * converted to UTF-8 and stored. But, I (khw) didn't want
4145 * to slow down the mainstream case at all for this fairly
4146 * rare case, so I didn't want to add a test that didn't
4147 * absolutely have to be there in the loop, besides the
4148 * possibility that it would get too complicated for
4149 * optimizers to deal with. Another possibility is to just
4150 * give up, convert the source to UTF-8, and restart the
4151 * function that way. Another possibility is to convert
4152 * both what has already been processed and what is yet to
4153 * come separately to UTF-8, then jump into the loop that
4154 * handles UTF-8. But the most efficient time-wise of the
4155 * ones I could think of is what follows, and turned out to
4156 * not require much extra code. */
4158 /* Convert what we have so far into UTF-8, telling the
4159 * function that we know it should be converted, and to
4160 * allow extra space for what we haven't processed yet.
4161 * Assume the worst case space requirements for converting
4162 * what we haven't processed so far: that it will require
4163 * two bytes for each remaining source character, plus the
4164 * NUL at the end. This may cause the string pointer to
4165 * move, so re-find it. */
4167 len = d - (U8*)SvPVX_const(dest);
4168 SvCUR_set(dest, len);
4169 len = sv_utf8_upgrade_flags_grow(dest,
4170 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4172 d = (U8*)SvPVX(dest) + len;
4174 /* Now process the remainder of the source, converting to
4175 * upper and UTF-8. If a resulting byte is invariant in
4176 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4177 * append it to the output. */
4178 for (; s < send; s++) {
4179 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4183 /* Here have processed the whole source; no need to continue
4184 * with the outer loop. Each character has been converted
4185 * to upper case and converted to UTF-8 */
4188 } /* End of processing all latin1-style chars */
4189 } /* End of processing all chars */
4190 } /* End of source is not empty */
4192 if (source != dest) {
4193 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4194 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4196 } /* End of isn't utf8 */
4197 #ifdef USE_LOCALE_CTYPE
4198 if (IN_LC_RUNTIME(LC_CTYPE)) {
4203 if (dest != source && SvTAINTED(source))
4221 if ( SvPADTMP(source)
4222 && !SvREADONLY(source) && SvPOK(source)
4223 && !DO_UTF8(source)) {
4225 /* We can convert in place, as lowercasing anything in the latin1 range
4226 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4228 s = d = (U8*)SvPV_force_nomg(source, len);
4235 s = (const U8*)SvPV_nomg_const(source, len);
4238 SvUPGRADE(dest, SVt_PV);
4239 d = (U8*)SvGROW(dest, min);
4240 (void)SvPOK_only(dest);
4245 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4246 to check DO_UTF8 again here. */
4248 if (DO_UTF8(source)) {
4249 const U8 *const send = s + len;
4250 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4253 const STRLEN u = UTF8SKIP(s);
4256 #ifdef USE_LOCALE_CTYPE
4257 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4259 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4262 /* Here is where we would do context-sensitive actions. See the
4263 * commit message for 86510fb15 for why there isn't any */
4265 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4267 /* If the eventually required minimum size outgrows the
4268 * available space, we need to grow. */
4269 const UV o = d - (U8*)SvPVX_const(dest);
4271 /* If someone lowercases one million U+0130s we SvGROW() one
4272 * million times. Or we could try guessing how much to
4273 * allocate without allocating too much. Such is life.
4274 * Another option would be to grow an extra byte or two more
4275 * each time we need to grow, which would cut down the million
4276 * to 500K, with little waste */
4277 d = o + (U8*) SvGROW(dest, min);
4280 /* Copy the newly lowercased letter to the output buffer we're
4282 Copy(tmpbuf, d, ulen, U8);
4285 } /* End of looping through the source string */
4288 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4289 } else { /* Not utf8 */
4291 const U8 *const send = s + len;
4293 /* Use locale casing if in locale; regular style if not treating
4294 * latin1 as having case; otherwise the latin1 casing. Do the
4295 * whole thing in a tight loop, for speed, */
4296 #ifdef USE_LOCALE_CTYPE
4297 if (IN_LC_RUNTIME(LC_CTYPE)) {
4298 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4299 for (; s < send; d++, s++)
4300 *d = toLOWER_LC(*s);
4304 if (! IN_UNI_8_BIT) {
4305 for (; s < send; d++, s++) {
4310 for (; s < send; d++, s++) {
4311 *d = toLOWER_LATIN1(*s);
4315 if (source != dest) {
4317 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4320 #ifdef USE_LOCALE_CTYPE
4321 if (IN_LC_RUNTIME(LC_CTYPE)) {
4326 if (dest != source && SvTAINTED(source))
4335 SV * const sv = TOPs;
4337 const char *s = SvPV_const(sv,len);
4339 SvUTF8_off(TARG); /* decontaminate */
4342 SvUPGRADE(TARG, SVt_PV);
4343 SvGROW(TARG, (len * 2) + 1);
4347 STRLEN ulen = UTF8SKIP(s);
4348 bool to_quote = FALSE;
4350 if (UTF8_IS_INVARIANT(*s)) {
4351 if (_isQUOTEMETA(*s)) {
4355 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) {
4357 #ifdef USE_LOCALE_CTYPE
4358 /* In locale, we quote all non-ASCII Latin1 chars.
4359 * Otherwise use the quoting rules */
4361 IN_LC_RUNTIME(LC_CTYPE)
4364 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
4369 else if (is_QUOTEMETA_high(s)) {
4384 else if (IN_UNI_8_BIT) {
4386 if (_isQUOTEMETA(*s))
4392 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4393 * including everything above ASCII */
4395 if (!isWORDCHAR_A(*s))
4401 SvCUR_set(TARG, d - SvPVX_const(TARG));
4402 (void)SvPOK_only_UTF8(TARG);
4405 sv_setpvn(TARG, s, len);
4421 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4422 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
4423 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
4424 || UNICODE_DOT_DOT_VERSION > 0)
4425 const bool full_folding = TRUE; /* This variable is here so we can easily
4426 move to more generality later */
4428 const bool full_folding = FALSE;
4430 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4431 #ifdef USE_LOCALE_CTYPE
4432 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
4436 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4437 * You are welcome(?) -Hugmeir
4445 s = (const U8*)SvPV_nomg_const(source, len);
4447 if (ckWARN(WARN_UNINITIALIZED))
4448 report_uninit(source);
4455 SvUPGRADE(dest, SVt_PV);
4456 d = (U8*)SvGROW(dest, min);
4457 (void)SvPOK_only(dest);
4462 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4464 const STRLEN u = UTF8SKIP(s);
4467 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags);
4469 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4470 const UV o = d - (U8*)SvPVX_const(dest);
4471 d = o + (U8*) SvGROW(dest, min);
4474 Copy(tmpbuf, d, ulen, U8);
4479 } /* Unflagged string */
4481 #ifdef USE_LOCALE_CTYPE
4482 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4483 if (IN_UTF8_CTYPE_LOCALE) {
4484 goto do_uni_folding;
4486 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4487 for (; s < send; d++, s++)
4488 *d = (U8) toFOLD_LC(*s);
4492 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4493 for (; s < send; d++, s++)
4497 #ifdef USE_LOCALE_CTYPE
4500 /* For ASCII and the Latin-1 range, there's only two troublesome
4501 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4502 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4503 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4504 * For the rest, the casefold is their lowercase. */
4505 for (; s < send; d++, s++) {
4506 if (*s == MICRO_SIGN) {
4507 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4508 * which is outside of the latin-1 range. There's a couple
4509 * of ways to deal with this -- khw discusses them in
4510 * pp_lc/uc, so go there :) What we do here is upgrade what
4511 * we had already casefolded, then enter an inner loop that
4512 * appends the rest of the characters as UTF-8. */
4513 len = d - (U8*)SvPVX_const(dest);
4514 SvCUR_set(dest, len);
4515 len = sv_utf8_upgrade_flags_grow(dest,
4516 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4517 /* The max expansion for latin1
4518 * chars is 1 byte becomes 2 */
4520 d = (U8*)SvPVX(dest) + len;
4522 Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
4525 for (; s < send; s++) {
4527 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4528 if UVCHR_IS_INVARIANT(fc) {
4530 && *s == LATIN_SMALL_LETTER_SHARP_S)
4539 Copy(tmpbuf, d, ulen, U8);
4545 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4546 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4547 * becomes "ss", which may require growing the SV. */
4548 if (SvLEN(dest) < ++min) {
4549 const UV o = d - (U8*)SvPVX_const(dest);
4550 d = o + (U8*) SvGROW(dest, min);
4555 else { /* If it's not one of those two, the fold is their lower
4557 *d = toLOWER_LATIN1(*s);
4563 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4565 #ifdef USE_LOCALE_CTYPE
4566 if (IN_LC_RUNTIME(LC_CTYPE)) {
4571 if (SvTAINTED(source))
4581 dSP; dMARK; dORIGMARK;
4582 AV *const av = MUTABLE_AV(POPs);
4583 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4585 if (SvTYPE(av) == SVt_PVAV) {
4586 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4587 bool can_preserve = FALSE;
4593 can_preserve = SvCANEXISTDELETE(av);
4596 if (lval && localizing) {
4599 for (svp = MARK + 1; svp <= SP; svp++) {
4600 const SSize_t elem = SvIV(*svp);
4604 if (max > AvMAX(av))
4608 while (++MARK <= SP) {
4610 SSize_t elem = SvIV(*MARK);
4611 bool preeminent = TRUE;
4613 if (localizing && can_preserve) {
4614 /* If we can determine whether the element exist,
4615 * Try to preserve the existenceness of a tied array
4616 * element by using EXISTS and DELETE if possible.
4617 * Fallback to FETCH and STORE otherwise. */
4618 preeminent = av_exists(av, elem);
4621 svp = av_fetch(av, elem, lval);
4624 DIE(aTHX_ PL_no_aelem, elem);
4627 save_aelem(av, elem, svp);
4629 SAVEADELETE(av, elem);
4632 *MARK = svp ? *svp : &PL_sv_undef;
4635 if (GIMME_V != G_ARRAY) {
4637 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4646 AV *const av = MUTABLE_AV(POPs);
4647 I32 lval = (PL_op->op_flags & OPf_MOD);
4648 SSize_t items = SP - MARK;
4650 if (PL_op->op_private & OPpMAYBE_LVSUB) {
4651 const I32 flags = is_lvalue_sub();
4653 if (!(flags & OPpENTERSUB_INARGS))
4654 /* diag_listed_as: Can't modify %s in %s */
4655 Perl_croak(aTHX_ "Can't modify index/value array slice in list assignment");
4662 *(MARK+items*2-1) = *(MARK+items);
4668 while (++MARK <= SP) {
4671 svp = av_fetch(av, SvIV(*MARK), lval);
4673 if (!svp || !*svp || *svp == &PL_sv_undef) {
4674 DIE(aTHX_ PL_no_aelem, SvIV(*MARK));