3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
32 #include "regcharclass.h"
34 /* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
38 #ifdef NEED_GETPID_PROTO
39 extern Pid_t getpid (void);
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
46 #if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
50 static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
51 static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
53 /* variations on pp_null */
58 if (GIMME_V == G_SCALAR)
65 /* This is also called directly by pp_lvavref. */
70 assert(SvTYPE(TARG) == SVt_PVAV);
71 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
72 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
73 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
76 if (PL_op->op_flags & OPf_REF) {
80 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
81 const I32 flags = is_lvalue_sub();
82 if (flags && !(flags & OPpENTERSUB_INARGS)) {
83 if (GIMME_V == G_SCALAR)
84 /* diag_listed_as: Can't return %s to lvalue scalar context */
85 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
92 if (gimme == G_ARRAY) {
93 /* XXX see also S_pushav in pp_hot.c */
94 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
96 if (SvMAGICAL(TARG)) {
98 for (i=0; i < maxarg; i++) {
99 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
100 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
105 for (i=0; i < maxarg; i++) {
106 SV * const sv = AvARRAY((const AV *)TARG)[i];
107 SP[i+1] = sv ? sv : &PL_sv_undef;
112 else if (gimme == G_SCALAR) {
113 SV* const sv = sv_newmortal();
114 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
115 sv_setiv(sv, maxarg);
126 assert(SvTYPE(TARG) == SVt_PVHV);
128 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
129 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
130 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
132 if (PL_op->op_flags & OPf_REF)
134 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
135 const I32 flags = is_lvalue_sub();
136 if (flags && !(flags & OPpENTERSUB_INARGS)) {
137 if (GIMME_V == G_SCALAR)
138 /* diag_listed_as: Can't return %s to lvalue scalar context */
139 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
145 if (gimme == G_ARRAY) {
146 RETURNOP(Perl_do_kv(aTHX));
148 else if ((PL_op->op_private & OPpTRUEBOOL
149 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
150 && block_gimme() == G_VOID ))
151 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))
153 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : &PL_sv_no);
154 else if (gimme == G_SCALAR) {
155 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
164 assert(SvTYPE(TARG) == SVt_PVCV);
172 SvPADSTALE_off(TARG);
179 CV * const protocv = PadnamePROTOCV(
180 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG]
182 assert(SvTYPE(TARG) == SVt_PVCV);
184 if (CvISXSUB(protocv)) { /* constant */
185 /* XXX Should we clone it here? */
186 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
187 to introcv and remove the SvPADSTALE_off. */
188 SAVEPADSVANDMORTALIZE(ARGTARG);
189 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv);
192 if (CvROOT(protocv)) {
193 assert(CvCLONE(protocv));
194 assert(!CvCLONED(protocv));
196 cv_clone_into(protocv,(CV *)TARG);
197 SAVECLEARSV(PAD_SVl(ARGTARG));
204 /* In some cases this function inspects PL_op. If this function is called
205 for new op types, more bool parameters may need to be added in place of
208 When noinit is true, the absence of a gv will cause a retval of undef.
209 This is unrelated to the cv-to-gv assignment case.
213 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
216 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
219 sv = amagic_deref_call(sv, to_gv_amg);
223 if (SvTYPE(sv) == SVt_PVIO) {
224 GV * const gv = MUTABLE_GV(sv_newmortal());
225 gv_init(gv, 0, "__ANONIO__", 10, 0);
226 GvIOp(gv) = MUTABLE_IO(sv);
227 SvREFCNT_inc_void_NN(sv);
230 else if (!isGV_with_GP(sv)) {
231 Perl_die(aTHX_ "Not a GLOB reference");
235 if (!isGV_with_GP(sv)) {
237 /* If this is a 'my' scalar and flag is set then vivify
240 if (vivify_sv && sv != &PL_sv_undef) {
243 Perl_croak_no_modify();
244 if (cUNOP->op_targ) {
245 SV * const namesv = PAD_SV(cUNOP->op_targ);
246 HV *stash = CopSTASH(PL_curcop);
247 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
248 gv = MUTABLE_GV(newSV(0));
249 gv_init_sv(gv, stash, namesv, 0);
252 const char * const name = CopSTASHPV(PL_curcop);
253 gv = newGVgen_flags(name,
254 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
255 SvREFCNT_inc_simple_void_NN(gv);
257 prepare_SV_for_RV(sv);
258 SvRV_set(sv, MUTABLE_SV(gv));
263 if (PL_op->op_flags & OPf_REF || strict) {
264 Perl_die(aTHX_ PL_no_usym, "a symbol");
266 if (ckWARN(WARN_UNINITIALIZED))
272 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
273 sv, GV_ADDMG, SVt_PVGV
282 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
286 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
287 == OPpDONT_INIT_GV) {
288 /* We are the target of a coderef assignment. Return
289 the scalar unchanged, and let pp_sasssign deal with
293 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
295 /* FAKE globs in the symbol table cause weird bugs (#77810) */
299 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
300 SV *newsv = sv_newmortal();
301 sv_setsv_flags(newsv, sv, 0);
313 sv, PL_op->op_private & OPpDEREF,
314 PL_op->op_private & HINT_STRICT_REFS,
315 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
316 || PL_op->op_type == OP_READLINE
318 if (PL_op->op_private & OPpLVAL_INTRO)
319 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
324 /* Helper function for pp_rv2sv and pp_rv2av */
326 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
327 const svtype type, SV ***spp)
331 PERL_ARGS_ASSERT_SOFTREF2XV;
333 if (PL_op->op_private & HINT_STRICT_REFS) {
335 Perl_die(aTHX_ PL_no_symref_sv, sv,
336 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
338 Perl_die(aTHX_ PL_no_usym, what);
342 PL_op->op_flags & OPf_REF
344 Perl_die(aTHX_ PL_no_usym, what);
345 if (ckWARN(WARN_UNINITIALIZED))
347 if (type != SVt_PV && GIMME_V == G_ARRAY) {
351 **spp = &PL_sv_undef;
354 if ((PL_op->op_flags & OPf_SPECIAL) &&
355 !(PL_op->op_flags & OPf_MOD))
357 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
359 **spp = &PL_sv_undef;
364 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
377 sv = amagic_deref_call(sv, to_sv_amg);
381 if (SvTYPE(sv) >= SVt_PVAV)
382 DIE(aTHX_ "Not a SCALAR reference");
387 if (!isGV_with_GP(gv)) {
388 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
394 if (PL_op->op_flags & OPf_MOD) {
395 if (PL_op->op_private & OPpLVAL_INTRO) {
396 if (cUNOP->op_first->op_type == OP_NULL)
397 sv = save_scalar(MUTABLE_GV(TOPs));
399 sv = save_scalar(gv);
401 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
403 else if (PL_op->op_private & OPpDEREF)
404 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
406 SPAGAIN; /* in case chasing soft refs reallocated the stack */
414 AV * const av = MUTABLE_AV(TOPs);
415 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
417 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
419 *svp = newSV_type(SVt_PVMG);
420 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
424 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
433 if (PL_op->op_flags & OPf_MOD || LVRET) {
434 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
435 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
437 LvTARG(ret) = SvREFCNT_inc_simple(sv);
438 SETs(ret); /* no SvSETMAGIC */
441 const MAGIC * const mg = mg_find_mglob(sv);
442 if (mg && mg->mg_len != -1) {
444 STRLEN i = mg->mg_len;
445 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
446 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
460 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
462 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
463 == OPpMAY_RETURN_CONSTANT)
466 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
467 /* (But not in defined().) */
469 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
471 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
472 cv = SvTYPE(SvRV(gv)) == SVt_PVCV
473 ? MUTABLE_CV(SvRV(gv))
477 cv = MUTABLE_CV(&PL_sv_undef);
478 SETs(MUTABLE_SV(cv));
488 SV *ret = &PL_sv_undef;
490 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
491 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
492 const char * s = SvPVX_const(TOPs);
493 if (strnEQ(s, "CORE::", 6)) {
494 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
496 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"",
497 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
499 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
505 cv = sv_2cv(TOPs, &stash, &gv, 0);
507 ret = newSVpvn_flags(
508 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
518 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
520 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
522 PUSHs(MUTABLE_SV(cv));
536 if (GIMME_V != G_ARRAY) {
542 *MARK = &PL_sv_undef;
544 *MARK = refto(*MARK);
548 EXTEND_MORTAL(SP - MARK);
550 *MARK = refto(*MARK);
555 S_refto(pTHX_ SV *sv)
559 PERL_ARGS_ASSERT_REFTO;
561 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
564 if (!(sv = LvTARG(sv)))
567 SvREFCNT_inc_void_NN(sv);
569 else if (SvTYPE(sv) == SVt_PVAV) {
570 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
571 av_reify(MUTABLE_AV(sv));
573 SvREFCNT_inc_void_NN(sv);
575 else if (SvPADTMP(sv)) {
580 SvREFCNT_inc_void_NN(sv);
583 sv_upgrade(rv, SVt_IV);
592 SV * const sv = TOPs;
600 /* use the return value that is in a register, its the same as TARG */
601 TARG = sv_ref(TARG,SvRV(sv),TRUE);
616 stash = CopSTASH(PL_curcop);
617 if (SvTYPE(stash) != SVt_PVHV)
618 Perl_croak(aTHX_ "Attempt to bless into a freed package");
621 SV * const ssv = POPs;
625 if (!ssv) goto curstash;
628 if (!SvAMAGIC(ssv)) {
630 Perl_croak(aTHX_ "Attempt to bless into a reference");
632 /* SvAMAGIC is on here, but it only means potentially overloaded,
633 so after stringification: */
634 ptr = SvPV_nomg_const(ssv,len);
635 /* We need to check the flag again: */
636 if (!SvAMAGIC(ssv)) goto frog;
638 else ptr = SvPV_nomg_const(ssv,len);
640 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
641 "Explicit blessing to '' (assuming package main)");
642 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
645 (void)sv_bless(TOPs, stash);
655 const char * const elem = SvPV_const(sv, len);
656 GV * const gv = MUTABLE_GV(TOPs);
661 /* elem will always be NUL terminated. */
664 if (memEQs(elem, len, "ARRAY"))
666 tmpRef = MUTABLE_SV(GvAV(gv));
667 if (tmpRef && !AvREAL((const AV *)tmpRef)
668 && AvREIFY((const AV *)tmpRef))
669 av_reify(MUTABLE_AV(tmpRef));
673 if (memEQs(elem, len, "CODE"))
674 tmpRef = MUTABLE_SV(GvCVu(gv));
677 if (memEQs(elem, len, "FILEHANDLE")) {
678 tmpRef = MUTABLE_SV(GvIOp(gv));
681 if (memEQs(elem, len, "FORMAT"))
682 tmpRef = MUTABLE_SV(GvFORM(gv));
685 if (memEQs(elem, len, "GLOB"))
686 tmpRef = MUTABLE_SV(gv);
689 if (memEQs(elem, len, "HASH"))
690 tmpRef = MUTABLE_SV(GvHV(gv));
693 if (memEQs(elem, len, "IO"))
694 tmpRef = MUTABLE_SV(GvIOp(gv));
697 if (memEQs(elem, len, "NAME"))
698 sv = newSVhek(GvNAME_HEK(gv));
701 if (memEQs(elem, len, "PACKAGE")) {
702 const HV * const stash = GvSTASH(gv);
703 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
704 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
708 if (memEQs(elem, len, "SCALAR"))
723 /* Pattern matching */
731 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
732 /* Historically, study was skipped in these cases. */
737 /* Make study a no-op. It's no longer useful and its existence
738 complicates matters elsewhere. */
744 /* also used for: pp_transr() */
751 if (PL_op->op_flags & OPf_STACKED)
756 sv = PAD_SV(ARGTARG);
761 if(PL_op->op_type == OP_TRANSR) {
763 const char * const pv = SvPV(sv,len);
764 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
769 I32 i = do_trans(sv);
775 /* Lvalue operators. */
778 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
784 PERL_ARGS_ASSERT_DO_CHOMP;
786 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
788 if (SvTYPE(sv) == SVt_PVAV) {
790 AV *const av = MUTABLE_AV(sv);
791 const I32 max = AvFILL(av);
793 for (i = 0; i <= max; i++) {
794 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
795 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
796 count += do_chomp(retval, sv, chomping);
800 else if (SvTYPE(sv) == SVt_PVHV) {
801 HV* const hv = MUTABLE_HV(sv);
803 (void)hv_iterinit(hv);
804 while ((entry = hv_iternext(hv)))
805 count += do_chomp(retval, hv_iterval(hv,entry), chomping);
808 else if (SvREADONLY(sv)) {
809 Perl_croak_no_modify();
815 char *temp_buffer = NULL;
820 goto nope_free_nothing;
822 while (len && s[-1] == '\n') {
829 STRLEN rslen, rs_charlen;
830 const char *rsptr = SvPV_const(PL_rs, rslen);
832 rs_charlen = SvUTF8(PL_rs)
836 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
837 /* Assumption is that rs is shorter than the scalar. */
839 /* RS is utf8, scalar is 8 bit. */
841 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
844 /* Cannot downgrade, therefore cannot possibly match.
845 At this point, temp_buffer is not alloced, and
846 is the buffer inside PL_rs, so dont free it.
848 assert (temp_buffer == rsptr);
854 /* RS is 8 bit, scalar is utf8. */
855 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
869 if (memNE(s, rsptr, rslen))
874 SvPV_force_nomg_nolen(sv);
881 Safefree(temp_buffer);
883 SvREFCNT_dec(svrecode);
887 if (len && (!SvPOK(sv) || SvIsCOW(sv)))
888 s = SvPV_force_nomg(sv, len);
891 char * const send = s + len;
892 char * const start = s;
894 while (s > start && UTF8_IS_CONTINUATION(*s))
896 if (is_utf8_string((U8*)s, send - s)) {
897 sv_setpvn(retval, s, send - s);
899 SvCUR_set(sv, s - start);
909 sv_setpvn(retval, s, 1);
923 /* also used for: pp_schomp() */
928 const bool chomping = PL_op->op_type == OP_SCHOMP;
930 const size_t count = do_chomp(TARG, TOPs, chomping);
932 sv_setiv(TARG, count);
938 /* also used for: pp_chomp() */
942 dSP; dMARK; dTARGET; dORIGMARK;
943 const bool chomping = PL_op->op_type == OP_CHOMP;
947 count += do_chomp(TARG, *++MARK, chomping);
949 sv_setiv(TARG, count);
960 if (!PL_op->op_private) {
972 if (SvTHINKFIRST(sv))
973 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF);
975 switch (SvTYPE(sv)) {
979 av_undef(MUTABLE_AV(sv));
982 hv_undef(MUTABLE_HV(sv));
985 if (cv_const_sv((const CV *)sv))
986 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
987 "Constant subroutine %" SVf " undefined",
988 SVfARG(CvANON((const CV *)sv)
989 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
990 : sv_2mortal(newSVhek(
992 ? CvNAME_HEK((CV *)sv)
993 : GvENAME_HEK(CvGV((const CV *)sv))
998 /* let user-undef'd sub keep its identity */
999 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME);
1002 assert(isGV_with_GP(sv));
1003 assert(!SvFAKE(sv));
1008 /* undef *Pkg::meth_name ... */
1010 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1011 && HvENAME_get(stash);
1013 if((stash = GvHV((const GV *)sv))) {
1014 if(HvENAME_get(stash))
1015 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1019 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
1020 gp_free(MUTABLE_GV(sv));
1022 GvGP_set(sv, gp_ref(gp));
1023 #ifndef PERL_DONT_CREATE_GVSV
1024 GvSV(sv) = newSV(0);
1026 GvLINE(sv) = CopLINE(PL_curcop);
1027 GvEGV(sv) = MUTABLE_GV(sv);
1031 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1033 /* undef *Foo::ISA */
1034 if( strEQ(GvNAME((const GV *)sv), "ISA")
1035 && (stash = GvSTASH((const GV *)sv))
1036 && (method_changed || HvENAME(stash)) )
1037 mro_isa_changed_in(stash);
1038 else if(method_changed)
1039 mro_method_changed_in(
1040 GvSTASH((const GV *)sv)
1046 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1060 /* common "slow" code for pp_postinc and pp_postdec */
1063 S_postincdec_common(pTHX_ SV *sv, SV *targ)
1067 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1070 TARG = sv_newmortal();
1077 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1078 if (inc && !SvOK(TARG))
1085 /* also used for: pp_i_postinc() */
1092 /* special-case sv being a simple integer */
1093 if (LIKELY(((sv->sv_flags &
1094 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1095 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1097 && SvIVX(sv) != IV_MAX)
1100 SvIV_set(sv, iv + 1);
1101 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1106 return S_postincdec_common(aTHX_ sv, TARG);
1110 /* also used for: pp_i_postdec() */
1117 /* special-case sv being a simple integer */
1118 if (LIKELY(((sv->sv_flags &
1119 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1120 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1122 && SvIVX(sv) != IV_MIN)
1125 SvIV_set(sv, iv - 1);
1126 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1131 return S_postincdec_common(aTHX_ sv, TARG);
1135 /* Ordinary operators. */
1139 dSP; dATARGET; SV *svl, *svr;
1140 #ifdef PERL_PRESERVE_IVUV
1143 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1146 #ifdef PERL_PRESERVE_IVUV
1147 /* For integer to integer power, we do the calculation by hand wherever
1148 we're sure it is safe; otherwise we call pow() and try to convert to
1149 integer afterwards. */
1150 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1158 const IV iv = SvIVX(svr);
1162 goto float_it; /* Can't do negative powers this way. */
1166 baseuok = SvUOK(svl);
1168 baseuv = SvUVX(svl);
1170 const IV iv = SvIVX(svl);
1173 baseuok = TRUE; /* effectively it's a UV now */
1175 baseuv = -iv; /* abs, baseuok == false records sign */
1178 /* now we have integer ** positive integer. */
1181 /* foo & (foo - 1) is zero only for a power of 2. */
1182 if (!(baseuv & (baseuv - 1))) {
1183 /* We are raising power-of-2 to a positive integer.
1184 The logic here will work for any base (even non-integer
1185 bases) but it can be less accurate than
1186 pow (base,power) or exp (power * log (base)) when the
1187 intermediate values start to spill out of the mantissa.
1188 With powers of 2 we know this can't happen.
1189 And powers of 2 are the favourite thing for perl
1190 programmers to notice ** not doing what they mean. */
1192 NV base = baseuok ? baseuv : -(NV)baseuv;
1197 while (power >>= 1) {
1205 SvIV_please_nomg(svr);
1208 unsigned int highbit = 8 * sizeof(UV);
1209 unsigned int diff = 8 * sizeof(UV);
1210 while (diff >>= 1) {
1212 if (baseuv >> highbit) {
1216 /* we now have baseuv < 2 ** highbit */
1217 if (power * highbit <= 8 * sizeof(UV)) {
1218 /* result will definitely fit in UV, so use UV math
1219 on same algorithm as above */
1222 const bool odd_power = cBOOL(power & 1);
1226 while (power >>= 1) {
1233 if (baseuok || !odd_power)
1234 /* answer is positive */
1236 else if (result <= (UV)IV_MAX)
1237 /* answer negative, fits in IV */
1238 SETi( -(IV)result );
1239 else if (result == (UV)IV_MIN)
1240 /* 2's complement assumption: special case IV_MIN */
1243 /* answer negative, doesn't fit */
1244 SETn( -(NV)result );
1252 NV right = SvNV_nomg(svr);
1253 NV left = SvNV_nomg(svl);
1256 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1258 We are building perl with long double support and are on an AIX OS
1259 afflicted with a powl() function that wrongly returns NaNQ for any
1260 negative base. This was reported to IBM as PMR #23047-379 on
1261 03/06/2006. The problem exists in at least the following versions
1262 of AIX and the libm fileset, and no doubt others as well:
1264 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1265 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1266 AIX 5.2.0 bos.adt.libm 5.2.0.85
1268 So, until IBM fixes powl(), we provide the following workaround to
1269 handle the problem ourselves. Our logic is as follows: for
1270 negative bases (left), we use fmod(right, 2) to check if the
1271 exponent is an odd or even integer:
1273 - if odd, powl(left, right) == -powl(-left, right)
1274 - if even, powl(left, right) == powl(-left, right)
1276 If the exponent is not an integer, the result is rightly NaNQ, so
1277 we just return that (as NV_NAN).
1281 NV mod2 = Perl_fmod( right, 2.0 );
1282 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1283 SETn( -Perl_pow( -left, right) );
1284 } else if (mod2 == 0.0) { /* even integer */
1285 SETn( Perl_pow( -left, right) );
1286 } else { /* fractional power */
1290 SETn( Perl_pow( left, right) );
1293 SETn( Perl_pow( left, right) );
1294 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1296 #ifdef PERL_PRESERVE_IVUV
1298 SvIV_please_nomg(svr);
1306 dSP; dATARGET; SV *svl, *svr;
1307 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1311 #ifdef PERL_PRESERVE_IVUV
1313 /* special-case some simple common cases */
1314 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1316 U32 flags = (svl->sv_flags & svr->sv_flags);
1317 if (flags & SVf_IOK) {
1318 /* both args are simple IVs */
1323 topl = ((UV)il) >> (UVSIZE * 4 - 1);
1324 topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1326 /* if both are in a range that can't under/overflow, do a
1327 * simple integer multiply: if the top halves(*) of both numbers
1328 * are 00...00 or 11...11, then it's safe.
1329 * (*) for 32-bits, the "top half" is the top 17 bits,
1330 * for 64-bits, its 33 bits */
1332 ((topl+1) | (topr+1))
1333 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1336 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1342 else if (flags & SVf_NOK) {
1343 /* both args are NVs */
1349 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1350 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1351 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1353 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1356 /* nothing was lost by converting to IVs */
1360 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1361 if (Perl_isinf(result)) {
1362 Zero((U8*)&result + 8, 8, U8);
1365 TARGn(result, 0); /* args not GMG, so can't be tainted */
1373 if (SvIV_please_nomg(svr)) {
1374 /* Unless the left argument is integer in range we are going to have to
1375 use NV maths. Hence only attempt to coerce the right argument if
1376 we know the left is integer. */
1377 /* Left operand is defined, so is it IV? */
1378 if (SvIV_please_nomg(svl)) {
1379 bool auvok = SvUOK(svl);
1380 bool buvok = SvUOK(svr);
1381 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1382 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1391 const IV aiv = SvIVX(svl);
1394 auvok = TRUE; /* effectively it's a UV now */
1396 /* abs, auvok == false records sign */
1397 alow = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1403 const IV biv = SvIVX(svr);
1406 buvok = TRUE; /* effectively it's a UV now */
1408 /* abs, buvok == false records sign */
1409 blow = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1413 /* If this does sign extension on unsigned it's time for plan B */
1414 ahigh = alow >> (4 * sizeof (UV));
1416 bhigh = blow >> (4 * sizeof (UV));
1418 if (ahigh && bhigh) {
1420 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1421 which is overflow. Drop to NVs below. */
1422 } else if (!ahigh && !bhigh) {
1423 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1424 so the unsigned multiply cannot overflow. */
1425 const UV product = alow * blow;
1426 if (auvok == buvok) {
1427 /* -ve * -ve or +ve * +ve gives a +ve result. */
1431 } else if (product <= (UV)IV_MIN) {
1432 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1433 /* -ve result, which could overflow an IV */
1435 /* can't negate IV_MIN, but there are aren't two
1436 * integers such that !ahigh && !bhigh, where the
1437 * product equals 0x800....000 */
1438 assert(product != (UV)IV_MIN);
1439 SETi( -(IV)product );
1441 } /* else drop to NVs below. */
1443 /* One operand is large, 1 small */
1446 /* swap the operands */
1448 bhigh = blow; /* bhigh now the temp var for the swap */
1452 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1453 multiplies can't overflow. shift can, add can, -ve can. */
1454 product_middle = ahigh * blow;
1455 if (!(product_middle & topmask)) {
1456 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1458 product_middle <<= (4 * sizeof (UV));
1459 product_low = alow * blow;
1461 /* as for pp_add, UV + something mustn't get smaller.
1462 IIRC ANSI mandates this wrapping *behaviour* for
1463 unsigned whatever the actual representation*/
1464 product_low += product_middle;
1465 if (product_low >= product_middle) {
1466 /* didn't overflow */
1467 if (auvok == buvok) {
1468 /* -ve * -ve or +ve * +ve gives a +ve result. */
1470 SETu( product_low );
1472 } else if (product_low <= (UV)IV_MIN) {
1473 /* 2s complement assumption again */
1474 /* -ve result, which could overflow an IV */
1476 SETi(product_low == (UV)IV_MIN
1477 ? IV_MIN : -(IV)product_low);
1479 } /* else drop to NVs below. */
1481 } /* product_middle too large */
1482 } /* ahigh && bhigh */
1487 NV right = SvNV_nomg(svr);
1488 NV left = SvNV_nomg(svl);
1489 NV result = left * right;
1492 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1493 if (Perl_isinf(result)) {
1494 Zero((U8*)&result + 8, 8, U8);
1504 dSP; dATARGET; SV *svl, *svr;
1505 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1508 /* Only try to do UV divide first
1509 if ((SLOPPYDIVIDE is true) or
1510 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1512 The assumption is that it is better to use floating point divide
1513 whenever possible, only doing integer divide first if we can't be sure.
1514 If NV_PRESERVES_UV is true then we know at compile time that no UV
1515 can be too large to preserve, so don't need to compile the code to
1516 test the size of UVs. */
1519 # define PERL_TRY_UV_DIVIDE
1520 /* ensure that 20./5. == 4. */
1522 # ifdef PERL_PRESERVE_IVUV
1523 # ifndef NV_PRESERVES_UV
1524 # define PERL_TRY_UV_DIVIDE
1529 #ifdef PERL_TRY_UV_DIVIDE
1530 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1531 bool left_non_neg = SvUOK(svl);
1532 bool right_non_neg = SvUOK(svr);
1536 if (right_non_neg) {
1540 const IV biv = SvIVX(svr);
1543 right_non_neg = TRUE; /* effectively it's a UV now */
1546 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1549 /* historically undef()/0 gives a "Use of uninitialized value"
1550 warning before dieing, hence this test goes here.
1551 If it were immediately before the second SvIV_please, then
1552 DIE() would be invoked before left was even inspected, so
1553 no inspection would give no warning. */
1555 DIE(aTHX_ "Illegal division by zero");
1561 const IV aiv = SvIVX(svl);
1564 left_non_neg = TRUE; /* effectively it's a UV now */
1567 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1573 /* For sloppy divide we always attempt integer division. */
1575 /* Otherwise we only attempt it if either or both operands
1576 would not be preserved by an NV. If both fit in NVs
1577 we fall through to the NV divide code below. However,
1578 as left >= right to ensure integer result here, we know that
1579 we can skip the test on the right operand - right big
1580 enough not to be preserved can't get here unless left is
1583 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1586 /* Integer division can't overflow, but it can be imprecise. */
1587 const UV result = left / right;
1588 if (result * right == left) {
1589 SP--; /* result is valid */
1590 if (left_non_neg == right_non_neg) {
1591 /* signs identical, result is positive. */
1595 /* 2s complement assumption */
1596 if (result <= (UV)IV_MIN)
1597 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result);
1599 /* It's exact but too negative for IV. */
1600 SETn( -(NV)result );
1603 } /* tried integer divide but it was not an integer result */
1604 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1605 } /* one operand wasn't SvIOK */
1606 #endif /* PERL_TRY_UV_DIVIDE */
1608 NV right = SvNV_nomg(svr);
1609 NV left = SvNV_nomg(svl);
1610 (void)POPs;(void)POPs;
1611 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1612 if (! Perl_isnan(right) && right == 0.0)
1616 DIE(aTHX_ "Illegal division by zero");
1617 PUSHn( left / right );
1625 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1629 bool left_neg = FALSE;
1630 bool right_neg = FALSE;
1631 bool use_double = FALSE;
1632 bool dright_valid = FALSE;
1635 SV * const svr = TOPs;
1636 SV * const svl = TOPm1s;
1637 if (SvIV_please_nomg(svr)) {
1638 right_neg = !SvUOK(svr);
1642 const IV biv = SvIVX(svr);
1645 right_neg = FALSE; /* effectively it's a UV now */
1647 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1652 dright = SvNV_nomg(svr);
1653 right_neg = dright < 0;
1656 if (dright < UV_MAX_P1) {
1657 right = U_V(dright);
1658 dright_valid = TRUE; /* In case we need to use double below. */
1664 /* At this point use_double is only true if right is out of range for
1665 a UV. In range NV has been rounded down to nearest UV and
1666 use_double false. */
1667 if (!use_double && SvIV_please_nomg(svl)) {
1668 left_neg = !SvUOK(svl);
1672 const IV aiv = SvIVX(svl);
1675 left_neg = FALSE; /* effectively it's a UV now */
1677 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1682 dleft = SvNV_nomg(svl);
1683 left_neg = dleft < 0;
1687 /* This should be exactly the 5.6 behaviour - if left and right are
1688 both in range for UV then use U_V() rather than floor. */
1690 if (dleft < UV_MAX_P1) {
1691 /* right was in range, so is dleft, so use UVs not double.
1695 /* left is out of range for UV, right was in range, so promote
1696 right (back) to double. */
1698 /* The +0.5 is used in 5.6 even though it is not strictly
1699 consistent with the implicit +0 floor in the U_V()
1700 inside the #if 1. */
1701 dleft = Perl_floor(dleft + 0.5);
1704 dright = Perl_floor(dright + 0.5);
1715 DIE(aTHX_ "Illegal modulus zero");
1717 dans = Perl_fmod(dleft, dright);
1718 if ((left_neg != right_neg) && dans)
1719 dans = dright - dans;
1722 sv_setnv(TARG, dans);
1728 DIE(aTHX_ "Illegal modulus zero");
1731 if ((left_neg != right_neg) && ans)
1734 /* XXX may warn: unary minus operator applied to unsigned type */
1735 /* could change -foo to be (~foo)+1 instead */
1736 if (ans <= ~((UV)IV_MAX)+1)
1737 sv_setiv(TARG, ~ans+1);
1739 sv_setnv(TARG, -(NV)ans);
1742 sv_setuv(TARG, ans);
1754 bool infnan = FALSE;
1756 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1757 /* TODO: think of some way of doing list-repeat overloading ??? */
1762 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) {
1763 /* The parser saw this as a list repeat, and there
1764 are probably several items on the stack. But we're
1765 in scalar/void context, and there's no pp_list to save us
1766 now. So drop the rest of the items -- robin@kitsite.com
1769 if (MARK + 1 < SP) {
1775 ASSUME(MARK + 1 == SP);
1777 MARK[1] = &PL_sv_undef;
1781 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1787 const UV uv = SvUV_nomg(sv);
1789 count = IV_MAX; /* The best we can do? */
1793 count = SvIV_nomg(sv);
1796 else if (SvNOKp(sv)) {
1797 const NV nv = SvNV_nomg(sv);
1798 infnan = Perl_isinfnan(nv);
1799 if (UNLIKELY(infnan)) {
1803 count = -1; /* An arbitrary negative integer */
1809 count = SvIV_nomg(sv);
1812 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1813 "Non-finite repeat count does nothing");
1814 } else if (count < 0) {
1816 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1817 "Negative repeat count does nothing");
1820 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1822 const SSize_t items = SP - MARK;
1823 const U8 mod = PL_op->op_flags & OPf_MOD;
1828 if ( items > SSize_t_MAX / count /* max would overflow */
1829 /* repeatcpy would overflow */
1830 || items > I32_MAX / (I32)sizeof(SV *)
1832 Perl_croak(aTHX_ "%s","Out of memory during list extend");
1833 max = items * count;
1838 if (mod && SvPADTMP(*SP)) {
1839 *SP = sv_mortalcopy(*SP);
1846 repeatcpy((char*)(MARK + items), (char*)MARK,
1847 items * sizeof(const SV *), count - 1);
1850 else if (count <= 0)
1853 else { /* Note: mark already snarfed by pp_list */
1854 SV * const tmpstr = POPs;
1859 sv_setsv_nomg(TARG, tmpstr);
1860 SvPV_force_nomg(TARG, len);
1861 isutf = DO_UTF8(TARG);
1868 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */
1869 || len > (U32)I32_MAX /* repeatcpy would overflow */
1871 Perl_croak(aTHX_ "%s",
1872 "Out of memory during string extend");
1873 max = (UV)count * len + 1;
1876 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1877 SvCUR_set(TARG, SvCUR(TARG) * count);
1879 *SvEND(TARG) = '\0';
1882 (void)SvPOK_only_UTF8(TARG);
1884 (void)SvPOK_only(TARG);
1893 dSP; dATARGET; bool useleft; SV *svl, *svr;
1894 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1898 #ifdef PERL_PRESERVE_IVUV
1900 /* special-case some simple common cases */
1901 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1903 U32 flags = (svl->sv_flags & svr->sv_flags);
1904 if (flags & SVf_IOK) {
1905 /* both args are simple IVs */
1910 topl = ((UV)il) >> (UVSIZE * 8 - 2);
1911 topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1913 /* if both are in a range that can't under/overflow, do a
1914 * simple integer subtract: if the top of both numbers
1915 * are 00 or 11, then it's safe */
1916 if (!( ((topl+1) | (topr+1)) & 2)) {
1918 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1924 else if (flags & SVf_NOK) {
1925 /* both args are NVs */
1930 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1931 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1932 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1934 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1937 /* nothing was lost by converting to IVs */
1940 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
1948 useleft = USE_LEFT(svl);
1949 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1950 "bad things" happen if you rely on signed integers wrapping. */
1951 if (SvIV_please_nomg(svr)) {
1952 /* Unless the left argument is integer in range we are going to have to
1953 use NV maths. Hence only attempt to coerce the right argument if
1954 we know the left is integer. */
1961 a_valid = auvok = 1;
1962 /* left operand is undef, treat as zero. */
1964 /* Left operand is defined, so is it IV? */
1965 if (SvIV_please_nomg(svl)) {
1966 if ((auvok = SvUOK(svl)))
1969 const IV aiv = SvIVX(svl);
1972 auvok = 1; /* Now acting as a sign flag. */
1973 } else { /* 2s complement assumption for IV_MIN */
1974 auv = (aiv == IV_MIN) ? (UV)aiv : (UV)-aiv;
1981 bool result_good = 0;
1984 bool buvok = SvUOK(svr);
1989 const IV biv = SvIVX(svr);
1994 buv = (biv == IV_MIN) ? (UV)biv : (UV)-biv;
1996 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1997 else "IV" now, independent of how it came in.
1998 if a, b represents positive, A, B negative, a maps to -A etc
2003 all UV maths. negate result if A negative.
2004 subtract if signs same, add if signs differ. */
2006 if (auvok ^ buvok) {
2015 /* Must get smaller */
2020 if (result <= buv) {
2021 /* result really should be -(auv-buv). as its negation
2022 of true value, need to swap our result flag */
2034 if (result <= (UV)IV_MIN)
2035 SETi(result == (UV)IV_MIN
2036 ? IV_MIN : -(IV)result);
2038 /* result valid, but out of range for IV. */
2039 SETn( -(NV)result );
2043 } /* Overflow, drop through to NVs. */
2047 useleft = USE_LEFT(svl);
2050 NV value = SvNV_nomg(svr);
2054 /* left operand is undef, treat as zero - value */
2058 SETn( SvNV_nomg(svl) - value );
2063 #define IV_BITS (IVSIZE * 8)
2065 static UV S_uv_shift(UV uv, int shift, bool left)
2071 if (shift >= IV_BITS) {
2074 return left ? uv << shift : uv >> shift;
2077 static IV S_iv_shift(IV iv, int shift, bool left)
2083 if (shift >= IV_BITS) {
2084 return iv < 0 && !left ? -1 : 0;
2086 return left ? iv << shift : iv >> shift;
2089 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
2090 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE)
2091 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE)
2092 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE)
2096 dSP; dATARGET; SV *svl, *svr;
2097 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
2101 const IV shift = SvIV_nomg(svr);
2102 if (PL_op->op_private & HINT_INTEGER) {
2103 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift));
2106 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift));
2114 dSP; dATARGET; SV *svl, *svr;
2115 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2119 const IV shift = SvIV_nomg(svr);
2120 if (PL_op->op_private & HINT_INTEGER) {
2121 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift));
2124 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift));
2135 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2139 (SvIOK_notUV(left) && SvIOK_notUV(right))
2140 ? (SvIVX(left) < SvIVX(right))
2141 : (do_ncmp(left, right) == -1)
2151 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2155 (SvIOK_notUV(left) && SvIOK_notUV(right))
2156 ? (SvIVX(left) > SvIVX(right))
2157 : (do_ncmp(left, right) == 1)
2167 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2171 (SvIOK_notUV(left) && SvIOK_notUV(right))
2172 ? (SvIVX(left) <= SvIVX(right))
2173 : (do_ncmp(left, right) <= 0)
2183 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2187 (SvIOK_notUV(left) && SvIOK_notUV(right))
2188 ? (SvIVX(left) >= SvIVX(right))
2189 : ( (do_ncmp(left, right) & 2) == 0)
2199 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2203 (SvIOK_notUV(left) && SvIOK_notUV(right))
2204 ? (SvIVX(left) != SvIVX(right))
2205 : (do_ncmp(left, right) != 0)
2210 /* compare left and right SVs. Returns:
2214 * 2: left or right was a NaN
2217 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2219 PERL_ARGS_ASSERT_DO_NCMP;
2220 #ifdef PERL_PRESERVE_IVUV
2221 /* Fortunately it seems NaN isn't IOK */
2222 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2224 const IV leftiv = SvIVX(left);
2225 if (!SvUOK(right)) {
2226 /* ## IV <=> IV ## */
2227 const IV rightiv = SvIVX(right);
2228 return (leftiv > rightiv) - (leftiv < rightiv);
2230 /* ## IV <=> UV ## */
2232 /* As (b) is a UV, it's >=0, so it must be < */
2235 const UV rightuv = SvUVX(right);
2236 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2241 /* ## UV <=> UV ## */
2242 const UV leftuv = SvUVX(left);
2243 const UV rightuv = SvUVX(right);
2244 return (leftuv > rightuv) - (leftuv < rightuv);
2246 /* ## UV <=> IV ## */
2248 const IV rightiv = SvIVX(right);
2250 /* As (a) is a UV, it's >=0, so it cannot be < */
2253 const UV leftuv = SvUVX(left);
2254 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2257 NOT_REACHED; /* NOTREACHED */
2261 NV const rnv = SvNV_nomg(right);
2262 NV const lnv = SvNV_nomg(left);
2264 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2265 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2268 return (lnv > rnv) - (lnv < rnv);
2287 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2290 value = do_ncmp(left, right);
2302 /* also used for: pp_sge() pp_sgt() pp_slt() */
2308 int amg_type = sle_amg;
2312 switch (PL_op->op_type) {
2331 tryAMAGICbin_MG(amg_type, AMGf_set);
2335 #ifdef USE_LOCALE_COLLATE
2336 (IN_LC_RUNTIME(LC_COLLATE))
2337 ? sv_cmp_locale_flags(left, right, 0)
2340 sv_cmp_flags(left, right, 0);
2341 SETs(boolSV(cmp * multiplier < rhs));
2349 tryAMAGICbin_MG(seq_amg, AMGf_set);
2352 SETs(boolSV(sv_eq_flags(left, right, 0)));
2360 tryAMAGICbin_MG(sne_amg, AMGf_set);
2363 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2371 tryAMAGICbin_MG(scmp_amg, 0);
2375 #ifdef USE_LOCALE_COLLATE
2376 (IN_LC_RUNTIME(LC_COLLATE))
2377 ? sv_cmp_locale_flags(left, right, 0)
2380 sv_cmp_flags(left, right, 0);
2389 tryAMAGICbin_MG(band_amg, AMGf_assign);
2392 if (SvNIOKp(left) || SvNIOKp(right)) {
2393 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2394 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2395 if (PL_op->op_private & HINT_INTEGER) {
2396 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2400 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2403 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2404 if (right_ro_nonnum) SvNIOK_off(right);
2407 do_vop(PL_op->op_type, TARG, left, right);
2417 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg);
2419 dATARGET; dPOPTOPssrl;
2420 if (PL_op->op_private & HINT_INTEGER) {
2421 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2425 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2435 tryAMAGICbin_MG(sband_amg, AMGf_assign);
2437 dATARGET; dPOPTOPssrl;
2438 do_vop(OP_BIT_AND, TARG, left, right);
2443 /* also used for: pp_bit_xor() */
2448 const int op_type = PL_op->op_type;
2450 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2453 if (SvNIOKp(left) || SvNIOKp(right)) {
2454 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2455 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2456 if (PL_op->op_private & HINT_INTEGER) {
2457 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2458 const IV r = SvIV_nomg(right);
2459 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2463 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2464 const UV r = SvUV_nomg(right);
2465 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2468 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2469 if (right_ro_nonnum) SvNIOK_off(right);
2472 do_vop(op_type, TARG, left, right);
2479 /* also used for: pp_nbit_xor() */
2484 const int op_type = PL_op->op_type;
2486 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg),
2487 AMGf_assign|AMGf_numarg);
2489 dATARGET; dPOPTOPssrl;
2490 if (PL_op->op_private & HINT_INTEGER) {
2491 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2492 const IV r = SvIV_nomg(right);
2493 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2497 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2498 const UV r = SvUV_nomg(right);
2499 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2506 /* also used for: pp_sbit_xor() */
2511 const int op_type = PL_op->op_type;
2513 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg),
2516 dATARGET; dPOPTOPssrl;
2517 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left,
2523 PERL_STATIC_INLINE bool
2524 S_negate_string(pTHX)
2529 SV * const sv = TOPs;
2530 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2532 s = SvPV_nomg_const(sv, len);
2533 if (isIDFIRST(*s)) {
2534 sv_setpvs(TARG, "-");
2537 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2538 sv_setsv_nomg(TARG, sv);
2539 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2549 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2550 if (S_negate_string(aTHX)) return NORMAL;
2552 SV * const sv = TOPs;
2555 /* It's publicly an integer */
2558 if (SvIVX(sv) == IV_MIN) {
2559 /* 2s complement assumption. */
2560 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2564 else if (SvUVX(sv) <= IV_MAX) {
2569 else if (SvIVX(sv) != IV_MIN) {
2573 #ifdef PERL_PRESERVE_IVUV
2580 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2581 SETn(-SvNV_nomg(sv));
2582 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2583 goto oops_its_an_int;
2585 SETn(-SvNV_nomg(sv));
2593 tryAMAGICun_MG(not_amg, AMGf_set);
2594 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2599 S_scomplement(pTHX_ SV *targ, SV *sv)
2605 sv_copypv_nomg(TARG, sv);
2606 tmps = (U8*)SvPV_nomg(TARG, len);
2609 /* Calculate exact length, let's not estimate. */
2614 U8 * const send = tmps + len;
2615 U8 * const origtmps = tmps;
2616 const UV utf8flags = UTF8_ALLOW_ANYUV;
2618 while (tmps < send) {
2619 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2621 targlen += UVCHR_SKIP(~c);
2627 /* Now rewind strings and write them. */
2634 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2635 deprecated_above_ff_msg, PL_op_desc[PL_op->op_type]);
2636 Newx(result, targlen + 1, U8);
2638 while (tmps < send) {
2639 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2641 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2644 sv_usepvn_flags(TARG, (char*)result, targlen,
2645 SV_HAS_TRAILING_NUL);
2652 Newx(result, nchar + 1, U8);
2654 while (tmps < send) {
2655 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2660 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2668 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2671 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2676 for ( ; anum > 0; anum--, tmps++)
2683 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2687 if (PL_op->op_private & HINT_INTEGER) {
2688 const IV i = ~SvIV_nomg(sv);
2692 const UV u = ~SvUV_nomg(sv);
2697 S_scomplement(aTHX_ TARG, sv);
2707 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg);
2710 if (PL_op->op_private & HINT_INTEGER) {
2711 const IV i = ~SvIV_nomg(sv);
2715 const UV u = ~SvUV_nomg(sv);
2725 tryAMAGICun_MG(scompl_amg, AMGf_numeric);
2728 S_scomplement(aTHX_ TARG, sv);
2734 /* integer versions of some of the above */
2739 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2742 SETi( left * right );
2751 tryAMAGICbin_MG(div_amg, AMGf_assign);
2754 IV value = SvIV_nomg(right);
2756 DIE(aTHX_ "Illegal division by zero");
2757 num = SvIV_nomg(left);
2759 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2763 value = num / value;
2771 /* This is the vanilla old i_modulo. */
2773 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2777 DIE(aTHX_ "Illegal modulus zero");
2778 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2782 SETi( left % right );
2787 #if defined(__GLIBC__) && IVSIZE == 8 \
2788 && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
2790 PP(pp_i_modulo_glibc_bugfix)
2792 /* This is the i_modulo with the workaround for the _moddi3 bug
2793 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2794 * See below for pp_i_modulo. */
2796 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2800 DIE(aTHX_ "Illegal modulus zero");
2801 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2805 SETi( left % PERL_ABS(right) );
2814 tryAMAGICbin_MG(add_amg, AMGf_assign);
2816 dPOPTOPiirl_ul_nomg;
2817 SETi( left + right );
2825 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2827 dPOPTOPiirl_ul_nomg;
2828 SETi( left - right );
2836 tryAMAGICbin_MG(lt_amg, AMGf_set);
2839 SETs(boolSV(left < right));
2847 tryAMAGICbin_MG(gt_amg, AMGf_set);
2850 SETs(boolSV(left > right));
2858 tryAMAGICbin_MG(le_amg, AMGf_set);
2861 SETs(boolSV(left <= right));
2869 tryAMAGICbin_MG(ge_amg, AMGf_set);
2872 SETs(boolSV(left >= right));
2880 tryAMAGICbin_MG(eq_amg, AMGf_set);
2883 SETs(boolSV(left == right));
2891 tryAMAGICbin_MG(ne_amg, AMGf_set);
2894 SETs(boolSV(left != right));
2902 tryAMAGICbin_MG(ncmp_amg, 0);
2909 else if (left < right)
2921 tryAMAGICun_MG(neg_amg, 0);
2922 if (S_negate_string(aTHX)) return NORMAL;
2924 SV * const sv = TOPs;
2925 IV const i = SvIV_nomg(sv);
2931 /* High falutin' math. */
2936 tryAMAGICbin_MG(atan2_amg, 0);
2939 SETn(Perl_atan2(left, right));
2945 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */
2950 int amg_type = fallback_amg;
2951 const char *neg_report = NULL;
2952 const int op_type = PL_op->op_type;
2955 case OP_SIN: amg_type = sin_amg; break;
2956 case OP_COS: amg_type = cos_amg; break;
2957 case OP_EXP: amg_type = exp_amg; break;
2958 case OP_LOG: amg_type = log_amg; neg_report = "log"; break;
2959 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break;
2962 assert(amg_type != fallback_amg);
2964 tryAMAGICun_MG(amg_type, 0);
2966 SV * const arg = TOPs;
2967 const NV value = SvNV_nomg(arg);
2973 if (neg_report) { /* log or sqrt */
2975 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2976 ! Perl_isnan(value) &&
2978 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) {
2979 SET_NUMERIC_STANDARD();
2980 /* diag_listed_as: Can't take log of %g */
2981 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value);
2986 case OP_SIN: result = Perl_sin(value); break;
2987 case OP_COS: result = Perl_cos(value); break;
2988 case OP_EXP: result = Perl_exp(value); break;
2989 case OP_LOG: result = Perl_log(value); break;
2990 case OP_SQRT: result = Perl_sqrt(value); break;
2997 /* Support Configure command-line overrides for rand() functions.
2998 After 5.005, perhaps we should replace this by Configure support
2999 for drand48(), random(), or rand(). For 5.005, though, maintain
3000 compatibility by calling rand() but allow the user to override it.
3001 See INSTALL for details. --Andy Dougherty 15 July 1998
3003 /* Now it's after 5.005, and Configure supports drand48() and random(),
3004 in addition to rand(). So the overrides should not be needed any more.
3005 --Jarkko Hietaniemi 27 September 1998
3010 if (!PL_srand_called) {
3011 (void)seedDrand01((Rand_seed_t)seed());
3012 PL_srand_called = TRUE;
3024 SV * const sv = POPs;
3030 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
3031 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3032 if (! Perl_isnan(value) && value == 0.0)
3042 sv_setnv_mg(TARG, value);
3053 if (MAXARG >= 1 && (TOPs || POPs)) {
3060 pv = SvPV(top, len);
3061 flags = grok_number(pv, len, &anum);
3063 if (!(flags & IS_NUMBER_IN_UV)) {
3064 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
3065 "Integer overflow in srand");
3073 (void)seedDrand01((Rand_seed_t)anum);
3074 PL_srand_called = TRUE;
3078 /* Historically srand always returned true. We can avoid breaking
3080 sv_setpvs(TARG, "0 but true");
3089 tryAMAGICun_MG(int_amg, AMGf_numeric);
3091 SV * const sv = TOPs;
3092 const IV iv = SvIV_nomg(sv);
3093 /* XXX it's arguable that compiler casting to IV might be subtly
3094 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3095 else preferring IV has introduced a subtle behaviour change bug. OTOH
3096 relying on floating point to be accurate is a bug. */
3101 else if (SvIOK(sv)) {
3103 SETu(SvUV_nomg(sv));
3108 const NV value = SvNV_nomg(sv);
3109 if (UNLIKELY(Perl_isinfnan(value)))
3111 else if (value >= 0.0) {
3112 if (value < (NV)UV_MAX + 0.5) {
3115 SETn(Perl_floor(value));
3119 if (value > (NV)IV_MIN - 0.5) {
3122 SETn(Perl_ceil(value));
3133 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3135 SV * const sv = TOPs;
3136 /* This will cache the NV value if string isn't actually integer */
3137 const IV iv = SvIV_nomg(sv);
3142 else if (SvIOK(sv)) {
3143 /* IVX is precise */
3145 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3153 /* 2s complement assumption. Also, not really needed as
3154 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3160 const NV value = SvNV_nomg(sv);
3171 /* also used for: pp_hex() */
3177 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3181 SV* const sv = TOPs;
3183 tmps = (SvPV_const(sv, len));
3185 /* If Unicode, try to downgrade
3186 * If not possible, croak. */
3187 SV* const tsv = sv_2mortal(newSVsv(sv));
3190 sv_utf8_downgrade(tsv, FALSE);
3191 tmps = SvPV_const(tsv, len);
3193 if (PL_op->op_type == OP_HEX)
3196 while (*tmps && len && isSPACE(*tmps))
3200 if (isALPHA_FOLD_EQ(*tmps, 'x')) {
3202 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3204 else if (isALPHA_FOLD_EQ(*tmps, 'b'))
3205 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3207 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3209 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3223 SV * const sv = TOPs;
3225 U32 in_bytes = IN_BYTES;
3226 /* simplest case shortcut */
3227 /* turn off SVf_UTF8 in tmp flags if HINT_BYTES on*/
3228 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
3229 STATIC_ASSERT_STMT(HINT_BYTES == 0x00000008 && SVf_UTF8 == 0x20000000 && (SVf_UTF8 == HINT_BYTES << 26));
3232 if(LIKELY(svflags == SVf_POK))
3234 if(svflags & SVs_GMG)
3237 if (!IN_BYTES) /* reread to avoid using an C auto/register */
3238 sv_setiv(TARG, (IV)sv_len_utf8_nomg(sv));
3242 /* unrolled SvPV_nomg_const(sv,len) */
3247 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
3249 sv_setiv(TARG, (IV)(len));
3252 if (!SvPADTMP(TARG)) {
3254 } else { /* TARG is on stack at this point and is overwriten by SETs.
3255 This branch is the odd one out, so put TARG by default on
3256 stack earlier to let local SP go out of liveness sooner */
3263 return NORMAL; /* no putback, SP didn't move in this opcode */
3266 /* Returns false if substring is completely outside original string.
3267 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
3268 always be true for an explicit 0.
3271 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv,
3272 bool pos1_is_uv, IV len_iv,
3273 bool len_is_uv, STRLEN *posp,
3279 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
3281 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3282 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3285 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3288 if (len_iv || len_is_uv) {
3289 if (!len_is_uv && len_iv < 0) {
3290 pos2_iv = curlen + len_iv;
3292 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3295 } else { /* len_iv >= 0 */
3296 if (!pos1_is_uv && pos1_iv < 0) {
3297 pos2_iv = pos1_iv + len_iv;
3298 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3300 if ((UV)len_iv > curlen-(UV)pos1_iv)
3303 pos2_iv = pos1_iv+len_iv;
3313 if (!pos2_is_uv && pos2_iv < 0) {
3314 if (!pos1_is_uv && pos1_iv < 0)
3318 else if (!pos1_is_uv && pos1_iv < 0)
3321 if ((UV)pos2_iv < (UV)pos1_iv)
3323 if ((UV)pos2_iv > curlen)
3326 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3327 *posp = (STRLEN)( (UV)pos1_iv );
3328 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3345 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3346 const bool rvalue = (GIMME_V != G_VOID);
3349 const char *repl = NULL;
3351 int num_args = PL_op->op_private & 7;
3352 bool repl_need_utf8_upgrade = FALSE;
3356 if(!(repl_sv = POPs)) num_args--;
3358 if ((len_sv = POPs)) {
3359 len_iv = SvIV(len_sv);
3360 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3365 pos1_iv = SvIV(pos_sv);
3366 pos1_is_uv = SvIOK_UV(pos_sv);
3368 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3372 if (lvalue && !repl_sv) {
3374 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3375 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3377 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3379 pos1_is_uv || pos1_iv >= 0
3380 ? (STRLEN)(UV)pos1_iv
3381 : (LvFLAGS(ret) |= LVf_NEG_OFF, (STRLEN)(UV)-pos1_iv);
3383 len_is_uv || len_iv > 0
3384 ? (STRLEN)(UV)len_iv
3385 : (LvFLAGS(ret) |= LVf_NEG_LEN, (STRLEN)(UV)-len_iv);
3387 PUSHs(ret); /* avoid SvSETMAGIC here */
3391 repl = SvPV_const(repl_sv, repl_len);
3394 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3395 "Attempt to use reference as lvalue in substr"
3397 tmps = SvPV_force_nomg(sv, curlen);
3398 if (DO_UTF8(repl_sv) && repl_len) {
3400 /* Upgrade the dest, and recalculate tmps in case the buffer
3401 * got reallocated; curlen may also have been changed */
3402 sv_utf8_upgrade_nomg(sv);
3403 tmps = SvPV_nomg(sv, curlen);
3406 else if (DO_UTF8(sv))
3407 repl_need_utf8_upgrade = TRUE;
3409 else tmps = SvPV_const(sv, curlen);
3411 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3412 if (utf8_curlen == curlen)
3415 curlen = utf8_curlen;
3421 STRLEN pos, len, byte_len, byte_pos;
3423 if (!translate_substr_offsets(
3424 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3428 byte_pos = utf8_curlen
3429 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3434 SvTAINTED_off(TARG); /* decontaminate */
3435 SvUTF8_off(TARG); /* decontaminate */
3436 sv_setpvn(TARG, tmps, byte_len);
3437 #ifdef USE_LOCALE_COLLATE
3438 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3445 SV* repl_sv_copy = NULL;
3447 if (repl_need_utf8_upgrade) {
3448 repl_sv_copy = newSVsv(repl_sv);
3449 sv_utf8_upgrade(repl_sv_copy);
3450 repl = SvPV_const(repl_sv_copy, repl_len);
3454 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3455 SvREFCNT_dec(repl_sv_copy);
3458 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST)
3468 Perl_croak(aTHX_ "substr outside of string");
3469 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3476 const IV size = POPi;
3477 SV* offsetsv = POPs;
3478 SV * const src = POPs;
3479 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3485 /* extract a STRLEN-ranged integer value from offsetsv into offset,
3486 * or flag that its out of range */
3488 IV iv = SvIV(offsetsv);
3490 /* avoid a large UV being wrapped to a negative value */
3491 if (SvIOK_UV(offsetsv) && SvUVX(offsetsv) > (UV)IV_MAX)
3492 errflags = LVf_OUT_OF_RANGE;
3494 errflags = (LVf_NEG_OFF|LVf_OUT_OF_RANGE);
3495 #if PTRSIZE < IVSIZE
3496 else if (iv > Size_t_MAX)
3497 errflags = LVf_OUT_OF_RANGE;
3500 offset = (STRLEN)iv;
3503 retuv = errflags ? 0 : do_vecget(src, offset, size);
3505 if (lvalue) { /* it's an lvalue! */
3506 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3507 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3509 LvTARG(ret) = SvREFCNT_inc_simple(src);
3510 LvTARGOFF(ret) = offset;
3511 LvTARGLEN(ret) = size;
3512 LvFLAGS(ret) = errflags;
3516 SvTAINTED_off(TARG); /* decontaminate */
3520 sv_setuv(ret, retuv);
3528 /* also used for: pp_rindex() */
3541 const char *little_p;
3544 const bool is_index = PL_op->op_type == OP_INDEX;
3545 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3551 big_p = SvPV_const(big, biglen);
3552 little_p = SvPV_const(little, llen);
3554 big_utf8 = DO_UTF8(big);
3555 little_utf8 = DO_UTF8(little);
3556 if (big_utf8 ^ little_utf8) {
3557 /* One needs to be upgraded. */
3559 /* Well, maybe instead we might be able to downgrade the small
3561 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3564 /* If the large string is ISO-8859-1, and it's not possible to
3565 convert the small string to ISO-8859-1, then there is no
3566 way that it could be found anywhere by index. */
3571 /* At this point, pv is a malloc()ed string. So donate it to temp
3572 to ensure it will get free()d */
3573 little = temp = newSV(0);
3574 sv_usepvn(temp, pv, llen);
3575 little_p = SvPVX(little);
3577 temp = newSVpvn(little_p, llen);
3579 sv_utf8_upgrade(temp);
3581 little_p = SvPV_const(little, llen);
3584 if (SvGAMAGIC(big)) {
3585 /* Life just becomes a lot easier if I use a temporary here.
3586 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3587 will trigger magic and overloading again, as will fbm_instr()
3589 big = newSVpvn_flags(big_p, biglen,
3590 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3593 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3594 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3595 warn on undef, and we've already triggered a warning with the
3596 SvPV_const some lines above. We can't remove that, as we need to
3597 call some SvPV to trigger overloading early and find out if the
3599 This is all getting too messy. The API isn't quite clean enough,
3600 because data access has side effects.
3602 little = newSVpvn_flags(little_p, llen,
3603 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3604 little_p = SvPVX(little);
3608 offset = is_index ? 0 : biglen;
3610 if (big_utf8 && offset > 0)
3611 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
3617 else if (offset > (SSize_t)biglen)
3619 if (!(little_p = is_index
3620 ? fbm_instr((unsigned char*)big_p + offset,
3621 (unsigned char*)big_p + biglen, little, 0)
3622 : rninstr(big_p, big_p + offset,
3623 little_p, little_p + llen)))
3626 retval = little_p - big_p;
3627 if (retval > 1 && big_utf8)
3628 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
3638 dSP; dMARK; dORIGMARK; dTARGET;
3639 SvTAINTED_off(TARG);
3640 do_sprintf(TARG, SP-MARK, MARK+1);
3641 TAINT_IF(SvTAINTED(TARG));
3653 const U8 *s = (U8*)SvPV_const(argsv, len);
3656 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0)
3670 if (UNLIKELY(SvAMAGIC(top)))
3672 if (UNLIKELY(isinfnansv(top)))
3673 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top));
3675 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3676 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3678 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3679 && SvNV_nomg(top) < 0.0)))
3681 if (ckWARN(WARN_UTF8)) {
3682 if (SvGMAGICAL(top)) {
3683 SV *top2 = sv_newmortal();
3684 sv_setsv_nomg(top2, top);
3687 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3688 "Invalid negative number (%" SVf ") in chr", SVfARG(top));
3690 value = UNICODE_REPLACEMENT;
3692 value = SvUV_nomg(top);
3696 SvUPGRADE(TARG,SVt_PV);
3698 if (value > 255 && !IN_BYTES) {
3699 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
3700 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3701 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3703 (void)SvPOK_only(TARG);
3712 *tmps++ = (char)value;
3714 (void)SvPOK_only(TARG);
3726 const char *tmps = SvPV_const(left, len);
3728 if (DO_UTF8(left)) {
3729 /* If Unicode, try to downgrade.
3730 * If not possible, croak.
3731 * Yes, we made this up. */
3732 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP);
3734 sv_utf8_downgrade(tsv, FALSE);
3735 tmps = SvPV_const(tsv, len);
3737 # ifdef USE_ITHREADS
3739 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3740 /* This should be threadsafe because in ithreads there is only
3741 * one thread per interpreter. If this would not be true,
3742 * we would need a mutex to protect this malloc. */
3743 PL_reentrant_buffer->_crypt_struct_buffer =
3744 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3745 #if defined(__GLIBC__) || defined(__EMX__)
3746 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3747 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3748 /* work around glibc-2.2.5 bug */
3749 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3753 # endif /* HAS_CRYPT_R */
3754 # endif /* USE_ITHREADS */
3756 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3758 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3765 "The crypt() function is unimplemented due to excessive paranoia.");
3769 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3770 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3773 /* also used for: pp_lcfirst() */
3777 /* Actually is both lcfirst() and ucfirst(). Only the first character
3778 * changes. This means that possibly we can change in-place, ie., just
3779 * take the source and change that one character and store it back, but not
3780 * if read-only etc, or if the length changes */
3784 STRLEN slen; /* slen is the byte length of the whole SV. */
3787 bool inplace; /* ? Convert first char only, in-place */
3788 bool doing_utf8 = FALSE; /* ? using utf8 */
3789 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3790 const int op_type = PL_op->op_type;
3793 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3794 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3795 * stored as UTF-8 at s. */
3796 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3797 * lowercased) character stored in tmpbuf. May be either
3798 * UTF-8 or not, but in either case is the number of bytes */
3800 s = (const U8*)SvPV_const(source, slen);
3802 /* We may be able to get away with changing only the first character, in
3803 * place, but not if read-only, etc. Later we may discover more reasons to
3804 * not convert in-place. */
3805 inplace = !SvREADONLY(source) && SvPADTMP(source);
3807 /* First calculate what the changed first character should be. This affects
3808 * whether we can just swap it out, leaving the rest of the string unchanged,
3809 * or even if have to convert the dest to UTF-8 when the source isn't */
3811 if (! slen) { /* If empty */
3812 need = 1; /* still need a trailing NUL */
3815 else if (DO_UTF8(source)) { /* Is the source utf8? */
3818 if (op_type == OP_UCFIRST) {
3819 #ifdef USE_LOCALE_CTYPE
3820 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3822 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0);
3826 #ifdef USE_LOCALE_CTYPE
3827 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3829 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
3833 /* we can't do in-place if the length changes. */
3834 if (ulen != tculen) inplace = FALSE;
3835 need = slen + 1 - ulen + tculen;
3837 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3838 * latin1 is treated as caseless. Note that a locale takes
3840 ulen = 1; /* Original character is 1 byte */
3841 tculen = 1; /* Most characters will require one byte, but this will
3842 * need to be overridden for the tricky ones */
3845 if (op_type == OP_LCFIRST) {
3847 /* lower case the first letter: no trickiness for any character */
3848 #ifdef USE_LOCALE_CTYPE
3849 if (IN_LC_RUNTIME(LC_CTYPE)) {
3850 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3851 *tmpbuf = toLOWER_LC(*s);
3856 *tmpbuf = (IN_UNI_8_BIT)
3857 ? toLOWER_LATIN1(*s)
3861 #ifdef USE_LOCALE_CTYPE
3863 else if (IN_LC_RUNTIME(LC_CTYPE)) {
3864 if (IN_UTF8_CTYPE_LOCALE) {
3868 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3869 *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
3870 locales have upper and title case
3874 else if (! IN_UNI_8_BIT) {
3875 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3876 * on EBCDIC machines whatever the
3877 * native function does */
3880 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
3881 * UTF-8, which we treat as not in locale), and cased latin1 */
3883 #ifdef USE_LOCALE_CTYPE
3887 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3889 assert(tculen == 2);
3891 /* If the result is an upper Latin1-range character, it can
3892 * still be represented in one byte, which is its ordinal */
3893 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3894 *tmpbuf = (U8) title_ord;
3898 /* Otherwise it became more than one ASCII character (in
3899 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3900 * beyond Latin1, so the number of bytes changed, so can't
3901 * replace just the first character in place. */
3904 /* If the result won't fit in a byte, the entire result
3905 * will have to be in UTF-8. Assume worst case sizing in
3906 * conversion. (all latin1 characters occupy at most two
3908 if (title_ord > 255) {
3910 convert_source_to_utf8 = TRUE;
3911 need = slen * 2 + 1;
3913 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3914 * (both) characters whose title case is above 255 is
3918 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3919 need = slen + 1 + 1;
3923 } /* End of use Unicode (Latin1) semantics */
3924 } /* End of changing the case of the first character */
3926 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3927 * generate the result */
3930 /* We can convert in place. This means we change just the first
3931 * character without disturbing the rest; no need to grow */
3933 s = d = (U8*)SvPV_force_nomg(source, slen);
3939 /* Here, we can't convert in place; we earlier calculated how much
3940 * space we will need, so grow to accommodate that */
3941 SvUPGRADE(dest, SVt_PV);
3942 d = (U8*)SvGROW(dest, need);
3943 (void)SvPOK_only(dest);
3950 if (! convert_source_to_utf8) {
3952 /* Here both source and dest are in UTF-8, but have to create
3953 * the entire output. We initialize the result to be the
3954 * title/lower cased first character, and then append the rest
3956 sv_setpvn(dest, (char*)tmpbuf, tculen);
3958 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3962 const U8 *const send = s + slen;
3964 /* Here the dest needs to be in UTF-8, but the source isn't,
3965 * except we earlier UTF-8'd the first character of the source
3966 * into tmpbuf. First put that into dest, and then append the
3967 * rest of the source, converting it to UTF-8 as we go. */
3969 /* Assert tculen is 2 here because the only two characters that
3970 * get to this part of the code have 2-byte UTF-8 equivalents */
3972 *d++ = *(tmpbuf + 1);
3973 s++; /* We have just processed the 1st char */
3975 for (; s < send; s++) {
3976 d = uvchr_to_utf8(d, *s);
3979 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3983 else { /* in-place UTF-8. Just overwrite the first character */
3984 Copy(tmpbuf, d, tculen, U8);
3985 SvCUR_set(dest, need - 1);
3989 else { /* Neither source nor dest are in or need to be UTF-8 */
3991 if (inplace) { /* in-place, only need to change the 1st char */
3994 else { /* Not in-place */
3996 /* Copy the case-changed character(s) from tmpbuf */
3997 Copy(tmpbuf, d, tculen, U8);
3998 d += tculen - 1; /* Code below expects d to point to final
3999 * character stored */
4002 else { /* empty source */
4003 /* See bug #39028: Don't taint if empty */
4007 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4008 * the destination to retain that flag */
4009 if (SvUTF8(source) && ! IN_BYTES)
4012 if (!inplace) { /* Finish the rest of the string, unchanged */
4013 /* This will copy the trailing NUL */
4014 Copy(s + 1, d + 1, slen, U8);
4015 SvCUR_set(dest, need - 1);
4018 #ifdef USE_LOCALE_CTYPE
4019 if (IN_LC_RUNTIME(LC_CTYPE)) {
4024 if (dest != source && SvTAINTED(source))
4030 /* There's so much setup/teardown code common between uc and lc, I wonder if
4031 it would be worth merging the two, and just having a switch outside each
4032 of the three tight loops. There is less and less commonality though */
4045 if ( SvPADTMP(source)
4046 && !SvREADONLY(source) && SvPOK(source)
4049 #ifdef USE_LOCALE_CTYPE
4050 (IN_LC_RUNTIME(LC_CTYPE))
4051 ? ! IN_UTF8_CTYPE_LOCALE
4057 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4058 * make the loop tight, so we overwrite the source with the dest before
4059 * looking at it, and we need to look at the original source
4060 * afterwards. There would also need to be code added to handle
4061 * switching to not in-place in midstream if we run into characters
4062 * that change the length. Since being in locale overrides UNI_8_BIT,
4063 * that latter becomes irrelevant in the above test; instead for
4064 * locale, the size can't normally change, except if the locale is a
4067 s = d = (U8*)SvPV_force_nomg(source, len);
4074 s = (const U8*)SvPV_nomg_const(source, len);
4077 SvUPGRADE(dest, SVt_PV);
4078 d = (U8*)SvGROW(dest, min);
4079 (void)SvPOK_only(dest);
4084 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4085 to check DO_UTF8 again here. */
4087 if (DO_UTF8(source)) {
4088 const U8 *const send = s + len;
4089 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4091 /* All occurrences of these are to be moved to follow any other marks.
4092 * This is context-dependent. We may not be passed enough context to
4093 * move the iota subscript beyond all of them, but we do the best we can
4094 * with what we're given. The result is always better than if we
4095 * hadn't done this. And, the problem would only arise if we are
4096 * passed a character without all its combining marks, which would be
4097 * the caller's mistake. The information this is based on comes from a
4098 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4099 * itself) and so can't be checked properly to see if it ever gets
4100 * revised. But the likelihood of it changing is remote */
4101 bool in_iota_subscript = FALSE;
4107 if (in_iota_subscript && ! _is_utf8_mark(s)) {
4109 /* A non-mark. Time to output the iota subscript */
4110 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4111 d += capital_iota_len;
4112 in_iota_subscript = FALSE;
4115 /* Then handle the current character. Get the changed case value
4116 * and copy it to the output buffer */
4119 #ifdef USE_LOCALE_CTYPE
4120 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4122 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4124 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4125 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4126 if (uv == GREEK_CAPITAL_LETTER_IOTA
4127 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4129 in_iota_subscript = TRUE;
4132 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4133 /* If the eventually required minimum size outgrows the
4134 * available space, we need to grow. */
4135 const UV o = d - (U8*)SvPVX_const(dest);
4137 /* If someone uppercases one million U+03B0s we SvGROW()
4138 * one million times. Or we could try guessing how much to
4139 * allocate without allocating too much. Such is life.
4140 * See corresponding comment in lc code for another option
4142 d = o + (U8*) SvGROW(dest, min);
4144 Copy(tmpbuf, d, ulen, U8);
4149 if (in_iota_subscript) {
4150 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4151 d += capital_iota_len;
4156 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4158 else { /* Not UTF-8 */
4160 const U8 *const send = s + len;
4162 /* Use locale casing if in locale; regular style if not treating
4163 * latin1 as having case; otherwise the latin1 casing. Do the
4164 * whole thing in a tight loop, for speed, */
4165 #ifdef USE_LOCALE_CTYPE
4166 if (IN_LC_RUNTIME(LC_CTYPE)) {
4167 if (IN_UTF8_CTYPE_LOCALE) {
4170 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4171 for (; s < send; d++, s++)
4172 *d = (U8) toUPPER_LC(*s);
4176 if (! IN_UNI_8_BIT) {
4177 for (; s < send; d++, s++) {
4182 #ifdef USE_LOCALE_CTYPE
4185 for (; s < send; d++, s++) {
4186 *d = toUPPER_LATIN1_MOD(*s);
4187 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
4191 /* The mainstream case is the tight loop above. To avoid
4192 * extra tests in that, all three characters that require
4193 * special handling are mapped by the MOD to the one tested
4195 * Use the source to distinguish between the three cases */
4197 #if UNICODE_MAJOR_VERSION > 2 \
4198 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
4199 && UNICODE_DOT_DOT_VERSION >= 8)
4200 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4202 /* uc() of this requires 2 characters, but they are
4203 * ASCII. If not enough room, grow the string */
4204 if (SvLEN(dest) < ++min) {
4205 const UV o = d - (U8*)SvPVX_const(dest);
4206 d = o + (U8*) SvGROW(dest, min);
4208 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4209 continue; /* Back to the tight loop; still in ASCII */
4213 /* The other two special handling characters have their
4214 * upper cases outside the latin1 range, hence need to be
4215 * in UTF-8, so the whole result needs to be in UTF-8. So,
4216 * here we are somewhere in the middle of processing a
4217 * non-UTF-8 string, and realize that we will have to convert
4218 * the whole thing to UTF-8. What to do? There are
4219 * several possibilities. The simplest to code is to
4220 * convert what we have so far, set a flag, and continue on
4221 * in the loop. The flag would be tested each time through
4222 * the loop, and if set, the next character would be
4223 * converted to UTF-8 and stored. But, I (khw) didn't want
4224 * to slow down the mainstream case at all for this fairly
4225 * rare case, so I didn't want to add a test that didn't
4226 * absolutely have to be there in the loop, besides the
4227 * possibility that it would get too complicated for
4228 * optimizers to deal with. Another possibility is to just
4229 * give up, convert the source to UTF-8, and restart the
4230 * function that way. Another possibility is to convert
4231 * both what has already been processed and what is yet to
4232 * come separately to UTF-8, then jump into the loop that
4233 * handles UTF-8. But the most efficient time-wise of the
4234 * ones I could think of is what follows, and turned out to
4235 * not require much extra code. */
4237 /* Convert what we have so far into UTF-8, telling the
4238 * function that we know it should be converted, and to
4239 * allow extra space for what we haven't processed yet.
4240 * Assume the worst case space requirements for converting
4241 * what we haven't processed so far: that it will require
4242 * two bytes for each remaining source character, plus the
4243 * NUL at the end. This may cause the string pointer to
4244 * move, so re-find it. */
4246 len = d - (U8*)SvPVX_const(dest);
4247 SvCUR_set(dest, len);
4248 len = sv_utf8_upgrade_flags_grow(dest,
4249 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4251 d = (U8*)SvPVX(dest) + len;
4253 /* Now process the remainder of the source, converting to
4254 * upper and UTF-8. If a resulting byte is invariant in
4255 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4256 * append it to the output. */
4257 for (; s < send; s++) {
4258 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4262 /* Here have processed the whole source; no need to continue
4263 * with the outer loop. Each character has been converted
4264 * to upper case and converted to UTF-8 */
4267 } /* End of processing all latin1-style chars */
4268 } /* End of processing all chars */
4269 } /* End of source is not empty */
4271 if (source != dest) {
4272 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4273 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4275 } /* End of isn't utf8 */
4276 #ifdef USE_LOCALE_CTYPE
4277 if (IN_LC_RUNTIME(LC_CTYPE)) {
4282 if (dest != source && SvTAINTED(source))
4300 if ( SvPADTMP(source)
4301 && !SvREADONLY(source) && SvPOK(source)
4302 && !DO_UTF8(source)) {
4304 /* We can convert in place, as lowercasing anything in the latin1 range
4305 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4307 s = d = (U8*)SvPV_force_nomg(source, len);
4314 s = (const U8*)SvPV_nomg_const(source, len);
4317 SvUPGRADE(dest, SVt_PV);
4318 d = (U8*)SvGROW(dest, min);
4319 (void)SvPOK_only(dest);
4324 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4325 to check DO_UTF8 again here. */
4327 if (DO_UTF8(source)) {
4328 const U8 *const send = s + len;
4329 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4332 const STRLEN u = UTF8SKIP(s);
4335 #ifdef USE_LOCALE_CTYPE
4336 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4338 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4341 /* Here is where we would do context-sensitive actions. See the
4342 * commit message for 86510fb15 for why there isn't any */
4344 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4346 /* If the eventually required minimum size outgrows the
4347 * available space, we need to grow. */
4348 const UV o = d - (U8*)SvPVX_const(dest);
4350 /* If someone lowercases one million U+0130s we SvGROW() one
4351 * million times. Or we could try guessing how much to
4352 * allocate without allocating too much. Such is life.
4353 * Another option would be to grow an extra byte or two more
4354 * each time we need to grow, which would cut down the million
4355 * to 500K, with little waste */
4356 d = o + (U8*) SvGROW(dest, min);
4359 /* Copy the newly lowercased letter to the output buffer we're
4361 Copy(tmpbuf, d, ulen, U8);
4364 } /* End of looping through the source string */
4367 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4368 } else { /* Not utf8 */
4370 const U8 *const send = s + len;
4372 /* Use locale casing if in locale; regular style if not treating
4373 * latin1 as having case; otherwise the latin1 casing. Do the
4374 * whole thing in a tight loop, for speed, */
4375 #ifdef USE_LOCALE_CTYPE
4376 if (IN_LC_RUNTIME(LC_CTYPE)) {
4377 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4378 for (; s < send; d++, s++)
4379 *d = toLOWER_LC(*s);
4383 if (! IN_UNI_8_BIT) {
4384 for (; s < send; d++, s++) {
4389 for (; s < send; d++, s++) {
4390 *d = toLOWER_LATIN1(*s);
4394 if (source != dest) {
4396 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4399 #ifdef USE_LOCALE_CTYPE
4400 if (IN_LC_RUNTIME(LC_CTYPE)) {
4405 if (dest != source && SvTAINTED(source))
4414 SV * const sv = TOPs;
4416 const char *s = SvPV_const(sv,len);
4418 SvUTF8_off(TARG); /* decontaminate */
4421 SvUPGRADE(TARG, SVt_PV);
4422 SvGROW(TARG, (len * 2) + 1);
4426 STRLEN ulen = UTF8SKIP(s);
4427 bool to_quote = FALSE;
4429 if (UTF8_IS_INVARIANT(*s)) {
4430 if (_isQUOTEMETA(*s)) {
4434 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) {
4436 #ifdef USE_LOCALE_CTYPE
4437 /* In locale, we quote all non-ASCII Latin1 chars.
4438 * Otherwise use the quoting rules */
4440 IN_LC_RUNTIME(LC_CTYPE)
4443 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
4448 else if (is_QUOTEMETA_high(s)) {
4463 else if (IN_UNI_8_BIT) {
4465 if (_isQUOTEMETA(*s))
4471 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4472 * including everything above ASCII */
4474 if (!isWORDCHAR_A(*s))
4480 SvCUR_set(TARG, d - SvPVX_const(TARG));
4481 (void)SvPOK_only_UTF8(TARG);
4484 sv_setpvn(TARG, s, len);
4500 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4501 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
4502 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
4503 || UNICODE_DOT_DOT_VERSION > 0)
4504 const bool full_folding = TRUE; /* This variable is here so we can easily
4505 move to more generality later */
4507 const bool full_folding = FALSE;
4509 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4510 #ifdef USE_LOCALE_CTYPE
4511 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
4515 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4516 * You are welcome(?) -Hugmeir
4524 s = (const U8*)SvPV_nomg_const(source, len);
4526 if (ckWARN(WARN_UNINITIALIZED))
4527 report_uninit(source);
4534 SvUPGRADE(dest, SVt_PV);
4535 d = (U8*)SvGROW(dest, min);
4536 (void)SvPOK_only(dest);
4541 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4543 const STRLEN u = UTF8SKIP(s);
4546 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags);
4548 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4549 const UV o = d - (U8*)SvPVX_const(dest);
4550 d = o + (U8*) SvGROW(dest, min);
4553 Copy(tmpbuf, d, ulen, U8);
4558 } /* Unflagged string */
4560 #ifdef USE_LOCALE_CTYPE
4561 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4562 if (IN_UTF8_CTYPE_LOCALE) {
4563 goto do_uni_folding;
4565 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4566 for (; s < send; d++, s++)
4567 *d = (U8) toFOLD_LC(*s);
4571 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4572 for (; s < send; d++, s++)
4576 #ifdef USE_LOCALE_CTYPE
4579 /* For ASCII and the Latin-1 range, there's only two troublesome
4580 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4581 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4582 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4583 * For the rest, the casefold is their lowercase. */
4584 for (; s < send; d++, s++) {
4585 if (*s == MICRO_SIGN) {
4586 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4587 * which is outside of the latin-1 range. There's a couple
4588 * of ways to deal with this -- khw discusses them in
4589 * pp_lc/uc, so go there :) What we do here is upgrade what
4590 * we had already casefolded, then enter an inner loop that
4591 * appends the rest of the characters as UTF-8. */
4592 len = d - (U8*)SvPVX_const(dest);
4593 SvCUR_set(dest, len);
4594 len = sv_utf8_upgrade_flags_grow(dest,
4595 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4596 /* The max expansion for latin1
4597 * chars is 1 byte becomes 2 */
4599 d = (U8*)SvPVX(dest) + len;
4601 Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
4604 for (; s < send; s++) {
4606 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4607 if UVCHR_IS_INVARIANT(fc) {
4609 && *s == LATIN_SMALL_LETTER_SHARP_S)
4618 Copy(tmpbuf, d, ulen, U8);
4624 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4625 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4626 * becomes "ss", which may require growing the SV. */
4627 if (SvLEN(dest) < ++min) {
4628 const UV o = d - (U8*)SvPVX_const(dest);
4629 d = o + (U8*) SvGROW(dest, min);
4634 else { /* If it's not one of those two, the fold is their lower
4636 *d = toLOWER_LATIN1(*s);
4642 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4644 #ifdef USE_LOCALE_CTYPE
4645 if (IN_LC_RUNTIME(LC_CTYPE)) {
4650 if (SvTAINTED(source))
4660 dSP; dMARK; dORIGMARK;
4661 AV *const av = MUTABLE_AV(POPs);
4662 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4664 if (SvTYPE(av) == SVt_PVAV) {
4665 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4666 bool can_preserve = FALSE;
4672 can_preserve = SvCANEXISTDELETE(av);
4675 if (lval && localizing) {