3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
32 #include "regcharclass.h"
34 /* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
38 #ifdef NEED_GETPID_PROTO
39 extern Pid_t getpid (void);
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
46 #if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
50 static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
51 static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
53 /* variations on pp_null */
58 if (GIMME_V == G_SCALAR)
65 /* This is also called directly by pp_lvavref. */
70 assert(SvTYPE(TARG) == SVt_PVAV);
71 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
72 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
73 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
76 if (PL_op->op_flags & OPf_REF) {
80 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
81 const I32 flags = is_lvalue_sub();
82 if (flags && !(flags & OPpENTERSUB_INARGS)) {
83 if (GIMME_V == G_SCALAR)
84 /* diag_listed_as: Can't return %s to lvalue scalar context */
85 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
92 if (gimme == G_ARRAY) {
93 /* XXX see also S_pushav in pp_hot.c */
94 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
96 if (SvMAGICAL(TARG)) {
98 for (i=0; i < maxarg; i++) {
99 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
100 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
105 for (i=0; i < maxarg; i++) {
106 SV * const sv = AvARRAY((const AV *)TARG)[i];
107 SP[i+1] = sv ? sv : &PL_sv_undef;
112 else if (gimme == G_SCALAR) {
113 SV* const sv = sv_newmortal();
114 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
115 sv_setiv(sv, maxarg);
126 assert(SvTYPE(TARG) == SVt_PVHV);
128 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
129 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
130 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
132 if (PL_op->op_flags & OPf_REF)
134 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
135 const I32 flags = is_lvalue_sub();
136 if (flags && !(flags & OPpENTERSUB_INARGS)) {
137 if (GIMME_V == G_SCALAR)
138 /* diag_listed_as: Can't return %s to lvalue scalar context */
139 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
145 if (gimme == G_ARRAY) {
146 RETURNOP(Perl_do_kv(aTHX));
148 else if ((PL_op->op_private & OPpTRUEBOOL
149 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
150 && block_gimme() == G_VOID ))
151 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))
153 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : &PL_sv_no);
154 else if (gimme == G_SCALAR) {
155 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
164 assert(SvTYPE(TARG) == SVt_PVCV);
172 SvPADSTALE_off(TARG);
179 CV * const protocv = PadnamePROTOCV(
180 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG]
182 assert(SvTYPE(TARG) == SVt_PVCV);
184 if (CvISXSUB(protocv)) { /* constant */
185 /* XXX Should we clone it here? */
186 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
187 to introcv and remove the SvPADSTALE_off. */
188 SAVEPADSVANDMORTALIZE(ARGTARG);
189 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv);
192 if (CvROOT(protocv)) {
193 assert(CvCLONE(protocv));
194 assert(!CvCLONED(protocv));
196 cv_clone_into(protocv,(CV *)TARG);
197 SAVECLEARSV(PAD_SVl(ARGTARG));
204 /* In some cases this function inspects PL_op. If this function is called
205 for new op types, more bool parameters may need to be added in place of
208 When noinit is true, the absence of a gv will cause a retval of undef.
209 This is unrelated to the cv-to-gv assignment case.
213 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
216 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
219 sv = amagic_deref_call(sv, to_gv_amg);
223 if (SvTYPE(sv) == SVt_PVIO) {
224 GV * const gv = MUTABLE_GV(sv_newmortal());
225 gv_init(gv, 0, "__ANONIO__", 10, 0);
226 GvIOp(gv) = MUTABLE_IO(sv);
227 SvREFCNT_inc_void_NN(sv);
230 else if (!isGV_with_GP(sv)) {
231 Perl_die(aTHX_ "Not a GLOB reference");
235 if (!isGV_with_GP(sv)) {
237 /* If this is a 'my' scalar and flag is set then vivify
240 if (vivify_sv && sv != &PL_sv_undef) {
243 Perl_croak_no_modify();
244 if (cUNOP->op_targ) {
245 SV * const namesv = PAD_SV(cUNOP->op_targ);
246 HV *stash = CopSTASH(PL_curcop);
247 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
248 gv = MUTABLE_GV(newSV(0));
249 gv_init_sv(gv, stash, namesv, 0);
252 const char * const name = CopSTASHPV(PL_curcop);
253 gv = newGVgen_flags(name,
254 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
255 SvREFCNT_inc_simple_void_NN(gv);
257 prepare_SV_for_RV(sv);
258 SvRV_set(sv, MUTABLE_SV(gv));
263 if (PL_op->op_flags & OPf_REF || strict) {
264 Perl_die(aTHX_ PL_no_usym, "a symbol");
266 if (ckWARN(WARN_UNINITIALIZED))
272 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
273 sv, GV_ADDMG, SVt_PVGV
282 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
286 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
287 == OPpDONT_INIT_GV) {
288 /* We are the target of a coderef assignment. Return
289 the scalar unchanged, and let pp_sasssign deal with
293 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
295 /* FAKE globs in the symbol table cause weird bugs (#77810) */
299 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
300 SV *newsv = sv_newmortal();
301 sv_setsv_flags(newsv, sv, 0);
313 sv, PL_op->op_private & OPpDEREF,
314 PL_op->op_private & HINT_STRICT_REFS,
315 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
316 || PL_op->op_type == OP_READLINE
318 if (PL_op->op_private & OPpLVAL_INTRO)
319 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
324 /* Helper function for pp_rv2sv and pp_rv2av */
326 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
327 const svtype type, SV ***spp)
331 PERL_ARGS_ASSERT_SOFTREF2XV;
333 if (PL_op->op_private & HINT_STRICT_REFS) {
335 Perl_die(aTHX_ PL_no_symref_sv, sv,
336 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
338 Perl_die(aTHX_ PL_no_usym, what);
342 PL_op->op_flags & OPf_REF
344 Perl_die(aTHX_ PL_no_usym, what);
345 if (ckWARN(WARN_UNINITIALIZED))
347 if (type != SVt_PV && GIMME_V == G_ARRAY) {
351 **spp = &PL_sv_undef;
354 if ((PL_op->op_flags & OPf_SPECIAL) &&
355 !(PL_op->op_flags & OPf_MOD))
357 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
359 **spp = &PL_sv_undef;
364 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
377 sv = amagic_deref_call(sv, to_sv_amg);
381 if (SvTYPE(sv) >= SVt_PVAV)
382 DIE(aTHX_ "Not a SCALAR reference");
387 if (!isGV_with_GP(gv)) {
388 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
394 if (PL_op->op_flags & OPf_MOD) {
395 if (PL_op->op_private & OPpLVAL_INTRO) {
396 if (cUNOP->op_first->op_type == OP_NULL)
397 sv = save_scalar(MUTABLE_GV(TOPs));
399 sv = save_scalar(gv);
401 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
403 else if (PL_op->op_private & OPpDEREF)
404 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
413 AV * const av = MUTABLE_AV(TOPs);
414 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
416 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
418 *svp = newSV_type(SVt_PVMG);
419 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
423 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
432 if (PL_op->op_flags & OPf_MOD || LVRET) {
433 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
434 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
436 LvTARG(ret) = SvREFCNT_inc_simple(sv);
437 SETs(ret); /* no SvSETMAGIC */
440 const MAGIC * const mg = mg_find_mglob(sv);
441 if (mg && mg->mg_len != -1) {
443 STRLEN i = mg->mg_len;
444 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
445 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
459 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
461 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
462 == OPpMAY_RETURN_CONSTANT)
465 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
466 /* (But not in defined().) */
468 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
470 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
471 cv = SvTYPE(SvRV(gv)) == SVt_PVCV
472 ? MUTABLE_CV(SvRV(gv))
476 cv = MUTABLE_CV(&PL_sv_undef);
477 SETs(MUTABLE_SV(cv));
487 SV *ret = &PL_sv_undef;
489 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
490 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
491 const char * s = SvPVX_const(TOPs);
492 if (strnEQ(s, "CORE::", 6)) {
493 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
495 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"",
496 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
498 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
504 cv = sv_2cv(TOPs, &stash, &gv, 0);
506 ret = newSVpvn_flags(
507 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
517 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
519 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
521 PUSHs(MUTABLE_SV(cv));
535 if (GIMME_V != G_ARRAY) {
541 *MARK = &PL_sv_undef;
543 *MARK = refto(*MARK);
547 EXTEND_MORTAL(SP - MARK);
549 *MARK = refto(*MARK);
554 S_refto(pTHX_ SV *sv)
558 PERL_ARGS_ASSERT_REFTO;
560 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
563 if (!(sv = LvTARG(sv)))
566 SvREFCNT_inc_void_NN(sv);
568 else if (SvTYPE(sv) == SVt_PVAV) {
569 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
570 av_reify(MUTABLE_AV(sv));
572 SvREFCNT_inc_void_NN(sv);
574 else if (SvPADTMP(sv)) {
579 SvREFCNT_inc_void_NN(sv);
582 sv_upgrade(rv, SVt_IV);
591 SV * const sv = TOPs;
599 /* use the return value that is in a register, its the same as TARG */
600 TARG = sv_ref(TARG,SvRV(sv),TRUE);
615 stash = CopSTASH(PL_curcop);
616 if (SvTYPE(stash) != SVt_PVHV)
617 Perl_croak(aTHX_ "Attempt to bless into a freed package");
620 SV * const ssv = POPs;
624 if (!ssv) goto curstash;
627 if (!SvAMAGIC(ssv)) {
629 Perl_croak(aTHX_ "Attempt to bless into a reference");
631 /* SvAMAGIC is on here, but it only means potentially overloaded,
632 so after stringification: */
633 ptr = SvPV_nomg_const(ssv,len);
634 /* We need to check the flag again: */
635 if (!SvAMAGIC(ssv)) goto frog;
637 else ptr = SvPV_nomg_const(ssv,len);
639 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
640 "Explicit blessing to '' (assuming package main)");
641 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
644 (void)sv_bless(TOPs, stash);
654 const char * const elem = SvPV_const(sv, len);
655 GV * const gv = MUTABLE_GV(TOPs);
660 /* elem will always be NUL terminated. */
663 if (memEQs(elem, len, "ARRAY"))
665 tmpRef = MUTABLE_SV(GvAV(gv));
666 if (tmpRef && !AvREAL((const AV *)tmpRef)
667 && AvREIFY((const AV *)tmpRef))
668 av_reify(MUTABLE_AV(tmpRef));
672 if (memEQs(elem, len, "CODE"))
673 tmpRef = MUTABLE_SV(GvCVu(gv));
676 if (memEQs(elem, len, "FILEHANDLE")) {
677 tmpRef = MUTABLE_SV(GvIOp(gv));
680 if (memEQs(elem, len, "FORMAT"))
681 tmpRef = MUTABLE_SV(GvFORM(gv));
684 if (memEQs(elem, len, "GLOB"))
685 tmpRef = MUTABLE_SV(gv);
688 if (memEQs(elem, len, "HASH"))
689 tmpRef = MUTABLE_SV(GvHV(gv));
692 if (memEQs(elem, len, "IO"))
693 tmpRef = MUTABLE_SV(GvIOp(gv));
696 if (memEQs(elem, len, "NAME"))
697 sv = newSVhek(GvNAME_HEK(gv));
700 if (memEQs(elem, len, "PACKAGE")) {
701 const HV * const stash = GvSTASH(gv);
702 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
703 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
707 if (memEQs(elem, len, "SCALAR"))
722 /* Pattern matching */
730 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
731 /* Historically, study was skipped in these cases. */
736 /* Make study a no-op. It's no longer useful and its existence
737 complicates matters elsewhere. */
743 /* also used for: pp_transr() */
750 if (PL_op->op_flags & OPf_STACKED)
755 sv = PAD_SV(ARGTARG);
760 if(PL_op->op_type == OP_TRANSR) {
762 const char * const pv = SvPV(sv,len);
763 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
768 I32 i = do_trans(sv);
774 /* Lvalue operators. */
777 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
783 PERL_ARGS_ASSERT_DO_CHOMP;
785 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
787 if (SvTYPE(sv) == SVt_PVAV) {
789 AV *const av = MUTABLE_AV(sv);
790 const I32 max = AvFILL(av);
792 for (i = 0; i <= max; i++) {
793 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
794 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
795 count += do_chomp(retval, sv, chomping);
799 else if (SvTYPE(sv) == SVt_PVHV) {
800 HV* const hv = MUTABLE_HV(sv);
802 (void)hv_iterinit(hv);
803 while ((entry = hv_iternext(hv)))
804 count += do_chomp(retval, hv_iterval(hv,entry), chomping);
807 else if (SvREADONLY(sv)) {
808 Perl_croak_no_modify();
814 char *temp_buffer = NULL;
819 goto nope_free_nothing;
821 while (len && s[-1] == '\n') {
828 STRLEN rslen, rs_charlen;
829 const char *rsptr = SvPV_const(PL_rs, rslen);
831 rs_charlen = SvUTF8(PL_rs)
835 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
836 /* Assumption is that rs is shorter than the scalar. */
838 /* RS is utf8, scalar is 8 bit. */
840 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
843 /* Cannot downgrade, therefore cannot possibly match.
844 At this point, temp_buffer is not alloced, and
845 is the buffer inside PL_rs, so dont free it.
847 assert (temp_buffer == rsptr);
853 /* RS is 8 bit, scalar is utf8. */
854 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
868 if (memNE(s, rsptr, rslen))
873 SvPV_force_nomg_nolen(sv);
880 Safefree(temp_buffer);
882 SvREFCNT_dec(svrecode);
886 if (len && (!SvPOK(sv) || SvIsCOW(sv)))
887 s = SvPV_force_nomg(sv, len);
890 char * const send = s + len;
891 char * const start = s;
893 while (s > start && UTF8_IS_CONTINUATION(*s))
895 if (is_utf8_string((U8*)s, send - s)) {
896 sv_setpvn(retval, s, send - s);
898 SvCUR_set(sv, s - start);
908 sv_setpvn(retval, s, 1);
922 /* also used for: pp_schomp() */
927 const bool chomping = PL_op->op_type == OP_SCHOMP;
929 const size_t count = do_chomp(TARG, TOPs, chomping);
931 sv_setiv(TARG, count);
937 /* also used for: pp_chomp() */
941 dSP; dMARK; dTARGET; dORIGMARK;
942 const bool chomping = PL_op->op_type == OP_CHOMP;
946 count += do_chomp(TARG, *++MARK, chomping);
948 sv_setiv(TARG, count);
959 if (!PL_op->op_private) {
971 if (SvTHINKFIRST(sv))
972 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF);
974 switch (SvTYPE(sv)) {
978 av_undef(MUTABLE_AV(sv));
981 hv_undef(MUTABLE_HV(sv));
984 if (cv_const_sv((const CV *)sv))
985 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
986 "Constant subroutine %" SVf " undefined",
987 SVfARG(CvANON((const CV *)sv)
988 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
989 : sv_2mortal(newSVhek(
991 ? CvNAME_HEK((CV *)sv)
992 : GvENAME_HEK(CvGV((const CV *)sv))
997 /* let user-undef'd sub keep its identity */
998 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME);
1001 assert(isGV_with_GP(sv));
1002 assert(!SvFAKE(sv));
1007 /* undef *Pkg::meth_name ... */
1009 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1010 && HvENAME_get(stash);
1012 if((stash = GvHV((const GV *)sv))) {
1013 if(HvENAME_get(stash))
1014 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1018 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
1019 gp_free(MUTABLE_GV(sv));
1021 GvGP_set(sv, gp_ref(gp));
1022 #ifndef PERL_DONT_CREATE_GVSV
1023 GvSV(sv) = newSV(0);
1025 GvLINE(sv) = CopLINE(PL_curcop);
1026 GvEGV(sv) = MUTABLE_GV(sv);
1030 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1032 /* undef *Foo::ISA */
1033 if( strEQ(GvNAME((const GV *)sv), "ISA")
1034 && (stash = GvSTASH((const GV *)sv))
1035 && (method_changed || HvENAME(stash)) )
1036 mro_isa_changed_in(stash);
1037 else if(method_changed)
1038 mro_method_changed_in(
1039 GvSTASH((const GV *)sv)
1045 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1059 /* common "slow" code for pp_postinc and pp_postdec */
1062 S_postincdec_common(pTHX_ SV *sv, SV *targ)
1066 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1069 TARG = sv_newmortal();
1076 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1077 if (inc && !SvOK(TARG))
1084 /* also used for: pp_i_postinc() */
1091 /* special-case sv being a simple integer */
1092 if (LIKELY(((sv->sv_flags &
1093 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1094 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1096 && SvIVX(sv) != IV_MAX)
1099 SvIV_set(sv, iv + 1);
1100 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1105 return S_postincdec_common(aTHX_ sv, TARG);
1109 /* also used for: pp_i_postdec() */
1116 /* special-case sv being a simple integer */
1117 if (LIKELY(((sv->sv_flags &
1118 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1119 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1121 && SvIVX(sv) != IV_MIN)
1124 SvIV_set(sv, iv - 1);
1125 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1130 return S_postincdec_common(aTHX_ sv, TARG);
1134 /* Ordinary operators. */
1138 dSP; dATARGET; SV *svl, *svr;
1139 #ifdef PERL_PRESERVE_IVUV
1142 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1145 #ifdef PERL_PRESERVE_IVUV
1146 /* For integer to integer power, we do the calculation by hand wherever
1147 we're sure it is safe; otherwise we call pow() and try to convert to
1148 integer afterwards. */
1149 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1157 const IV iv = SvIVX(svr);
1161 goto float_it; /* Can't do negative powers this way. */
1165 baseuok = SvUOK(svl);
1167 baseuv = SvUVX(svl);
1169 const IV iv = SvIVX(svl);
1172 baseuok = TRUE; /* effectively it's a UV now */
1174 baseuv = -iv; /* abs, baseuok == false records sign */
1177 /* now we have integer ** positive integer. */
1180 /* foo & (foo - 1) is zero only for a power of 2. */
1181 if (!(baseuv & (baseuv - 1))) {
1182 /* We are raising power-of-2 to a positive integer.
1183 The logic here will work for any base (even non-integer
1184 bases) but it can be less accurate than
1185 pow (base,power) or exp (power * log (base)) when the
1186 intermediate values start to spill out of the mantissa.
1187 With powers of 2 we know this can't happen.
1188 And powers of 2 are the favourite thing for perl
1189 programmers to notice ** not doing what they mean. */
1191 NV base = baseuok ? baseuv : -(NV)baseuv;
1196 while (power >>= 1) {
1204 SvIV_please_nomg(svr);
1207 unsigned int highbit = 8 * sizeof(UV);
1208 unsigned int diff = 8 * sizeof(UV);
1209 while (diff >>= 1) {
1211 if (baseuv >> highbit) {
1215 /* we now have baseuv < 2 ** highbit */
1216 if (power * highbit <= 8 * sizeof(UV)) {
1217 /* result will definitely fit in UV, so use UV math
1218 on same algorithm as above */
1221 const bool odd_power = cBOOL(power & 1);
1225 while (power >>= 1) {
1232 if (baseuok || !odd_power)
1233 /* answer is positive */
1235 else if (result <= (UV)IV_MAX)
1236 /* answer negative, fits in IV */
1237 SETi( -(IV)result );
1238 else if (result == (UV)IV_MIN)
1239 /* 2's complement assumption: special case IV_MIN */
1242 /* answer negative, doesn't fit */
1243 SETn( -(NV)result );
1251 NV right = SvNV_nomg(svr);
1252 NV left = SvNV_nomg(svl);
1255 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1257 We are building perl with long double support and are on an AIX OS
1258 afflicted with a powl() function that wrongly returns NaNQ for any
1259 negative base. This was reported to IBM as PMR #23047-379 on
1260 03/06/2006. The problem exists in at least the following versions
1261 of AIX and the libm fileset, and no doubt others as well:
1263 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1264 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1265 AIX 5.2.0 bos.adt.libm 5.2.0.85
1267 So, until IBM fixes powl(), we provide the following workaround to
1268 handle the problem ourselves. Our logic is as follows: for
1269 negative bases (left), we use fmod(right, 2) to check if the
1270 exponent is an odd or even integer:
1272 - if odd, powl(left, right) == -powl(-left, right)
1273 - if even, powl(left, right) == powl(-left, right)
1275 If the exponent is not an integer, the result is rightly NaNQ, so
1276 we just return that (as NV_NAN).
1280 NV mod2 = Perl_fmod( right, 2.0 );
1281 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1282 SETn( -Perl_pow( -left, right) );
1283 } else if (mod2 == 0.0) { /* even integer */
1284 SETn( Perl_pow( -left, right) );
1285 } else { /* fractional power */
1289 SETn( Perl_pow( left, right) );
1292 SETn( Perl_pow( left, right) );
1293 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1295 #ifdef PERL_PRESERVE_IVUV
1297 SvIV_please_nomg(svr);
1305 dSP; dATARGET; SV *svl, *svr;
1306 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1310 #ifdef PERL_PRESERVE_IVUV
1312 /* special-case some simple common cases */
1313 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1315 U32 flags = (svl->sv_flags & svr->sv_flags);
1316 if (flags & SVf_IOK) {
1317 /* both args are simple IVs */
1322 topl = ((UV)il) >> (UVSIZE * 4 - 1);
1323 topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1325 /* if both are in a range that can't under/overflow, do a
1326 * simple integer multiply: if the top halves(*) of both numbers
1327 * are 00...00 or 11...11, then it's safe.
1328 * (*) for 32-bits, the "top half" is the top 17 bits,
1329 * for 64-bits, its 33 bits */
1331 ((topl+1) | (topr+1))
1332 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1335 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1341 else if (flags & SVf_NOK) {
1342 /* both args are NVs */
1348 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1349 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1350 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1352 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1355 /* nothing was lost by converting to IVs */
1359 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1360 if (Perl_isinf(result)) {
1361 Zero((U8*)&result + 8, 8, U8);
1364 TARGn(result, 0); /* args not GMG, so can't be tainted */
1372 if (SvIV_please_nomg(svr)) {
1373 /* Unless the left argument is integer in range we are going to have to
1374 use NV maths. Hence only attempt to coerce the right argument if
1375 we know the left is integer. */
1376 /* Left operand is defined, so is it IV? */
1377 if (SvIV_please_nomg(svl)) {
1378 bool auvok = SvUOK(svl);
1379 bool buvok = SvUOK(svr);
1380 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1381 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1390 const IV aiv = SvIVX(svl);
1393 auvok = TRUE; /* effectively it's a UV now */
1395 /* abs, auvok == false records sign */
1396 alow = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1402 const IV biv = SvIVX(svr);
1405 buvok = TRUE; /* effectively it's a UV now */
1407 /* abs, buvok == false records sign */
1408 blow = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1412 /* If this does sign extension on unsigned it's time for plan B */
1413 ahigh = alow >> (4 * sizeof (UV));
1415 bhigh = blow >> (4 * sizeof (UV));
1417 if (ahigh && bhigh) {
1419 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1420 which is overflow. Drop to NVs below. */
1421 } else if (!ahigh && !bhigh) {
1422 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1423 so the unsigned multiply cannot overflow. */
1424 const UV product = alow * blow;
1425 if (auvok == buvok) {
1426 /* -ve * -ve or +ve * +ve gives a +ve result. */
1430 } else if (product <= (UV)IV_MIN) {
1431 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1432 /* -ve result, which could overflow an IV */
1434 /* can't negate IV_MIN, but there are aren't two
1435 * integers such that !ahigh && !bhigh, where the
1436 * product equals 0x800....000 */
1437 assert(product != (UV)IV_MIN);
1438 SETi( -(IV)product );
1440 } /* else drop to NVs below. */
1442 /* One operand is large, 1 small */
1445 /* swap the operands */
1447 bhigh = blow; /* bhigh now the temp var for the swap */
1451 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1452 multiplies can't overflow. shift can, add can, -ve can. */
1453 product_middle = ahigh * blow;
1454 if (!(product_middle & topmask)) {
1455 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1457 product_middle <<= (4 * sizeof (UV));
1458 product_low = alow * blow;
1460 /* as for pp_add, UV + something mustn't get smaller.
1461 IIRC ANSI mandates this wrapping *behaviour* for
1462 unsigned whatever the actual representation*/
1463 product_low += product_middle;
1464 if (product_low >= product_middle) {
1465 /* didn't overflow */
1466 if (auvok == buvok) {
1467 /* -ve * -ve or +ve * +ve gives a +ve result. */
1469 SETu( product_low );
1471 } else if (product_low <= (UV)IV_MIN) {
1472 /* 2s complement assumption again */
1473 /* -ve result, which could overflow an IV */
1475 SETi(product_low == (UV)IV_MIN
1476 ? IV_MIN : -(IV)product_low);
1478 } /* else drop to NVs below. */
1480 } /* product_middle too large */
1481 } /* ahigh && bhigh */
1486 NV right = SvNV_nomg(svr);
1487 NV left = SvNV_nomg(svl);
1488 NV result = left * right;
1491 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1492 if (Perl_isinf(result)) {
1493 Zero((U8*)&result + 8, 8, U8);
1503 dSP; dATARGET; SV *svl, *svr;
1504 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1507 /* Only try to do UV divide first
1508 if ((SLOPPYDIVIDE is true) or
1509 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1511 The assumption is that it is better to use floating point divide
1512 whenever possible, only doing integer divide first if we can't be sure.
1513 If NV_PRESERVES_UV is true then we know at compile time that no UV
1514 can be too large to preserve, so don't need to compile the code to
1515 test the size of UVs. */
1518 # define PERL_TRY_UV_DIVIDE
1519 /* ensure that 20./5. == 4. */
1521 # ifdef PERL_PRESERVE_IVUV
1522 # ifndef NV_PRESERVES_UV
1523 # define PERL_TRY_UV_DIVIDE
1528 #ifdef PERL_TRY_UV_DIVIDE
1529 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1530 bool left_non_neg = SvUOK(svl);
1531 bool right_non_neg = SvUOK(svr);
1535 if (right_non_neg) {
1539 const IV biv = SvIVX(svr);
1542 right_non_neg = TRUE; /* effectively it's a UV now */
1545 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1548 /* historically undef()/0 gives a "Use of uninitialized value"
1549 warning before dieing, hence this test goes here.
1550 If it were immediately before the second SvIV_please, then
1551 DIE() would be invoked before left was even inspected, so
1552 no inspection would give no warning. */
1554 DIE(aTHX_ "Illegal division by zero");
1560 const IV aiv = SvIVX(svl);
1563 left_non_neg = TRUE; /* effectively it's a UV now */
1566 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1572 /* For sloppy divide we always attempt integer division. */
1574 /* Otherwise we only attempt it if either or both operands
1575 would not be preserved by an NV. If both fit in NVs
1576 we fall through to the NV divide code below. However,
1577 as left >= right to ensure integer result here, we know that
1578 we can skip the test on the right operand - right big
1579 enough not to be preserved can't get here unless left is
1582 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1585 /* Integer division can't overflow, but it can be imprecise. */
1586 const UV result = left / right;
1587 if (result * right == left) {
1588 SP--; /* result is valid */
1589 if (left_non_neg == right_non_neg) {
1590 /* signs identical, result is positive. */
1594 /* 2s complement assumption */
1595 if (result <= (UV)IV_MIN)
1596 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result);
1598 /* It's exact but too negative for IV. */
1599 SETn( -(NV)result );
1602 } /* tried integer divide but it was not an integer result */
1603 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1604 } /* one operand wasn't SvIOK */
1605 #endif /* PERL_TRY_UV_DIVIDE */
1607 NV right = SvNV_nomg(svr);
1608 NV left = SvNV_nomg(svl);
1609 (void)POPs;(void)POPs;
1610 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1611 if (! Perl_isnan(right) && right == 0.0)
1615 DIE(aTHX_ "Illegal division by zero");
1616 PUSHn( left / right );
1624 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1628 bool left_neg = FALSE;
1629 bool right_neg = FALSE;
1630 bool use_double = FALSE;
1631 bool dright_valid = FALSE;
1634 SV * const svr = TOPs;
1635 SV * const svl = TOPm1s;
1636 if (SvIV_please_nomg(svr)) {
1637 right_neg = !SvUOK(svr);
1641 const IV biv = SvIVX(svr);
1644 right_neg = FALSE; /* effectively it's a UV now */
1646 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1651 dright = SvNV_nomg(svr);
1652 right_neg = dright < 0;
1655 if (dright < UV_MAX_P1) {
1656 right = U_V(dright);
1657 dright_valid = TRUE; /* In case we need to use double below. */
1663 /* At this point use_double is only true if right is out of range for
1664 a UV. In range NV has been rounded down to nearest UV and
1665 use_double false. */
1666 if (!use_double && SvIV_please_nomg(svl)) {
1667 left_neg = !SvUOK(svl);
1671 const IV aiv = SvIVX(svl);
1674 left_neg = FALSE; /* effectively it's a UV now */
1676 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1681 dleft = SvNV_nomg(svl);
1682 left_neg = dleft < 0;
1686 /* This should be exactly the 5.6 behaviour - if left and right are
1687 both in range for UV then use U_V() rather than floor. */
1689 if (dleft < UV_MAX_P1) {
1690 /* right was in range, so is dleft, so use UVs not double.
1694 /* left is out of range for UV, right was in range, so promote
1695 right (back) to double. */
1697 /* The +0.5 is used in 5.6 even though it is not strictly
1698 consistent with the implicit +0 floor in the U_V()
1699 inside the #if 1. */
1700 dleft = Perl_floor(dleft + 0.5);
1703 dright = Perl_floor(dright + 0.5);
1714 DIE(aTHX_ "Illegal modulus zero");
1716 dans = Perl_fmod(dleft, dright);
1717 if ((left_neg != right_neg) && dans)
1718 dans = dright - dans;
1721 sv_setnv(TARG, dans);
1727 DIE(aTHX_ "Illegal modulus zero");
1730 if ((left_neg != right_neg) && ans)
1733 /* XXX may warn: unary minus operator applied to unsigned type */
1734 /* could change -foo to be (~foo)+1 instead */
1735 if (ans <= ~((UV)IV_MAX)+1)
1736 sv_setiv(TARG, ~ans+1);
1738 sv_setnv(TARG, -(NV)ans);
1741 sv_setuv(TARG, ans);
1753 bool infnan = FALSE;
1755 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1756 /* TODO: think of some way of doing list-repeat overloading ??? */
1761 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) {
1762 /* The parser saw this as a list repeat, and there
1763 are probably several items on the stack. But we're
1764 in scalar/void context, and there's no pp_list to save us
1765 now. So drop the rest of the items -- robin@kitsite.com
1768 if (MARK + 1 < SP) {
1774 ASSUME(MARK + 1 == SP);
1776 MARK[1] = &PL_sv_undef;
1780 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1786 const UV uv = SvUV_nomg(sv);
1788 count = IV_MAX; /* The best we can do? */
1792 count = SvIV_nomg(sv);
1795 else if (SvNOKp(sv)) {
1796 const NV nv = SvNV_nomg(sv);
1797 infnan = Perl_isinfnan(nv);
1798 if (UNLIKELY(infnan)) {
1802 count = -1; /* An arbitrary negative integer */
1808 count = SvIV_nomg(sv);
1811 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1812 "Non-finite repeat count does nothing");
1813 } else if (count < 0) {
1815 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1816 "Negative repeat count does nothing");
1819 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1821 const SSize_t items = SP - MARK;
1822 const U8 mod = PL_op->op_flags & OPf_MOD;
1827 if ( items > SSize_t_MAX / count /* max would overflow */
1828 /* repeatcpy would overflow */
1829 || items > I32_MAX / (I32)sizeof(SV *)
1831 Perl_croak(aTHX_ "%s","Out of memory during list extend");
1832 max = items * count;
1837 if (mod && SvPADTMP(*SP)) {
1838 *SP = sv_mortalcopy(*SP);
1845 repeatcpy((char*)(MARK + items), (char*)MARK,
1846 items * sizeof(const SV *), count - 1);
1849 else if (count <= 0)
1852 else { /* Note: mark already snarfed by pp_list */
1853 SV * const tmpstr = POPs;
1858 sv_setsv_nomg(TARG, tmpstr);
1859 SvPV_force_nomg(TARG, len);
1860 isutf = DO_UTF8(TARG);
1867 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */
1868 || len > (U32)I32_MAX /* repeatcpy would overflow */
1870 Perl_croak(aTHX_ "%s",
1871 "Out of memory during string extend");
1872 max = (UV)count * len + 1;
1875 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1876 SvCUR_set(TARG, SvCUR(TARG) * count);
1878 *SvEND(TARG) = '\0';
1881 (void)SvPOK_only_UTF8(TARG);
1883 (void)SvPOK_only(TARG);
1892 dSP; dATARGET; bool useleft; SV *svl, *svr;
1893 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1897 #ifdef PERL_PRESERVE_IVUV
1899 /* special-case some simple common cases */
1900 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1902 U32 flags = (svl->sv_flags & svr->sv_flags);
1903 if (flags & SVf_IOK) {
1904 /* both args are simple IVs */
1909 topl = ((UV)il) >> (UVSIZE * 8 - 2);
1910 topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1912 /* if both are in a range that can't under/overflow, do a
1913 * simple integer subtract: if the top of both numbers
1914 * are 00 or 11, then it's safe */
1915 if (!( ((topl+1) | (topr+1)) & 2)) {
1917 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1923 else if (flags & SVf_NOK) {
1924 /* both args are NVs */
1929 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1930 !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
1931 && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
1933 nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
1936 /* nothing was lost by converting to IVs */
1939 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
1947 useleft = USE_LEFT(svl);
1948 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1949 "bad things" happen if you rely on signed integers wrapping. */
1950 if (SvIV_please_nomg(svr)) {
1951 /* Unless the left argument is integer in range we are going to have to
1952 use NV maths. Hence only attempt to coerce the right argument if
1953 we know the left is integer. */
1960 a_valid = auvok = 1;
1961 /* left operand is undef, treat as zero. */
1963 /* Left operand is defined, so is it IV? */
1964 if (SvIV_please_nomg(svl)) {
1965 if ((auvok = SvUOK(svl)))
1968 const IV aiv = SvIVX(svl);
1971 auvok = 1; /* Now acting as a sign flag. */
1972 } else { /* 2s complement assumption for IV_MIN */
1973 auv = (aiv == IV_MIN) ? (UV)aiv : (UV)-aiv;
1980 bool result_good = 0;
1983 bool buvok = SvUOK(svr);
1988 const IV biv = SvIVX(svr);
1993 buv = (biv == IV_MIN) ? (UV)biv : (UV)-biv;
1995 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1996 else "IV" now, independent of how it came in.
1997 if a, b represents positive, A, B negative, a maps to -A etc
2002 all UV maths. negate result if A negative.
2003 subtract if signs same, add if signs differ. */
2005 if (auvok ^ buvok) {
2014 /* Must get smaller */
2019 if (result <= buv) {
2020 /* result really should be -(auv-buv). as its negation
2021 of true value, need to swap our result flag */
2033 if (result <= (UV)IV_MIN)
2034 SETi(result == (UV)IV_MIN
2035 ? IV_MIN : -(IV)result);
2037 /* result valid, but out of range for IV. */
2038 SETn( -(NV)result );
2042 } /* Overflow, drop through to NVs. */
2046 useleft = USE_LEFT(svl);
2049 NV value = SvNV_nomg(svr);
2053 /* left operand is undef, treat as zero - value */
2057 SETn( SvNV_nomg(svl) - value );
2062 #define IV_BITS (IVSIZE * 8)
2064 static UV S_uv_shift(UV uv, int shift, bool left)
2070 if (shift >= IV_BITS) {
2073 return left ? uv << shift : uv >> shift;
2076 static IV S_iv_shift(IV iv, int shift, bool left)
2082 if (shift >= IV_BITS) {
2083 return iv < 0 && !left ? -1 : 0;
2085 return left ? iv << shift : iv >> shift;
2088 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
2089 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE)
2090 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE)
2091 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE)
2095 dSP; dATARGET; SV *svl, *svr;
2096 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
2100 const IV shift = SvIV_nomg(svr);
2101 if (PL_op->op_private & HINT_INTEGER) {
2102 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift));
2105 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift));
2113 dSP; dATARGET; SV *svl, *svr;
2114 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2118 const IV shift = SvIV_nomg(svr);
2119 if (PL_op->op_private & HINT_INTEGER) {
2120 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift));
2123 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift));
2134 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2138 (SvIOK_notUV(left) && SvIOK_notUV(right))
2139 ? (SvIVX(left) < SvIVX(right))
2140 : (do_ncmp(left, right) == -1)
2150 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2154 (SvIOK_notUV(left) && SvIOK_notUV(right))
2155 ? (SvIVX(left) > SvIVX(right))
2156 : (do_ncmp(left, right) == 1)
2166 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2170 (SvIOK_notUV(left) && SvIOK_notUV(right))
2171 ? (SvIVX(left) <= SvIVX(right))
2172 : (do_ncmp(left, right) <= 0)
2182 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2186 (SvIOK_notUV(left) && SvIOK_notUV(right))
2187 ? (SvIVX(left) >= SvIVX(right))
2188 : ( (do_ncmp(left, right) & 2) == 0)
2198 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2202 (SvIOK_notUV(left) && SvIOK_notUV(right))
2203 ? (SvIVX(left) != SvIVX(right))
2204 : (do_ncmp(left, right) != 0)
2209 /* compare left and right SVs. Returns:
2213 * 2: left or right was a NaN
2216 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2218 PERL_ARGS_ASSERT_DO_NCMP;
2219 #ifdef PERL_PRESERVE_IVUV
2220 /* Fortunately it seems NaN isn't IOK */
2221 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2223 const IV leftiv = SvIVX(left);
2224 if (!SvUOK(right)) {
2225 /* ## IV <=> IV ## */
2226 const IV rightiv = SvIVX(right);
2227 return (leftiv > rightiv) - (leftiv < rightiv);
2229 /* ## IV <=> UV ## */
2231 /* As (b) is a UV, it's >=0, so it must be < */
2234 const UV rightuv = SvUVX(right);
2235 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2240 /* ## UV <=> UV ## */
2241 const UV leftuv = SvUVX(left);
2242 const UV rightuv = SvUVX(right);
2243 return (leftuv > rightuv) - (leftuv < rightuv);
2245 /* ## UV <=> IV ## */
2247 const IV rightiv = SvIVX(right);
2249 /* As (a) is a UV, it's >=0, so it cannot be < */
2252 const UV leftuv = SvUVX(left);
2253 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2256 NOT_REACHED; /* NOTREACHED */
2260 NV const rnv = SvNV_nomg(right);
2261 NV const lnv = SvNV_nomg(left);
2263 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2264 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2267 return (lnv > rnv) - (lnv < rnv);
2286 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2289 value = do_ncmp(left, right);
2301 /* also used for: pp_sge() pp_sgt() pp_slt() */
2307 int amg_type = sle_amg;
2311 switch (PL_op->op_type) {
2330 tryAMAGICbin_MG(amg_type, AMGf_set);
2334 #ifdef USE_LOCALE_COLLATE
2335 (IN_LC_RUNTIME(LC_COLLATE))
2336 ? sv_cmp_locale_flags(left, right, 0)
2339 sv_cmp_flags(left, right, 0);
2340 SETs(boolSV(cmp * multiplier < rhs));
2348 tryAMAGICbin_MG(seq_amg, AMGf_set);
2351 SETs(boolSV(sv_eq_flags(left, right, 0)));
2359 tryAMAGICbin_MG(sne_amg, AMGf_set);
2362 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2370 tryAMAGICbin_MG(scmp_amg, 0);
2374 #ifdef USE_LOCALE_COLLATE
2375 (IN_LC_RUNTIME(LC_COLLATE))
2376 ? sv_cmp_locale_flags(left, right, 0)
2379 sv_cmp_flags(left, right, 0);
2388 tryAMAGICbin_MG(band_amg, AMGf_assign);
2391 if (SvNIOKp(left) || SvNIOKp(right)) {
2392 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2393 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2394 if (PL_op->op_private & HINT_INTEGER) {
2395 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2399 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2402 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2403 if (right_ro_nonnum) SvNIOK_off(right);
2406 do_vop(PL_op->op_type, TARG, left, right);
2416 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg);
2418 dATARGET; dPOPTOPssrl;
2419 if (PL_op->op_private & HINT_INTEGER) {
2420 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2424 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2434 tryAMAGICbin_MG(sband_amg, AMGf_assign);
2436 dATARGET; dPOPTOPssrl;
2437 do_vop(OP_BIT_AND, TARG, left, right);
2442 /* also used for: pp_bit_xor() */
2447 const int op_type = PL_op->op_type;
2449 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2452 if (SvNIOKp(left) || SvNIOKp(right)) {
2453 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2454 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2455 if (PL_op->op_private & HINT_INTEGER) {
2456 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2457 const IV r = SvIV_nomg(right);
2458 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2462 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2463 const UV r = SvUV_nomg(right);
2464 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2467 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2468 if (right_ro_nonnum) SvNIOK_off(right);
2471 do_vop(op_type, TARG, left, right);
2478 /* also used for: pp_nbit_xor() */
2483 const int op_type = PL_op->op_type;
2485 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg),
2486 AMGf_assign|AMGf_numarg);
2488 dATARGET; dPOPTOPssrl;
2489 if (PL_op->op_private & HINT_INTEGER) {
2490 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2491 const IV r = SvIV_nomg(right);
2492 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2496 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2497 const UV r = SvUV_nomg(right);
2498 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2505 /* also used for: pp_sbit_xor() */
2510 const int op_type = PL_op->op_type;
2512 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg),
2515 dATARGET; dPOPTOPssrl;
2516 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left,
2522 PERL_STATIC_INLINE bool
2523 S_negate_string(pTHX)
2528 SV * const sv = TOPs;
2529 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2531 s = SvPV_nomg_const(sv, len);
2532 if (isIDFIRST(*s)) {
2533 sv_setpvs(TARG, "-");
2536 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2537 sv_setsv_nomg(TARG, sv);
2538 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2548 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2549 if (S_negate_string(aTHX)) return NORMAL;
2551 SV * const sv = TOPs;
2554 /* It's publicly an integer */
2557 if (SvIVX(sv) == IV_MIN) {
2558 /* 2s complement assumption. */
2559 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2563 else if (SvUVX(sv) <= IV_MAX) {
2568 else if (SvIVX(sv) != IV_MIN) {
2572 #ifdef PERL_PRESERVE_IVUV
2579 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2580 SETn(-SvNV_nomg(sv));
2581 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2582 goto oops_its_an_int;
2584 SETn(-SvNV_nomg(sv));
2592 tryAMAGICun_MG(not_amg, AMGf_set);
2593 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2598 S_scomplement(pTHX_ SV *targ, SV *sv)
2604 sv_copypv_nomg(TARG, sv);
2605 tmps = (U8*)SvPV_nomg(TARG, len);
2608 /* Calculate exact length, let's not estimate. */
2613 U8 * const send = tmps + len;
2614 U8 * const origtmps = tmps;
2615 const UV utf8flags = UTF8_ALLOW_ANYUV;
2617 while (tmps < send) {
2618 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2620 targlen += UVCHR_SKIP(~c);
2626 /* Now rewind strings and write them. */
2633 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2634 deprecated_above_ff_msg, PL_op_desc[PL_op->op_type]);
2635 Newx(result, targlen + 1, U8);
2637 while (tmps < send) {
2638 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2640 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2643 sv_usepvn_flags(TARG, (char*)result, targlen,
2644 SV_HAS_TRAILING_NUL);
2651 Newx(result, nchar + 1, U8);
2653 while (tmps < send) {
2654 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2659 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2667 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2670 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2675 for ( ; anum > 0; anum--, tmps++)
2682 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2686 if (PL_op->op_private & HINT_INTEGER) {
2687 const IV i = ~SvIV_nomg(sv);
2691 const UV u = ~SvUV_nomg(sv);
2696 S_scomplement(aTHX_ TARG, sv);
2706 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg);
2709 if (PL_op->op_private & HINT_INTEGER) {
2710 const IV i = ~SvIV_nomg(sv);
2714 const UV u = ~SvUV_nomg(sv);
2724 tryAMAGICun_MG(scompl_amg, AMGf_numeric);
2727 S_scomplement(aTHX_ TARG, sv);
2733 /* integer versions of some of the above */
2738 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2741 SETi( left * right );
2750 tryAMAGICbin_MG(div_amg, AMGf_assign);
2753 IV value = SvIV_nomg(right);
2755 DIE(aTHX_ "Illegal division by zero");
2756 num = SvIV_nomg(left);
2758 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2762 value = num / value;
2770 /* This is the vanilla old i_modulo. */
2772 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2776 DIE(aTHX_ "Illegal modulus zero");
2777 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2781 SETi( left % right );
2786 #if defined(__GLIBC__) && IVSIZE == 8 \
2787 && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
2789 PP(pp_i_modulo_glibc_bugfix)
2791 /* This is the i_modulo with the workaround for the _moddi3 bug
2792 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2793 * See below for pp_i_modulo. */
2795 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2799 DIE(aTHX_ "Illegal modulus zero");
2800 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2804 SETi( left % PERL_ABS(right) );
2813 tryAMAGICbin_MG(add_amg, AMGf_assign);
2815 dPOPTOPiirl_ul_nomg;
2816 SETi( left + right );
2824 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2826 dPOPTOPiirl_ul_nomg;
2827 SETi( left - right );
2835 tryAMAGICbin_MG(lt_amg, AMGf_set);
2838 SETs(boolSV(left < right));
2846 tryAMAGICbin_MG(gt_amg, AMGf_set);
2849 SETs(boolSV(left > right));
2857 tryAMAGICbin_MG(le_amg, AMGf_set);
2860 SETs(boolSV(left <= right));
2868 tryAMAGICbin_MG(ge_amg, AMGf_set);
2871 SETs(boolSV(left >= right));
2879 tryAMAGICbin_MG(eq_amg, AMGf_set);
2882 SETs(boolSV(left == right));
2890 tryAMAGICbin_MG(ne_amg, AMGf_set);
2893 SETs(boolSV(left != right));
2901 tryAMAGICbin_MG(ncmp_amg, 0);
2908 else if (left < right)
2920 tryAMAGICun_MG(neg_amg, 0);
2921 if (S_negate_string(aTHX)) return NORMAL;
2923 SV * const sv = TOPs;
2924 IV const i = SvIV_nomg(sv);
2930 /* High falutin' math. */
2935 tryAMAGICbin_MG(atan2_amg, 0);
2938 SETn(Perl_atan2(left, right));
2944 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */
2949 int amg_type = fallback_amg;
2950 const char *neg_report = NULL;
2951 const int op_type = PL_op->op_type;
2954 case OP_SIN: amg_type = sin_amg; break;
2955 case OP_COS: amg_type = cos_amg; break;
2956 case OP_EXP: amg_type = exp_amg; break;
2957 case OP_LOG: amg_type = log_amg; neg_report = "log"; break;
2958 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break;
2961 assert(amg_type != fallback_amg);
2963 tryAMAGICun_MG(amg_type, 0);
2965 SV * const arg = TOPs;
2966 const NV value = SvNV_nomg(arg);
2972 if (neg_report) { /* log or sqrt */
2974 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2975 ! Perl_isnan(value) &&
2977 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) {
2978 SET_NUMERIC_STANDARD();
2979 /* diag_listed_as: Can't take log of %g */
2980 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value);
2985 case OP_SIN: result = Perl_sin(value); break;
2986 case OP_COS: result = Perl_cos(value); break;
2987 case OP_EXP: result = Perl_exp(value); break;
2988 case OP_LOG: result = Perl_log(value); break;
2989 case OP_SQRT: result = Perl_sqrt(value); break;
2996 /* Support Configure command-line overrides for rand() functions.
2997 After 5.005, perhaps we should replace this by Configure support
2998 for drand48(), random(), or rand(). For 5.005, though, maintain
2999 compatibility by calling rand() but allow the user to override it.
3000 See INSTALL for details. --Andy Dougherty 15 July 1998
3002 /* Now it's after 5.005, and Configure supports drand48() and random(),
3003 in addition to rand(). So the overrides should not be needed any more.
3004 --Jarkko Hietaniemi 27 September 1998
3009 if (!PL_srand_called) {
3010 (void)seedDrand01((Rand_seed_t)seed());
3011 PL_srand_called = TRUE;
3023 SV * const sv = POPs;
3029 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
3030 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3031 if (! Perl_isnan(value) && value == 0.0)
3041 sv_setnv_mg(TARG, value);
3052 if (MAXARG >= 1 && (TOPs || POPs)) {
3059 pv = SvPV(top, len);
3060 flags = grok_number(pv, len, &anum);
3062 if (!(flags & IS_NUMBER_IN_UV)) {
3063 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
3064 "Integer overflow in srand");
3072 (void)seedDrand01((Rand_seed_t)anum);
3073 PL_srand_called = TRUE;
3077 /* Historically srand always returned true. We can avoid breaking
3079 sv_setpvs(TARG, "0 but true");
3088 tryAMAGICun_MG(int_amg, AMGf_numeric);
3090 SV * const sv = TOPs;
3091 const IV iv = SvIV_nomg(sv);
3092 /* XXX it's arguable that compiler casting to IV might be subtly
3093 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3094 else preferring IV has introduced a subtle behaviour change bug. OTOH
3095 relying on floating point to be accurate is a bug. */
3100 else if (SvIOK(sv)) {
3102 SETu(SvUV_nomg(sv));
3107 const NV value = SvNV_nomg(sv);
3108 if (UNLIKELY(Perl_isinfnan(value)))
3110 else if (value >= 0.0) {
3111 if (value < (NV)UV_MAX + 0.5) {
3114 SETn(Perl_floor(value));
3118 if (value > (NV)IV_MIN - 0.5) {
3121 SETn(Perl_ceil(value));
3132 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3134 SV * const sv = TOPs;
3135 /* This will cache the NV value if string isn't actually integer */
3136 const IV iv = SvIV_nomg(sv);
3141 else if (SvIOK(sv)) {
3142 /* IVX is precise */
3144 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3152 /* 2s complement assumption. Also, not really needed as
3153 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3159 const NV value = SvNV_nomg(sv);
3170 /* also used for: pp_hex() */
3176 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3180 SV* const sv = TOPs;
3182 tmps = (SvPV_const(sv, len));
3184 /* If Unicode, try to downgrade
3185 * If not possible, croak. */
3186 SV* const tsv = sv_2mortal(newSVsv(sv));
3189 sv_utf8_downgrade(tsv, FALSE);
3190 tmps = SvPV_const(tsv, len);
3192 if (PL_op->op_type == OP_HEX)
3195 while (*tmps && len && isSPACE(*tmps))
3199 if (isALPHA_FOLD_EQ(*tmps, 'x')) {
3201 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3203 else if (isALPHA_FOLD_EQ(*tmps, 'b'))
3204 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3206 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3208 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3222 SV * const sv = TOPs;
3224 U32 in_bytes = IN_BYTES;
3225 /* simplest case shortcut */
3226 /* turn off SVf_UTF8 in tmp flags if HINT_BYTES on*/
3227 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
3228 STATIC_ASSERT_STMT(HINT_BYTES == 0x00000008 && SVf_UTF8 == 0x20000000 && (SVf_UTF8 == HINT_BYTES << 26));
3231 if(LIKELY(svflags == SVf_POK))
3233 if(svflags & SVs_GMG)
3236 if (!IN_BYTES) /* reread to avoid using an C auto/register */
3237 sv_setiv(TARG, (IV)sv_len_utf8_nomg(sv));
3241 /* unrolled SvPV_nomg_const(sv,len) */
3246 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
3248 sv_setiv(TARG, (IV)(len));
3251 if (!SvPADTMP(TARG)) {
3253 } else { /* TARG is on stack at this point and is overwriten by SETs.
3254 This branch is the odd one out, so put TARG by default on
3255 stack earlier to let local SP go out of liveness sooner */
3262 return NORMAL; /* no putback, SP didn't move in this opcode */
3265 /* Returns false if substring is completely outside original string.
3266 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
3267 always be true for an explicit 0.
3270 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv,
3271 bool pos1_is_uv, IV len_iv,
3272 bool len_is_uv, STRLEN *posp,
3278 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
3280 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3281 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3284 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3287 if (len_iv || len_is_uv) {
3288 if (!len_is_uv && len_iv < 0) {
3289 pos2_iv = curlen + len_iv;
3291 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3294 } else { /* len_iv >= 0 */
3295 if (!pos1_is_uv && pos1_iv < 0) {
3296 pos2_iv = pos1_iv + len_iv;
3297 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3299 if ((UV)len_iv > curlen-(UV)pos1_iv)
3302 pos2_iv = pos1_iv+len_iv;
3312 if (!pos2_is_uv && pos2_iv < 0) {
3313 if (!pos1_is_uv && pos1_iv < 0)
3317 else if (!pos1_is_uv && pos1_iv < 0)
3320 if ((UV)pos2_iv < (UV)pos1_iv)
3322 if ((UV)pos2_iv > curlen)
3325 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3326 *posp = (STRLEN)( (UV)pos1_iv );
3327 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3344 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3345 const bool rvalue = (GIMME_V != G_VOID);
3348 const char *repl = NULL;
3350 int num_args = PL_op->op_private & 7;
3351 bool repl_need_utf8_upgrade = FALSE;
3355 if(!(repl_sv = POPs)) num_args--;
3357 if ((len_sv = POPs)) {
3358 len_iv = SvIV(len_sv);
3359 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3364 pos1_iv = SvIV(pos_sv);
3365 pos1_is_uv = SvIOK_UV(pos_sv);
3367 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3371 if (lvalue && !repl_sv) {
3373 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3374 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3376 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3378 pos1_is_uv || pos1_iv >= 0
3379 ? (STRLEN)(UV)pos1_iv
3380 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3382 len_is_uv || len_iv > 0
3383 ? (STRLEN)(UV)len_iv
3384 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3386 PUSHs(ret); /* avoid SvSETMAGIC here */
3390 repl = SvPV_const(repl_sv, repl_len);
3393 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3394 "Attempt to use reference as lvalue in substr"
3396 tmps = SvPV_force_nomg(sv, curlen);
3397 if (DO_UTF8(repl_sv) && repl_len) {
3399 /* Upgrade the dest, and recalculate tmps in case the buffer
3400 * got reallocated; curlen may also have been changed */
3401 sv_utf8_upgrade_nomg(sv);
3402 tmps = SvPV_nomg(sv, curlen);
3405 else if (DO_UTF8(sv))
3406 repl_need_utf8_upgrade = TRUE;
3408 else tmps = SvPV_const(sv, curlen);
3410 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3411 if (utf8_curlen == curlen)
3414 curlen = utf8_curlen;
3420 STRLEN pos, len, byte_len, byte_pos;
3422 if (!translate_substr_offsets(
3423 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3427 byte_pos = utf8_curlen
3428 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3433 SvTAINTED_off(TARG); /* decontaminate */
3434 SvUTF8_off(TARG); /* decontaminate */
3435 sv_setpvn(TARG, tmps, byte_len);
3436 #ifdef USE_LOCALE_COLLATE
3437 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3444 SV* repl_sv_copy = NULL;
3446 if (repl_need_utf8_upgrade) {
3447 repl_sv_copy = newSVsv(repl_sv);
3448 sv_utf8_upgrade(repl_sv_copy);
3449 repl = SvPV_const(repl_sv_copy, repl_len);
3453 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3454 SvREFCNT_dec(repl_sv_copy);
3457 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST)
3467 Perl_croak(aTHX_ "substr outside of string");
3468 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3475 const IV size = POPi;
3476 const IV offset = POPi;
3477 SV * const src = POPs;
3478 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3481 if (lvalue) { /* it's an lvalue! */
3482 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3483 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3485 LvTARG(ret) = SvREFCNT_inc_simple(src);
3486 LvTARGOFF(ret) = offset;
3487 LvTARGLEN(ret) = size;
3491 SvTAINTED_off(TARG); /* decontaminate */
3495 sv_setuv(ret, do_vecget(src, offset, size));
3503 /* also used for: pp_rindex() */
3516 const char *little_p;
3519 const bool is_index = PL_op->op_type == OP_INDEX;
3520 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3526 big_p = SvPV_const(big, biglen);
3527 little_p = SvPV_const(little, llen);
3529 big_utf8 = DO_UTF8(big);
3530 little_utf8 = DO_UTF8(little);
3531 if (big_utf8 ^ little_utf8) {
3532 /* One needs to be upgraded. */
3534 /* Well, maybe instead we might be able to downgrade the small
3536 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3539 /* If the large string is ISO-8859-1, and it's not possible to
3540 convert the small string to ISO-8859-1, then there is no
3541 way that it could be found anywhere by index. */
3546 /* At this point, pv is a malloc()ed string. So donate it to temp
3547 to ensure it will get free()d */
3548 little = temp = newSV(0);
3549 sv_usepvn(temp, pv, llen);
3550 little_p = SvPVX(little);
3552 temp = newSVpvn(little_p, llen);
3554 sv_utf8_upgrade(temp);
3556 little_p = SvPV_const(little, llen);
3559 if (SvGAMAGIC(big)) {
3560 /* Life just becomes a lot easier if I use a temporary here.
3561 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3562 will trigger magic and overloading again, as will fbm_instr()
3564 big = newSVpvn_flags(big_p, biglen,
3565 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3568 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3569 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3570 warn on undef, and we've already triggered a warning with the
3571 SvPV_const some lines above. We can't remove that, as we need to
3572 call some SvPV to trigger overloading early and find out if the
3574 This is all getting too messy. The API isn't quite clean enough,
3575 because data access has side effects.
3577 little = newSVpvn_flags(little_p, llen,
3578 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3579 little_p = SvPVX(little);
3583 offset = is_index ? 0 : biglen;
3585 if (big_utf8 && offset > 0)
3586 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
3592 else if (offset > (SSize_t)biglen)
3594 if (!(little_p = is_index
3595 ? fbm_instr((unsigned char*)big_p + offset,
3596 (unsigned char*)big_p + biglen, little, 0)
3597 : rninstr(big_p, big_p + offset,
3598 little_p, little_p + llen)))
3601 retval = little_p - big_p;
3602 if (retval > 1 && big_utf8)
3603 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
3613 dSP; dMARK; dORIGMARK; dTARGET;
3614 SvTAINTED_off(TARG);
3615 do_sprintf(TARG, SP-MARK, MARK+1);
3616 TAINT_IF(SvTAINTED(TARG));
3628 const U8 *s = (U8*)SvPV_const(argsv, len);
3631 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0)
3645 if (UNLIKELY(SvAMAGIC(top)))
3647 if (UNLIKELY(isinfnansv(top)))
3648 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top));
3650 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3651 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3653 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3654 && SvNV_nomg(top) < 0.0)))
3656 if (ckWARN(WARN_UTF8)) {
3657 if (SvGMAGICAL(top)) {
3658 SV *top2 = sv_newmortal();
3659 sv_setsv_nomg(top2, top);
3662 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3663 "Invalid negative number (%" SVf ") in chr", SVfARG(top));
3665 value = UNICODE_REPLACEMENT;
3667 value = SvUV_nomg(top);
3671 SvUPGRADE(TARG,SVt_PV);
3673 if (value > 255 && !IN_BYTES) {
3674 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
3675 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3676 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3678 (void)SvPOK_only(TARG);
3687 *tmps++ = (char)value;
3689 (void)SvPOK_only(TARG);
3701 const char *tmps = SvPV_const(left, len);
3703 if (DO_UTF8(left)) {
3704 /* If Unicode, try to downgrade.
3705 * If not possible, croak.
3706 * Yes, we made this up. */
3707 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP);
3709 sv_utf8_downgrade(tsv, FALSE);
3710 tmps = SvPV_const(tsv, len);
3712 # ifdef USE_ITHREADS
3714 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3715 /* This should be threadsafe because in ithreads there is only
3716 * one thread per interpreter. If this would not be true,
3717 * we would need a mutex to protect this malloc. */
3718 PL_reentrant_buffer->_crypt_struct_buffer =
3719 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3720 #if defined(__GLIBC__) || defined(__EMX__)
3721 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3722 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3723 /* work around glibc-2.2.5 bug */
3724 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3728 # endif /* HAS_CRYPT_R */
3729 # endif /* USE_ITHREADS */
3731 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3733 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3740 "The crypt() function is unimplemented due to excessive paranoia.");
3744 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3745 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3748 /* also used for: pp_lcfirst() */
3752 /* Actually is both lcfirst() and ucfirst(). Only the first character
3753 * changes. This means that possibly we can change in-place, ie., just
3754 * take the source and change that one character and store it back, but not
3755 * if read-only etc, or if the length changes */
3759 STRLEN slen; /* slen is the byte length of the whole SV. */
3762 bool inplace; /* ? Convert first char only, in-place */
3763 bool doing_utf8 = FALSE; /* ? using utf8 */
3764 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3765 const int op_type = PL_op->op_type;
3768 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3769 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3770 * stored as UTF-8 at s. */
3771 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3772 * lowercased) character stored in tmpbuf. May be either
3773 * UTF-8 or not, but in either case is the number of bytes */
3775 s = (const U8*)SvPV_const(source, slen);
3777 /* We may be able to get away with changing only the first character, in
3778 * place, but not if read-only, etc. Later we may discover more reasons to
3779 * not convert in-place. */
3780 inplace = !SvREADONLY(source) && SvPADTMP(source);
3782 /* First calculate what the changed first character should be. This affects
3783 * whether we can just swap it out, leaving the rest of the string unchanged,
3784 * or even if have to convert the dest to UTF-8 when the source isn't */
3786 if (! slen) { /* If empty */
3787 need = 1; /* still need a trailing NUL */
3790 else if (DO_UTF8(source)) { /* Is the source utf8? */
3793 if (op_type == OP_UCFIRST) {
3794 #ifdef USE_LOCALE_CTYPE
3795 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3797 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0);
3801 #ifdef USE_LOCALE_CTYPE
3802 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3804 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
3808 /* we can't do in-place if the length changes. */
3809 if (ulen != tculen) inplace = FALSE;
3810 need = slen + 1 - ulen + tculen;
3812 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3813 * latin1 is treated as caseless. Note that a locale takes
3815 ulen = 1; /* Original character is 1 byte */
3816 tculen = 1; /* Most characters will require one byte, but this will
3817 * need to be overridden for the tricky ones */
3820 if (op_type == OP_LCFIRST) {
3822 /* lower case the first letter: no trickiness for any character */
3823 #ifdef USE_LOCALE_CTYPE
3824 if (IN_LC_RUNTIME(LC_CTYPE)) {
3825 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3826 *tmpbuf = toLOWER_LC(*s);
3831 *tmpbuf = (IN_UNI_8_BIT)
3832 ? toLOWER_LATIN1(*s)
3836 #ifdef USE_LOCALE_CTYPE
3838 else if (IN_LC_RUNTIME(LC_CTYPE)) {
3839 if (IN_UTF8_CTYPE_LOCALE) {
3843 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3844 *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
3845 locales have upper and title case
3849 else if (! IN_UNI_8_BIT) {
3850 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3851 * on EBCDIC machines whatever the
3852 * native function does */
3855 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
3856 * UTF-8, which we treat as not in locale), and cased latin1 */
3858 #ifdef USE_LOCALE_CTYPE
3862 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3864 assert(tculen == 2);
3866 /* If the result is an upper Latin1-range character, it can
3867 * still be represented in one byte, which is its ordinal */
3868 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3869 *tmpbuf = (U8) title_ord;
3873 /* Otherwise it became more than one ASCII character (in
3874 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3875 * beyond Latin1, so the number of bytes changed, so can't
3876 * replace just the first character in place. */
3879 /* If the result won't fit in a byte, the entire result
3880 * will have to be in UTF-8. Assume worst case sizing in
3881 * conversion. (all latin1 characters occupy at most two
3883 if (title_ord > 255) {
3885 convert_source_to_utf8 = TRUE;
3886 need = slen * 2 + 1;
3888 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3889 * (both) characters whose title case is above 255 is
3893 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3894 need = slen + 1 + 1;
3898 } /* End of use Unicode (Latin1) semantics */
3899 } /* End of changing the case of the first character */
3901 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3902 * generate the result */
3905 /* We can convert in place. This means we change just the first
3906 * character without disturbing the rest; no need to grow */
3908 s = d = (U8*)SvPV_force_nomg(source, slen);
3914 /* Here, we can't convert in place; we earlier calculated how much
3915 * space we will need, so grow to accommodate that */
3916 SvUPGRADE(dest, SVt_PV);
3917 d = (U8*)SvGROW(dest, need);
3918 (void)SvPOK_only(dest);
3925 if (! convert_source_to_utf8) {
3927 /* Here both source and dest are in UTF-8, but have to create
3928 * the entire output. We initialize the result to be the
3929 * title/lower cased first character, and then append the rest
3931 sv_setpvn(dest, (char*)tmpbuf, tculen);
3933 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3937 const U8 *const send = s + slen;
3939 /* Here the dest needs to be in UTF-8, but the source isn't,
3940 * except we earlier UTF-8'd the first character of the source
3941 * into tmpbuf. First put that into dest, and then append the
3942 * rest of the source, converting it to UTF-8 as we go. */
3944 /* Assert tculen is 2 here because the only two characters that
3945 * get to this part of the code have 2-byte UTF-8 equivalents */
3947 *d++ = *(tmpbuf + 1);
3948 s++; /* We have just processed the 1st char */
3950 for (; s < send; s++) {
3951 d = uvchr_to_utf8(d, *s);
3954 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3958 else { /* in-place UTF-8. Just overwrite the first character */
3959 Copy(tmpbuf, d, tculen, U8);
3960 SvCUR_set(dest, need - 1);
3964 else { /* Neither source nor dest are in or need to be UTF-8 */
3966 if (inplace) { /* in-place, only need to change the 1st char */
3969 else { /* Not in-place */
3971 /* Copy the case-changed character(s) from tmpbuf */
3972 Copy(tmpbuf, d, tculen, U8);
3973 d += tculen - 1; /* Code below expects d to point to final
3974 * character stored */
3977 else { /* empty source */
3978 /* See bug #39028: Don't taint if empty */
3982 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3983 * the destination to retain that flag */
3984 if (SvUTF8(source) && ! IN_BYTES)
3987 if (!inplace) { /* Finish the rest of the string, unchanged */
3988 /* This will copy the trailing NUL */
3989 Copy(s + 1, d + 1, slen, U8);
3990 SvCUR_set(dest, need - 1);
3993 #ifdef USE_LOCALE_CTYPE
3994 if (IN_LC_RUNTIME(LC_CTYPE)) {
3999 if (dest != source && SvTAINTED(source))
4005 /* There's so much setup/teardown code common between uc and lc, I wonder if
4006 it would be worth merging the two, and just having a switch outside each
4007 of the three tight loops. There is less and less commonality though */
4020 if ( SvPADTMP(source)
4021 && !SvREADONLY(source) && SvPOK(source)
4024 #ifdef USE_LOCALE_CTYPE
4025 (IN_LC_RUNTIME(LC_CTYPE))
4026 ? ! IN_UTF8_CTYPE_LOCALE
4032 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4033 * make the loop tight, so we overwrite the source with the dest before
4034 * looking at it, and we need to look at the original source
4035 * afterwards. There would also need to be code added to handle
4036 * switching to not in-place in midstream if we run into characters
4037 * that change the length. Since being in locale overrides UNI_8_BIT,
4038 * that latter becomes irrelevant in the above test; instead for
4039 * locale, the size can't normally change, except if the locale is a
4042 s = d = (U8*)SvPV_force_nomg(source, len);
4049 s = (const U8*)SvPV_nomg_const(source, len);
4052 SvUPGRADE(dest, SVt_PV);
4053 d = (U8*)SvGROW(dest, min);
4054 (void)SvPOK_only(dest);
4059 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4060 to check DO_UTF8 again here. */
4062 if (DO_UTF8(source)) {
4063 const U8 *const send = s + len;
4064 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4066 /* All occurrences of these are to be moved to follow any other marks.
4067 * This is context-dependent. We may not be passed enough context to
4068 * move the iota subscript beyond all of them, but we do the best we can
4069 * with what we're given. The result is always better than if we
4070 * hadn't done this. And, the problem would only arise if we are
4071 * passed a character without all its combining marks, which would be
4072 * the caller's mistake. The information this is based on comes from a
4073 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4074 * itself) and so can't be checked properly to see if it ever gets
4075 * revised. But the likelihood of it changing is remote */
4076 bool in_iota_subscript = FALSE;
4082 if (in_iota_subscript && ! _is_utf8_mark(s)) {
4084 /* A non-mark. Time to output the iota subscript */
4085 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4086 d += capital_iota_len;
4087 in_iota_subscript = FALSE;
4090 /* Then handle the current character. Get the changed case value
4091 * and copy it to the output buffer */
4094 #ifdef USE_LOCALE_CTYPE
4095 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4097 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4099 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4100 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4101 if (uv == GREEK_CAPITAL_LETTER_IOTA
4102 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4104 in_iota_subscript = TRUE;
4107 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4108 /* If the eventually required minimum size outgrows the
4109 * available space, we need to grow. */
4110 const UV o = d - (U8*)SvPVX_const(dest);
4112 /* If someone uppercases one million U+03B0s we SvGROW()
4113 * one million times. Or we could try guessing how much to
4114 * allocate without allocating too much. Such is life.
4115 * See corresponding comment in lc code for another option
4117 d = o + (U8*) SvGROW(dest, min);
4119 Copy(tmpbuf, d, ulen, U8);
4124 if (in_iota_subscript) {
4125 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4126 d += capital_iota_len;
4131 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4133 else { /* Not UTF-8 */
4135 const U8 *const send = s + len;
4137 /* Use locale casing if in locale; regular style if not treating
4138 * latin1 as having case; otherwise the latin1 casing. Do the
4139 * whole thing in a tight loop, for speed, */
4140 #ifdef USE_LOCALE_CTYPE
4141 if (IN_LC_RUNTIME(LC_CTYPE)) {
4142 if (IN_UTF8_CTYPE_LOCALE) {
4145 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4146 for (; s < send; d++, s++)
4147 *d = (U8) toUPPER_LC(*s);
4151 if (! IN_UNI_8_BIT) {
4152 for (; s < send; d++, s++) {
4157 #ifdef USE_LOCALE_CTYPE
4160 for (; s < send; d++, s++) {
4161 *d = toUPPER_LATIN1_MOD(*s);
4162 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
4166 /* The mainstream case is the tight loop above. To avoid
4167 * extra tests in that, all three characters that require
4168 * special handling are mapped by the MOD to the one tested
4170 * Use the source to distinguish between the three cases */
4172 #if UNICODE_MAJOR_VERSION > 2 \
4173 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
4174 && UNICODE_DOT_DOT_VERSION >= 8)
4175 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4177 /* uc() of this requires 2 characters, but they are
4178 * ASCII. If not enough room, grow the string */
4179 if (SvLEN(dest) < ++min) {
4180 const UV o = d - (U8*)SvPVX_const(dest);
4181 d = o + (U8*) SvGROW(dest, min);
4183 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4184 continue; /* Back to the tight loop; still in ASCII */
4188 /* The other two special handling characters have their
4189 * upper cases outside the latin1 range, hence need to be
4190 * in UTF-8, so the whole result needs to be in UTF-8. So,
4191 * here we are somewhere in the middle of processing a
4192 * non-UTF-8 string, and realize that we will have to convert
4193 * the whole thing to UTF-8. What to do? There are
4194 * several possibilities. The simplest to code is to
4195 * convert what we have so far, set a flag, and continue on
4196 * in the loop. The flag would be tested each time through
4197 * the loop, and if set, the next character would be
4198 * converted to UTF-8 and stored. But, I (khw) didn't want
4199 * to slow down the mainstream case at all for this fairly
4200 * rare case, so I didn't want to add a test that didn't
4201 * absolutely have to be there in the loop, besides the
4202 * possibility that it would get too complicated for
4203 * optimizers to deal with. Another possibility is to just
4204 * give up, convert the source to UTF-8, and restart the
4205 * function that way. Another possibility is to convert
4206 * both what has already been processed and what is yet to
4207 * come separately to UTF-8, then jump into the loop that
4208 * handles UTF-8. But the most efficient time-wise of the
4209 * ones I could think of is what follows, and turned out to
4210 * not require much extra code. */
4212 /* Convert what we have so far into UTF-8, telling the
4213 * function that we know it should be converted, and to
4214 * allow extra space for what we haven't processed yet.
4215 * Assume the worst case space requirements for converting
4216 * what we haven't processed so far: that it will require
4217 * two bytes for each remaining source character, plus the
4218 * NUL at the end. This may cause the string pointer to
4219 * move, so re-find it. */
4221 len = d - (U8*)SvPVX_const(dest);
4222 SvCUR_set(dest, len);
4223 len = sv_utf8_upgrade_flags_grow(dest,
4224 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4226 d = (U8*)SvPVX(dest) + len;
4228 /* Now process the remainder of the source, converting to
4229 * upper and UTF-8. If a resulting byte is invariant in
4230 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4231 * append it to the output. */
4232 for (; s < send; s++) {
4233 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4237 /* Here have processed the whole source; no need to continue
4238 * with the outer loop. Each character has been converted
4239 * to upper case and converted to UTF-8 */
4242 } /* End of processing all latin1-style chars */
4243 } /* End of processing all chars */
4244 } /* End of source is not empty */
4246 if (source != dest) {
4247 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4248 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4250 } /* End of isn't utf8 */
4251 #ifdef USE_LOCALE_CTYPE
4252 if (IN_LC_RUNTIME(LC_CTYPE)) {
4257 if (dest != source && SvTAINTED(source))
4275 if ( SvPADTMP(source)
4276 && !SvREADONLY(source) && SvPOK(source)
4277 && !DO_UTF8(source)) {
4279 /* We can convert in place, as lowercasing anything in the latin1 range
4280 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4282 s = d = (U8*)SvPV_force_nomg(source, len);
4289 s = (const U8*)SvPV_nomg_const(source, len);
4292 SvUPGRADE(dest, SVt_PV);
4293 d = (U8*)SvGROW(dest, min);
4294 (void)SvPOK_only(dest);
4299 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4300 to check DO_UTF8 again here. */
4302 if (DO_UTF8(source)) {
4303 const U8 *const send = s + len;
4304 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4307 const STRLEN u = UTF8SKIP(s);
4310 #ifdef USE_LOCALE_CTYPE
4311 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4313 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4316 /* Here is where we would do context-sensitive actions. See the
4317 * commit message for 86510fb15 for why there isn't any */
4319 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4321 /* If the eventually required minimum size outgrows the
4322 * available space, we need to grow. */
4323 const UV o = d - (U8*)SvPVX_const(dest);
4325 /* If someone lowercases one million U+0130s we SvGROW() one
4326 * million times. Or we could try guessing how much to
4327 * allocate without allocating too much. Such is life.
4328 * Another option would be to grow an extra byte or two more
4329 * each time we need to grow, which would cut down the million
4330 * to 500K, with little waste */
4331 d = o + (U8*) SvGROW(dest, min);
4334 /* Copy the newly lowercased letter to the output buffer we're
4336 Copy(tmpbuf, d, ulen, U8);
4339 } /* End of looping through the source string */
4342 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4343 } else { /* Not utf8 */
4345 const U8 *const send = s + len;
4347 /* Use locale casing if in locale; regular style if not treating
4348 * latin1 as having case; otherwise the latin1 casing. Do the
4349 * whole thing in a tight loop, for speed, */
4350 #ifdef USE_LOCALE_CTYPE
4351 if (IN_LC_RUNTIME(LC_CTYPE)) {
4352 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4353 for (; s < send; d++, s++)
4354 *d = toLOWER_LC(*s);
4358 if (! IN_UNI_8_BIT) {
4359 for (; s < send; d++, s++) {
4364 for (; s < send; d++, s++) {
4365 *d = toLOWER_LATIN1(*s);
4369 if (source != dest) {
4371 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4374 #ifdef USE_LOCALE_CTYPE
4375 if (IN_LC_RUNTIME(LC_CTYPE)) {
4380 if (dest != source && SvTAINTED(source))
4389 SV * const sv = TOPs;
4391 const char *s = SvPV_const(sv,len);
4393 SvUTF8_off(TARG); /* decontaminate */
4396 SvUPGRADE(TARG, SVt_PV);
4397 SvGROW(TARG, (len * 2) + 1);
4401 STRLEN ulen = UTF8SKIP(s);
4402 bool to_quote = FALSE;
4404 if (UTF8_IS_INVARIANT(*s)) {
4405 if (_isQUOTEMETA(*s)) {
4409 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) {
4411 #ifdef USE_LOCALE_CTYPE
4412 /* In locale, we quote all non-ASCII Latin1 chars.
4413 * Otherwise use the quoting rules */
4415 IN_LC_RUNTIME(LC_CTYPE)
4418 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
4423 else if (is_QUOTEMETA_high(s)) {
4438 else if (IN_UNI_8_BIT) {
4440 if (_isQUOTEMETA(*s))
4446 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4447 * including everything above ASCII */
4449 if (!isWORDCHAR_A(*s))
4455 SvCUR_set(TARG, d - SvPVX_const(TARG));
4456 (void)SvPOK_only_UTF8(TARG);
4459 sv_setpvn(TARG, s, len);
4475 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4476 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
4477 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
4478 || UNICODE_DOT_DOT_VERSION > 0)
4479 const bool full_folding = TRUE; /* This variable is here so we can easily
4480 move to more generality later */
4482 const bool full_folding = FALSE;
4484 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4485 #ifdef USE_LOCALE_CTYPE
4486 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
4490 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4491 * You are welcome(?) -Hugmeir
4499 s = (const U8*)SvPV_nomg_const(source, len);
4501 if (ckWARN(WARN_UNINITIALIZED))
4502 report_uninit(source);
4509 SvUPGRADE(dest, SVt_PV);
4510 d = (U8*)SvGROW(dest, min);
4511 (void)SvPOK_only(dest);
4516 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4518 const STRLEN u = UTF8SKIP(s);
4521 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags);
4523 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4524 const UV o = d - (U8*)SvPVX_const(dest);
4525 d = o + (U8*) SvGROW(dest, min);
4528 Copy(tmpbuf, d, ulen, U8);
4533 } /* Unflagged string */
4535 #ifdef USE_LOCALE_CTYPE
4536 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4537 if (IN_UTF8_CTYPE_LOCALE) {
4538 goto do_uni_folding;
4540 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4541 for (; s < send; d++, s++)
4542 *d = (U8) toFOLD_LC(*s);
4546 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4547 for (; s < send; d++, s++)
4551 #ifdef USE_LOCALE_CTYPE
4554 /* For ASCII and the Latin-1 range, there's only two troublesome
4555 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4556 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4557 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4558 * For the rest, the casefold is their lowercase. */
4559 for (; s < send; d++, s++) {
4560 if (*s == MICRO_SIGN) {
4561 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4562 * which is outside of the latin-1 range. There's a couple
4563 * of ways to deal with this -- khw discusses them in
4564 * pp_lc/uc, so go there :) What we do here is upgrade what
4565 * we had already casefolded, then enter an inner loop that
4566 * appends the rest of the characters as UTF-8. */
4567 len = d - (U8*)SvPVX_const(dest);
4568 SvCUR_set(dest, len);
4569 len = sv_utf8_upgrade_flags_grow(dest,
4570 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4571 /* The max expansion for latin1
4572 * chars is 1 byte becomes 2 */
4574 d = (U8*)SvPVX(dest) + len;
4576 Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
4579 for (; s < send; s++) {
4581 UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
4582 if UVCHR_IS_INVARIANT(fc) {
4584 && *s == LATIN_SMALL_LETTER_SHARP_S)
4593 Copy(tmpbuf, d, ulen, U8);
4599 else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
4600 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4601 * becomes "ss", which may require growing the SV. */
4602 if (SvLEN(dest) < ++min) {
4603 const UV o = d - (U8*)SvPVX_const(dest);
4604 d = o + (U8*) SvGROW(dest, min);
4609 else { /* If it's not one of those two, the fold is their lower
4611 *d = toLOWER_LATIN1(*s);
4617 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4619 #ifdef USE_LOCALE_CTYPE
4620 if (IN_LC_RUNTIME(LC_CTYPE)) {
4625 if (SvTAINTED(source))
4635 dSP; dMARK; dORIGMARK;
4636 AV *const av = MUTABLE_AV(POPs);
4637 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4639 if (SvTYPE(av) == SVt_PVAV) {
4640 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4641 bool can_preserve = FALSE;
4647 can_preserve = SvCANEXISTDELETE(av);
4650 if (lval && localizing) {
4653 for (svp = MARK + 1; svp <= SP; svp++) {
4654 const SSize_t elem = SvIV(*svp);
4658 if (max > AvMAX(av))
4662 while (++MARK <= SP) {
4664 SSize_t elem = SvIV(*MARK);
4665 bool preeminent = TRUE;
4667 if (localizing && can_preserve) {
4668 /* If we can determine whether the element exist,
4669 * Try to preserve the existenceness of a tied array
4670 * element by using EXISTS and DELETE if possible.
4671 * Fallback to FETCH and STORE otherwise. */
4672 preeminent = av_exists(av, elem);