3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
151 sv = amagic_deref_call(sv, to_gv_amg);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
172 if (PL_op->op_private & OPpDEREF) {
175 Perl_croak_no_modify(aTHX);
176 if (cUNOP->op_targ) {
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
184 const char * const name = CopSTASHPV(PL_curcop);
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
200 if ( ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
202 || PL_op->op_type == OP_READLINE )
205 const char * const nambeg = SvPV_nomg_const(sv, len);
206 SV * const temp = MUTABLE_SV(
207 gv_fetchpvn_flags(nambeg, len, SvUTF8(sv), SVt_PVGV)
210 /* !len to avoid an extra uninit warning */
211 && (!len || !is_gv_magical_sv(sv,0)
212 || !(sv = MUTABLE_SV(gv_fetchpvn_flags(
213 nambeg, len, GV_ADD | SvUTF8(sv),
220 if (PL_op->op_private & HINT_STRICT_REFS)
221 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
222 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
223 == OPpDONT_INIT_GV) {
224 /* We are the target of a coderef assignment. Return
225 the scalar unchanged, and let pp_sasssign deal with
231 const char * const nambeg = SvPV_nomg_const(sv, len);
234 nambeg, len, GV_ADD | SvUTF8(sv), SVt_PVGV
239 /* FAKE globs in the symbol table cause weird bugs (#77810) */
244 SV *newsv = sv_newmortal();
245 sv_setsv_flags(newsv, sv, 0);
249 if (PL_op->op_private & OPpLVAL_INTRO)
250 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
255 /* Helper function for pp_rv2sv and pp_rv2av */
257 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
258 const svtype type, SV ***spp)
263 PERL_ARGS_ASSERT_SOFTREF2XV;
265 if (PL_op->op_private & HINT_STRICT_REFS) {
267 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
269 Perl_die(aTHX_ PL_no_usym, what);
273 PL_op->op_flags & OPf_REF &&
274 PL_op->op_next->op_type != OP_BOOLKEYS
276 Perl_die(aTHX_ PL_no_usym, what);
277 if (ckWARN(WARN_UNINITIALIZED))
279 if (type != SVt_PV && GIMME_V == G_ARRAY) {
283 **spp = &PL_sv_undef;
286 if ((PL_op->op_flags & OPf_SPECIAL) &&
287 !(PL_op->op_flags & OPf_MOD))
290 const char * const nambeg = SvPV_nomg_const(sv, len);
291 gv = gv_fetchpvn_flags(nambeg, len, SvUTF8(sv), type);
293 && (!is_gv_magical_sv(sv,0)
294 || !(gv = gv_fetchpvn_flags(
295 nambeg, len, GV_ADD|SvUTF8(sv), type
300 **spp = &PL_sv_undef;
306 const char * const nambeg = SvPV_nomg_const(sv, len);
307 gv = gv_fetchpvn_flags(nambeg, len, GV_ADD | SvUTF8(sv), type);
317 if (!(PL_op->op_private & OPpDEREFed))
321 sv = amagic_deref_call(sv, to_sv_amg);
326 switch (SvTYPE(sv)) {
332 DIE(aTHX_ "Not a SCALAR reference");
339 if (!isGV_with_GP(gv)) {
340 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
346 if (PL_op->op_flags & OPf_MOD) {
347 if (PL_op->op_private & OPpLVAL_INTRO) {
348 if (cUNOP->op_first->op_type == OP_NULL)
349 sv = save_scalar(MUTABLE_GV(TOPs));
351 sv = save_scalar(gv);
353 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
355 else if (PL_op->op_private & OPpDEREF)
356 vivify_ref(sv, PL_op->op_private & OPpDEREF);
365 AV * const av = MUTABLE_AV(TOPs);
366 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
368 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
370 *sv = newSV_type(SVt_PVMG);
371 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
375 SETs(sv_2mortal(newSViv(
376 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
386 if (PL_op->op_flags & OPf_MOD || LVRET) {
387 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
388 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
390 LvTARG(ret) = SvREFCNT_inc_simple(sv);
391 PUSHs(ret); /* no SvSETMAGIC */
395 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
396 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
397 if (mg && mg->mg_len >= 0) {
402 PUSHi(i + CopARYBASE_get(PL_curcop));
415 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
417 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
420 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
421 /* (But not in defined().) */
423 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
426 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
427 if ((PL_op->op_private & OPpLVAL_INTRO)) {
428 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
431 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
434 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
438 cv = MUTABLE_CV(&PL_sv_undef);
439 SETs(MUTABLE_SV(cv));
449 SV *ret = &PL_sv_undef;
451 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
452 const char * s = SvPVX_const(TOPs);
453 if (strnEQ(s, "CORE::", 6)) {
454 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
455 if (!code || code == -KEY_CORE)
456 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
457 if (code < 0) { /* Overridable. */
458 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
464 cv = sv_2cv(TOPs, &stash, &gv, 0);
466 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
475 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
477 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
479 PUSHs(MUTABLE_SV(cv));
493 if (GIMME != G_ARRAY) {
497 *MARK = &PL_sv_undef;
498 *MARK = refto(*MARK);
502 EXTEND_MORTAL(SP - MARK);
504 *MARK = refto(*MARK);
509 S_refto(pTHX_ SV *sv)
514 PERL_ARGS_ASSERT_REFTO;
516 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
519 if (!(sv = LvTARG(sv)))
522 SvREFCNT_inc_void_NN(sv);
524 else if (SvTYPE(sv) == SVt_PVAV) {
525 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
526 av_reify(MUTABLE_AV(sv));
528 SvREFCNT_inc_void_NN(sv);
530 else if (SvPADTMP(sv) && !IS_PADGV(sv))
534 SvREFCNT_inc_void_NN(sv);
537 sv_upgrade(rv, SVt_IV);
547 SV * const sv = POPs;
552 if (!sv || !SvROK(sv))
555 pv = sv_reftype(SvRV(sv),TRUE);
556 PUSHp(pv, strlen(pv));
566 stash = CopSTASH(PL_curcop);
568 SV * const ssv = POPs;
572 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
573 Perl_croak(aTHX_ "Attempt to bless into a reference");
574 ptr = SvPV_const(ssv,len);
576 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
577 "Explicit blessing to '' (assuming package main)");
578 stash = gv_stashpvn(ptr, len, GV_ADD);
581 (void)sv_bless(TOPs, stash);
590 const char * const elem = SvPV_nolen_const(sv);
591 GV * const gv = MUTABLE_GV(POPs);
596 /* elem will always be NUL terminated. */
597 const char * const second_letter = elem + 1;
600 if (strEQ(second_letter, "RRAY"))
601 tmpRef = MUTABLE_SV(GvAV(gv));
604 if (strEQ(second_letter, "ODE"))
605 tmpRef = MUTABLE_SV(GvCVu(gv));
608 if (strEQ(second_letter, "ILEHANDLE")) {
609 /* finally deprecated in 5.8.0 */
610 deprecate("*glob{FILEHANDLE}");
611 tmpRef = MUTABLE_SV(GvIOp(gv));
614 if (strEQ(second_letter, "ORMAT"))
615 tmpRef = MUTABLE_SV(GvFORM(gv));
618 if (strEQ(second_letter, "LOB"))
619 tmpRef = MUTABLE_SV(gv);
622 if (strEQ(second_letter, "ASH"))
623 tmpRef = MUTABLE_SV(GvHV(gv));
626 if (*second_letter == 'O' && !elem[2])
627 tmpRef = MUTABLE_SV(GvIOp(gv));
630 if (strEQ(second_letter, "AME"))
631 sv = newSVhek(GvNAME_HEK(gv));
634 if (strEQ(second_letter, "ACKAGE")) {
635 const HV * const stash = GvSTASH(gv);
636 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
637 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
641 if (strEQ(second_letter, "CALAR"))
656 /* Pattern matching */
661 register unsigned char *s;
664 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
668 if (mg && SvSCREAM(sv))
671 s = (unsigned char*)(SvPV(sv, len));
672 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
673 /* No point in studying a zero length string, and not safe to study
674 anything that doesn't appear to be a simple scalar (and hence might
675 change between now and when the regexp engine runs without our set
676 magic ever running) such as a reference to an object with overloaded
677 stringification. Also refuse to study an FBM scalar, as this gives
678 more flexibility in SV flag usage. No real-world code would ever
679 end up studying an FBM scalar, so this isn't a real pessimisation.
680 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
681 the study length limit from I32_MAX to U32_MAX - 1.
688 } else if (len < 0xFFFF) {
693 size = (256 + len) * quanta;
694 sfirst_raw = (char *)safemalloc(size);
697 DIE(aTHX_ "do_study: out of memory");
701 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
702 mg->mg_ptr = sfirst_raw;
704 mg->mg_private = quanta;
706 memset(sfirst_raw, ~0, 256 * quanta);
708 /* The assumption here is that most studied strings are fairly short, hence
709 the pain of the extra code is worth it, given the memory savings.
710 80 character string, 336 bytes as U8, down from 1344 as U32
711 800 character string, 2112 bytes as U16, down from 4224 as U32
715 U8 *const sfirst = (U8 *)sfirst_raw;
716 U8 *const snext = sfirst + 256;
718 const U8 ch = s[len];
719 snext[len] = sfirst[ch];
722 } else if (quanta == 2) {
723 U16 *const sfirst = (U16 *)sfirst_raw;
724 U16 *const snext = sfirst + 256;
726 const U8 ch = s[len];
727 snext[len] = sfirst[ch];
731 U32 *const sfirst = (U32 *)sfirst_raw;
732 U32 *const snext = sfirst + 256;
734 const U8 ch = s[len];
735 snext[len] = sfirst[ch];
748 if (PL_op->op_flags & OPf_STACKED)
750 else if (PL_op->op_private & OPpTARGET_MY)
756 TARG = sv_newmortal();
757 if(PL_op->op_type == OP_TRANSR) {
758 SV * const newsv = newSVsv(sv);
762 else PUSHi(do_trans(sv));
766 /* Lvalue operators. */
769 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
775 PERL_ARGS_ASSERT_DO_CHOMP;
777 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
779 if (SvTYPE(sv) == SVt_PVAV) {
781 AV *const av = MUTABLE_AV(sv);
782 const I32 max = AvFILL(av);
784 for (i = 0; i <= max; i++) {
785 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
786 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
787 do_chomp(retval, sv, chomping);
791 else if (SvTYPE(sv) == SVt_PVHV) {
792 HV* const hv = MUTABLE_HV(sv);
794 (void)hv_iterinit(hv);
795 while ((entry = hv_iternext(hv)))
796 do_chomp(retval, hv_iterval(hv,entry), chomping);
799 else if (SvREADONLY(sv)) {
801 /* SV is copy-on-write */
802 sv_force_normal_flags(sv, 0);
805 Perl_croak_no_modify(aTHX);
810 /* XXX, here sv is utf8-ized as a side-effect!
811 If encoding.pm is used properly, almost string-generating
812 operations, including literal strings, chr(), input data, etc.
813 should have been utf8-ized already, right?
815 sv_recode_to_utf8(sv, PL_encoding);
821 char *temp_buffer = NULL;
830 while (len && s[-1] == '\n') {
837 STRLEN rslen, rs_charlen;
838 const char *rsptr = SvPV_const(PL_rs, rslen);
840 rs_charlen = SvUTF8(PL_rs)
844 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
845 /* Assumption is that rs is shorter than the scalar. */
847 /* RS is utf8, scalar is 8 bit. */
849 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
852 /* Cannot downgrade, therefore cannot possibly match
854 assert (temp_buffer == rsptr);
860 else if (PL_encoding) {
861 /* RS is 8 bit, encoding.pm is used.
862 * Do not recode PL_rs as a side-effect. */
863 svrecode = newSVpvn(rsptr, rslen);
864 sv_recode_to_utf8(svrecode, PL_encoding);
865 rsptr = SvPV_const(svrecode, rslen);
866 rs_charlen = sv_len_utf8(svrecode);
869 /* RS is 8 bit, scalar is utf8. */
870 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
884 if (memNE(s, rsptr, rslen))
886 SvIVX(retval) += rs_charlen;
889 s = SvPV_force_nolen(sv);
897 SvREFCNT_dec(svrecode);
899 Safefree(temp_buffer);
901 if (len && !SvPOK(sv))
902 s = SvPV_force_nomg(sv, len);
905 char * const send = s + len;
906 char * const start = s;
908 while (s > start && UTF8_IS_CONTINUATION(*s))
910 if (is_utf8_string((U8*)s, send - s)) {
911 sv_setpvn(retval, s, send - s);
913 SvCUR_set(sv, s - start);
919 sv_setpvs(retval, "");
923 sv_setpvn(retval, s, 1);
930 sv_setpvs(retval, "");
938 const bool chomping = PL_op->op_type == OP_SCHOMP;
942 do_chomp(TARG, TOPs, chomping);
949 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
950 const bool chomping = PL_op->op_type == OP_CHOMP;
955 do_chomp(TARG, *++MARK, chomping);
966 if (!PL_op->op_private) {
975 SV_CHECK_THINKFIRST_COW_DROP(sv);
977 switch (SvTYPE(sv)) {
981 av_undef(MUTABLE_AV(sv));
984 hv_undef(MUTABLE_HV(sv));
987 if (cv_const_sv((const CV *)sv))
988 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
989 CvANON((const CV *)sv) ? "(anonymous)"
990 : GvENAME(CvGV((const CV *)sv)));
994 /* let user-undef'd sub keep its identity */
995 GV* const gv = CvGV((const CV *)sv);
996 cv_undef(MUTABLE_CV(sv));
997 CvGV_set(MUTABLE_CV(sv), gv);
1002 SvSetMagicSV(sv, &PL_sv_undef);
1005 else if (isGV_with_GP(sv)) {
1009 /* undef *Pkg::meth_name ... */
1011 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1012 && HvENAME_get(stash);
1014 if((stash = GvHV((const GV *)sv))) {
1015 if(HvENAME_get(stash))
1016 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1020 gp_free(MUTABLE_GV(sv));
1022 GvGP_set(sv, gp_ref(gp));
1023 GvSV(sv) = newSV(0);
1024 GvLINE(sv) = CopLINE(PL_curcop);
1025 GvEGV(sv) = MUTABLE_GV(sv);
1029 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1031 /* undef *Foo::ISA */
1032 if( strEQ(GvNAME((const GV *)sv), "ISA")
1033 && (stash = GvSTASH((const GV *)sv))
1034 && (method_changed || HvENAME(stash)) )
1035 mro_isa_changed_in(stash);
1036 else if(method_changed)
1037 mro_method_changed_in(
1038 GvSTASH((const GV *)sv)
1045 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1060 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1061 Perl_croak_no_modify(aTHX);
1062 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1063 && SvIVX(TOPs) != IV_MIN)
1065 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1066 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1077 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1078 Perl_croak_no_modify(aTHX);
1080 TARG = sv_newmortal();
1081 sv_setsv(TARG, TOPs);
1082 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1083 && SvIVX(TOPs) != IV_MAX)
1085 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1086 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1091 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1101 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1102 Perl_croak_no_modify(aTHX);
1104 TARG = sv_newmortal();
1105 sv_setsv(TARG, TOPs);
1106 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1107 && SvIVX(TOPs) != IV_MIN)
1109 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1110 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1119 /* Ordinary operators. */
1123 dVAR; dSP; dATARGET; SV *svl, *svr;
1124 #ifdef PERL_PRESERVE_IVUV
1127 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1130 #ifdef PERL_PRESERVE_IVUV
1131 /* For integer to integer power, we do the calculation by hand wherever
1132 we're sure it is safe; otherwise we call pow() and try to convert to
1133 integer afterwards. */
1135 SvIV_please_nomg(svr);
1137 SvIV_please_nomg(svl);
1146 const IV iv = SvIVX(svr);
1150 goto float_it; /* Can't do negative powers this way. */
1154 baseuok = SvUOK(svl);
1156 baseuv = SvUVX(svl);
1158 const IV iv = SvIVX(svl);
1161 baseuok = TRUE; /* effectively it's a UV now */
1163 baseuv = -iv; /* abs, baseuok == false records sign */
1166 /* now we have integer ** positive integer. */
1169 /* foo & (foo - 1) is zero only for a power of 2. */
1170 if (!(baseuv & (baseuv - 1))) {
1171 /* We are raising power-of-2 to a positive integer.
1172 The logic here will work for any base (even non-integer
1173 bases) but it can be less accurate than
1174 pow (base,power) or exp (power * log (base)) when the
1175 intermediate values start to spill out of the mantissa.
1176 With powers of 2 we know this can't happen.
1177 And powers of 2 are the favourite thing for perl
1178 programmers to notice ** not doing what they mean. */
1180 NV base = baseuok ? baseuv : -(NV)baseuv;
1185 while (power >>= 1) {
1193 SvIV_please_nomg(svr);
1196 register unsigned int highbit = 8 * sizeof(UV);
1197 register unsigned int diff = 8 * sizeof(UV);
1198 while (diff >>= 1) {
1200 if (baseuv >> highbit) {
1204 /* we now have baseuv < 2 ** highbit */
1205 if (power * highbit <= 8 * sizeof(UV)) {
1206 /* result will definitely fit in UV, so use UV math
1207 on same algorithm as above */
1208 register UV result = 1;
1209 register UV base = baseuv;
1210 const bool odd_power = cBOOL(power & 1);
1214 while (power >>= 1) {
1221 if (baseuok || !odd_power)
1222 /* answer is positive */
1224 else if (result <= (UV)IV_MAX)
1225 /* answer negative, fits in IV */
1226 SETi( -(IV)result );
1227 else if (result == (UV)IV_MIN)
1228 /* 2's complement assumption: special case IV_MIN */
1231 /* answer negative, doesn't fit */
1232 SETn( -(NV)result );
1242 NV right = SvNV_nomg(svr);
1243 NV left = SvNV_nomg(svl);
1246 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1248 We are building perl with long double support and are on an AIX OS
1249 afflicted with a powl() function that wrongly returns NaNQ for any
1250 negative base. This was reported to IBM as PMR #23047-379 on
1251 03/06/2006. The problem exists in at least the following versions
1252 of AIX and the libm fileset, and no doubt others as well:
1254 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1255 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1256 AIX 5.2.0 bos.adt.libm 5.2.0.85
1258 So, until IBM fixes powl(), we provide the following workaround to
1259 handle the problem ourselves. Our logic is as follows: for
1260 negative bases (left), we use fmod(right, 2) to check if the
1261 exponent is an odd or even integer:
1263 - if odd, powl(left, right) == -powl(-left, right)
1264 - if even, powl(left, right) == powl(-left, right)
1266 If the exponent is not an integer, the result is rightly NaNQ, so
1267 we just return that (as NV_NAN).
1271 NV mod2 = Perl_fmod( right, 2.0 );
1272 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1273 SETn( -Perl_pow( -left, right) );
1274 } else if (mod2 == 0.0) { /* even integer */
1275 SETn( Perl_pow( -left, right) );
1276 } else { /* fractional power */
1280 SETn( Perl_pow( left, right) );
1283 SETn( Perl_pow( left, right) );
1284 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1286 #ifdef PERL_PRESERVE_IVUV
1288 SvIV_please_nomg(svr);
1296 dVAR; dSP; dATARGET; SV *svl, *svr;
1297 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1300 #ifdef PERL_PRESERVE_IVUV
1301 SvIV_please_nomg(svr);
1303 /* Unless the left argument is integer in range we are going to have to
1304 use NV maths. Hence only attempt to coerce the right argument if
1305 we know the left is integer. */
1306 /* Left operand is defined, so is it IV? */
1307 SvIV_please_nomg(svl);
1309 bool auvok = SvUOK(svl);
1310 bool buvok = SvUOK(svr);
1311 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1312 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1321 const IV aiv = SvIVX(svl);
1324 auvok = TRUE; /* effectively it's a UV now */
1326 alow = -aiv; /* abs, auvok == false records sign */
1332 const IV biv = SvIVX(svr);
1335 buvok = TRUE; /* effectively it's a UV now */
1337 blow = -biv; /* abs, buvok == false records sign */
1341 /* If this does sign extension on unsigned it's time for plan B */
1342 ahigh = alow >> (4 * sizeof (UV));
1344 bhigh = blow >> (4 * sizeof (UV));
1346 if (ahigh && bhigh) {
1348 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1349 which is overflow. Drop to NVs below. */
1350 } else if (!ahigh && !bhigh) {
1351 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1352 so the unsigned multiply cannot overflow. */
1353 const UV product = alow * blow;
1354 if (auvok == buvok) {
1355 /* -ve * -ve or +ve * +ve gives a +ve result. */
1359 } else if (product <= (UV)IV_MIN) {
1360 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1361 /* -ve result, which could overflow an IV */
1363 SETi( -(IV)product );
1365 } /* else drop to NVs below. */
1367 /* One operand is large, 1 small */
1370 /* swap the operands */
1372 bhigh = blow; /* bhigh now the temp var for the swap */
1376 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1377 multiplies can't overflow. shift can, add can, -ve can. */
1378 product_middle = ahigh * blow;
1379 if (!(product_middle & topmask)) {
1380 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1382 product_middle <<= (4 * sizeof (UV));
1383 product_low = alow * blow;
1385 /* as for pp_add, UV + something mustn't get smaller.
1386 IIRC ANSI mandates this wrapping *behaviour* for
1387 unsigned whatever the actual representation*/
1388 product_low += product_middle;
1389 if (product_low >= product_middle) {
1390 /* didn't overflow */
1391 if (auvok == buvok) {
1392 /* -ve * -ve or +ve * +ve gives a +ve result. */
1394 SETu( product_low );
1396 } else if (product_low <= (UV)IV_MIN) {
1397 /* 2s complement assumption again */
1398 /* -ve result, which could overflow an IV */
1400 SETi( -(IV)product_low );
1402 } /* else drop to NVs below. */
1404 } /* product_middle too large */
1405 } /* ahigh && bhigh */
1410 NV right = SvNV_nomg(svr);
1411 NV left = SvNV_nomg(svl);
1413 SETn( left * right );
1420 dVAR; dSP; dATARGET; SV *svl, *svr;
1421 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1424 /* Only try to do UV divide first
1425 if ((SLOPPYDIVIDE is true) or
1426 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1428 The assumption is that it is better to use floating point divide
1429 whenever possible, only doing integer divide first if we can't be sure.
1430 If NV_PRESERVES_UV is true then we know at compile time that no UV
1431 can be too large to preserve, so don't need to compile the code to
1432 test the size of UVs. */
1435 # define PERL_TRY_UV_DIVIDE
1436 /* ensure that 20./5. == 4. */
1438 # ifdef PERL_PRESERVE_IVUV
1439 # ifndef NV_PRESERVES_UV
1440 # define PERL_TRY_UV_DIVIDE
1445 #ifdef PERL_TRY_UV_DIVIDE
1446 SvIV_please_nomg(svr);
1448 SvIV_please_nomg(svl);
1450 bool left_non_neg = SvUOK(svl);
1451 bool right_non_neg = SvUOK(svr);
1455 if (right_non_neg) {
1459 const IV biv = SvIVX(svr);
1462 right_non_neg = TRUE; /* effectively it's a UV now */
1468 /* historically undef()/0 gives a "Use of uninitialized value"
1469 warning before dieing, hence this test goes here.
1470 If it were immediately before the second SvIV_please, then
1471 DIE() would be invoked before left was even inspected, so
1472 no inspection would give no warning. */
1474 DIE(aTHX_ "Illegal division by zero");
1480 const IV aiv = SvIVX(svl);
1483 left_non_neg = TRUE; /* effectively it's a UV now */
1492 /* For sloppy divide we always attempt integer division. */
1494 /* Otherwise we only attempt it if either or both operands
1495 would not be preserved by an NV. If both fit in NVs
1496 we fall through to the NV divide code below. However,
1497 as left >= right to ensure integer result here, we know that
1498 we can skip the test on the right operand - right big
1499 enough not to be preserved can't get here unless left is
1502 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1505 /* Integer division can't overflow, but it can be imprecise. */
1506 const UV result = left / right;
1507 if (result * right == left) {
1508 SP--; /* result is valid */
1509 if (left_non_neg == right_non_neg) {
1510 /* signs identical, result is positive. */
1514 /* 2s complement assumption */
1515 if (result <= (UV)IV_MIN)
1516 SETi( -(IV)result );
1518 /* It's exact but too negative for IV. */
1519 SETn( -(NV)result );
1522 } /* tried integer divide but it was not an integer result */
1523 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1524 } /* left wasn't SvIOK */
1525 } /* right wasn't SvIOK */
1526 #endif /* PERL_TRY_UV_DIVIDE */
1528 NV right = SvNV_nomg(svr);
1529 NV left = SvNV_nomg(svl);
1530 (void)POPs;(void)POPs;
1531 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1532 if (! Perl_isnan(right) && right == 0.0)
1536 DIE(aTHX_ "Illegal division by zero");
1537 PUSHn( left / right );
1544 dVAR; dSP; dATARGET;
1545 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1549 bool left_neg = FALSE;
1550 bool right_neg = FALSE;
1551 bool use_double = FALSE;
1552 bool dright_valid = FALSE;
1555 SV * const svr = TOPs;
1556 SV * const svl = TOPm1s;
1557 SvIV_please_nomg(svr);
1559 right_neg = !SvUOK(svr);
1563 const IV biv = SvIVX(svr);
1566 right_neg = FALSE; /* effectively it's a UV now */
1573 dright = SvNV_nomg(svr);
1574 right_neg = dright < 0;
1577 if (dright < UV_MAX_P1) {
1578 right = U_V(dright);
1579 dright_valid = TRUE; /* In case we need to use double below. */
1585 /* At this point use_double is only true if right is out of range for
1586 a UV. In range NV has been rounded down to nearest UV and
1587 use_double false. */
1588 SvIV_please_nomg(svl);
1589 if (!use_double && SvIOK(svl)) {
1591 left_neg = !SvUOK(svl);
1595 const IV aiv = SvIVX(svl);
1598 left_neg = FALSE; /* effectively it's a UV now */
1606 dleft = SvNV_nomg(svl);
1607 left_neg = dleft < 0;
1611 /* This should be exactly the 5.6 behaviour - if left and right are
1612 both in range for UV then use U_V() rather than floor. */
1614 if (dleft < UV_MAX_P1) {
1615 /* right was in range, so is dleft, so use UVs not double.
1619 /* left is out of range for UV, right was in range, so promote
1620 right (back) to double. */
1622 /* The +0.5 is used in 5.6 even though it is not strictly
1623 consistent with the implicit +0 floor in the U_V()
1624 inside the #if 1. */
1625 dleft = Perl_floor(dleft + 0.5);
1628 dright = Perl_floor(dright + 0.5);
1639 DIE(aTHX_ "Illegal modulus zero");
1641 dans = Perl_fmod(dleft, dright);
1642 if ((left_neg != right_neg) && dans)
1643 dans = dright - dans;
1646 sv_setnv(TARG, dans);
1652 DIE(aTHX_ "Illegal modulus zero");
1655 if ((left_neg != right_neg) && ans)
1658 /* XXX may warn: unary minus operator applied to unsigned type */
1659 /* could change -foo to be (~foo)+1 instead */
1660 if (ans <= ~((UV)IV_MAX)+1)
1661 sv_setiv(TARG, ~ans+1);
1663 sv_setnv(TARG, -(NV)ans);
1666 sv_setuv(TARG, ans);
1675 dVAR; dSP; dATARGET;
1679 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1680 /* TODO: think of some way of doing list-repeat overloading ??? */
1685 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1691 const UV uv = SvUV_nomg(sv);
1693 count = IV_MAX; /* The best we can do? */
1697 const IV iv = SvIV_nomg(sv);
1704 else if (SvNOKp(sv)) {
1705 const NV nv = SvNV_nomg(sv);
1712 count = SvIV_nomg(sv);
1714 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1716 static const char oom_list_extend[] = "Out of memory during list extend";
1717 const I32 items = SP - MARK;
1718 const I32 max = items * count;
1720 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1721 /* Did the max computation overflow? */
1722 if (items > 0 && max > 0 && (max < items || max < count))
1723 Perl_croak(aTHX_ oom_list_extend);
1728 /* This code was intended to fix 20010809.028:
1731 for (($x =~ /./g) x 2) {
1732 print chop; # "abcdabcd" expected as output.
1735 * but that change (#11635) broke this code:
1737 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1739 * I can't think of a better fix that doesn't introduce
1740 * an efficiency hit by copying the SVs. The stack isn't
1741 * refcounted, and mortalisation obviously doesn't
1742 * Do The Right Thing when the stack has more than
1743 * one pointer to the same mortal value.
1747 *SP = sv_2mortal(newSVsv(*SP));
1757 repeatcpy((char*)(MARK + items), (char*)MARK,
1758 items * sizeof(const SV *), count - 1);
1761 else if (count <= 0)
1764 else { /* Note: mark already snarfed by pp_list */
1765 SV * const tmpstr = POPs;
1768 static const char oom_string_extend[] =
1769 "Out of memory during string extend";
1772 sv_setsv_nomg(TARG, tmpstr);
1773 SvPV_force_nomg(TARG, len);
1774 isutf = DO_UTF8(TARG);
1779 const STRLEN max = (UV)count * len;
1780 if (len > MEM_SIZE_MAX / count)
1781 Perl_croak(aTHX_ oom_string_extend);
1782 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1783 SvGROW(TARG, max + 1);
1784 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1785 SvCUR_set(TARG, SvCUR(TARG) * count);
1787 *SvEND(TARG) = '\0';
1790 (void)SvPOK_only_UTF8(TARG);
1792 (void)SvPOK_only(TARG);
1794 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1795 /* The parser saw this as a list repeat, and there
1796 are probably several items on the stack. But we're
1797 in scalar context, and there's no pp_list to save us
1798 now. So drop the rest of the items -- robin@kitsite.com
1810 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1811 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1814 useleft = USE_LEFT(svl);
1815 #ifdef PERL_PRESERVE_IVUV
1816 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1817 "bad things" happen if you rely on signed integers wrapping. */
1818 SvIV_please_nomg(svr);
1820 /* Unless the left argument is integer in range we are going to have to
1821 use NV maths. Hence only attempt to coerce the right argument if
1822 we know the left is integer. */
1823 register UV auv = 0;
1829 a_valid = auvok = 1;
1830 /* left operand is undef, treat as zero. */
1832 /* Left operand is defined, so is it IV? */
1833 SvIV_please_nomg(svl);
1835 if ((auvok = SvUOK(svl)))
1838 register const IV aiv = SvIVX(svl);
1841 auvok = 1; /* Now acting as a sign flag. */
1842 } else { /* 2s complement assumption for IV_MIN */
1850 bool result_good = 0;
1853 bool buvok = SvUOK(svr);
1858 register const IV biv = SvIVX(svr);
1865 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1866 else "IV" now, independent of how it came in.
1867 if a, b represents positive, A, B negative, a maps to -A etc
1872 all UV maths. negate result if A negative.
1873 subtract if signs same, add if signs differ. */
1875 if (auvok ^ buvok) {
1884 /* Must get smaller */
1889 if (result <= buv) {
1890 /* result really should be -(auv-buv). as its negation
1891 of true value, need to swap our result flag */
1903 if (result <= (UV)IV_MIN)
1904 SETi( -(IV)result );
1906 /* result valid, but out of range for IV. */
1907 SETn( -(NV)result );
1911 } /* Overflow, drop through to NVs. */
1916 NV value = SvNV_nomg(svr);
1920 /* left operand is undef, treat as zero - value */
1924 SETn( SvNV_nomg(svl) - value );
1931 dVAR; dSP; dATARGET; SV *svl, *svr;
1932 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1936 const IV shift = SvIV_nomg(svr);
1937 if (PL_op->op_private & HINT_INTEGER) {
1938 const IV i = SvIV_nomg(svl);
1942 const UV u = SvUV_nomg(svl);
1951 dVAR; dSP; dATARGET; SV *svl, *svr;
1952 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1956 const IV shift = SvIV_nomg(svr);
1957 if (PL_op->op_private & HINT_INTEGER) {
1958 const IV i = SvIV_nomg(svl);
1962 const UV u = SvUV_nomg(svl);
1974 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1978 (SvIOK_notUV(left) && SvIOK_notUV(right))
1979 ? (SvIVX(left) < SvIVX(right))
1980 : (do_ncmp(left, right) == -1)
1990 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1994 (SvIOK_notUV(left) && SvIOK_notUV(right))
1995 ? (SvIVX(left) > SvIVX(right))
1996 : (do_ncmp(left, right) == 1)
2006 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2010 (SvIOK_notUV(left) && SvIOK_notUV(right))
2011 ? (SvIVX(left) <= SvIVX(right))
2012 : (do_ncmp(left, right) <= 0)
2022 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2026 (SvIOK_notUV(left) && SvIOK_notUV(right))
2027 ? (SvIVX(left) >= SvIVX(right))
2028 : ( (do_ncmp(left, right) & 2) == 0)
2038 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2042 (SvIOK_notUV(left) && SvIOK_notUV(right))
2043 ? (SvIVX(left) != SvIVX(right))
2044 : (do_ncmp(left, right) != 0)
2049 /* compare left and right SVs. Returns:
2053 * 2: left or right was a NaN
2056 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2060 PERL_ARGS_ASSERT_DO_NCMP;
2061 #ifdef PERL_PRESERVE_IVUV
2062 SvIV_please_nomg(right);
2063 /* Fortunately it seems NaN isn't IOK */
2065 SvIV_please_nomg(left);
2068 const IV leftiv = SvIVX(left);
2069 if (!SvUOK(right)) {
2070 /* ## IV <=> IV ## */
2071 const IV rightiv = SvIVX(right);
2072 return (leftiv > rightiv) - (leftiv < rightiv);
2074 /* ## IV <=> UV ## */
2076 /* As (b) is a UV, it's >=0, so it must be < */
2079 const UV rightuv = SvUVX(right);
2080 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2085 /* ## UV <=> UV ## */
2086 const UV leftuv = SvUVX(left);
2087 const UV rightuv = SvUVX(right);
2088 return (leftuv > rightuv) - (leftuv < rightuv);
2090 /* ## UV <=> IV ## */
2092 const IV rightiv = SvIVX(right);
2094 /* As (a) is a UV, it's >=0, so it cannot be < */
2097 const UV leftuv = SvUVX(left);
2098 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2106 NV const rnv = SvNV_nomg(right);
2107 NV const lnv = SvNV_nomg(left);
2109 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2110 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2113 return (lnv > rnv) - (lnv < rnv);
2132 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2135 value = do_ncmp(left, right);
2150 int amg_type = sle_amg;
2154 switch (PL_op->op_type) {
2173 tryAMAGICbin_MG(amg_type, AMGf_set);
2176 const int cmp = (IN_LOCALE_RUNTIME
2177 ? sv_cmp_locale_flags(left, right, 0)
2178 : sv_cmp_flags(left, right, 0));
2179 SETs(boolSV(cmp * multiplier < rhs));
2187 tryAMAGICbin_MG(seq_amg, AMGf_set);
2190 SETs(boolSV(sv_eq_flags(left, right, 0)));
2198 tryAMAGICbin_MG(sne_amg, AMGf_set);
2201 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2209 tryAMAGICbin_MG(scmp_amg, 0);
2212 const int cmp = (IN_LOCALE_RUNTIME
2213 ? sv_cmp_locale_flags(left, right, 0)
2214 : sv_cmp_flags(left, right, 0));
2222 dVAR; dSP; dATARGET;
2223 tryAMAGICbin_MG(band_amg, AMGf_assign);
2226 if (SvNIOKp(left) || SvNIOKp(right)) {
2227 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2228 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2229 if (PL_op->op_private & HINT_INTEGER) {
2230 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2234 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2237 if (left_ro_nonnum) SvNIOK_off(left);
2238 if (right_ro_nonnum) SvNIOK_off(right);
2241 do_vop(PL_op->op_type, TARG, left, right);
2250 dVAR; dSP; dATARGET;
2251 const int op_type = PL_op->op_type;
2253 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2256 if (SvNIOKp(left) || SvNIOKp(right)) {
2257 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2258 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2259 if (PL_op->op_private & HINT_INTEGER) {
2260 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2261 const IV r = SvIV_nomg(right);
2262 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2266 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2267 const UV r = SvUV_nomg(right);
2268 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2271 if (left_ro_nonnum) SvNIOK_off(left);
2272 if (right_ro_nonnum) SvNIOK_off(right);
2275 do_vop(op_type, TARG, left, right);
2285 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2287 SV * const sv = TOPs;
2288 const int flags = SvFLAGS(sv);
2290 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2294 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2295 /* It's publicly an integer, or privately an integer-not-float */
2298 if (SvIVX(sv) == IV_MIN) {
2299 /* 2s complement assumption. */
2300 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2303 else if (SvUVX(sv) <= IV_MAX) {
2308 else if (SvIVX(sv) != IV_MIN) {
2312 #ifdef PERL_PRESERVE_IVUV
2320 SETn(-SvNV_nomg(sv));
2321 else if (SvPOKp(sv)) {
2323 const char * const s = SvPV_nomg_const(sv, len);
2324 if (isIDFIRST(*s)) {
2325 sv_setpvs(TARG, "-");
2328 else if (*s == '+' || *s == '-') {
2329 sv_setsv_nomg(TARG, sv);
2330 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2332 else if (DO_UTF8(sv)) {
2333 SvIV_please_nomg(sv);
2335 goto oops_its_an_int;
2337 sv_setnv(TARG, -SvNV_nomg(sv));
2339 sv_setpvs(TARG, "-");
2344 SvIV_please_nomg(sv);
2346 goto oops_its_an_int;
2347 sv_setnv(TARG, -SvNV_nomg(sv));
2352 SETn(-SvNV_nomg(sv));
2360 tryAMAGICun_MG(not_amg, AMGf_set);
2361 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2368 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2372 if (PL_op->op_private & HINT_INTEGER) {
2373 const IV i = ~SvIV_nomg(sv);
2377 const UV u = ~SvUV_nomg(sv);
2386 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2387 sv_setsv_nomg(TARG, sv);
2388 tmps = (U8*)SvPV_force_nomg(TARG, len);
2391 /* Calculate exact length, let's not estimate. */
2396 U8 * const send = tmps + len;
2397 U8 * const origtmps = tmps;
2398 const UV utf8flags = UTF8_ALLOW_ANYUV;
2400 while (tmps < send) {
2401 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2403 targlen += UNISKIP(~c);
2409 /* Now rewind strings and write them. */
2416 Newx(result, targlen + 1, U8);
2418 while (tmps < send) {
2419 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2421 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2424 sv_usepvn_flags(TARG, (char*)result, targlen,
2425 SV_HAS_TRAILING_NUL);
2432 Newx(result, nchar + 1, U8);
2434 while (tmps < send) {
2435 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2440 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2448 register long *tmpl;
2449 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2452 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2457 for ( ; anum > 0; anum--, tmps++)
2465 /* integer versions of some of the above */
2469 dVAR; dSP; dATARGET;
2470 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2473 SETi( left * right );
2481 dVAR; dSP; dATARGET;
2482 tryAMAGICbin_MG(div_amg, AMGf_assign);
2485 IV value = SvIV_nomg(right);
2487 DIE(aTHX_ "Illegal division by zero");
2488 num = SvIV_nomg(left);
2490 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2494 value = num / value;
2500 #if defined(__GLIBC__) && IVSIZE == 8
2507 /* This is the vanilla old i_modulo. */
2508 dVAR; dSP; dATARGET;
2509 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2513 DIE(aTHX_ "Illegal modulus zero");
2514 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2518 SETi( left % right );
2523 #if defined(__GLIBC__) && IVSIZE == 8
2528 /* This is the i_modulo with the workaround for the _moddi3 bug
2529 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2530 * See below for pp_i_modulo. */
2531 dVAR; dSP; dATARGET;
2532 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2536 DIE(aTHX_ "Illegal modulus zero");
2537 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2541 SETi( left % PERL_ABS(right) );
2548 dVAR; dSP; dATARGET;
2549 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2553 DIE(aTHX_ "Illegal modulus zero");
2554 /* The assumption is to use hereafter the old vanilla version... */
2556 PL_ppaddr[OP_I_MODULO] =
2558 /* .. but if we have glibc, we might have a buggy _moddi3
2559 * (at least glicb 2.2.5 is known to have this bug), in other
2560 * words our integer modulus with negative quad as the second
2561 * argument might be broken. Test for this and re-patch the
2562 * opcode dispatch table if that is the case, remembering to
2563 * also apply the workaround so that this first round works
2564 * right, too. See [perl #9402] for more information. */
2568 /* Cannot do this check with inlined IV constants since
2569 * that seems to work correctly even with the buggy glibc. */
2571 /* Yikes, we have the bug.
2572 * Patch in the workaround version. */
2574 PL_ppaddr[OP_I_MODULO] =
2575 &Perl_pp_i_modulo_1;
2576 /* Make certain we work right this time, too. */
2577 right = PERL_ABS(right);
2580 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2584 SETi( left % right );
2592 dVAR; dSP; dATARGET;
2593 tryAMAGICbin_MG(add_amg, AMGf_assign);
2595 dPOPTOPiirl_ul_nomg;
2596 SETi( left + right );
2603 dVAR; dSP; dATARGET;
2604 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2606 dPOPTOPiirl_ul_nomg;
2607 SETi( left - right );
2615 tryAMAGICbin_MG(lt_amg, AMGf_set);
2618 SETs(boolSV(left < right));
2626 tryAMAGICbin_MG(gt_amg, AMGf_set);
2629 SETs(boolSV(left > right));
2637 tryAMAGICbin_MG(le_amg, AMGf_set);
2640 SETs(boolSV(left <= right));
2648 tryAMAGICbin_MG(ge_amg, AMGf_set);
2651 SETs(boolSV(left >= right));
2659 tryAMAGICbin_MG(eq_amg, AMGf_set);
2662 SETs(boolSV(left == right));
2670 tryAMAGICbin_MG(ne_amg, AMGf_set);
2673 SETs(boolSV(left != right));
2681 tryAMAGICbin_MG(ncmp_amg, 0);
2688 else if (left < right)
2700 tryAMAGICun_MG(neg_amg, 0);
2702 SV * const sv = TOPs;
2703 IV const i = SvIV_nomg(sv);
2709 /* High falutin' math. */
2714 tryAMAGICbin_MG(atan2_amg, 0);
2717 SETn(Perl_atan2(left, right));
2725 int amg_type = sin_amg;
2726 const char *neg_report = NULL;
2727 NV (*func)(NV) = Perl_sin;
2728 const int op_type = PL_op->op_type;
2745 amg_type = sqrt_amg;
2747 neg_report = "sqrt";
2752 tryAMAGICun_MG(amg_type, 0);
2754 SV * const arg = POPs;
2755 const NV value = SvNV_nomg(arg);
2757 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2758 SET_NUMERIC_STANDARD();
2759 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2762 XPUSHn(func(value));
2767 /* Support Configure command-line overrides for rand() functions.
2768 After 5.005, perhaps we should replace this by Configure support
2769 for drand48(), random(), or rand(). For 5.005, though, maintain
2770 compatibility by calling rand() but allow the user to override it.
2771 See INSTALL for details. --Andy Dougherty 15 July 1998
2773 /* Now it's after 5.005, and Configure supports drand48() and random(),
2774 in addition to rand(). So the overrides should not be needed any more.
2775 --Jarkko Hietaniemi 27 September 1998
2778 #ifndef HAS_DRAND48_PROTO
2779 extern double drand48 (void);
2792 if (!PL_srand_called) {
2793 (void)seedDrand01((Rand_seed_t)seed());
2794 PL_srand_called = TRUE;
2804 const UV anum = (MAXARG < 1) ? seed() : POPu;
2805 (void)seedDrand01((Rand_seed_t)anum);
2806 PL_srand_called = TRUE;
2810 /* Historically srand always returned true. We can avoid breaking
2812 sv_setpvs(TARG, "0 but true");
2821 tryAMAGICun_MG(int_amg, AMGf_numeric);
2823 SV * const sv = TOPs;
2824 const IV iv = SvIV_nomg(sv);
2825 /* XXX it's arguable that compiler casting to IV might be subtly
2826 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2827 else preferring IV has introduced a subtle behaviour change bug. OTOH
2828 relying on floating point to be accurate is a bug. */
2833 else if (SvIOK(sv)) {
2835 SETu(SvUV_nomg(sv));
2840 const NV value = SvNV_nomg(sv);
2842 if (value < (NV)UV_MAX + 0.5) {
2845 SETn(Perl_floor(value));
2849 if (value > (NV)IV_MIN - 0.5) {
2852 SETn(Perl_ceil(value));
2863 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2865 SV * const sv = TOPs;
2866 /* This will cache the NV value if string isn't actually integer */
2867 const IV iv = SvIV_nomg(sv);
2872 else if (SvIOK(sv)) {
2873 /* IVX is precise */
2875 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2883 /* 2s complement assumption. Also, not really needed as
2884 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2890 const NV value = SvNV_nomg(sv);
2904 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2908 SV* const sv = POPs;
2910 tmps = (SvPV_const(sv, len));
2912 /* If Unicode, try to downgrade
2913 * If not possible, croak. */
2914 SV* const tsv = sv_2mortal(newSVsv(sv));
2917 sv_utf8_downgrade(tsv, FALSE);
2918 tmps = SvPV_const(tsv, len);
2920 if (PL_op->op_type == OP_HEX)
2923 while (*tmps && len && isSPACE(*tmps))
2927 if (*tmps == 'x' || *tmps == 'X') {
2929 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2931 else if (*tmps == 'b' || *tmps == 'B')
2932 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2934 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2936 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2950 SV * const sv = TOPs;
2952 if (SvGAMAGIC(sv)) {
2953 /* For an overloaded or magic scalar, we can't know in advance if
2954 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2955 it likes to cache the length. Maybe that should be a documented
2960 = sv_2pv_flags(sv, &len,
2961 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2964 if (!SvPADTMP(TARG)) {
2965 sv_setsv(TARG, &PL_sv_undef);
2970 else if (DO_UTF8(sv)) {
2971 SETi(utf8_length((U8*)p, (U8*)p + len));
2975 } else if (SvOK(sv)) {
2976 /* Neither magic nor overloaded. */
2978 SETi(sv_len_utf8(sv));
2982 if (!SvPADTMP(TARG)) {
2983 sv_setsv_nomg(TARG, &PL_sv_undef);
3005 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3007 const IV arybase = CopARYBASE_get(PL_curcop);
3009 const char *repl = NULL;
3011 const int num_args = PL_op->op_private & 7;
3012 bool repl_need_utf8_upgrade = FALSE;
3013 bool repl_is_utf8 = FALSE;
3018 repl = SvPV_const(repl_sv, repl_len);
3019 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3022 len_iv = SvIV(len_sv);
3023 len_is_uv = SvIOK_UV(len_sv);
3026 pos1_iv = SvIV(pos_sv);
3027 pos1_is_uv = SvIOK_UV(pos_sv);
3033 sv_utf8_upgrade(sv);
3035 else if (DO_UTF8(sv))
3036 repl_need_utf8_upgrade = TRUE;
3038 tmps = SvPV_const(sv, curlen);
3040 utf8_curlen = sv_len_utf8(sv);
3041 if (utf8_curlen == curlen)
3044 curlen = utf8_curlen;
3049 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3050 UV pos1_uv = pos1_iv-arybase;
3051 /* Overflow can occur when $[ < 0 */
3052 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3057 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3058 goto bound_fail; /* $[=3; substr($_,2,...) */
3060 else { /* pos < $[ */
3061 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3066 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3071 if (pos1_is_uv || pos1_iv > 0) {
3072 if ((UV)pos1_iv > curlen)
3077 if (!len_is_uv && len_iv < 0) {
3078 pos2_iv = curlen + len_iv;
3080 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3083 } else { /* len_iv >= 0 */
3084 if (!pos1_is_uv && pos1_iv < 0) {
3085 pos2_iv = pos1_iv + len_iv;
3086 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3088 if ((UV)len_iv > curlen-(UV)pos1_iv)
3091 pos2_iv = pos1_iv+len_iv;
3101 if (!pos2_is_uv && pos2_iv < 0) {
3102 if (!pos1_is_uv && pos1_iv < 0)
3106 else if (!pos1_is_uv && pos1_iv < 0)
3109 if ((UV)pos2_iv < (UV)pos1_iv)
3111 if ((UV)pos2_iv > curlen)
3115 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3116 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3117 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3118 STRLEN byte_len = len;
3119 STRLEN byte_pos = utf8_curlen
3120 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3122 if (lvalue && !repl) {
3125 if (!SvGMAGICAL(sv)) {
3127 SvPV_force_nolen(sv);
3128 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3129 "Attempt to use reference as lvalue in substr");
3131 if (isGV_with_GP(sv))
3132 SvPV_force_nolen(sv);
3133 else if (SvOK(sv)) /* is it defined ? */
3134 (void)SvPOK_only_UTF8(sv);
3136 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3139 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3140 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3142 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3143 LvTARGOFF(ret) = pos;
3144 LvTARGLEN(ret) = len;
3147 PUSHs(ret); /* avoid SvSETMAGIC here */
3151 SvTAINTED_off(TARG); /* decontaminate */
3152 SvUTF8_off(TARG); /* decontaminate */
3155 sv_setpvn(TARG, tmps, byte_len);
3156 #ifdef USE_LOCALE_COLLATE
3157 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3163 SV* repl_sv_copy = NULL;
3165 if (repl_need_utf8_upgrade) {
3166 repl_sv_copy = newSVsv(repl_sv);
3167 sv_utf8_upgrade(repl_sv_copy);
3168 repl = SvPV_const(repl_sv_copy, repl_len);
3169 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3173 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3176 SvREFCNT_dec(repl_sv_copy);
3186 Perl_croak(aTHX_ "substr outside of string");
3187 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3194 register const IV size = POPi;
3195 register const IV offset = POPi;
3196 register SV * const src = POPs;
3197 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3200 if (lvalue) { /* it's an lvalue! */
3201 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3202 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3204 LvTARG(ret) = SvREFCNT_inc_simple(src);
3205 LvTARGOFF(ret) = offset;
3206 LvTARGLEN(ret) = size;
3210 SvTAINTED_off(TARG); /* decontaminate */
3214 sv_setuv(ret, do_vecget(src, offset, size));
3230 const char *little_p;
3231 const I32 arybase = CopARYBASE_get(PL_curcop);
3234 const bool is_index = PL_op->op_type == OP_INDEX;
3237 /* arybase is in characters, like offset, so combine prior to the
3238 UTF-8 to bytes calculation. */
3239 offset = POPi - arybase;
3243 big_p = SvPV_const(big, biglen);
3244 little_p = SvPV_const(little, llen);
3246 big_utf8 = DO_UTF8(big);
3247 little_utf8 = DO_UTF8(little);
3248 if (big_utf8 ^ little_utf8) {
3249 /* One needs to be upgraded. */
3250 if (little_utf8 && !PL_encoding) {
3251 /* Well, maybe instead we might be able to downgrade the small
3253 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3256 /* If the large string is ISO-8859-1, and it's not possible to
3257 convert the small string to ISO-8859-1, then there is no
3258 way that it could be found anywhere by index. */
3263 /* At this point, pv is a malloc()ed string. So donate it to temp
3264 to ensure it will get free()d */
3265 little = temp = newSV(0);
3266 sv_usepvn(temp, pv, llen);
3267 little_p = SvPVX(little);
3270 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3273 sv_recode_to_utf8(temp, PL_encoding);
3275 sv_utf8_upgrade(temp);
3280 big_p = SvPV_const(big, biglen);
3283 little_p = SvPV_const(little, llen);
3287 if (SvGAMAGIC(big)) {
3288 /* Life just becomes a lot easier if I use a temporary here.
3289 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3290 will trigger magic and overloading again, as will fbm_instr()
3292 big = newSVpvn_flags(big_p, biglen,
3293 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3296 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3297 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3298 warn on undef, and we've already triggered a warning with the
3299 SvPV_const some lines above. We can't remove that, as we need to
3300 call some SvPV to trigger overloading early and find out if the
3302 This is all getting to messy. The API isn't quite clean enough,
3303 because data access has side effects.
3305 little = newSVpvn_flags(little_p, llen,
3306 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3307 little_p = SvPVX(little);
3311 offset = is_index ? 0 : biglen;
3313 if (big_utf8 && offset > 0)
3314 sv_pos_u2b(big, &offset, 0);
3320 else if (offset > (I32)biglen)
3322 if (!(little_p = is_index
3323 ? fbm_instr((unsigned char*)big_p + offset,
3324 (unsigned char*)big_p + biglen, little, 0)
3325 : rninstr(big_p, big_p + offset,
3326 little_p, little_p + llen)))
3329 retval = little_p - big_p;
3330 if (retval > 0 && big_utf8)
3331 sv_pos_b2u(big, &retval);
3335 PUSHi(retval + arybase);
3341 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3342 SvTAINTED_off(TARG);
3343 do_sprintf(TARG, SP-MARK, MARK+1);
3344 TAINT_IF(SvTAINTED(TARG));
3356 const U8 *s = (U8*)SvPV_const(argsv, len);
3358 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3359 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3360 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3364 XPUSHu(DO_UTF8(argsv) ?
3365 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3377 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3379 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3381 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3383 (void) POPs; /* Ignore the argument value. */
3384 value = UNICODE_REPLACEMENT;
3390 SvUPGRADE(TARG,SVt_PV);
3392 if (value > 255 && !IN_BYTES) {
3393 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3394 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3395 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3397 (void)SvPOK_only(TARG);
3406 *tmps++ = (char)value;
3408 (void)SvPOK_only(TARG);
3410 if (PL_encoding && !IN_BYTES) {
3411 sv_recode_to_utf8(TARG, PL_encoding);
3413 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3414 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3418 *tmps++ = (char)value;
3434 const char *tmps = SvPV_const(left, len);
3436 if (DO_UTF8(left)) {
3437 /* If Unicode, try to downgrade.
3438 * If not possible, croak.
3439 * Yes, we made this up. */
3440 SV* const tsv = sv_2mortal(newSVsv(left));
3443 sv_utf8_downgrade(tsv, FALSE);
3444 tmps = SvPV_const(tsv, len);
3446 # ifdef USE_ITHREADS
3448 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3449 /* This should be threadsafe because in ithreads there is only
3450 * one thread per interpreter. If this would not be true,
3451 * we would need a mutex to protect this malloc. */
3452 PL_reentrant_buffer->_crypt_struct_buffer =
3453 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3454 #if defined(__GLIBC__) || defined(__EMX__)
3455 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3456 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3457 /* work around glibc-2.2.5 bug */
3458 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3462 # endif /* HAS_CRYPT_R */
3463 # endif /* USE_ITHREADS */
3465 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3467 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3473 "The crypt() function is unimplemented due to excessive paranoia.");
3477 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3478 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3480 /* Below are several macros that generate code */
3481 /* Generates code to store a unicode codepoint c that is known to occupy
3482 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3483 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3485 *(p) = UTF8_TWO_BYTE_HI(c); \
3486 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3489 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3490 * available byte after the two bytes */
3491 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3493 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3494 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3497 /* Generates code to store the upper case of latin1 character l which is known
3498 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3499 * are only two characters that fit this description, and this macro knows
3500 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3502 #define STORE_NON_LATIN1_UC(p, l) \
3504 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3505 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3506 } else { /* Must be the following letter */ \
3507 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3511 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3512 * after the character stored */
3513 #define CAT_NON_LATIN1_UC(p, l) \
3515 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3516 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3518 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3522 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3523 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3524 * and must require two bytes to store it. Advances p to point to the next
3525 * available position */
3526 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3528 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3529 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3530 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3531 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3532 } else {/* else is one of the other two special cases */ \
3533 CAT_NON_LATIN1_UC((p), (l)); \
3539 /* Actually is both lcfirst() and ucfirst(). Only the first character
3540 * changes. This means that possibly we can change in-place, ie., just
3541 * take the source and change that one character and store it back, but not
3542 * if read-only etc, or if the length changes */
3547 STRLEN slen; /* slen is the byte length of the whole SV. */
3550 bool inplace; /* ? Convert first char only, in-place */
3551 bool doing_utf8 = FALSE; /* ? using utf8 */
3552 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3553 const int op_type = PL_op->op_type;
3556 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3557 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3558 * stored as UTF-8 at s. */
3559 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3560 * lowercased) character stored in tmpbuf. May be either
3561 * UTF-8 or not, but in either case is the number of bytes */
3565 s = (const U8*)SvPV_nomg_const(source, slen);
3567 if (ckWARN(WARN_UNINITIALIZED))
3568 report_uninit(source);
3573 /* We may be able to get away with changing only the first character, in
3574 * place, but not if read-only, etc. Later we may discover more reasons to
3575 * not convert in-place. */
3576 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3578 /* First calculate what the changed first character should be. This affects
3579 * whether we can just swap it out, leaving the rest of the string unchanged,
3580 * or even if have to convert the dest to UTF-8 when the source isn't */
3582 if (! slen) { /* If empty */
3583 need = 1; /* still need a trailing NUL */
3585 else if (DO_UTF8(source)) { /* Is the source utf8? */
3588 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3589 * and doesn't allow for the user to specify their own. When code is added to
3590 * detect if there is a user-defined mapping in force here, and if so to use
3591 * that, then the code below can be compiled. The detection would be a good
3592 * thing anyway, as currently the user-defined mappings only work on utf8
3593 * strings, and thus depend on the chosen internal storage method, which is a
3595 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3596 if (UTF8_IS_INVARIANT(*s)) {
3598 /* An invariant source character is either ASCII or, in EBCDIC, an
3599 * ASCII equivalent or a caseless C1 control. In both these cases,
3600 * the lower and upper cases of any character are also invariants
3601 * (and title case is the same as upper case). So it is safe to
3602 * use the simple case change macros which avoid the overhead of
3603 * the general functions. Note that if perl were to be extended to
3604 * do locale handling in UTF-8 strings, this wouldn't be true in,
3605 * for example, Lithuanian or Turkic. */
3606 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3610 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3613 /* Similarly, if the source character isn't invariant but is in the
3614 * latin1 range (or EBCDIC equivalent thereof), we have the case
3615 * changes compiled into perl, and can avoid the overhead of the
3616 * general functions. In this range, the characters are stored as
3617 * two UTF-8 bytes, and it so happens that any changed-case version
3618 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3622 /* Convert the two source bytes to a single Unicode code point
3623 * value, change case and save for below */
3624 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3625 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3626 U8 lower = toLOWER_LATIN1(chr);
3627 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3629 else { /* ucfirst */
3630 U8 upper = toUPPER_LATIN1_MOD(chr);
3632 /* Most of the latin1 range characters are well-behaved. Their
3633 * title and upper cases are the same, and are also in the
3634 * latin1 range. The macro above returns their upper (hence
3635 * title) case, and all that need be done is to save the result
3636 * for below. However, several characters are problematic, and
3637 * have to be handled specially. The MOD in the macro name
3638 * above means that these tricky characters all get mapped to
3639 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3640 * This mapping saves some tests for the majority of the
3643 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3645 /* Not tricky. Just save it. */
3646 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3648 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3650 /* This one is tricky because it is two characters long,
3651 * though the UTF-8 is still two bytes, so the stored
3652 * length doesn't change */
3653 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3654 *(tmpbuf + 1) = 's';
3658 /* The other two have their title and upper cases the same,
3659 * but are tricky because the changed-case characters
3660 * aren't in the latin1 range. They, however, do fit into
3661 * two UTF-8 bytes */
3662 STORE_NON_LATIN1_UC(tmpbuf, chr);
3667 #endif /* end of dont want to break user-defined casing */
3669 /* Here, can't short-cut the general case */
3671 utf8_to_uvchr(s, &ulen);
3672 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3673 else toLOWER_utf8(s, tmpbuf, &tculen);
3675 /* we can't do in-place if the length changes. */
3676 if (ulen != tculen) inplace = FALSE;
3677 need = slen + 1 - ulen + tculen;
3678 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3682 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3683 * latin1 is treated as caseless. Note that a locale takes
3685 tculen = 1; /* Most characters will require one byte, but this will
3686 * need to be overridden for the tricky ones */
3689 if (op_type == OP_LCFIRST) {
3691 /* lower case the first letter: no trickiness for any character */
3692 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3693 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3696 else if (IN_LOCALE_RUNTIME) {
3697 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3698 * have upper and title case different
3701 else if (! IN_UNI_8_BIT) {
3702 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3703 * on EBCDIC machines whatever the
3704 * native function does */
3706 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3707 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3709 /* tmpbuf now has the correct title case for all latin1 characters
3710 * except for the several ones that have tricky handling. All
3711 * of these are mapped by the MOD to the letter below. */
3712 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3714 /* The length is going to change, with all three of these, so
3715 * can't replace just the first character */
3718 /* We use the original to distinguish between these tricky
3720 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3721 /* Two character title case 'Ss', but can remain non-UTF-8 */
3724 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3729 /* The other two tricky ones have their title case outside
3730 * latin1. It is the same as their upper case. */
3732 STORE_NON_LATIN1_UC(tmpbuf, *s);
3734 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3735 * and their upper cases is 2. */
3738 /* The entire result will have to be in UTF-8. Assume worst
3739 * case sizing in conversion. (all latin1 characters occupy
3740 * at most two bytes in utf8) */
3741 convert_source_to_utf8 = TRUE;
3742 need = slen * 2 + 1;
3744 } /* End of is one of the three special chars */
3745 } /* End of use Unicode (Latin1) semantics */
3746 } /* End of changing the case of the first character */
3748 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3749 * generate the result */
3752 /* We can convert in place. This means we change just the first
3753 * character without disturbing the rest; no need to grow */
3755 s = d = (U8*)SvPV_force_nomg(source, slen);
3761 /* Here, we can't convert in place; we earlier calculated how much
3762 * space we will need, so grow to accommodate that */
3763 SvUPGRADE(dest, SVt_PV);
3764 d = (U8*)SvGROW(dest, need);
3765 (void)SvPOK_only(dest);
3772 if (! convert_source_to_utf8) {
3774 /* Here both source and dest are in UTF-8, but have to create
3775 * the entire output. We initialize the result to be the
3776 * title/lower cased first character, and then append the rest
3778 sv_setpvn(dest, (char*)tmpbuf, tculen);
3780 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3784 const U8 *const send = s + slen;
3786 /* Here the dest needs to be in UTF-8, but the source isn't,
3787 * except we earlier UTF-8'd the first character of the source
3788 * into tmpbuf. First put that into dest, and then append the
3789 * rest of the source, converting it to UTF-8 as we go. */
3791 /* Assert tculen is 2 here because the only two characters that
3792 * get to this part of the code have 2-byte UTF-8 equivalents */
3794 *d++ = *(tmpbuf + 1);
3795 s++; /* We have just processed the 1st char */
3797 for (; s < send; s++) {
3798 d = uvchr_to_utf8(d, *s);
3801 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3805 else { /* in-place UTF-8. Just overwrite the first character */
3806 Copy(tmpbuf, d, tculen, U8);
3807 SvCUR_set(dest, need - 1);
3810 else { /* Neither source nor dest are in or need to be UTF-8 */
3812 if (IN_LOCALE_RUNTIME) {
3816 if (inplace) { /* in-place, only need to change the 1st char */
3819 else { /* Not in-place */
3821 /* Copy the case-changed character(s) from tmpbuf */
3822 Copy(tmpbuf, d, tculen, U8);
3823 d += tculen - 1; /* Code below expects d to point to final
3824 * character stored */
3827 else { /* empty source */
3828 /* See bug #39028: Don't taint if empty */
3832 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3833 * the destination to retain that flag */
3837 if (!inplace) { /* Finish the rest of the string, unchanged */
3838 /* This will copy the trailing NUL */
3839 Copy(s + 1, d + 1, slen, U8);
3840 SvCUR_set(dest, need - 1);
3843 if (dest != source && SvTAINTED(source))
3849 /* There's so much setup/teardown code common between uc and lc, I wonder if
3850 it would be worth merging the two, and just having a switch outside each
3851 of the three tight loops. There is less and less commonality though */
3865 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3866 && SvTEMP(source) && !DO_UTF8(source)
3867 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3869 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3870 * make the loop tight, so we overwrite the source with the dest before
3871 * looking at it, and we need to look at the original source
3872 * afterwards. There would also need to be code added to handle
3873 * switching to not in-place in midstream if we run into characters
3874 * that change the length.
3877 s = d = (U8*)SvPV_force_nomg(source, len);
3884 /* The old implementation would copy source into TARG at this point.
3885 This had the side effect that if source was undef, TARG was now
3886 an undefined SV with PADTMP set, and they don't warn inside
3887 sv_2pv_flags(). However, we're now getting the PV direct from
3888 source, which doesn't have PADTMP set, so it would warn. Hence the
3892 s = (const U8*)SvPV_nomg_const(source, len);
3894 if (ckWARN(WARN_UNINITIALIZED))
3895 report_uninit(source);
3901 SvUPGRADE(dest, SVt_PV);
3902 d = (U8*)SvGROW(dest, min);
3903 (void)SvPOK_only(dest);
3908 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3909 to check DO_UTF8 again here. */
3911 if (DO_UTF8(source)) {
3912 const U8 *const send = s + len;
3913 U8 tmpbuf[UTF8_MAXBYTES+1];
3915 /* All occurrences of these are to be moved to follow any other marks.
3916 * This is context-dependent. We may not be passed enough context to
3917 * move the iota subscript beyond all of them, but we do the best we can
3918 * with what we're given. The result is always better than if we
3919 * hadn't done this. And, the problem would only arise if we are
3920 * passed a character without all its combining marks, which would be
3921 * the caller's mistake. The information this is based on comes from a
3922 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3923 * itself) and so can't be checked properly to see if it ever gets
3924 * revised. But the likelihood of it changing is remote */
3925 bool in_iota_subscript = FALSE;
3928 if (in_iota_subscript && ! is_utf8_mark(s)) {
3929 /* A non-mark. Time to output the iota subscript */
3930 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3931 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3933 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3934 in_iota_subscript = FALSE;
3938 /* See comments at the first instance in this file of this ifdef */
3939 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3941 /* If the UTF-8 character is invariant, then it is in the range
3942 * known by the standard macro; result is only one byte long */
3943 if (UTF8_IS_INVARIANT(*s)) {
3947 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3949 /* Likewise, if it fits in a byte, its case change is in our
3951 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
3952 U8 upper = toUPPER_LATIN1_MOD(orig);
3953 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3961 /* Otherwise, need the general UTF-8 case. Get the changed
3962 * case value and copy it to the output buffer */
3964 const STRLEN u = UTF8SKIP(s);
3967 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3968 if (uv == GREEK_CAPITAL_LETTER_IOTA
3969 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3971 in_iota_subscript = TRUE;
3974 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3975 /* If the eventually required minimum size outgrows
3976 * the available space, we need to grow. */
3977 const UV o = d - (U8*)SvPVX_const(dest);
3979 /* If someone uppercases one million U+03B0s we
3980 * SvGROW() one million times. Or we could try
3981 * guessing how much to allocate without allocating too
3982 * much. Such is life. See corresponding comment in
3983 * lc code for another option */
3985 d = (U8*)SvPVX(dest) + o;
3987 Copy(tmpbuf, d, ulen, U8);
3993 if (in_iota_subscript) {
3994 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3998 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4000 else { /* Not UTF-8 */
4002 const U8 *const send = s + len;
4004 /* Use locale casing if in locale; regular style if not treating
4005 * latin1 as having case; otherwise the latin1 casing. Do the
4006 * whole thing in a tight loop, for speed, */
4007 if (IN_LOCALE_RUNTIME) {
4010 for (; s < send; d++, s++)
4011 *d = toUPPER_LC(*s);
4013 else if (! IN_UNI_8_BIT) {
4014 for (; s < send; d++, s++) {
4019 for (; s < send; d++, s++) {
4020 *d = toUPPER_LATIN1_MOD(*s);
4021 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4023 /* The mainstream case is the tight loop above. To avoid
4024 * extra tests in that, all three characters that require
4025 * special handling are mapped by the MOD to the one tested
4027 * Use the source to distinguish between the three cases */
4029 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4031 /* uc() of this requires 2 characters, but they are
4032 * ASCII. If not enough room, grow the string */
4033 if (SvLEN(dest) < ++min) {
4034 const UV o = d - (U8*)SvPVX_const(dest);
4036 d = (U8*)SvPVX(dest) + o;
4038 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4039 continue; /* Back to the tight loop; still in ASCII */
4042 /* The other two special handling characters have their
4043 * upper cases outside the latin1 range, hence need to be
4044 * in UTF-8, so the whole result needs to be in UTF-8. So,
4045 * here we are somewhere in the middle of processing a
4046 * non-UTF-8 string, and realize that we will have to convert
4047 * the whole thing to UTF-8. What to do? There are
4048 * several possibilities. The simplest to code is to
4049 * convert what we have so far, set a flag, and continue on
4050 * in the loop. The flag would be tested each time through
4051 * the loop, and if set, the next character would be
4052 * converted to UTF-8 and stored. But, I (khw) didn't want
4053 * to slow down the mainstream case at all for this fairly
4054 * rare case, so I didn't want to add a test that didn't
4055 * absolutely have to be there in the loop, besides the
4056 * possibility that it would get too complicated for
4057 * optimizers to deal with. Another possibility is to just
4058 * give up, convert the source to UTF-8, and restart the
4059 * function that way. Another possibility is to convert
4060 * both what has already been processed and what is yet to
4061 * come separately to UTF-8, then jump into the loop that
4062 * handles UTF-8. But the most efficient time-wise of the
4063 * ones I could think of is what follows, and turned out to
4064 * not require much extra code. */
4066 /* Convert what we have so far into UTF-8, telling the
4067 * function that we know it should be converted, and to
4068 * allow extra space for what we haven't processed yet.
4069 * Assume the worst case space requirements for converting
4070 * what we haven't processed so far: that it will require
4071 * two bytes for each remaining source character, plus the
4072 * NUL at the end. This may cause the string pointer to
4073 * move, so re-find it. */
4075 len = d - (U8*)SvPVX_const(dest);
4076 SvCUR_set(dest, len);
4077 len = sv_utf8_upgrade_flags_grow(dest,
4078 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4080 d = (U8*)SvPVX(dest) + len;
4082 /* And append the current character's upper case in UTF-8 */
4083 CAT_NON_LATIN1_UC(d, *s);
4085 /* Now process the remainder of the source, converting to
4086 * upper and UTF-8. If a resulting byte is invariant in
4087 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4088 * append it to the output. */
4091 for (; s < send; s++) {
4092 U8 upper = toUPPER_LATIN1_MOD(*s);
4093 if UTF8_IS_INVARIANT(upper) {
4097 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4101 /* Here have processed the whole source; no need to continue
4102 * with the outer loop. Each character has been converted
4103 * to upper case and converted to UTF-8 */
4106 } /* End of processing all latin1-style chars */
4107 } /* End of processing all chars */
4108 } /* End of source is not empty */
4110 if (source != dest) {
4111 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4112 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4114 } /* End of isn't utf8 */
4115 if (dest != source && SvTAINTED(source))
4134 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4135 && SvTEMP(source) && !DO_UTF8(source)) {
4137 /* We can convert in place, as lowercasing anything in the latin1 range
4138 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4140 s = d = (U8*)SvPV_force_nomg(source, len);
4147 /* The old implementation would copy source into TARG at this point.
4148 This had the side effect that if source was undef, TARG was now
4149 an undefined SV with PADTMP set, and they don't warn inside
4150 sv_2pv_flags(). However, we're now getting the PV direct from
4151 source, which doesn't have PADTMP set, so it would warn. Hence the
4155 s = (const U8*)SvPV_nomg_const(source, len);
4157 if (ckWARN(WARN_UNINITIALIZED))
4158 report_uninit(source);
4164 SvUPGRADE(dest, SVt_PV);
4165 d = (U8*)SvGROW(dest, min);
4166 (void)SvPOK_only(dest);
4171 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4172 to check DO_UTF8 again here. */
4174 if (DO_UTF8(source)) {
4175 const U8 *const send = s + len;
4176 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4179 /* See comments at the first instance in this file of this ifdef */
4180 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4181 if (UTF8_IS_INVARIANT(*s)) {
4183 /* Invariant characters use the standard mappings compiled in.
4188 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4190 /* As do the ones in the Latin1 range */
4191 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4192 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4197 /* Here, is utf8 not in Latin-1 range, have to go out and get
4198 * the mappings from the tables. */
4200 const STRLEN u = UTF8SKIP(s);
4203 #ifndef CONTEXT_DEPENDENT_CASING
4204 toLOWER_utf8(s, tmpbuf, &ulen);
4206 /* This is ifdefd out because it needs more work and thought. It isn't clear
4207 * that we should do it.
4208 * A minor objection is that this is based on a hard-coded rule from the
4209 * Unicode standard, and may change, but this is not very likely at all.
4210 * mktables should check and warn if it does.
4211 * More importantly, if the sigma occurs at the end of the string, we don't
4212 * have enough context to know whether it is part of a larger string or going
4213 * to be or not. It may be that we are passed a subset of the context, via
4214 * a \U...\E, for example, and we could conceivably know the larger context if
4215 * code were changed to pass that in. But, if the string passed in is an
4216 * intermediate result, and the user concatenates two strings together
4217 * after we have made a final sigma, that would be wrong. If the final sigma
4218 * occurs in the middle of the string we are working on, then we know that it
4219 * should be a final sigma, but otherwise we can't be sure. */
4221 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4223 /* If the lower case is a small sigma, it may be that we need
4224 * to change it to a final sigma. This happens at the end of
4225 * a word that contains more than just this character, and only
4226 * when we started with a capital sigma. */
4227 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4228 s > send - len && /* Makes sure not the first letter */
4229 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4232 /* We use the algorithm in:
4233 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4234 * is a CAPITAL SIGMA): If C is preceded by a sequence
4235 * consisting of a cased letter and a case-ignorable
4236 * sequence, and C is not followed by a sequence consisting
4237 * of a case ignorable sequence and then a cased letter,
4238 * then when lowercasing C, C becomes a final sigma */
4240 /* To determine if this is the end of a word, need to peek
4241 * ahead. Look at the next character */
4242 const U8 *peek = s + u;
4244 /* Skip any case ignorable characters */
4245 while (peek < send && is_utf8_case_ignorable(peek)) {
4246 peek += UTF8SKIP(peek);
4249 /* If we reached the end of the string without finding any
4250 * non-case ignorable characters, or if the next such one
4251 * is not-cased, then we have met the conditions for it
4252 * being a final sigma with regards to peek ahead, and so
4253 * must do peek behind for the remaining conditions. (We
4254 * know there is stuff behind to look at since we tested
4255 * above that this isn't the first letter) */
4256 if (peek >= send || ! is_utf8_cased(peek)) {
4257 peek = utf8_hop(s, -1);
4259 /* Here are at the beginning of the first character
4260 * before the original upper case sigma. Keep backing
4261 * up, skipping any case ignorable characters */
4262 while (is_utf8_case_ignorable(peek)) {
4263 peek = utf8_hop(peek, -1);
4266 /* Here peek points to the first byte of the closest
4267 * non-case-ignorable character before the capital
4268 * sigma. If it is cased, then by the Unicode
4269 * algorithm, we should use a small final sigma instead
4270 * of what we have */
4271 if (is_utf8_cased(peek)) {
4272 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4273 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4277 else { /* Not a context sensitive mapping */
4278 #endif /* End of commented out context sensitive */
4279 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4281 /* If the eventually required minimum size outgrows
4282 * the available space, we need to grow. */
4283 const UV o = d - (U8*)SvPVX_const(dest);
4285 /* If someone lowercases one million U+0130s we
4286 * SvGROW() one million times. Or we could try
4287 * guessing how much to allocate without allocating too
4288 * much. Such is life. Another option would be to
4289 * grow an extra byte or two more each time we need to
4290 * grow, which would cut down the million to 500K, with
4293 d = (U8*)SvPVX(dest) + o;
4295 #ifdef CONTEXT_DEPENDENT_CASING
4298 /* Copy the newly lowercased letter to the output buffer we're
4300 Copy(tmpbuf, d, ulen, U8);
4303 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4306 } /* End of looping through the source string */
4309 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4310 } else { /* Not utf8 */
4312 const U8 *const send = s + len;
4314 /* Use locale casing if in locale; regular style if not treating
4315 * latin1 as having case; otherwise the latin1 casing. Do the
4316 * whole thing in a tight loop, for speed, */
4317 if (IN_LOCALE_RUNTIME) {
4320 for (; s < send; d++, s++)
4321 *d = toLOWER_LC(*s);
4323 else if (! IN_UNI_8_BIT) {
4324 for (; s < send; d++, s++) {
4329 for (; s < send; d++, s++) {
4330 *d = toLOWER_LATIN1(*s);
4334 if (source != dest) {
4336 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4339 if (dest != source && SvTAINTED(source))
4348 SV * const sv = TOPs;
4350 register const char *s = SvPV_const(sv,len);
4352 SvUTF8_off(TARG); /* decontaminate */
4355 SvUPGRADE(TARG, SVt_PV);
4356 SvGROW(TARG, (len * 2) + 1);
4360 if (UTF8_IS_CONTINUED(*s)) {
4361 STRLEN ulen = UTF8SKIP(s);
4385 SvCUR_set(TARG, d - SvPVX_const(TARG));
4386 (void)SvPOK_only_UTF8(TARG);
4389 sv_setpvn(TARG, s, len);
4398 dVAR; dSP; dMARK; dORIGMARK;
4399 register AV *const av = MUTABLE_AV(POPs);
4400 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4402 if (SvTYPE(av) == SVt_PVAV) {
4403 const I32 arybase = CopARYBASE_get(PL_curcop);
4404 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4405 bool can_preserve = FALSE;
4411 can_preserve = SvCANEXISTDELETE(av);
4414 if (lval && localizing) {
4417 for (svp = MARK + 1; svp <= SP; svp++) {
4418 const I32 elem = SvIV(*svp);
4422 if (max > AvMAX(av))
4426 while (++MARK <= SP) {
4428 I32 elem = SvIV(*MARK);
4429 bool preeminent = TRUE;
4433 if (localizing && can_preserve) {
4434 /* If we can determine whether the element exist,
4435 * Try to preserve the existenceness of a tied array
4436 * element by using EXISTS and DELETE if possible.
4437 * Fallback to FETCH and STORE otherwise. */
4438 preeminent = av_exists(av, elem);
4441 svp = av_fetch(av, elem, lval);
4443 if (!svp || *svp == &PL_sv_undef)
4444 DIE(aTHX_ PL_no_aelem, elem);
4447 save_aelem(av, elem, svp);
4449 SAVEADELETE(av, elem);
4452 *MARK = svp ? *svp : &PL_sv_undef;
4455 if (GIMME != G_ARRAY) {
4457 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4463 /* Smart dereferencing for keys, values and each */
4475 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4480 "Type of argument to %s must be unblessed hashref or arrayref",
4481 PL_op_desc[PL_op->op_type] );
4484 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4486 "Can't modify %s in %s",
4487 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4490 /* Delegate to correct function for op type */
4492 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4493 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4496 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4504 AV *array = MUTABLE_AV(POPs);
4505 const I32 gimme = GIMME_V;
4506 IV *iterp = Perl_av_iter_p(aTHX_ array);
4507 const IV current = (*iterp)++;
4509 if (current > av_len(array)) {
4511 if (gimme == G_SCALAR)
4518 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4519 if (gimme == G_ARRAY) {
4520 SV **const element = av_fetch(array, current, 0);
4521 PUSHs(element ? *element : &PL_sv_undef);
4530 AV *array = MUTABLE_AV(POPs);
4531 const I32 gimme = GIMME_V;
4533 *Perl_av_iter_p(aTHX_ array) = 0;
4535 if (gimme == G_SCALAR) {
4537 PUSHi(av_len(array) + 1);
4539 else if (gimme == G_ARRAY) {
4540 IV n = Perl_av_len(aTHX_ array);
4541 IV i = CopARYBASE_get(PL_curcop);
4545 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4547 for (; i <= n; i++) {
4552 for (i = 0; i <= n; i++) {
4553 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4554 PUSHs(elem ? *elem : &PL_sv_undef);
4561 /* Associative arrays. */
4567 HV * hash = MUTABLE_HV(POPs);
4569 const I32 gimme = GIMME_V;
4572 /* might clobber stack_sp */
4573 entry = hv_iternext(hash);
4578 SV* const sv = hv_iterkeysv(entry);
4579 PUSHs(sv); /* won't clobber stack_sp */
4580 if (gimme == G_ARRAY) {
4583 /* might clobber stack_sp */
4584 val = hv_iterval(hash, entry);
4589 else if (gimme == G_SCALAR)
4596 S_do_delete_local(pTHX)
4600 const I32 gimme = GIMME_V;
4604 if (PL_op->op_private & OPpSLICE) {
4606 SV * const osv = POPs;
4607 const bool tied = SvRMAGICAL(osv)
4608 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4609 const bool can_preserve = SvCANEXISTDELETE(osv)
4610 || mg_find((const SV *)osv, PERL_MAGIC_env);
4611 const U32 type = SvTYPE(osv);
4612 if (type == SVt_PVHV) { /* hash element */
4613 HV * const hv = MUTABLE_HV(osv);
4614 while (++MARK <= SP) {
4615 SV * const keysv = *MARK;
4617 bool preeminent = TRUE;
4619 preeminent = hv_exists_ent(hv, keysv, 0);
4621 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4628 sv = hv_delete_ent(hv, keysv, 0, 0);
4629 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4632 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4634 *MARK = sv_mortalcopy(sv);
4640 SAVEHDELETE(hv, keysv);
4641 *MARK = &PL_sv_undef;
4645 else if (type == SVt_PVAV) { /* array element */
4646 if (PL_op->op_flags & OPf_SPECIAL) {
4647 AV * const av = MUTABLE_AV(osv);
4648 while (++MARK <= SP) {
4649 I32 idx = SvIV(*MARK);
4651 bool preeminent = TRUE;
4653 preeminent = av_exists(av, idx);
4655 SV **svp = av_fetch(av, idx, 1);
4662 sv = av_delete(av, idx, 0);
4663 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4666 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4668 *MARK = sv_mortalcopy(sv);
4674 SAVEADELETE(av, idx);
4675 *MARK = &PL_sv_undef;
4681 DIE(aTHX_ "Not a HASH reference");
4682 if (gimme == G_VOID)
4684 else if (gimme == G_SCALAR) {
4689 *++MARK = &PL_sv_undef;
4694 SV * const keysv = POPs;
4695 SV * const osv = POPs;
4696 const bool tied = SvRMAGICAL(osv)
4697 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4698 const bool can_preserve = SvCANEXISTDELETE(osv)
4699 || mg_find((const SV *)osv, PERL_MAGIC_env);
4700 const U32 type = SvTYPE(osv);
4702 if (type == SVt_PVHV) {
4703 HV * const hv = MUTABLE_HV(osv);
4704 bool preeminent = TRUE;
4706 preeminent = hv_exists_ent(hv, keysv, 0);
4708 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4715 sv = hv_delete_ent(hv, keysv, 0, 0);
4716 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4719 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4721 SV *nsv = sv_mortalcopy(sv);
4727 SAVEHDELETE(hv, keysv);
4729 else if (type == SVt_PVAV) {
4730 if (PL_op->op_flags & OPf_SPECIAL) {
4731 AV * const av = MUTABLE_AV(osv);
4732 I32 idx = SvIV(keysv);
4733 bool preeminent = TRUE;
4735 preeminent = av_exists(av, idx);
4737 SV **svp = av_fetch(av, idx, 1);
4744 sv = av_delete(av, idx, 0);
4745 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4748 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4750 SV *nsv = sv_mortalcopy(sv);
4756 SAVEADELETE(av, idx);
4759 DIE(aTHX_ "panic: avhv_delete no longer supported");
4762 DIE(aTHX_ "Not a HASH reference");
4765 if (gimme != G_VOID)
4779 if (PL_op->op_private & OPpLVAL_INTRO)
4780 return do_delete_local();
4783 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4785 if (PL_op->op_private & OPpSLICE) {
4787 HV * const hv = MUTABLE_HV(POPs);
4788 const U32 hvtype = SvTYPE(hv);
4789 if (hvtype == SVt_PVHV) { /* hash element */
4790 while (++MARK <= SP) {
4791 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4792 *MARK = sv ? sv : &PL_sv_undef;
4795 else if (hvtype == SVt_PVAV) { /* array element */
4796 if (PL_op->op_flags & OPf_SPECIAL) {
4797 while (++MARK <= SP) {
4798 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4799 *MARK = sv ? sv : &PL_sv_undef;
4804 DIE(aTHX_ "Not a HASH reference");
4807 else if (gimme == G_SCALAR) {
4812 *++MARK = &PL_sv_undef;
4818 HV * const hv = MUTABLE_HV(POPs);
4820 if (SvTYPE(hv) == SVt_PVHV)
4821 sv = hv_delete_ent(hv, keysv, discard, 0);
4822 else if (SvTYPE(hv) == SVt_PVAV) {
4823 if (PL_op->op_flags & OPf_SPECIAL)
4824 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4826 DIE(aTHX_ "panic: avhv_delete no longer supported");
4829 DIE(aTHX_ "Not a HASH reference");
4845 if (PL_op->op_private & OPpEXISTS_SUB) {
4847 SV * const sv = POPs;
4848 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4851 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4856 hv = MUTABLE_HV(POPs);
4857 if (SvTYPE(hv) == SVt_PVHV) {
4858 if (hv_exists_ent(hv, tmpsv, 0))
4861 else if (SvTYPE(hv) == SVt_PVAV) {
4862 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4863 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4868 DIE(aTHX_ "Not a HASH reference");
4875 dVAR; dSP; dMARK; dORIGMARK;
4876 register HV * const hv = MUTABLE_HV(POPs);
4877 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4878 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4879 bool can_preserve = FALSE;
4885 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4886 can_preserve = TRUE;
4889 while (++MARK <= SP) {
4890 SV * const keysv = *MARK;
4893 bool preeminent = TRUE;
4895 if (localizing && can_preserve) {
4896 /* If we can determine whether the element exist,
4897 * try to preserve the existenceness of a tied hash
4898 * element by using EXISTS and DELETE if possible.
4899 * Fallback to FETCH and STORE otherwise. */
4900 preeminent = hv_exists_ent(hv, keysv, 0);
4903 he = hv_fetch_ent(hv, keysv, lval, 0);
4904 svp = he ? &HeVAL(he) : NULL;
4907 if (!svp || *svp == &PL_sv_undef) {
4908 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4911 if (HvNAME_get(hv) && isGV(*svp))
4912 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4913 else if (preeminent)
4914 save_helem_flags(hv, keysv, svp,
4915 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4917 SAVEHDELETE(hv, keysv);
4920 *MARK = svp ? *svp : &PL_sv_undef;
4922 if (GIMME != G_ARRAY) {
4924 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4930 /* List operators. */
4935 if (GIMME != G_ARRAY) {
4937 *MARK = *SP; /* unwanted list, return last item */
4939 *MARK = &PL_sv_undef;
4949 SV ** const lastrelem = PL_stack_sp;
4950 SV ** const lastlelem = PL_stack_base + POPMARK;
4951 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4952 register SV ** const firstrelem = lastlelem + 1;
4953 const I32 arybase = CopARYBASE_get(PL_curcop);
4954 I32 is_something_there = FALSE;
4956 register const I32 max = lastrelem - lastlelem;
4957 register SV **lelem;
4959 if (GIMME != G_ARRAY) {
4960 I32 ix = SvIV(*lastlelem);
4965 if (ix < 0 || ix >= max)
4966 *firstlelem = &PL_sv_undef;
4968 *firstlelem = firstrelem[ix];
4974 SP = firstlelem - 1;
4978 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4979 I32 ix = SvIV(*lelem);
4984 if (ix < 0 || ix >= max)
4985 *lelem = &PL_sv_undef;
4987 is_something_there = TRUE;
4988 if (!(*lelem = firstrelem[ix]))
4989 *lelem = &PL_sv_undef;
4992 if (is_something_there)
4995 SP = firstlelem - 1;
5001 dVAR; dSP; dMARK; dORIGMARK;
5002 const I32 items = SP - MARK;
5003 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
5004 SP = ORIGMARK; /* av_make() might realloc stack_sp */
5005 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5006 ? newRV_noinc(av) : av);
5012 dVAR; dSP; dMARK; dORIGMARK;
5013 HV* const hv = newHV();
5016 SV * const key = *++MARK;
5017 SV * const val = newSV(0);
5019 sv_setsv(val, *++MARK);
5021 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5022 (void)hv_store_ent(hv,key,val,0);
5025 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5026 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5031 S_deref_plain_array(pTHX_ AV *ary)
5033 if (SvTYPE(ary) == SVt_PVAV) return ary;
5034 SvGETMAGIC((SV *)ary);
5035 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5036 Perl_die(aTHX_ "Not an ARRAY reference");
5037 else if (SvOBJECT(SvRV(ary)))
5038 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5039 return (AV *)SvRV(ary);
5042 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5043 # define DEREF_PLAIN_ARRAY(ary) \
5046 SvTYPE(aRrRay) == SVt_PVAV \
5048 : S_deref_plain_array(aTHX_ aRrRay); \
5051 # define DEREF_PLAIN_ARRAY(ary) \
5053 PL_Sv = (SV *)(ary), \
5054 SvTYPE(PL_Sv) == SVt_PVAV \
5056 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
5062 dVAR; dSP; dMARK; dORIGMARK;
5063 int num_args = (SP - MARK);
5064 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5068 register I32 offset;
5069 register I32 length;
5073 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5076 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5077 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5084 offset = i = SvIV(*MARK);
5086 offset += AvFILLp(ary) + 1;
5088 offset -= CopARYBASE_get(PL_curcop);
5090 DIE(aTHX_ PL_no_aelem, i);
5092 length = SvIVx(*MARK++);
5094 length += AvFILLp(ary) - offset + 1;
5100 length = AvMAX(ary) + 1; /* close enough to infinity */
5104 length = AvMAX(ary) + 1;
5106 if (offset > AvFILLp(ary) + 1) {
5108 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5109 offset = AvFILLp(ary) + 1;
5111 after = AvFILLp(ary) + 1 - (offset + length);
5112 if (after < 0) { /* not that much array */
5113 length += after; /* offset+length now in array */
5119 /* At this point, MARK .. SP-1 is our new LIST */
5122 diff = newlen - length;
5123 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5126 /* make new elements SVs now: avoid problems if they're from the array */
5127 for (dst = MARK, i = newlen; i; i--) {
5128 SV * const h = *dst;
5129 *dst++ = newSVsv(h);
5132 if (diff < 0) { /* shrinking the area */
5133 SV **tmparyval = NULL;
5135 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5136 Copy(MARK, tmparyval, newlen, SV*);
5139 MARK = ORIGMARK + 1;
5140 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5141 MEXTEND(MARK, length);
5142 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5144 EXTEND_MORTAL(length);
5145 for (i = length, dst = MARK; i; i--) {
5146 sv_2mortal(*dst); /* free them eventually */
5153 *MARK = AvARRAY(ary)[offset+length-1];
5156 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5157 SvREFCNT_dec(*dst++); /* free them now */
5160 AvFILLp(ary) += diff;
5162 /* pull up or down? */
5164 if (offset < after) { /* easier to pull up */
5165 if (offset) { /* esp. if nothing to pull */
5166 src = &AvARRAY(ary)[offset-1];
5167 dst = src - diff; /* diff is negative */
5168 for (i = offset; i > 0; i--) /* can't trust Copy */
5172 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5176 if (after) { /* anything to pull down? */
5177 src = AvARRAY(ary) + offset + length;
5178 dst = src + diff; /* diff is negative */
5179 Move(src, dst, after, SV*);
5181 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5182 /* avoid later double free */
5186 dst[--i] = &PL_sv_undef;
5189 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5190 Safefree(tmparyval);
5193 else { /* no, expanding (or same) */
5194 SV** tmparyval = NULL;
5196 Newx(tmparyval, length, SV*); /* so remember deletion */
5197 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5200 if (diff > 0) { /* expanding */
5201 /* push up or down? */
5202 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5206 Move(src, dst, offset, SV*);
5208 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5210 AvFILLp(ary) += diff;
5213 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5214 av_extend(ary, AvFILLp(ary) + diff);
5215 AvFILLp(ary) += diff;
5218 dst = AvARRAY(ary) + AvFILLp(ary);
5220 for (i = after; i; i--) {
5228 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5231 MARK = ORIGMARK + 1;
5232 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5234 Copy(tmparyval, MARK, length, SV*);
5236 EXTEND_MORTAL(length);
5237 for (i = length, dst = MARK; i; i--) {
5238 sv_2mortal(*dst); /* free them eventually */
5245 else if (length--) {
5246 *MARK = tmparyval[length];
5249 while (length-- > 0)
5250 SvREFCNT_dec(tmparyval[length]);
5254 *MARK = &PL_sv_undef;
5255 Safefree(tmparyval);
5259 mg_set(MUTABLE_SV(ary));
5267 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5268 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5269 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5272 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5275 ENTER_with_name("call_PUSH");
5276 call_method("PUSH",G_SCALAR|G_DISCARD);
5277 LEAVE_with_name("call_PUSH");
5281 PL_delaymagic = DM_DELAY;
5282 for (++MARK; MARK <= SP; MARK++) {
5283 SV * const sv = newSV(0);
5285 sv_setsv(sv, *MARK);
5286 av_store(ary, AvFILLp(ary)+1, sv);
5288 if (PL_delaymagic & DM_ARRAY_ISA)
5289 mg_set(MUTABLE_SV(ary));
5294 if (OP_GIMME(PL_op, 0) != G_VOID) {
5295 PUSHi( AvFILL(ary) + 1 );
5304 AV * const av = PL_op->op_flags & OPf_SPECIAL
5305 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5306 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5310 (void)sv_2mortal(sv);
5317 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5318 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5319 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5322 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5325 ENTER_with_name("call_UNSHIFT");
5326 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5327 LEAVE_with_name("call_UNSHIFT");
5332 av_unshift(ary, SP - MARK);
5334 SV * const sv = newSVsv(*++MARK);
5335 (void)av_store(ary, i++, sv);
5339 if (OP_GIMME(PL_op, 0) != G_VOID) {
5340 PUSHi( AvFILL(ary) + 1 );
5349 if (GIMME == G_ARRAY) {
5350 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5354 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5355 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5356 av = MUTABLE_AV((*SP));
5357 /* In-place reversing only happens in void context for the array
5358 * assignment. We don't need to push anything on the stack. */
5361 if (SvMAGICAL(av)) {
5363 register SV *tmp = sv_newmortal();
5364 /* For SvCANEXISTDELETE */
5367 bool can_preserve = SvCANEXISTDELETE(av);
5369 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5370 register SV *begin, *end;
5373 if (!av_exists(av, i)) {
5374 if (av_exists(av, j)) {
5375 register SV *sv = av_delete(av, j, 0);
5376 begin = *av_fetch(av, i, TRUE);
5377 sv_setsv_mg(begin, sv);
5381 else if (!av_exists(av, j)) {
5382 register SV *sv = av_delete(av, i, 0);
5383 end = *av_fetch(av, j, TRUE);
5384 sv_setsv_mg(end, sv);
5389 begin = *av_fetch(av, i, TRUE);
5390 end = *av_fetch(av, j, TRUE);
5391 sv_setsv(tmp, begin);
5392 sv_setsv_mg(begin, end);
5393 sv_setsv_mg(end, tmp);
5397 SV **begin = AvARRAY(av);
5400 SV **end = begin + AvFILLp(av);
5402 while (begin < end) {
5403 register SV * const tmp = *begin;
5414 register SV * const tmp = *MARK;
5418 /* safe as long as stack cannot get extended in the above */
5424 register char *down;
5429 SvUTF8_off(TARG); /* decontaminate */
5431 do_join(TARG, &PL_sv_no, MARK, SP);
5433 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5434 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5435 report_uninit(TARG);
5438 up = SvPV_force(TARG, len);
5440 if (DO_UTF8(TARG)) { /* first reverse each character */
5441 U8* s = (U8*)SvPVX(TARG);
5442 const U8* send = (U8*)(s + len);
5444 if (UTF8_IS_INVARIANT(*s)) {
5449 if (!utf8_to_uvchr(s, 0))
5453 down = (char*)(s - 1);
5454 /* reverse this character */
5458 *down-- = (char)tmp;
5464 down = SvPVX(TARG) + len - 1;
5468 *down-- = (char)tmp;
5470 (void)SvPOK_only_UTF8(TARG);
5482 register IV limit = POPi; /* note, negative is forever */
5483 SV * const sv = POPs;
5485 register const char *s = SvPV_const(sv, len);
5486 const bool do_utf8 = DO_UTF8(sv);
5487 const char *strend = s + len;
5489 register REGEXP *rx;
5491 register const char *m;
5493 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5494 I32 maxiters = slen + 10;
5495 I32 trailing_empty = 0;
5497 const I32 origlimit = limit;
5500 const I32 gimme = GIMME_V;
5502 const I32 oldsave = PL_savestack_ix;
5503 U32 make_mortal = SVs_TEMP;
5508 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5513 DIE(aTHX_ "panic: pp_split");
5516 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5517 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5519 RX_MATCH_UTF8_set(rx, do_utf8);
5522 if (pm->op_pmreplrootu.op_pmtargetoff) {
5523 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5526 if (pm->op_pmreplrootu.op_pmtargetgv) {
5527 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5532 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5538 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5540 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5547 for (i = AvFILLp(ary); i >= 0; i--)
5548 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5550 /* temporarily switch stacks */
5551 SAVESWITCHSTACK(PL_curstack, ary);
5555 base = SP - PL_stack_base;
5557 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5559 while (*s == ' ' || is_utf8_space((U8*)s))
5562 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5563 while (isSPACE_LC(*s))
5571 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5575 gimme_scalar = gimme == G_SCALAR && !ary;
5578 limit = maxiters + 2;
5579 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5582 /* this one uses 'm' and is a negative test */
5584 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5585 const int t = UTF8SKIP(m);
5586 /* is_utf8_space returns FALSE for malform utf8 */
5593 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5594 while (m < strend && !isSPACE_LC(*m))
5597 while (m < strend && !isSPACE(*m))
5610 dstr = newSVpvn_flags(s, m-s,
5611 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5615 /* skip the whitespace found last */
5617 s = m + UTF8SKIP(m);
5621 /* this one uses 's' and is a positive test */
5623 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5626 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5627 while (s < strend && isSPACE_LC(*s))
5630 while (s < strend && isSPACE(*s))
5635 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5637 for (m = s; m < strend && *m != '\n'; m++)
5650 dstr = newSVpvn_flags(s, m-s,
5651 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5657 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5659 Pre-extend the stack, either the number of bytes or
5660 characters in the string or a limited amount, triggered by:
5662 my ($x, $y) = split //, $str;
5666 if (!gimme_scalar) {
5667 const U32 items = limit - 1;
5676 /* keep track of how many bytes we skip over */
5686 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5699 dstr = newSVpvn(s, 1);
5715 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5716 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5717 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5718 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5719 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5720 SV * const csv = CALLREG_INTUIT_STRING(rx);
5722 len = RX_MINLENRET(rx);
5723 if (len == 1 && !RX_UTF8(rx) && !tail) {
5724 const char c = *SvPV_nolen_const(csv);
5726 for (m = s; m < strend && *m != c; m++)
5737 dstr = newSVpvn_flags(s, m-s,
5738 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5741 /* The rx->minlen is in characters but we want to step
5742 * s ahead by bytes. */
5744 s = (char*)utf8_hop((U8*)m, len);
5746 s = m + len; /* Fake \n at the end */
5750 while (s < strend && --limit &&
5751 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5752 csv, multiline ? FBMrf_MULTILINE : 0)) )
5761 dstr = newSVpvn_flags(s, m-s,
5762 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5765 /* The rx->minlen is in characters but we want to step
5766 * s ahead by bytes. */
5768 s = (char*)utf8_hop((U8*)m, len);
5770 s = m + len; /* Fake \n at the end */
5775 maxiters += slen * RX_NPARENS(rx);
5776 while (s < strend && --limit)
5780 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5781 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
5783 if (rex_return == 0)
5785 TAINT_IF(RX_MATCH_TAINTED(rx));
5786 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5789 orig = RX_SUBBEG(rx);
5791 strend = s + (strend - m);
5793 m = RX_OFFS(rx)[0].start + orig;
5802 dstr = newSVpvn_flags(s, m-s,
5803 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5806 if (RX_NPARENS(rx)) {
5808 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5809 s = RX_OFFS(rx)[i].start + orig;
5810 m = RX_OFFS(rx)[i].end + orig;
5812 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5813 parens that didn't match -- they should be set to
5814 undef, not the empty string */
5822 if (m >= orig && s >= orig) {
5823 dstr = newSVpvn_flags(s, m-s,
5824 (do_utf8 ? SVf_UTF8 : 0)
5828 dstr = &PL_sv_undef; /* undef, not "" */
5834 s = RX_OFFS(rx)[0].end + orig;
5838 if (!gimme_scalar) {
5839 iters = (SP - PL_stack_base) - base;
5841 if (iters > maxiters)
5842 DIE(aTHX_ "Split loop");
5844 /* keep field after final delim? */
5845 if (s < strend || (iters && origlimit)) {
5846 if (!gimme_scalar) {
5847 const STRLEN l = strend - s;
5848 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5853 else if (!origlimit) {
5855 iters -= trailing_empty;
5857 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5858 if (TOPs && !make_mortal)
5860 *SP-- = &PL_sv_undef;
5867 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5871 if (SvSMAGICAL(ary)) {
5873 mg_set(MUTABLE_SV(ary));
5876 if (gimme == G_ARRAY) {
5878 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5885 ENTER_with_name("call_PUSH");
5886 call_method("PUSH",G_SCALAR|G_DISCARD);
5887 LEAVE_with_name("call_PUSH");
5889 if (gimme == G_ARRAY) {
5891 /* EXTEND should not be needed - we just popped them */
5893 for (i=0; i < iters; i++) {
5894 SV **svp = av_fetch(ary, i, FALSE);
5895 PUSHs((svp) ? *svp : &PL_sv_undef);
5902 if (gimme == G_ARRAY)
5914 SV *const sv = PAD_SVl(PL_op->op_targ);
5916 if (SvPADSTALE(sv)) {
5919 RETURNOP(cLOGOP->op_other);
5921 RETURNOP(cLOGOP->op_next);
5931 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5932 || SvTYPE(retsv) == SVt_PVCV) {
5933 retsv = refto(retsv);
5940 PP(unimplemented_op)
5943 const Optype op_type = PL_op->op_type;
5944 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5945 with out of range op numbers - it only "special" cases op_custom.
5946 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5947 if we get here for a custom op then that means that the custom op didn't
5948 have an implementation. Given that OP_NAME() looks up the custom op
5949 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5950 registers &PL_unimplemented_op as the address of their custom op.
5951 NULL doesn't generate a useful error message. "custom" does. */
5952 const char *const name = op_type >= OP_max
5953 ? "[out of range]" : PL_op_name[PL_op->op_type];
5954 if(OP_IS_SOCKET(op_type))
5955 DIE(aTHX_ PL_no_sock_func, name);
5956 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5963 HV * const hv = (HV*)POPs;
5965 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5967 if (SvRMAGICAL(hv)) {
5968 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5970 XPUSHs(magic_scalarpack(hv, mg));
5975 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
5979 /* For sorting out arguments passed to a &CORE:: subroutine */
5983 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
5984 AV * const at_ = GvAV(PL_defgv);
5985 I32 minargs = 0, maxargs = 0, numargs = AvFILLp(at_)+1;
5986 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
5987 const char *err = NULL;
5989 /* Count how many args there are. */
5995 if(numargs < minargs) err = "Not enough";
5996 else if(numargs > maxargs) err = "Too many";
5998 /* diag_listed_as: Too many arguments for %s */
6000 "%s arguments for %s", err,
6001 opnum ? OP_DESC(PL_op->op_next) : SvPV_nolen_const(cSVOP_sv)
6004 /* Reset the stack pointer. Without this, we end up returning our own
6005 arguments in list context, in addition to the values we are supposed
6006 to return. nextstate usually does this on sub entry, but we need
6007 to run the next op with the caller’s hints, so we cannot have a
6009 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
6016 * c-indentation-style: bsd
6018 * indent-tabs-mode: t
6021 * ex: set ts=8 sts=4 sw=4 noet: