3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
144 /* In some cases this function inspects PL_op. If this function is called
145 for new op types, more bool parameters may need to be added in place of
148 When noinit is true, the absence of a gv will cause a retval of undef.
149 This is unrelated to the cv-to-gv assignment case.
151 Make sure to use SPAGAIN after calling this.
155 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
159 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
162 sv = amagic_deref_call(sv, to_gv_amg);
166 if (SvTYPE(sv) == SVt_PVIO) {
167 GV * const gv = MUTABLE_GV(sv_newmortal());
168 gv_init(gv, 0, "", 0, 0);
169 GvIOp(gv) = MUTABLE_IO(sv);
170 SvREFCNT_inc_void_NN(sv);
173 else if (!isGV_with_GP(sv))
174 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
177 if (!isGV_with_GP(sv)) {
179 /* If this is a 'my' scalar and flag is set then vivify
182 if (vivify_sv && sv != &PL_sv_undef) {
185 Perl_croak_no_modify(aTHX);
186 if (cUNOP->op_targ) {
188 SV * const namesv = PAD_SV(cUNOP->op_targ);
189 const char * const name = SvPV(namesv, len);
190 gv = MUTABLE_GV(newSV(0));
191 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
194 const char * const name = CopSTASHPV(PL_curcop);
197 prepare_SV_for_RV(sv);
198 SvRV_set(sv, MUTABLE_SV(gv));
203 if (PL_op->op_flags & OPf_REF || strict)
204 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
205 if (ckWARN(WARN_UNINITIALIZED))
211 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
212 sv, GV_ADDMG, SVt_PVGV
222 (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
225 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
226 == OPpDONT_INIT_GV) {
227 /* We are the target of a coderef assignment. Return
228 the scalar unchanged, and let pp_sasssign deal with
232 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
234 /* FAKE globs in the symbol table cause weird bugs (#77810) */
239 SV *newsv = sv_newmortal();
240 sv_setsv_flags(newsv, sv, 0);
252 sv, PL_op->op_private & OPpDEREF,
253 PL_op->op_private & HINT_STRICT_REFS,
254 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
255 || PL_op->op_type == OP_READLINE
258 if (PL_op->op_private & OPpLVAL_INTRO)
259 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
264 /* Helper function for pp_rv2sv and pp_rv2av */
266 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
267 const svtype type, SV ***spp)
272 PERL_ARGS_ASSERT_SOFTREF2XV;
274 if (PL_op->op_private & HINT_STRICT_REFS) {
276 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
278 Perl_die(aTHX_ PL_no_usym, what);
282 PL_op->op_flags & OPf_REF &&
283 PL_op->op_next->op_type != OP_BOOLKEYS
285 Perl_die(aTHX_ PL_no_usym, what);
286 if (ckWARN(WARN_UNINITIALIZED))
288 if (type != SVt_PV && GIMME_V == G_ARRAY) {
292 **spp = &PL_sv_undef;
295 if ((PL_op->op_flags & OPf_SPECIAL) &&
296 !(PL_op->op_flags & OPf_MOD))
298 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
300 **spp = &PL_sv_undef;
305 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
318 sv = amagic_deref_call(sv, to_sv_amg);
323 switch (SvTYPE(sv)) {
329 DIE(aTHX_ "Not a SCALAR reference");
336 if (!isGV_with_GP(gv)) {
337 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
343 if (PL_op->op_flags & OPf_MOD) {
344 if (PL_op->op_private & OPpLVAL_INTRO) {
345 if (cUNOP->op_first->op_type == OP_NULL)
346 sv = save_scalar(MUTABLE_GV(TOPs));
348 sv = save_scalar(gv);
350 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
352 else if (PL_op->op_private & OPpDEREF)
353 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
362 AV * const av = MUTABLE_AV(TOPs);
363 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
365 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
367 *sv = newSV_type(SVt_PVMG);
368 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
372 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
381 if (PL_op->op_flags & OPf_MOD || LVRET) {
382 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
383 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
385 LvTARG(ret) = SvREFCNT_inc_simple(sv);
386 PUSHs(ret); /* no SvSETMAGIC */
390 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
391 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
392 if (mg && mg->mg_len >= 0) {
410 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
412 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
415 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
416 /* (But not in defined().) */
418 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
421 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
422 if ((PL_op->op_private & OPpLVAL_INTRO)) {
423 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
426 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
429 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
433 cv = MUTABLE_CV(&PL_sv_undef);
434 SETs(MUTABLE_SV(cv));
444 SV *ret = &PL_sv_undef;
446 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
447 const char * s = SvPVX_const(TOPs);
448 if (strnEQ(s, "CORE::", 6)) {
449 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
450 if (!code || code == -KEY_CORE)
451 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
452 if (code < 0) { /* Overridable. */
453 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
459 cv = sv_2cv(TOPs, &stash, &gv, 0);
461 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
470 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
472 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
474 PUSHs(MUTABLE_SV(cv));
488 if (GIMME != G_ARRAY) {
492 *MARK = &PL_sv_undef;
493 *MARK = refto(*MARK);
497 EXTEND_MORTAL(SP - MARK);
499 *MARK = refto(*MARK);
504 S_refto(pTHX_ SV *sv)
509 PERL_ARGS_ASSERT_REFTO;
511 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
514 if (!(sv = LvTARG(sv)))
517 SvREFCNT_inc_void_NN(sv);
519 else if (SvTYPE(sv) == SVt_PVAV) {
520 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
521 av_reify(MUTABLE_AV(sv));
523 SvREFCNT_inc_void_NN(sv);
525 else if (SvPADTMP(sv) && !IS_PADGV(sv))
529 SvREFCNT_inc_void_NN(sv);
532 sv_upgrade(rv, SVt_IV);
542 SV * const sv = POPs;
547 if (!sv || !SvROK(sv))
550 pv = sv_reftype(SvRV(sv),TRUE);
551 PUSHp(pv, strlen(pv));
562 stash = CopSTASH(PL_curcop);
564 SV * const ssv = POPs;
568 if (!ssv) goto curstash;
569 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
570 Perl_croak(aTHX_ "Attempt to bless into a reference");
571 ptr = SvPV_const(ssv,len);
573 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
574 "Explicit blessing to '' (assuming package main)");
575 stash = gv_stashpvn(ptr, len, GV_ADD);
578 (void)sv_bless(TOPs, stash);
587 const char * const elem = SvPV_nolen_const(sv);
588 GV * const gv = MUTABLE_GV(POPs);
593 /* elem will always be NUL terminated. */
594 const char * const second_letter = elem + 1;
597 if (strEQ(second_letter, "RRAY"))
598 tmpRef = MUTABLE_SV(GvAV(gv));
601 if (strEQ(second_letter, "ODE"))
602 tmpRef = MUTABLE_SV(GvCVu(gv));
605 if (strEQ(second_letter, "ILEHANDLE")) {
606 /* finally deprecated in 5.8.0 */
607 deprecate("*glob{FILEHANDLE}");
608 tmpRef = MUTABLE_SV(GvIOp(gv));
611 if (strEQ(second_letter, "ORMAT"))
612 tmpRef = MUTABLE_SV(GvFORM(gv));
615 if (strEQ(second_letter, "LOB"))
616 tmpRef = MUTABLE_SV(gv);
619 if (strEQ(second_letter, "ASH"))
620 tmpRef = MUTABLE_SV(GvHV(gv));
623 if (*second_letter == 'O' && !elem[2])
624 tmpRef = MUTABLE_SV(GvIOp(gv));
627 if (strEQ(second_letter, "AME"))
628 sv = newSVhek(GvNAME_HEK(gv));
631 if (strEQ(second_letter, "ACKAGE")) {
632 const HV * const stash = GvSTASH(gv);
633 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
634 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
638 if (strEQ(second_letter, "CALAR"))
653 /* Pattern matching */
658 register unsigned char *s;
661 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
665 if (mg && SvSCREAM(sv))
668 s = (unsigned char*)(SvPV(sv, len));
669 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
670 /* No point in studying a zero length string, and not safe to study
671 anything that doesn't appear to be a simple scalar (and hence might
672 change between now and when the regexp engine runs without our set
673 magic ever running) such as a reference to an object with overloaded
674 stringification. Also refuse to study an FBM scalar, as this gives
675 more flexibility in SV flag usage. No real-world code would ever
676 end up studying an FBM scalar, so this isn't a real pessimisation.
677 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
678 the study length limit from I32_MAX to U32_MAX - 1.
685 } else if (len < 0xFFFF) {
690 size = (256 + len) * quanta;
691 sfirst_raw = (char *)safemalloc(size);
694 DIE(aTHX_ "do_study: out of memory");
698 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
699 mg->mg_ptr = sfirst_raw;
701 mg->mg_private = quanta;
703 memset(sfirst_raw, ~0, 256 * quanta);
705 /* The assumption here is that most studied strings are fairly short, hence
706 the pain of the extra code is worth it, given the memory savings.
707 80 character string, 336 bytes as U8, down from 1344 as U32
708 800 character string, 2112 bytes as U16, down from 4224 as U32
712 U8 *const sfirst = (U8 *)sfirst_raw;
713 U8 *const snext = sfirst + 256;
715 const U8 ch = s[len];
716 snext[len] = sfirst[ch];
719 } else if (quanta == 2) {
720 U16 *const sfirst = (U16 *)sfirst_raw;
721 U16 *const snext = sfirst + 256;
723 const U8 ch = s[len];
724 snext[len] = sfirst[ch];
728 U32 *const sfirst = (U32 *)sfirst_raw;
729 U32 *const snext = sfirst + 256;
731 const U8 ch = s[len];
732 snext[len] = sfirst[ch];
745 if (PL_op->op_flags & OPf_STACKED)
747 else if (PL_op->op_private & OPpTARGET_MY)
753 TARG = sv_newmortal();
754 if(PL_op->op_type == OP_TRANSR) {
755 SV * const newsv = newSVsv(sv);
759 else PUSHi(do_trans(sv));
763 /* Lvalue operators. */
766 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
772 PERL_ARGS_ASSERT_DO_CHOMP;
774 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
776 if (SvTYPE(sv) == SVt_PVAV) {
778 AV *const av = MUTABLE_AV(sv);
779 const I32 max = AvFILL(av);
781 for (i = 0; i <= max; i++) {
782 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
783 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
784 do_chomp(retval, sv, chomping);
788 else if (SvTYPE(sv) == SVt_PVHV) {
789 HV* const hv = MUTABLE_HV(sv);
791 (void)hv_iterinit(hv);
792 while ((entry = hv_iternext(hv)))
793 do_chomp(retval, hv_iterval(hv,entry), chomping);
796 else if (SvREADONLY(sv)) {
798 /* SV is copy-on-write */
799 sv_force_normal_flags(sv, 0);
802 Perl_croak_no_modify(aTHX);
807 /* XXX, here sv is utf8-ized as a side-effect!
808 If encoding.pm is used properly, almost string-generating
809 operations, including literal strings, chr(), input data, etc.
810 should have been utf8-ized already, right?
812 sv_recode_to_utf8(sv, PL_encoding);
818 char *temp_buffer = NULL;
827 while (len && s[-1] == '\n') {
834 STRLEN rslen, rs_charlen;
835 const char *rsptr = SvPV_const(PL_rs, rslen);
837 rs_charlen = SvUTF8(PL_rs)
841 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
842 /* Assumption is that rs is shorter than the scalar. */
844 /* RS is utf8, scalar is 8 bit. */
846 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
849 /* Cannot downgrade, therefore cannot possibly match
851 assert (temp_buffer == rsptr);
857 else if (PL_encoding) {
858 /* RS is 8 bit, encoding.pm is used.
859 * Do not recode PL_rs as a side-effect. */
860 svrecode = newSVpvn(rsptr, rslen);
861 sv_recode_to_utf8(svrecode, PL_encoding);
862 rsptr = SvPV_const(svrecode, rslen);
863 rs_charlen = sv_len_utf8(svrecode);
866 /* RS is 8 bit, scalar is utf8. */
867 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
881 if (memNE(s, rsptr, rslen))
883 SvIVX(retval) += rs_charlen;
886 s = SvPV_force_nolen(sv);
894 SvREFCNT_dec(svrecode);
896 Safefree(temp_buffer);
898 if (len && !SvPOK(sv))
899 s = SvPV_force_nomg(sv, len);
902 char * const send = s + len;
903 char * const start = s;
905 while (s > start && UTF8_IS_CONTINUATION(*s))
907 if (is_utf8_string((U8*)s, send - s)) {
908 sv_setpvn(retval, s, send - s);
910 SvCUR_set(sv, s - start);
916 sv_setpvs(retval, "");
920 sv_setpvn(retval, s, 1);
927 sv_setpvs(retval, "");
935 const bool chomping = PL_op->op_type == OP_SCHOMP;
939 do_chomp(TARG, TOPs, chomping);
946 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
947 const bool chomping = PL_op->op_type == OP_CHOMP;
952 do_chomp(TARG, *++MARK, chomping);
963 if (!PL_op->op_private) {
972 SV_CHECK_THINKFIRST_COW_DROP(sv);
974 switch (SvTYPE(sv)) {
978 av_undef(MUTABLE_AV(sv));
981 hv_undef(MUTABLE_HV(sv));
984 if (cv_const_sv((const CV *)sv))
985 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
986 CvANON((const CV *)sv) ? "(anonymous)"
987 : GvENAME(CvGV((const CV *)sv)));
991 /* let user-undef'd sub keep its identity */
992 GV* const gv = CvGV((const CV *)sv);
993 cv_undef(MUTABLE_CV(sv));
994 CvGV_set(MUTABLE_CV(sv), gv);
999 SvSetMagicSV(sv, &PL_sv_undef);
1002 else if (isGV_with_GP(sv)) {
1006 /* undef *Pkg::meth_name ... */
1008 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1009 && HvENAME_get(stash);
1011 if((stash = GvHV((const GV *)sv))) {
1012 if(HvENAME_get(stash))
1013 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1017 gp_free(MUTABLE_GV(sv));
1019 GvGP_set(sv, gp_ref(gp));
1020 GvSV(sv) = newSV(0);
1021 GvLINE(sv) = CopLINE(PL_curcop);
1022 GvEGV(sv) = MUTABLE_GV(sv);
1026 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1028 /* undef *Foo::ISA */
1029 if( strEQ(GvNAME((const GV *)sv), "ISA")
1030 && (stash = GvSTASH((const GV *)sv))
1031 && (method_changed || HvENAME(stash)) )
1032 mro_isa_changed_in(stash);
1033 else if(method_changed)
1034 mro_method_changed_in(
1035 GvSTASH((const GV *)sv)
1042 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1057 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1058 Perl_croak_no_modify(aTHX);
1059 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1060 && SvIVX(TOPs) != IV_MIN)
1062 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1063 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1074 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1075 Perl_croak_no_modify(aTHX);
1077 TARG = sv_newmortal();
1078 sv_setsv(TARG, TOPs);
1079 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1080 && SvIVX(TOPs) != IV_MAX)
1082 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1083 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1088 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1098 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1099 Perl_croak_no_modify(aTHX);
1101 TARG = sv_newmortal();
1102 sv_setsv(TARG, TOPs);
1103 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1104 && SvIVX(TOPs) != IV_MIN)
1106 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1107 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1116 /* Ordinary operators. */
1120 dVAR; dSP; dATARGET; SV *svl, *svr;
1121 #ifdef PERL_PRESERVE_IVUV
1124 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1127 #ifdef PERL_PRESERVE_IVUV
1128 /* For integer to integer power, we do the calculation by hand wherever
1129 we're sure it is safe; otherwise we call pow() and try to convert to
1130 integer afterwards. */
1132 SvIV_please_nomg(svr);
1134 SvIV_please_nomg(svl);
1143 const IV iv = SvIVX(svr);
1147 goto float_it; /* Can't do negative powers this way. */
1151 baseuok = SvUOK(svl);
1153 baseuv = SvUVX(svl);
1155 const IV iv = SvIVX(svl);
1158 baseuok = TRUE; /* effectively it's a UV now */
1160 baseuv = -iv; /* abs, baseuok == false records sign */
1163 /* now we have integer ** positive integer. */
1166 /* foo & (foo - 1) is zero only for a power of 2. */
1167 if (!(baseuv & (baseuv - 1))) {
1168 /* We are raising power-of-2 to a positive integer.
1169 The logic here will work for any base (even non-integer
1170 bases) but it can be less accurate than
1171 pow (base,power) or exp (power * log (base)) when the
1172 intermediate values start to spill out of the mantissa.
1173 With powers of 2 we know this can't happen.
1174 And powers of 2 are the favourite thing for perl
1175 programmers to notice ** not doing what they mean. */
1177 NV base = baseuok ? baseuv : -(NV)baseuv;
1182 while (power >>= 1) {
1190 SvIV_please_nomg(svr);
1193 register unsigned int highbit = 8 * sizeof(UV);
1194 register unsigned int diff = 8 * sizeof(UV);
1195 while (diff >>= 1) {
1197 if (baseuv >> highbit) {
1201 /* we now have baseuv < 2 ** highbit */
1202 if (power * highbit <= 8 * sizeof(UV)) {
1203 /* result will definitely fit in UV, so use UV math
1204 on same algorithm as above */
1205 register UV result = 1;
1206 register UV base = baseuv;
1207 const bool odd_power = cBOOL(power & 1);
1211 while (power >>= 1) {
1218 if (baseuok || !odd_power)
1219 /* answer is positive */
1221 else if (result <= (UV)IV_MAX)
1222 /* answer negative, fits in IV */
1223 SETi( -(IV)result );
1224 else if (result == (UV)IV_MIN)
1225 /* 2's complement assumption: special case IV_MIN */
1228 /* answer negative, doesn't fit */
1229 SETn( -(NV)result );
1239 NV right = SvNV_nomg(svr);
1240 NV left = SvNV_nomg(svl);
1243 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1245 We are building perl with long double support and are on an AIX OS
1246 afflicted with a powl() function that wrongly returns NaNQ for any
1247 negative base. This was reported to IBM as PMR #23047-379 on
1248 03/06/2006. The problem exists in at least the following versions
1249 of AIX and the libm fileset, and no doubt others as well:
1251 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1252 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1253 AIX 5.2.0 bos.adt.libm 5.2.0.85
1255 So, until IBM fixes powl(), we provide the following workaround to
1256 handle the problem ourselves. Our logic is as follows: for
1257 negative bases (left), we use fmod(right, 2) to check if the
1258 exponent is an odd or even integer:
1260 - if odd, powl(left, right) == -powl(-left, right)
1261 - if even, powl(left, right) == powl(-left, right)
1263 If the exponent is not an integer, the result is rightly NaNQ, so
1264 we just return that (as NV_NAN).
1268 NV mod2 = Perl_fmod( right, 2.0 );
1269 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1270 SETn( -Perl_pow( -left, right) );
1271 } else if (mod2 == 0.0) { /* even integer */
1272 SETn( Perl_pow( -left, right) );
1273 } else { /* fractional power */
1277 SETn( Perl_pow( left, right) );
1280 SETn( Perl_pow( left, right) );
1281 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1283 #ifdef PERL_PRESERVE_IVUV
1285 SvIV_please_nomg(svr);
1293 dVAR; dSP; dATARGET; SV *svl, *svr;
1294 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1297 #ifdef PERL_PRESERVE_IVUV
1298 SvIV_please_nomg(svr);
1300 /* Unless the left argument is integer in range we are going to have to
1301 use NV maths. Hence only attempt to coerce the right argument if
1302 we know the left is integer. */
1303 /* Left operand is defined, so is it IV? */
1304 SvIV_please_nomg(svl);
1306 bool auvok = SvUOK(svl);
1307 bool buvok = SvUOK(svr);
1308 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1309 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1318 const IV aiv = SvIVX(svl);
1321 auvok = TRUE; /* effectively it's a UV now */
1323 alow = -aiv; /* abs, auvok == false records sign */
1329 const IV biv = SvIVX(svr);
1332 buvok = TRUE; /* effectively it's a UV now */
1334 blow = -biv; /* abs, buvok == false records sign */
1338 /* If this does sign extension on unsigned it's time for plan B */
1339 ahigh = alow >> (4 * sizeof (UV));
1341 bhigh = blow >> (4 * sizeof (UV));
1343 if (ahigh && bhigh) {
1345 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1346 which is overflow. Drop to NVs below. */
1347 } else if (!ahigh && !bhigh) {
1348 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1349 so the unsigned multiply cannot overflow. */
1350 const UV product = alow * blow;
1351 if (auvok == buvok) {
1352 /* -ve * -ve or +ve * +ve gives a +ve result. */
1356 } else if (product <= (UV)IV_MIN) {
1357 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1358 /* -ve result, which could overflow an IV */
1360 SETi( -(IV)product );
1362 } /* else drop to NVs below. */
1364 /* One operand is large, 1 small */
1367 /* swap the operands */
1369 bhigh = blow; /* bhigh now the temp var for the swap */
1373 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1374 multiplies can't overflow. shift can, add can, -ve can. */
1375 product_middle = ahigh * blow;
1376 if (!(product_middle & topmask)) {
1377 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1379 product_middle <<= (4 * sizeof (UV));
1380 product_low = alow * blow;
1382 /* as for pp_add, UV + something mustn't get smaller.
1383 IIRC ANSI mandates this wrapping *behaviour* for
1384 unsigned whatever the actual representation*/
1385 product_low += product_middle;
1386 if (product_low >= product_middle) {
1387 /* didn't overflow */
1388 if (auvok == buvok) {
1389 /* -ve * -ve or +ve * +ve gives a +ve result. */
1391 SETu( product_low );
1393 } else if (product_low <= (UV)IV_MIN) {
1394 /* 2s complement assumption again */
1395 /* -ve result, which could overflow an IV */
1397 SETi( -(IV)product_low );
1399 } /* else drop to NVs below. */
1401 } /* product_middle too large */
1402 } /* ahigh && bhigh */
1407 NV right = SvNV_nomg(svr);
1408 NV left = SvNV_nomg(svl);
1410 SETn( left * right );
1417 dVAR; dSP; dATARGET; SV *svl, *svr;
1418 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1421 /* Only try to do UV divide first
1422 if ((SLOPPYDIVIDE is true) or
1423 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1425 The assumption is that it is better to use floating point divide
1426 whenever possible, only doing integer divide first if we can't be sure.
1427 If NV_PRESERVES_UV is true then we know at compile time that no UV
1428 can be too large to preserve, so don't need to compile the code to
1429 test the size of UVs. */
1432 # define PERL_TRY_UV_DIVIDE
1433 /* ensure that 20./5. == 4. */
1435 # ifdef PERL_PRESERVE_IVUV
1436 # ifndef NV_PRESERVES_UV
1437 # define PERL_TRY_UV_DIVIDE
1442 #ifdef PERL_TRY_UV_DIVIDE
1443 SvIV_please_nomg(svr);
1445 SvIV_please_nomg(svl);
1447 bool left_non_neg = SvUOK(svl);
1448 bool right_non_neg = SvUOK(svr);
1452 if (right_non_neg) {
1456 const IV biv = SvIVX(svr);
1459 right_non_neg = TRUE; /* effectively it's a UV now */
1465 /* historically undef()/0 gives a "Use of uninitialized value"
1466 warning before dieing, hence this test goes here.
1467 If it were immediately before the second SvIV_please, then
1468 DIE() would be invoked before left was even inspected, so
1469 no inspection would give no warning. */
1471 DIE(aTHX_ "Illegal division by zero");
1477 const IV aiv = SvIVX(svl);
1480 left_non_neg = TRUE; /* effectively it's a UV now */
1489 /* For sloppy divide we always attempt integer division. */
1491 /* Otherwise we only attempt it if either or both operands
1492 would not be preserved by an NV. If both fit in NVs
1493 we fall through to the NV divide code below. However,
1494 as left >= right to ensure integer result here, we know that
1495 we can skip the test on the right operand - right big
1496 enough not to be preserved can't get here unless left is
1499 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1502 /* Integer division can't overflow, but it can be imprecise. */
1503 const UV result = left / right;
1504 if (result * right == left) {
1505 SP--; /* result is valid */
1506 if (left_non_neg == right_non_neg) {
1507 /* signs identical, result is positive. */
1511 /* 2s complement assumption */
1512 if (result <= (UV)IV_MIN)
1513 SETi( -(IV)result );
1515 /* It's exact but too negative for IV. */
1516 SETn( -(NV)result );
1519 } /* tried integer divide but it was not an integer result */
1520 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1521 } /* left wasn't SvIOK */
1522 } /* right wasn't SvIOK */
1523 #endif /* PERL_TRY_UV_DIVIDE */
1525 NV right = SvNV_nomg(svr);
1526 NV left = SvNV_nomg(svl);
1527 (void)POPs;(void)POPs;
1528 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1529 if (! Perl_isnan(right) && right == 0.0)
1533 DIE(aTHX_ "Illegal division by zero");
1534 PUSHn( left / right );
1541 dVAR; dSP; dATARGET;
1542 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1546 bool left_neg = FALSE;
1547 bool right_neg = FALSE;
1548 bool use_double = FALSE;
1549 bool dright_valid = FALSE;
1552 SV * const svr = TOPs;
1553 SV * const svl = TOPm1s;
1554 SvIV_please_nomg(svr);
1556 right_neg = !SvUOK(svr);
1560 const IV biv = SvIVX(svr);
1563 right_neg = FALSE; /* effectively it's a UV now */
1570 dright = SvNV_nomg(svr);
1571 right_neg = dright < 0;
1574 if (dright < UV_MAX_P1) {
1575 right = U_V(dright);
1576 dright_valid = TRUE; /* In case we need to use double below. */
1582 /* At this point use_double is only true if right is out of range for
1583 a UV. In range NV has been rounded down to nearest UV and
1584 use_double false. */
1585 SvIV_please_nomg(svl);
1586 if (!use_double && SvIOK(svl)) {
1588 left_neg = !SvUOK(svl);
1592 const IV aiv = SvIVX(svl);
1595 left_neg = FALSE; /* effectively it's a UV now */
1603 dleft = SvNV_nomg(svl);
1604 left_neg = dleft < 0;
1608 /* This should be exactly the 5.6 behaviour - if left and right are
1609 both in range for UV then use U_V() rather than floor. */
1611 if (dleft < UV_MAX_P1) {
1612 /* right was in range, so is dleft, so use UVs not double.
1616 /* left is out of range for UV, right was in range, so promote
1617 right (back) to double. */
1619 /* The +0.5 is used in 5.6 even though it is not strictly
1620 consistent with the implicit +0 floor in the U_V()
1621 inside the #if 1. */
1622 dleft = Perl_floor(dleft + 0.5);
1625 dright = Perl_floor(dright + 0.5);
1636 DIE(aTHX_ "Illegal modulus zero");
1638 dans = Perl_fmod(dleft, dright);
1639 if ((left_neg != right_neg) && dans)
1640 dans = dright - dans;
1643 sv_setnv(TARG, dans);
1649 DIE(aTHX_ "Illegal modulus zero");
1652 if ((left_neg != right_neg) && ans)
1655 /* XXX may warn: unary minus operator applied to unsigned type */
1656 /* could change -foo to be (~foo)+1 instead */
1657 if (ans <= ~((UV)IV_MAX)+1)
1658 sv_setiv(TARG, ~ans+1);
1660 sv_setnv(TARG, -(NV)ans);
1663 sv_setuv(TARG, ans);
1672 dVAR; dSP; dATARGET;
1676 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1677 /* TODO: think of some way of doing list-repeat overloading ??? */
1682 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1688 const UV uv = SvUV_nomg(sv);
1690 count = IV_MAX; /* The best we can do? */
1694 const IV iv = SvIV_nomg(sv);
1701 else if (SvNOKp(sv)) {
1702 const NV nv = SvNV_nomg(sv);
1709 count = SvIV_nomg(sv);
1711 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1713 static const char oom_list_extend[] = "Out of memory during list extend";
1714 const I32 items = SP - MARK;
1715 const I32 max = items * count;
1717 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1718 /* Did the max computation overflow? */
1719 if (items > 0 && max > 0 && (max < items || max < count))
1720 Perl_croak(aTHX_ oom_list_extend);
1725 /* This code was intended to fix 20010809.028:
1728 for (($x =~ /./g) x 2) {
1729 print chop; # "abcdabcd" expected as output.
1732 * but that change (#11635) broke this code:
1734 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1736 * I can't think of a better fix that doesn't introduce
1737 * an efficiency hit by copying the SVs. The stack isn't
1738 * refcounted, and mortalisation obviously doesn't
1739 * Do The Right Thing when the stack has more than
1740 * one pointer to the same mortal value.
1744 *SP = sv_2mortal(newSVsv(*SP));
1754 repeatcpy((char*)(MARK + items), (char*)MARK,
1755 items * sizeof(const SV *), count - 1);
1758 else if (count <= 0)
1761 else { /* Note: mark already snarfed by pp_list */
1762 SV * const tmpstr = POPs;
1765 static const char oom_string_extend[] =
1766 "Out of memory during string extend";
1769 sv_setsv_nomg(TARG, tmpstr);
1770 SvPV_force_nomg(TARG, len);
1771 isutf = DO_UTF8(TARG);
1776 const STRLEN max = (UV)count * len;
1777 if (len > MEM_SIZE_MAX / count)
1778 Perl_croak(aTHX_ oom_string_extend);
1779 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1780 SvGROW(TARG, max + 1);
1781 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1782 SvCUR_set(TARG, SvCUR(TARG) * count);
1784 *SvEND(TARG) = '\0';
1787 (void)SvPOK_only_UTF8(TARG);
1789 (void)SvPOK_only(TARG);
1791 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1792 /* The parser saw this as a list repeat, and there
1793 are probably several items on the stack. But we're
1794 in scalar context, and there's no pp_list to save us
1795 now. So drop the rest of the items -- robin@kitsite.com
1807 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1808 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1811 useleft = USE_LEFT(svl);
1812 #ifdef PERL_PRESERVE_IVUV
1813 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1814 "bad things" happen if you rely on signed integers wrapping. */
1815 SvIV_please_nomg(svr);
1817 /* Unless the left argument is integer in range we are going to have to
1818 use NV maths. Hence only attempt to coerce the right argument if
1819 we know the left is integer. */
1820 register UV auv = 0;
1826 a_valid = auvok = 1;
1827 /* left operand is undef, treat as zero. */
1829 /* Left operand is defined, so is it IV? */
1830 SvIV_please_nomg(svl);
1832 if ((auvok = SvUOK(svl)))
1835 register const IV aiv = SvIVX(svl);
1838 auvok = 1; /* Now acting as a sign flag. */
1839 } else { /* 2s complement assumption for IV_MIN */
1847 bool result_good = 0;
1850 bool buvok = SvUOK(svr);
1855 register const IV biv = SvIVX(svr);
1862 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1863 else "IV" now, independent of how it came in.
1864 if a, b represents positive, A, B negative, a maps to -A etc
1869 all UV maths. negate result if A negative.
1870 subtract if signs same, add if signs differ. */
1872 if (auvok ^ buvok) {
1881 /* Must get smaller */
1886 if (result <= buv) {
1887 /* result really should be -(auv-buv). as its negation
1888 of true value, need to swap our result flag */
1900 if (result <= (UV)IV_MIN)
1901 SETi( -(IV)result );
1903 /* result valid, but out of range for IV. */
1904 SETn( -(NV)result );
1908 } /* Overflow, drop through to NVs. */
1913 NV value = SvNV_nomg(svr);
1917 /* left operand is undef, treat as zero - value */
1921 SETn( SvNV_nomg(svl) - value );
1928 dVAR; dSP; dATARGET; SV *svl, *svr;
1929 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1933 const IV shift = SvIV_nomg(svr);
1934 if (PL_op->op_private & HINT_INTEGER) {
1935 const IV i = SvIV_nomg(svl);
1939 const UV u = SvUV_nomg(svl);
1948 dVAR; dSP; dATARGET; SV *svl, *svr;
1949 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1953 const IV shift = SvIV_nomg(svr);
1954 if (PL_op->op_private & HINT_INTEGER) {
1955 const IV i = SvIV_nomg(svl);
1959 const UV u = SvUV_nomg(svl);
1971 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1975 (SvIOK_notUV(left) && SvIOK_notUV(right))
1976 ? (SvIVX(left) < SvIVX(right))
1977 : (do_ncmp(left, right) == -1)
1987 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1991 (SvIOK_notUV(left) && SvIOK_notUV(right))
1992 ? (SvIVX(left) > SvIVX(right))
1993 : (do_ncmp(left, right) == 1)
2003 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2007 (SvIOK_notUV(left) && SvIOK_notUV(right))
2008 ? (SvIVX(left) <= SvIVX(right))
2009 : (do_ncmp(left, right) <= 0)
2019 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2023 (SvIOK_notUV(left) && SvIOK_notUV(right))
2024 ? (SvIVX(left) >= SvIVX(right))
2025 : ( (do_ncmp(left, right) & 2) == 0)
2035 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2039 (SvIOK_notUV(left) && SvIOK_notUV(right))
2040 ? (SvIVX(left) != SvIVX(right))
2041 : (do_ncmp(left, right) != 0)
2046 /* compare left and right SVs. Returns:
2050 * 2: left or right was a NaN
2053 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2057 PERL_ARGS_ASSERT_DO_NCMP;
2058 #ifdef PERL_PRESERVE_IVUV
2059 SvIV_please_nomg(right);
2060 /* Fortunately it seems NaN isn't IOK */
2062 SvIV_please_nomg(left);
2065 const IV leftiv = SvIVX(left);
2066 if (!SvUOK(right)) {
2067 /* ## IV <=> IV ## */
2068 const IV rightiv = SvIVX(right);
2069 return (leftiv > rightiv) - (leftiv < rightiv);
2071 /* ## IV <=> UV ## */
2073 /* As (b) is a UV, it's >=0, so it must be < */
2076 const UV rightuv = SvUVX(right);
2077 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2082 /* ## UV <=> UV ## */
2083 const UV leftuv = SvUVX(left);
2084 const UV rightuv = SvUVX(right);
2085 return (leftuv > rightuv) - (leftuv < rightuv);
2087 /* ## UV <=> IV ## */
2089 const IV rightiv = SvIVX(right);
2091 /* As (a) is a UV, it's >=0, so it cannot be < */
2094 const UV leftuv = SvUVX(left);
2095 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2103 NV const rnv = SvNV_nomg(right);
2104 NV const lnv = SvNV_nomg(left);
2106 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2107 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2110 return (lnv > rnv) - (lnv < rnv);
2129 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2132 value = do_ncmp(left, right);
2147 int amg_type = sle_amg;
2151 switch (PL_op->op_type) {
2170 tryAMAGICbin_MG(amg_type, AMGf_set);
2173 const int cmp = (IN_LOCALE_RUNTIME
2174 ? sv_cmp_locale_flags(left, right, 0)
2175 : sv_cmp_flags(left, right, 0));
2176 SETs(boolSV(cmp * multiplier < rhs));
2184 tryAMAGICbin_MG(seq_amg, AMGf_set);
2187 SETs(boolSV(sv_eq_flags(left, right, 0)));
2195 tryAMAGICbin_MG(sne_amg, AMGf_set);
2198 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2206 tryAMAGICbin_MG(scmp_amg, 0);
2209 const int cmp = (IN_LOCALE_RUNTIME
2210 ? sv_cmp_locale_flags(left, right, 0)
2211 : sv_cmp_flags(left, right, 0));
2219 dVAR; dSP; dATARGET;
2220 tryAMAGICbin_MG(band_amg, AMGf_assign);
2223 if (SvNIOKp(left) || SvNIOKp(right)) {
2224 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2225 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2226 if (PL_op->op_private & HINT_INTEGER) {
2227 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2231 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2234 if (left_ro_nonnum) SvNIOK_off(left);
2235 if (right_ro_nonnum) SvNIOK_off(right);
2238 do_vop(PL_op->op_type, TARG, left, right);
2247 dVAR; dSP; dATARGET;
2248 const int op_type = PL_op->op_type;
2250 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2253 if (SvNIOKp(left) || SvNIOKp(right)) {
2254 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2255 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2256 if (PL_op->op_private & HINT_INTEGER) {
2257 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2258 const IV r = SvIV_nomg(right);
2259 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2263 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2264 const UV r = SvUV_nomg(right);
2265 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2268 if (left_ro_nonnum) SvNIOK_off(left);
2269 if (right_ro_nonnum) SvNIOK_off(right);
2272 do_vop(op_type, TARG, left, right);
2282 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2284 SV * const sv = TOPs;
2285 const int flags = SvFLAGS(sv);
2287 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2291 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2292 /* It's publicly an integer, or privately an integer-not-float */
2295 if (SvIVX(sv) == IV_MIN) {
2296 /* 2s complement assumption. */
2297 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2300 else if (SvUVX(sv) <= IV_MAX) {
2305 else if (SvIVX(sv) != IV_MIN) {
2309 #ifdef PERL_PRESERVE_IVUV
2317 SETn(-SvNV_nomg(sv));
2318 else if (SvPOKp(sv)) {
2320 const char * const s = SvPV_nomg_const(sv, len);
2321 if (isIDFIRST(*s)) {
2322 sv_setpvs(TARG, "-");
2325 else if (*s == '+' || *s == '-') {
2326 sv_setsv_nomg(TARG, sv);
2327 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2329 else if (DO_UTF8(sv)) {
2330 SvIV_please_nomg(sv);
2332 goto oops_its_an_int;
2334 sv_setnv(TARG, -SvNV_nomg(sv));
2336 sv_setpvs(TARG, "-");
2341 SvIV_please_nomg(sv);
2343 goto oops_its_an_int;
2344 sv_setnv(TARG, -SvNV_nomg(sv));
2349 SETn(-SvNV_nomg(sv));
2357 tryAMAGICun_MG(not_amg, AMGf_set);
2358 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2365 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2369 if (PL_op->op_private & HINT_INTEGER) {
2370 const IV i = ~SvIV_nomg(sv);
2374 const UV u = ~SvUV_nomg(sv);
2383 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2384 sv_setsv_nomg(TARG, sv);
2385 tmps = (U8*)SvPV_force_nomg(TARG, len);
2388 /* Calculate exact length, let's not estimate. */
2393 U8 * const send = tmps + len;
2394 U8 * const origtmps = tmps;
2395 const UV utf8flags = UTF8_ALLOW_ANYUV;
2397 while (tmps < send) {
2398 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2400 targlen += UNISKIP(~c);
2406 /* Now rewind strings and write them. */
2413 Newx(result, targlen + 1, U8);
2415 while (tmps < send) {
2416 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2418 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2421 sv_usepvn_flags(TARG, (char*)result, targlen,
2422 SV_HAS_TRAILING_NUL);
2429 Newx(result, nchar + 1, U8);
2431 while (tmps < send) {
2432 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2437 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2445 register long *tmpl;
2446 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2449 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2454 for ( ; anum > 0; anum--, tmps++)
2462 /* integer versions of some of the above */
2466 dVAR; dSP; dATARGET;
2467 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2470 SETi( left * right );
2478 dVAR; dSP; dATARGET;
2479 tryAMAGICbin_MG(div_amg, AMGf_assign);
2482 IV value = SvIV_nomg(right);
2484 DIE(aTHX_ "Illegal division by zero");
2485 num = SvIV_nomg(left);
2487 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2491 value = num / value;
2497 #if defined(__GLIBC__) && IVSIZE == 8
2504 /* This is the vanilla old i_modulo. */
2505 dVAR; dSP; dATARGET;
2506 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2510 DIE(aTHX_ "Illegal modulus zero");
2511 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2515 SETi( left % right );
2520 #if defined(__GLIBC__) && IVSIZE == 8
2525 /* This is the i_modulo with the workaround for the _moddi3 bug
2526 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2527 * See below for pp_i_modulo. */
2528 dVAR; dSP; dATARGET;
2529 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2533 DIE(aTHX_ "Illegal modulus zero");
2534 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2538 SETi( left % PERL_ABS(right) );
2545 dVAR; dSP; dATARGET;
2546 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2550 DIE(aTHX_ "Illegal modulus zero");
2551 /* The assumption is to use hereafter the old vanilla version... */
2553 PL_ppaddr[OP_I_MODULO] =
2555 /* .. but if we have glibc, we might have a buggy _moddi3
2556 * (at least glicb 2.2.5 is known to have this bug), in other
2557 * words our integer modulus with negative quad as the second
2558 * argument might be broken. Test for this and re-patch the
2559 * opcode dispatch table if that is the case, remembering to
2560 * also apply the workaround so that this first round works
2561 * right, too. See [perl #9402] for more information. */
2565 /* Cannot do this check with inlined IV constants since
2566 * that seems to work correctly even with the buggy glibc. */
2568 /* Yikes, we have the bug.
2569 * Patch in the workaround version. */
2571 PL_ppaddr[OP_I_MODULO] =
2572 &Perl_pp_i_modulo_1;
2573 /* Make certain we work right this time, too. */
2574 right = PERL_ABS(right);
2577 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2581 SETi( left % right );
2589 dVAR; dSP; dATARGET;
2590 tryAMAGICbin_MG(add_amg, AMGf_assign);
2592 dPOPTOPiirl_ul_nomg;
2593 SETi( left + right );
2600 dVAR; dSP; dATARGET;
2601 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2603 dPOPTOPiirl_ul_nomg;
2604 SETi( left - right );
2612 tryAMAGICbin_MG(lt_amg, AMGf_set);
2615 SETs(boolSV(left < right));
2623 tryAMAGICbin_MG(gt_amg, AMGf_set);
2626 SETs(boolSV(left > right));
2634 tryAMAGICbin_MG(le_amg, AMGf_set);
2637 SETs(boolSV(left <= right));
2645 tryAMAGICbin_MG(ge_amg, AMGf_set);
2648 SETs(boolSV(left >= right));
2656 tryAMAGICbin_MG(eq_amg, AMGf_set);
2659 SETs(boolSV(left == right));
2667 tryAMAGICbin_MG(ne_amg, AMGf_set);
2670 SETs(boolSV(left != right));
2678 tryAMAGICbin_MG(ncmp_amg, 0);
2685 else if (left < right)
2697 tryAMAGICun_MG(neg_amg, 0);
2699 SV * const sv = TOPs;
2700 IV const i = SvIV_nomg(sv);
2706 /* High falutin' math. */
2711 tryAMAGICbin_MG(atan2_amg, 0);
2714 SETn(Perl_atan2(left, right));
2722 int amg_type = sin_amg;
2723 const char *neg_report = NULL;
2724 NV (*func)(NV) = Perl_sin;
2725 const int op_type = PL_op->op_type;
2742 amg_type = sqrt_amg;
2744 neg_report = "sqrt";
2749 tryAMAGICun_MG(amg_type, 0);
2751 SV * const arg = POPs;
2752 const NV value = SvNV_nomg(arg);
2754 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2755 SET_NUMERIC_STANDARD();
2756 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2759 XPUSHn(func(value));
2764 /* Support Configure command-line overrides for rand() functions.
2765 After 5.005, perhaps we should replace this by Configure support
2766 for drand48(), random(), or rand(). For 5.005, though, maintain
2767 compatibility by calling rand() but allow the user to override it.
2768 See INSTALL for details. --Andy Dougherty 15 July 1998
2770 /* Now it's after 5.005, and Configure supports drand48() and random(),
2771 in addition to rand(). So the overrides should not be needed any more.
2772 --Jarkko Hietaniemi 27 September 1998
2775 #ifndef HAS_DRAND48_PROTO
2776 extern double drand48 (void);
2786 value = 1.0; (void)POPs;
2792 if (!PL_srand_called) {
2793 (void)seedDrand01((Rand_seed_t)seed());
2794 PL_srand_called = TRUE;
2804 const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
2805 (void)seedDrand01((Rand_seed_t)anum);
2806 PL_srand_called = TRUE;
2810 /* Historically srand always returned true. We can avoid breaking
2812 sv_setpvs(TARG, "0 but true");
2821 tryAMAGICun_MG(int_amg, AMGf_numeric);
2823 SV * const sv = TOPs;
2824 const IV iv = SvIV_nomg(sv);
2825 /* XXX it's arguable that compiler casting to IV might be subtly
2826 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2827 else preferring IV has introduced a subtle behaviour change bug. OTOH
2828 relying on floating point to be accurate is a bug. */
2833 else if (SvIOK(sv)) {
2835 SETu(SvUV_nomg(sv));
2840 const NV value = SvNV_nomg(sv);
2842 if (value < (NV)UV_MAX + 0.5) {
2845 SETn(Perl_floor(value));
2849 if (value > (NV)IV_MIN - 0.5) {
2852 SETn(Perl_ceil(value));
2863 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2865 SV * const sv = TOPs;
2866 /* This will cache the NV value if string isn't actually integer */
2867 const IV iv = SvIV_nomg(sv);
2872 else if (SvIOK(sv)) {
2873 /* IVX is precise */
2875 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2883 /* 2s complement assumption. Also, not really needed as
2884 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2890 const NV value = SvNV_nomg(sv);
2904 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2908 SV* const sv = POPs;
2910 tmps = (SvPV_const(sv, len));
2912 /* If Unicode, try to downgrade
2913 * If not possible, croak. */
2914 SV* const tsv = sv_2mortal(newSVsv(sv));
2917 sv_utf8_downgrade(tsv, FALSE);
2918 tmps = SvPV_const(tsv, len);
2920 if (PL_op->op_type == OP_HEX)
2923 while (*tmps && len && isSPACE(*tmps))
2927 if (*tmps == 'x' || *tmps == 'X') {
2929 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2931 else if (*tmps == 'b' || *tmps == 'B')
2932 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2934 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2936 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2950 SV * const sv = TOPs;
2952 if (SvGAMAGIC(sv)) {
2953 /* For an overloaded or magic scalar, we can't know in advance if
2954 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2955 it likes to cache the length. Maybe that should be a documented
2960 = sv_2pv_flags(sv, &len,
2961 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2964 if (!SvPADTMP(TARG)) {
2965 sv_setsv(TARG, &PL_sv_undef);
2970 else if (DO_UTF8(sv)) {
2971 SETi(utf8_length((U8*)p, (U8*)p + len));
2975 } else if (SvOK(sv)) {
2976 /* Neither magic nor overloaded. */
2978 SETi(sv_len_utf8(sv));
2982 if (!SvPADTMP(TARG)) {
2983 sv_setsv_nomg(TARG, &PL_sv_undef);
3005 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3008 const char *repl = NULL;
3010 int num_args = PL_op->op_private & 7;
3011 bool repl_need_utf8_upgrade = FALSE;
3012 bool repl_is_utf8 = FALSE;
3016 if((repl_sv = POPs)) {
3017 repl = SvPV_const(repl_sv, repl_len);
3018 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3022 if ((len_sv = POPs)) {
3023 len_iv = SvIV(len_sv);
3024 len_is_uv = SvIOK_UV(len_sv);
3029 pos1_iv = SvIV(pos_sv);
3030 pos1_is_uv = SvIOK_UV(pos_sv);
3036 sv_utf8_upgrade(sv);
3038 else if (DO_UTF8(sv))
3039 repl_need_utf8_upgrade = TRUE;
3041 tmps = SvPV_const(sv, curlen);
3043 utf8_curlen = sv_len_utf8(sv);
3044 if (utf8_curlen == curlen)
3047 curlen = utf8_curlen;
3052 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3053 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3056 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3060 if (!len_is_uv && len_iv < 0) {
3061 pos2_iv = curlen + len_iv;
3063 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3066 } else { /* len_iv >= 0 */
3067 if (!pos1_is_uv && pos1_iv < 0) {
3068 pos2_iv = pos1_iv + len_iv;
3069 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3071 if ((UV)len_iv > curlen-(UV)pos1_iv)
3074 pos2_iv = pos1_iv+len_iv;
3084 if (!pos2_is_uv && pos2_iv < 0) {
3085 if (!pos1_is_uv && pos1_iv < 0)
3089 else if (!pos1_is_uv && pos1_iv < 0)
3092 if ((UV)pos2_iv < (UV)pos1_iv)
3094 if ((UV)pos2_iv > curlen)
3098 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3099 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3100 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3101 STRLEN byte_len = len;
3102 STRLEN byte_pos = utf8_curlen
3103 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3105 if (lvalue && !repl) {
3108 if (!SvGMAGICAL(sv)) {
3110 SvPV_force_nolen(sv);
3111 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3112 "Attempt to use reference as lvalue in substr");
3114 if (isGV_with_GP(sv))
3115 SvPV_force_nolen(sv);
3116 else if (SvOK(sv)) /* is it defined ? */
3117 (void)SvPOK_only_UTF8(sv);
3119 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3122 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3123 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3125 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3126 LvTARGOFF(ret) = pos;
3127 LvTARGLEN(ret) = len;
3130 PUSHs(ret); /* avoid SvSETMAGIC here */
3134 SvTAINTED_off(TARG); /* decontaminate */
3135 SvUTF8_off(TARG); /* decontaminate */
3138 sv_setpvn(TARG, tmps, byte_len);
3139 #ifdef USE_LOCALE_COLLATE
3140 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3146 SV* repl_sv_copy = NULL;
3148 if (repl_need_utf8_upgrade) {
3149 repl_sv_copy = newSVsv(repl_sv);
3150 sv_utf8_upgrade(repl_sv_copy);
3151 repl = SvPV_const(repl_sv_copy, repl_len);
3152 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3156 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3159 SvREFCNT_dec(repl_sv_copy);
3169 Perl_croak(aTHX_ "substr outside of string");
3170 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3177 register const IV size = POPi;
3178 register const IV offset = POPi;
3179 register SV * const src = POPs;
3180 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3183 if (lvalue) { /* it's an lvalue! */
3184 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3185 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3187 LvTARG(ret) = SvREFCNT_inc_simple(src);
3188 LvTARGOFF(ret) = offset;
3189 LvTARGLEN(ret) = size;
3193 SvTAINTED_off(TARG); /* decontaminate */
3197 sv_setuv(ret, do_vecget(src, offset, size));
3213 const char *little_p;
3216 const bool is_index = PL_op->op_type == OP_INDEX;
3217 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3223 big_p = SvPV_const(big, biglen);
3224 little_p = SvPV_const(little, llen);
3226 big_utf8 = DO_UTF8(big);
3227 little_utf8 = DO_UTF8(little);
3228 if (big_utf8 ^ little_utf8) {
3229 /* One needs to be upgraded. */
3230 if (little_utf8 && !PL_encoding) {
3231 /* Well, maybe instead we might be able to downgrade the small
3233 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3236 /* If the large string is ISO-8859-1, and it's not possible to
3237 convert the small string to ISO-8859-1, then there is no
3238 way that it could be found anywhere by index. */
3243 /* At this point, pv is a malloc()ed string. So donate it to temp
3244 to ensure it will get free()d */
3245 little = temp = newSV(0);
3246 sv_usepvn(temp, pv, llen);
3247 little_p = SvPVX(little);
3250 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3253 sv_recode_to_utf8(temp, PL_encoding);
3255 sv_utf8_upgrade(temp);
3260 big_p = SvPV_const(big, biglen);
3263 little_p = SvPV_const(little, llen);
3267 if (SvGAMAGIC(big)) {
3268 /* Life just becomes a lot easier if I use a temporary here.
3269 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3270 will trigger magic and overloading again, as will fbm_instr()
3272 big = newSVpvn_flags(big_p, biglen,
3273 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3276 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3277 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3278 warn on undef, and we've already triggered a warning with the
3279 SvPV_const some lines above. We can't remove that, as we need to
3280 call some SvPV to trigger overloading early and find out if the
3282 This is all getting to messy. The API isn't quite clean enough,
3283 because data access has side effects.
3285 little = newSVpvn_flags(little_p, llen,
3286 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3287 little_p = SvPVX(little);
3291 offset = is_index ? 0 : biglen;
3293 if (big_utf8 && offset > 0)
3294 sv_pos_u2b(big, &offset, 0);
3300 else if (offset > (I32)biglen)
3302 if (!(little_p = is_index
3303 ? fbm_instr((unsigned char*)big_p + offset,
3304 (unsigned char*)big_p + biglen, little, 0)
3305 : rninstr(big_p, big_p + offset,
3306 little_p, little_p + llen)))
3309 retval = little_p - big_p;
3310 if (retval > 0 && big_utf8)
3311 sv_pos_b2u(big, &retval);
3321 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3322 SvTAINTED_off(TARG);
3323 do_sprintf(TARG, SP-MARK, MARK+1);
3324 TAINT_IF(SvTAINTED(TARG));
3336 const U8 *s = (U8*)SvPV_const(argsv, len);
3338 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3339 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3340 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3344 XPUSHu(DO_UTF8(argsv) ?
3345 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3357 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3359 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3361 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3363 (void) POPs; /* Ignore the argument value. */
3364 value = UNICODE_REPLACEMENT;
3370 SvUPGRADE(TARG,SVt_PV);
3372 if (value > 255 && !IN_BYTES) {
3373 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3374 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3375 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3377 (void)SvPOK_only(TARG);
3386 *tmps++ = (char)value;
3388 (void)SvPOK_only(TARG);
3390 if (PL_encoding && !IN_BYTES) {
3391 sv_recode_to_utf8(TARG, PL_encoding);
3393 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3394 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3398 *tmps++ = (char)value;
3414 const char *tmps = SvPV_const(left, len);
3416 if (DO_UTF8(left)) {
3417 /* If Unicode, try to downgrade.
3418 * If not possible, croak.
3419 * Yes, we made this up. */
3420 SV* const tsv = sv_2mortal(newSVsv(left));
3423 sv_utf8_downgrade(tsv, FALSE);
3424 tmps = SvPV_const(tsv, len);
3426 # ifdef USE_ITHREADS
3428 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3429 /* This should be threadsafe because in ithreads there is only
3430 * one thread per interpreter. If this would not be true,
3431 * we would need a mutex to protect this malloc. */
3432 PL_reentrant_buffer->_crypt_struct_buffer =
3433 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3434 #if defined(__GLIBC__) || defined(__EMX__)
3435 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3436 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3437 /* work around glibc-2.2.5 bug */
3438 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3442 # endif /* HAS_CRYPT_R */
3443 # endif /* USE_ITHREADS */
3445 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3447 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3453 "The crypt() function is unimplemented due to excessive paranoia.");
3457 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3458 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3460 /* Below are several macros that generate code */
3461 /* Generates code to store a unicode codepoint c that is known to occupy
3462 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3463 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3465 *(p) = UTF8_TWO_BYTE_HI(c); \
3466 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3469 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3470 * available byte after the two bytes */
3471 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3473 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3474 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3477 /* Generates code to store the upper case of latin1 character l which is known
3478 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3479 * are only two characters that fit this description, and this macro knows
3480 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3482 #define STORE_NON_LATIN1_UC(p, l) \
3484 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3485 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3486 } else { /* Must be the following letter */ \
3487 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3491 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3492 * after the character stored */
3493 #define CAT_NON_LATIN1_UC(p, l) \
3495 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3496 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3498 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3502 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3503 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3504 * and must require two bytes to store it. Advances p to point to the next
3505 * available position */
3506 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3508 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3509 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3510 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3511 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3512 } else {/* else is one of the other two special cases */ \
3513 CAT_NON_LATIN1_UC((p), (l)); \
3519 /* Actually is both lcfirst() and ucfirst(). Only the first character
3520 * changes. This means that possibly we can change in-place, ie., just
3521 * take the source and change that one character and store it back, but not
3522 * if read-only etc, or if the length changes */
3527 STRLEN slen; /* slen is the byte length of the whole SV. */
3530 bool inplace; /* ? Convert first char only, in-place */
3531 bool doing_utf8 = FALSE; /* ? using utf8 */
3532 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3533 const int op_type = PL_op->op_type;
3536 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3537 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3538 * stored as UTF-8 at s. */
3539 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3540 * lowercased) character stored in tmpbuf. May be either
3541 * UTF-8 or not, but in either case is the number of bytes */
3545 s = (const U8*)SvPV_nomg_const(source, slen);
3547 if (ckWARN(WARN_UNINITIALIZED))
3548 report_uninit(source);
3553 /* We may be able to get away with changing only the first character, in
3554 * place, but not if read-only, etc. Later we may discover more reasons to
3555 * not convert in-place. */
3556 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3558 /* First calculate what the changed first character should be. This affects
3559 * whether we can just swap it out, leaving the rest of the string unchanged,
3560 * or even if have to convert the dest to UTF-8 when the source isn't */
3562 if (! slen) { /* If empty */
3563 need = 1; /* still need a trailing NUL */
3565 else if (DO_UTF8(source)) { /* Is the source utf8? */
3568 if (UTF8_IS_INVARIANT(*s)) {
3570 /* An invariant source character is either ASCII or, in EBCDIC, an
3571 * ASCII equivalent or a caseless C1 control. In both these cases,
3572 * the lower and upper cases of any character are also invariants
3573 * (and title case is the same as upper case). So it is safe to
3574 * use the simple case change macros which avoid the overhead of
3575 * the general functions. Note that if perl were to be extended to
3576 * do locale handling in UTF-8 strings, this wouldn't be true in,
3577 * for example, Lithuanian or Turkic. */
3578 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3582 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3585 /* Similarly, if the source character isn't invariant but is in the
3586 * latin1 range (or EBCDIC equivalent thereof), we have the case
3587 * changes compiled into perl, and can avoid the overhead of the
3588 * general functions. In this range, the characters are stored as
3589 * two UTF-8 bytes, and it so happens that any changed-case version
3590 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3594 /* Convert the two source bytes to a single Unicode code point
3595 * value, change case and save for below */
3596 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3597 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3598 U8 lower = toLOWER_LATIN1(chr);
3599 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3601 else { /* ucfirst */
3602 U8 upper = toUPPER_LATIN1_MOD(chr);
3604 /* Most of the latin1 range characters are well-behaved. Their
3605 * title and upper cases are the same, and are also in the
3606 * latin1 range. The macro above returns their upper (hence
3607 * title) case, and all that need be done is to save the result
3608 * for below. However, several characters are problematic, and
3609 * have to be handled specially. The MOD in the macro name
3610 * above means that these tricky characters all get mapped to
3611 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3612 * This mapping saves some tests for the majority of the
3615 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3617 /* Not tricky. Just save it. */
3618 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3620 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3622 /* This one is tricky because it is two characters long,
3623 * though the UTF-8 is still two bytes, so the stored
3624 * length doesn't change */
3625 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3626 *(tmpbuf + 1) = 's';
3630 /* The other two have their title and upper cases the same,
3631 * but are tricky because the changed-case characters
3632 * aren't in the latin1 range. They, however, do fit into
3633 * two UTF-8 bytes */
3634 STORE_NON_LATIN1_UC(tmpbuf, chr);
3640 /* Here, can't short-cut the general case */
3642 utf8_to_uvchr(s, &ulen);
3643 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3644 else toLOWER_utf8(s, tmpbuf, &tculen);
3646 /* we can't do in-place if the length changes. */
3647 if (ulen != tculen) inplace = FALSE;
3648 need = slen + 1 - ulen + tculen;
3651 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3652 * latin1 is treated as caseless. Note that a locale takes
3654 tculen = 1; /* Most characters will require one byte, but this will
3655 * need to be overridden for the tricky ones */
3658 if (op_type == OP_LCFIRST) {
3660 /* lower case the first letter: no trickiness for any character */
3661 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3662 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3665 else if (IN_LOCALE_RUNTIME) {
3666 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3667 * have upper and title case different
3670 else if (! IN_UNI_8_BIT) {
3671 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3672 * on EBCDIC machines whatever the
3673 * native function does */
3675 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3676 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3678 /* tmpbuf now has the correct title case for all latin1 characters
3679 * except for the several ones that have tricky handling. All
3680 * of these are mapped by the MOD to the letter below. */
3681 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3683 /* The length is going to change, with all three of these, so
3684 * can't replace just the first character */
3687 /* We use the original to distinguish between these tricky
3689 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3690 /* Two character title case 'Ss', but can remain non-UTF-8 */
3693 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3698 /* The other two tricky ones have their title case outside
3699 * latin1. It is the same as their upper case. */
3701 STORE_NON_LATIN1_UC(tmpbuf, *s);
3703 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3704 * and their upper cases is 2. */
3707 /* The entire result will have to be in UTF-8. Assume worst
3708 * case sizing in conversion. (all latin1 characters occupy
3709 * at most two bytes in utf8) */
3710 convert_source_to_utf8 = TRUE;
3711 need = slen * 2 + 1;
3713 } /* End of is one of the three special chars */
3714 } /* End of use Unicode (Latin1) semantics */
3715 } /* End of changing the case of the first character */
3717 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3718 * generate the result */
3721 /* We can convert in place. This means we change just the first
3722 * character without disturbing the rest; no need to grow */
3724 s = d = (U8*)SvPV_force_nomg(source, slen);
3730 /* Here, we can't convert in place; we earlier calculated how much
3731 * space we will need, so grow to accommodate that */
3732 SvUPGRADE(dest, SVt_PV);
3733 d = (U8*)SvGROW(dest, need);
3734 (void)SvPOK_only(dest);
3741 if (! convert_source_to_utf8) {
3743 /* Here both source and dest are in UTF-8, but have to create
3744 * the entire output. We initialize the result to be the
3745 * title/lower cased first character, and then append the rest
3747 sv_setpvn(dest, (char*)tmpbuf, tculen);
3749 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3753 const U8 *const send = s + slen;
3755 /* Here the dest needs to be in UTF-8, but the source isn't,
3756 * except we earlier UTF-8'd the first character of the source
3757 * into tmpbuf. First put that into dest, and then append the
3758 * rest of the source, converting it to UTF-8 as we go. */
3760 /* Assert tculen is 2 here because the only two characters that
3761 * get to this part of the code have 2-byte UTF-8 equivalents */
3763 *d++ = *(tmpbuf + 1);
3764 s++; /* We have just processed the 1st char */
3766 for (; s < send; s++) {
3767 d = uvchr_to_utf8(d, *s);
3770 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3774 else { /* in-place UTF-8. Just overwrite the first character */
3775 Copy(tmpbuf, d, tculen, U8);
3776 SvCUR_set(dest, need - 1);
3779 else { /* Neither source nor dest are in or need to be UTF-8 */
3781 if (IN_LOCALE_RUNTIME) {
3785 if (inplace) { /* in-place, only need to change the 1st char */
3788 else { /* Not in-place */
3790 /* Copy the case-changed character(s) from tmpbuf */
3791 Copy(tmpbuf, d, tculen, U8);
3792 d += tculen - 1; /* Code below expects d to point to final
3793 * character stored */
3796 else { /* empty source */
3797 /* See bug #39028: Don't taint if empty */
3801 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3802 * the destination to retain that flag */
3806 if (!inplace) { /* Finish the rest of the string, unchanged */
3807 /* This will copy the trailing NUL */
3808 Copy(s + 1, d + 1, slen, U8);
3809 SvCUR_set(dest, need - 1);
3812 if (dest != source && SvTAINTED(source))
3818 /* There's so much setup/teardown code common between uc and lc, I wonder if
3819 it would be worth merging the two, and just having a switch outside each
3820 of the three tight loops. There is less and less commonality though */
3834 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3835 && SvTEMP(source) && !DO_UTF8(source)
3836 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3838 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3839 * make the loop tight, so we overwrite the source with the dest before
3840 * looking at it, and we need to look at the original source
3841 * afterwards. There would also need to be code added to handle
3842 * switching to not in-place in midstream if we run into characters
3843 * that change the length.
3846 s = d = (U8*)SvPV_force_nomg(source, len);
3853 /* The old implementation would copy source into TARG at this point.
3854 This had the side effect that if source was undef, TARG was now
3855 an undefined SV with PADTMP set, and they don't warn inside
3856 sv_2pv_flags(). However, we're now getting the PV direct from
3857 source, which doesn't have PADTMP set, so it would warn. Hence the
3861 s = (const U8*)SvPV_nomg_const(source, len);
3863 if (ckWARN(WARN_UNINITIALIZED))
3864 report_uninit(source);
3870 SvUPGRADE(dest, SVt_PV);
3871 d = (U8*)SvGROW(dest, min);
3872 (void)SvPOK_only(dest);
3877 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3878 to check DO_UTF8 again here. */
3880 if (DO_UTF8(source)) {
3881 const U8 *const send = s + len;
3882 U8 tmpbuf[UTF8_MAXBYTES+1];
3884 /* All occurrences of these are to be moved to follow any other marks.
3885 * This is context-dependent. We may not be passed enough context to
3886 * move the iota subscript beyond all of them, but we do the best we can
3887 * with what we're given. The result is always better than if we
3888 * hadn't done this. And, the problem would only arise if we are
3889 * passed a character without all its combining marks, which would be
3890 * the caller's mistake. The information this is based on comes from a
3891 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3892 * itself) and so can't be checked properly to see if it ever gets
3893 * revised. But the likelihood of it changing is remote */
3894 bool in_iota_subscript = FALSE;
3897 if (in_iota_subscript && ! is_utf8_mark(s)) {
3898 /* A non-mark. Time to output the iota subscript */
3899 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3900 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3902 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3903 in_iota_subscript = FALSE;
3906 /* If the UTF-8 character is invariant, then it is in the range
3907 * known by the standard macro; result is only one byte long */
3908 if (UTF8_IS_INVARIANT(*s)) {
3912 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3914 /* Likewise, if it fits in a byte, its case change is in our
3916 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3917 U8 upper = toUPPER_LATIN1_MOD(orig);
3918 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3923 /* Otherwise, need the general UTF-8 case. Get the changed
3924 * case value and copy it to the output buffer */
3926 const STRLEN u = UTF8SKIP(s);
3929 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3930 if (uv == GREEK_CAPITAL_LETTER_IOTA
3931 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3933 in_iota_subscript = TRUE;
3936 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3937 /* If the eventually required minimum size outgrows
3938 * the available space, we need to grow. */
3939 const UV o = d - (U8*)SvPVX_const(dest);
3941 /* If someone uppercases one million U+03B0s we
3942 * SvGROW() one million times. Or we could try
3943 * guessing how much to allocate without allocating too
3944 * much. Such is life. See corresponding comment in
3945 * lc code for another option */
3947 d = (U8*)SvPVX(dest) + o;
3949 Copy(tmpbuf, d, ulen, U8);
3955 if (in_iota_subscript) {
3956 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3960 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3962 else { /* Not UTF-8 */
3964 const U8 *const send = s + len;
3966 /* Use locale casing if in locale; regular style if not treating
3967 * latin1 as having case; otherwise the latin1 casing. Do the
3968 * whole thing in a tight loop, for speed, */
3969 if (IN_LOCALE_RUNTIME) {
3972 for (; s < send; d++, s++)
3973 *d = toUPPER_LC(*s);
3975 else if (! IN_UNI_8_BIT) {
3976 for (; s < send; d++, s++) {
3981 for (; s < send; d++, s++) {
3982 *d = toUPPER_LATIN1_MOD(*s);
3983 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
3985 /* The mainstream case is the tight loop above. To avoid
3986 * extra tests in that, all three characters that require
3987 * special handling are mapped by the MOD to the one tested
3989 * Use the source to distinguish between the three cases */
3991 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3993 /* uc() of this requires 2 characters, but they are
3994 * ASCII. If not enough room, grow the string */
3995 if (SvLEN(dest) < ++min) {
3996 const UV o = d - (U8*)SvPVX_const(dest);
3998 d = (U8*)SvPVX(dest) + o;
4000 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4001 continue; /* Back to the tight loop; still in ASCII */
4004 /* The other two special handling characters have their
4005 * upper cases outside the latin1 range, hence need to be
4006 * in UTF-8, so the whole result needs to be in UTF-8. So,
4007 * here we are somewhere in the middle of processing a
4008 * non-UTF-8 string, and realize that we will have to convert
4009 * the whole thing to UTF-8. What to do? There are
4010 * several possibilities. The simplest to code is to
4011 * convert what we have so far, set a flag, and continue on
4012 * in the loop. The flag would be tested each time through
4013 * the loop, and if set, the next character would be
4014 * converted to UTF-8 and stored. But, I (khw) didn't want
4015 * to slow down the mainstream case at all for this fairly
4016 * rare case, so I didn't want to add a test that didn't
4017 * absolutely have to be there in the loop, besides the
4018 * possibility that it would get too complicated for
4019 * optimizers to deal with. Another possibility is to just
4020 * give up, convert the source to UTF-8, and restart the
4021 * function that way. Another possibility is to convert
4022 * both what has already been processed and what is yet to
4023 * come separately to UTF-8, then jump into the loop that
4024 * handles UTF-8. But the most efficient time-wise of the
4025 * ones I could think of is what follows, and turned out to
4026 * not require much extra code. */
4028 /* Convert what we have so far into UTF-8, telling the
4029 * function that we know it should be converted, and to
4030 * allow extra space for what we haven't processed yet.
4031 * Assume the worst case space requirements for converting
4032 * what we haven't processed so far: that it will require
4033 * two bytes for each remaining source character, plus the
4034 * NUL at the end. This may cause the string pointer to
4035 * move, so re-find it. */
4037 len = d - (U8*)SvPVX_const(dest);
4038 SvCUR_set(dest, len);
4039 len = sv_utf8_upgrade_flags_grow(dest,
4040 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4042 d = (U8*)SvPVX(dest) + len;
4044 /* And append the current character's upper case in UTF-8 */
4045 CAT_NON_LATIN1_UC(d, *s);
4047 /* Now process the remainder of the source, converting to
4048 * upper and UTF-8. If a resulting byte is invariant in
4049 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4050 * append it to the output. */
4053 for (; s < send; s++) {
4054 U8 upper = toUPPER_LATIN1_MOD(*s);
4055 if UTF8_IS_INVARIANT(upper) {
4059 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4063 /* Here have processed the whole source; no need to continue
4064 * with the outer loop. Each character has been converted
4065 * to upper case and converted to UTF-8 */
4068 } /* End of processing all latin1-style chars */
4069 } /* End of processing all chars */
4070 } /* End of source is not empty */
4072 if (source != dest) {
4073 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4074 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4076 } /* End of isn't utf8 */
4077 if (dest != source && SvTAINTED(source))
4096 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4097 && SvTEMP(source) && !DO_UTF8(source)) {
4099 /* We can convert in place, as lowercasing anything in the latin1 range
4100 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4102 s = d = (U8*)SvPV_force_nomg(source, len);
4109 /* The old implementation would copy source into TARG at this point.
4110 This had the side effect that if source was undef, TARG was now
4111 an undefined SV with PADTMP set, and they don't warn inside
4112 sv_2pv_flags(). However, we're now getting the PV direct from
4113 source, which doesn't have PADTMP set, so it would warn. Hence the
4117 s = (const U8*)SvPV_nomg_const(source, len);
4119 if (ckWARN(WARN_UNINITIALIZED))
4120 report_uninit(source);
4126 SvUPGRADE(dest, SVt_PV);
4127 d = (U8*)SvGROW(dest, min);
4128 (void)SvPOK_only(dest);
4133 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4134 to check DO_UTF8 again here. */
4136 if (DO_UTF8(source)) {
4137 const U8 *const send = s + len;
4138 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4141 if (UTF8_IS_INVARIANT(*s)) {
4143 /* Invariant characters use the standard mappings compiled in.
4148 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4150 /* As do the ones in the Latin1 range */
4151 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
4152 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4156 /* Here, is utf8 not in Latin-1 range, have to go out and get
4157 * the mappings from the tables. */
4159 const STRLEN u = UTF8SKIP(s);
4162 #ifndef CONTEXT_DEPENDENT_CASING
4163 toLOWER_utf8(s, tmpbuf, &ulen);
4165 /* This is ifdefd out because it probably is the wrong thing to do. The right
4166 * thing is probably to have an I/O layer that converts final sigma to regular
4167 * on input and vice versa (under the correct circumstances) on output. In
4168 * effect, the final sigma is just a glyph variation when the regular one
4169 * occurs at the end of a word. And we don't really know what's going to be
4170 * the end of the word until it is finally output, as splitting and joining can
4171 * occur at any time and change what once was the word end to be in the middle,
4172 * and vice versa. */
4174 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4176 /* If the lower case is a small sigma, it may be that we need
4177 * to change it to a final sigma. This happens at the end of
4178 * a word that contains more than just this character, and only
4179 * when we started with a capital sigma. */
4180 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4181 s > send - len && /* Makes sure not the first letter */
4182 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4185 /* We use the algorithm in:
4186 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4187 * is a CAPITAL SIGMA): If C is preceded by a sequence
4188 * consisting of a cased letter and a case-ignorable
4189 * sequence, and C is not followed by a sequence consisting
4190 * of a case ignorable sequence and then a cased letter,
4191 * then when lowercasing C, C becomes a final sigma */
4193 /* To determine if this is the end of a word, need to peek
4194 * ahead. Look at the next character */
4195 const U8 *peek = s + u;
4197 /* Skip any case ignorable characters */
4198 while (peek < send && is_utf8_case_ignorable(peek)) {
4199 peek += UTF8SKIP(peek);
4202 /* If we reached the end of the string without finding any
4203 * non-case ignorable characters, or if the next such one
4204 * is not-cased, then we have met the conditions for it
4205 * being a final sigma with regards to peek ahead, and so
4206 * must do peek behind for the remaining conditions. (We
4207 * know there is stuff behind to look at since we tested
4208 * above that this isn't the first letter) */
4209 if (peek >= send || ! is_utf8_cased(peek)) {
4210 peek = utf8_hop(s, -1);
4212 /* Here are at the beginning of the first character
4213 * before the original upper case sigma. Keep backing
4214 * up, skipping any case ignorable characters */
4215 while (is_utf8_case_ignorable(peek)) {
4216 peek = utf8_hop(peek, -1);
4219 /* Here peek points to the first byte of the closest
4220 * non-case-ignorable character before the capital
4221 * sigma. If it is cased, then by the Unicode
4222 * algorithm, we should use a small final sigma instead
4223 * of what we have */
4224 if (is_utf8_cased(peek)) {
4225 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4226 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4230 else { /* Not a context sensitive mapping */
4231 #endif /* End of commented out context sensitive */
4232 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4234 /* If the eventually required minimum size outgrows
4235 * the available space, we need to grow. */
4236 const UV o = d - (U8*)SvPVX_const(dest);
4238 /* If someone lowercases one million U+0130s we
4239 * SvGROW() one million times. Or we could try
4240 * guessing how much to allocate without allocating too
4241 * much. Such is life. Another option would be to
4242 * grow an extra byte or two more each time we need to
4243 * grow, which would cut down the million to 500K, with
4246 d = (U8*)SvPVX(dest) + o;
4248 #ifdef CONTEXT_DEPENDENT_CASING
4251 /* Copy the newly lowercased letter to the output buffer we're
4253 Copy(tmpbuf, d, ulen, U8);
4257 } /* End of looping through the source string */
4260 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4261 } else { /* Not utf8 */
4263 const U8 *const send = s + len;
4265 /* Use locale casing if in locale; regular style if not treating
4266 * latin1 as having case; otherwise the latin1 casing. Do the
4267 * whole thing in a tight loop, for speed, */
4268 if (IN_LOCALE_RUNTIME) {
4271 for (; s < send; d++, s++)
4272 *d = toLOWER_LC(*s);
4274 else if (! IN_UNI_8_BIT) {
4275 for (; s < send; d++, s++) {
4280 for (; s < send; d++, s++) {
4281 *d = toLOWER_LATIN1(*s);
4285 if (source != dest) {
4287 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4290 if (dest != source && SvTAINTED(source))
4299 SV * const sv = TOPs;
4301 register const char *s = SvPV_const(sv,len);
4303 SvUTF8_off(TARG); /* decontaminate */
4306 SvUPGRADE(TARG, SVt_PV);
4307 SvGROW(TARG, (len * 2) + 1);
4311 if (UTF8_IS_CONTINUED(*s)) {
4312 STRLEN ulen = UTF8SKIP(s);
4336 SvCUR_set(TARG, d - SvPVX_const(TARG));
4337 (void)SvPOK_only_UTF8(TARG);
4340 sv_setpvn(TARG, s, len);
4349 dVAR; dSP; dMARK; dORIGMARK;
4350 register AV *const av = MUTABLE_AV(POPs);
4351 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4353 if (SvTYPE(av) == SVt_PVAV) {
4354 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4355 bool can_preserve = FALSE;
4361 can_preserve = SvCANEXISTDELETE(av);
4364 if (lval && localizing) {
4367 for (svp = MARK + 1; svp <= SP; svp++) {
4368 const I32 elem = SvIV(*svp);
4372 if (max > AvMAX(av))
4376 while (++MARK <= SP) {
4378 I32 elem = SvIV(*MARK);
4379 bool preeminent = TRUE;
4381 if (localizing && can_preserve) {
4382 /* If we can determine whether the element exist,
4383 * Try to preserve the existenceness of a tied array
4384 * element by using EXISTS and DELETE if possible.
4385 * Fallback to FETCH and STORE otherwise. */
4386 preeminent = av_exists(av, elem);
4389 svp = av_fetch(av, elem, lval);
4391 if (!svp || *svp == &PL_sv_undef)
4392 DIE(aTHX_ PL_no_aelem, elem);
4395 save_aelem(av, elem, svp);
4397 SAVEADELETE(av, elem);
4400 *MARK = svp ? *svp : &PL_sv_undef;
4403 if (GIMME != G_ARRAY) {
4405 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4411 /* Smart dereferencing for keys, values and each */
4423 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4428 "Type of argument to %s must be unblessed hashref or arrayref",
4429 PL_op_desc[PL_op->op_type] );
4432 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4434 "Can't modify %s in %s",
4435 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4438 /* Delegate to correct function for op type */
4440 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4441 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4444 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4452 AV *array = MUTABLE_AV(POPs);
4453 const I32 gimme = GIMME_V;
4454 IV *iterp = Perl_av_iter_p(aTHX_ array);
4455 const IV current = (*iterp)++;
4457 if (current > av_len(array)) {
4459 if (gimme == G_SCALAR)
4467 if (gimme == G_ARRAY) {
4468 SV **const element = av_fetch(array, current, 0);
4469 PUSHs(element ? *element : &PL_sv_undef);
4478 AV *array = MUTABLE_AV(POPs);
4479 const I32 gimme = GIMME_V;
4481 *Perl_av_iter_p(aTHX_ array) = 0;
4483 if (gimme == G_SCALAR) {
4485 PUSHi(av_len(array) + 1);
4487 else if (gimme == G_ARRAY) {
4488 IV n = Perl_av_len(aTHX_ array);
4493 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4494 for (i = 0; i <= n; i++) {
4499 for (i = 0; i <= n; i++) {
4500 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4501 PUSHs(elem ? *elem : &PL_sv_undef);
4508 /* Associative arrays. */
4514 HV * hash = MUTABLE_HV(POPs);
4516 const I32 gimme = GIMME_V;
4519 /* might clobber stack_sp */
4520 entry = hv_iternext(hash);
4525 SV* const sv = hv_iterkeysv(entry);
4526 PUSHs(sv); /* won't clobber stack_sp */
4527 if (gimme == G_ARRAY) {
4530 /* might clobber stack_sp */
4531 val = hv_iterval(hash, entry);
4536 else if (gimme == G_SCALAR)
4543 S_do_delete_local(pTHX)
4547 const I32 gimme = GIMME_V;
4551 if (PL_op->op_private & OPpSLICE) {
4553 SV * const osv = POPs;
4554 const bool tied = SvRMAGICAL(osv)
4555 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4556 const bool can_preserve = SvCANEXISTDELETE(osv)
4557 || mg_find((const SV *)osv, PERL_MAGIC_env);
4558 const U32 type = SvTYPE(osv);
4559 if (type == SVt_PVHV) { /* hash element */
4560 HV * const hv = MUTABLE_HV(osv);
4561 while (++MARK <= SP) {
4562 SV * const keysv = *MARK;
4564 bool preeminent = TRUE;
4566 preeminent = hv_exists_ent(hv, keysv, 0);
4568 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4575 sv = hv_delete_ent(hv, keysv, 0, 0);
4576 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4579 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4581 *MARK = sv_mortalcopy(sv);
4587 SAVEHDELETE(hv, keysv);
4588 *MARK = &PL_sv_undef;
4592 else if (type == SVt_PVAV) { /* array element */
4593 if (PL_op->op_flags & OPf_SPECIAL) {
4594 AV * const av = MUTABLE_AV(osv);
4595 while (++MARK <= SP) {
4596 I32 idx = SvIV(*MARK);
4598 bool preeminent = TRUE;
4600 preeminent = av_exists(av, idx);
4602 SV **svp = av_fetch(av, idx, 1);
4609 sv = av_delete(av, idx, 0);
4610 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4613 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4615 *MARK = sv_mortalcopy(sv);
4621 SAVEADELETE(av, idx);
4622 *MARK = &PL_sv_undef;
4628 DIE(aTHX_ "Not a HASH reference");
4629 if (gimme == G_VOID)
4631 else if (gimme == G_SCALAR) {
4636 *++MARK = &PL_sv_undef;
4641 SV * const keysv = POPs;
4642 SV * const osv = POPs;
4643 const bool tied = SvRMAGICAL(osv)
4644 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4645 const bool can_preserve = SvCANEXISTDELETE(osv)
4646 || mg_find((const SV *)osv, PERL_MAGIC_env);
4647 const U32 type = SvTYPE(osv);
4649 if (type == SVt_PVHV) {
4650 HV * const hv = MUTABLE_HV(osv);
4651 bool preeminent = TRUE;
4653 preeminent = hv_exists_ent(hv, keysv, 0);
4655 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4662 sv = hv_delete_ent(hv, keysv, 0, 0);
4663 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4666 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4668 SV *nsv = sv_mortalcopy(sv);
4674 SAVEHDELETE(hv, keysv);
4676 else if (type == SVt_PVAV) {
4677 if (PL_op->op_flags & OPf_SPECIAL) {
4678 AV * const av = MUTABLE_AV(osv);
4679 I32 idx = SvIV(keysv);
4680 bool preeminent = TRUE;
4682 preeminent = av_exists(av, idx);
4684 SV **svp = av_fetch(av, idx, 1);
4691 sv = av_delete(av, idx, 0);
4692 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4695 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4697 SV *nsv = sv_mortalcopy(sv);
4703 SAVEADELETE(av, idx);
4706 DIE(aTHX_ "panic: avhv_delete no longer supported");
4709 DIE(aTHX_ "Not a HASH reference");
4712 if (gimme != G_VOID)
4726 if (PL_op->op_private & OPpLVAL_INTRO)
4727 return do_delete_local();
4730 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4732 if (PL_op->op_private & OPpSLICE) {
4734 HV * const hv = MUTABLE_HV(POPs);
4735 const U32 hvtype = SvTYPE(hv);
4736 if (hvtype == SVt_PVHV) { /* hash element */
4737 while (++MARK <= SP) {
4738 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4739 *MARK = sv ? sv : &PL_sv_undef;
4742 else if (hvtype == SVt_PVAV) { /* array element */
4743 if (PL_op->op_flags & OPf_SPECIAL) {
4744 while (++MARK <= SP) {
4745 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4746 *MARK = sv ? sv : &PL_sv_undef;
4751 DIE(aTHX_ "Not a HASH reference");
4754 else if (gimme == G_SCALAR) {
4759 *++MARK = &PL_sv_undef;
4765 HV * const hv = MUTABLE_HV(POPs);
4767 if (SvTYPE(hv) == SVt_PVHV)
4768 sv = hv_delete_ent(hv, keysv, discard, 0);
4769 else if (SvTYPE(hv) == SVt_PVAV) {
4770 if (PL_op->op_flags & OPf_SPECIAL)
4771 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4773 DIE(aTHX_ "panic: avhv_delete no longer supported");
4776 DIE(aTHX_ "Not a HASH reference");
4792 if (PL_op->op_private & OPpEXISTS_SUB) {
4794 SV * const sv = POPs;
4795 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4798 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4803 hv = MUTABLE_HV(POPs);
4804 if (SvTYPE(hv) == SVt_PVHV) {
4805 if (hv_exists_ent(hv, tmpsv, 0))
4808 else if (SvTYPE(hv) == SVt_PVAV) {
4809 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4810 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4815 DIE(aTHX_ "Not a HASH reference");
4822 dVAR; dSP; dMARK; dORIGMARK;
4823 register HV * const hv = MUTABLE_HV(POPs);
4824 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4825 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4826 bool can_preserve = FALSE;
4832 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4833 can_preserve = TRUE;
4836 while (++MARK <= SP) {
4837 SV * const keysv = *MARK;
4840 bool preeminent = TRUE;
4842 if (localizing && can_preserve) {
4843 /* If we can determine whether the element exist,
4844 * try to preserve the existenceness of a tied hash
4845 * element by using EXISTS and DELETE if possible.
4846 * Fallback to FETCH and STORE otherwise. */
4847 preeminent = hv_exists_ent(hv, keysv, 0);
4850 he = hv_fetch_ent(hv, keysv, lval, 0);
4851 svp = he ? &HeVAL(he) : NULL;
4854 if (!svp || *svp == &PL_sv_undef) {
4855 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4858 if (HvNAME_get(hv) && isGV(*svp))
4859 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4860 else if (preeminent)
4861 save_helem_flags(hv, keysv, svp,
4862 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4864 SAVEHDELETE(hv, keysv);
4867 *MARK = svp ? *svp : &PL_sv_undef;
4869 if (GIMME != G_ARRAY) {
4871 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4877 /* List operators. */
4882 if (GIMME != G_ARRAY) {
4884 *MARK = *SP; /* unwanted list, return last item */
4886 *MARK = &PL_sv_undef;
4896 SV ** const lastrelem = PL_stack_sp;
4897 SV ** const lastlelem = PL_stack_base + POPMARK;
4898 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4899 register SV ** const firstrelem = lastlelem + 1;
4900 I32 is_something_there = FALSE;
4902 register const I32 max = lastrelem - lastlelem;
4903 register SV **lelem;
4905 if (GIMME != G_ARRAY) {
4906 I32 ix = SvIV(*lastlelem);
4909 if (ix < 0 || ix >= max)
4910 *firstlelem = &PL_sv_undef;
4912 *firstlelem = firstrelem[ix];
4918 SP = firstlelem - 1;
4922 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4923 I32 ix = SvIV(*lelem);
4926 if (ix < 0 || ix >= max)
4927 *lelem = &PL_sv_undef;
4929 is_something_there = TRUE;
4930 if (!(*lelem = firstrelem[ix]))
4931 *lelem = &PL_sv_undef;
4934 if (is_something_there)
4937 SP = firstlelem - 1;
4943 dVAR; dSP; dMARK; dORIGMARK;
4944 const I32 items = SP - MARK;
4945 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
4946 SP = ORIGMARK; /* av_make() might realloc stack_sp */
4947 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4948 ? newRV_noinc(av) : av);
4954 dVAR; dSP; dMARK; dORIGMARK;
4955 HV* const hv = newHV();
4958 SV * const key = *++MARK;
4959 SV * const val = newSV(0);
4961 sv_setsv(val, *++MARK);
4963 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
4964 (void)hv_store_ent(hv,key,val,0);
4967 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4968 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
4973 S_deref_plain_array(pTHX_ AV *ary)
4975 if (SvTYPE(ary) == SVt_PVAV) return ary;
4976 SvGETMAGIC((SV *)ary);
4977 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
4978 Perl_die(aTHX_ "Not an ARRAY reference");
4979 else if (SvOBJECT(SvRV(ary)))
4980 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
4981 return (AV *)SvRV(ary);
4984 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
4985 # define DEREF_PLAIN_ARRAY(ary) \
4988 SvTYPE(aRrRay) == SVt_PVAV \
4990 : S_deref_plain_array(aTHX_ aRrRay); \
4993 # define DEREF_PLAIN_ARRAY(ary) \
4995 PL_Sv = (SV *)(ary), \
4996 SvTYPE(PL_Sv) == SVt_PVAV \
4998 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
5004 dVAR; dSP; dMARK; dORIGMARK;
5005 int num_args = (SP - MARK);
5006 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5010 register I32 offset;
5011 register I32 length;
5015 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5018 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5019 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5026 offset = i = SvIV(*MARK);
5028 offset += AvFILLp(ary) + 1;
5030 DIE(aTHX_ PL_no_aelem, i);
5032 length = SvIVx(*MARK++);
5034 length += AvFILLp(ary) - offset + 1;
5040 length = AvMAX(ary) + 1; /* close enough to infinity */
5044 length = AvMAX(ary) + 1;
5046 if (offset > AvFILLp(ary) + 1) {
5048 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5049 offset = AvFILLp(ary) + 1;
5051 after = AvFILLp(ary) + 1 - (offset + length);
5052 if (after < 0) { /* not that much array */
5053 length += after; /* offset+length now in array */
5059 /* At this point, MARK .. SP-1 is our new LIST */
5062 diff = newlen - length;
5063 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5066 /* make new elements SVs now: avoid problems if they're from the array */
5067 for (dst = MARK, i = newlen; i; i--) {
5068 SV * const h = *dst;
5069 *dst++ = newSVsv(h);
5072 if (diff < 0) { /* shrinking the area */
5073 SV **tmparyval = NULL;
5075 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5076 Copy(MARK, tmparyval, newlen, SV*);
5079 MARK = ORIGMARK + 1;
5080 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5081 MEXTEND(MARK, length);
5082 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5084 EXTEND_MORTAL(length);
5085 for (i = length, dst = MARK; i; i--) {
5086 sv_2mortal(*dst); /* free them eventually */
5093 *MARK = AvARRAY(ary)[offset+length-1];
5096 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5097 SvREFCNT_dec(*dst++); /* free them now */
5100 AvFILLp(ary) += diff;
5102 /* pull up or down? */
5104 if (offset < after) { /* easier to pull up */
5105 if (offset) { /* esp. if nothing to pull */
5106 src = &AvARRAY(ary)[offset-1];
5107 dst = src - diff; /* diff is negative */
5108 for (i = offset; i > 0; i--) /* can't trust Copy */
5112 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5116 if (after) { /* anything to pull down? */
5117 src = AvARRAY(ary) + offset + length;
5118 dst = src + diff; /* diff is negative */
5119 Move(src, dst, after, SV*);
5121 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5122 /* avoid later double free */
5126 dst[--i] = &PL_sv_undef;
5129 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5130 Safefree(tmparyval);
5133 else { /* no, expanding (or same) */
5134 SV** tmparyval = NULL;
5136 Newx(tmparyval, length, SV*); /* so remember deletion */
5137 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5140 if (diff > 0) { /* expanding */
5141 /* push up or down? */
5142 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5146 Move(src, dst, offset, SV*);
5148 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5150 AvFILLp(ary) += diff;
5153 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5154 av_extend(ary, AvFILLp(ary) + diff);
5155 AvFILLp(ary) += diff;
5158 dst = AvARRAY(ary) + AvFILLp(ary);
5160 for (i = after; i; i--) {
5168 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5171 MARK = ORIGMARK + 1;
5172 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5174 Copy(tmparyval, MARK, length, SV*);
5176 EXTEND_MORTAL(length);
5177 for (i = length, dst = MARK; i; i--) {
5178 sv_2mortal(*dst); /* free them eventually */
5185 else if (length--) {
5186 *MARK = tmparyval[length];
5189 while (length-- > 0)
5190 SvREFCNT_dec(tmparyval[length]);
5194 *MARK = &PL_sv_undef;
5195 Safefree(tmparyval);
5199 mg_set(MUTABLE_SV(ary));
5207 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5208 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5209 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5212 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5215 ENTER_with_name("call_PUSH");
5216 call_method("PUSH",G_SCALAR|G_DISCARD);
5217 LEAVE_with_name("call_PUSH");
5221 PL_delaymagic = DM_DELAY;
5222 for (++MARK; MARK <= SP; MARK++) {
5223 SV * const sv = newSV(0);
5225 sv_setsv(sv, *MARK);
5226 av_store(ary, AvFILLp(ary)+1, sv);
5228 if (PL_delaymagic & DM_ARRAY_ISA)
5229 mg_set(MUTABLE_SV(ary));
5234 if (OP_GIMME(PL_op, 0) != G_VOID) {
5235 PUSHi( AvFILL(ary) + 1 );
5244 AV * const av = PL_op->op_flags & OPf_SPECIAL
5245 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5246 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5250 (void)sv_2mortal(sv);
5257 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5258 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5259 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5262 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5265 ENTER_with_name("call_UNSHIFT");
5266 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5267 LEAVE_with_name("call_UNSHIFT");
5272 av_unshift(ary, SP - MARK);
5274 SV * const sv = newSVsv(*++MARK);
5275 (void)av_store(ary, i++, sv);
5279 if (OP_GIMME(PL_op, 0) != G_VOID) {
5280 PUSHi( AvFILL(ary) + 1 );
5289 if (GIMME == G_ARRAY) {
5290 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5294 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5295 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5296 av = MUTABLE_AV((*SP));
5297 /* In-place reversing only happens in void context for the array
5298 * assignment. We don't need to push anything on the stack. */
5301 if (SvMAGICAL(av)) {
5303 register SV *tmp = sv_newmortal();
5304 /* For SvCANEXISTDELETE */
5307 bool can_preserve = SvCANEXISTDELETE(av);
5309 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5310 register SV *begin, *end;
5313 if (!av_exists(av, i)) {
5314 if (av_exists(av, j)) {
5315 register SV *sv = av_delete(av, j, 0);
5316 begin = *av_fetch(av, i, TRUE);
5317 sv_setsv_mg(begin, sv);
5321 else if (!av_exists(av, j)) {
5322 register SV *sv = av_delete(av, i, 0);
5323 end = *av_fetch(av, j, TRUE);
5324 sv_setsv_mg(end, sv);
5329 begin = *av_fetch(av, i, TRUE);
5330 end = *av_fetch(av, j, TRUE);
5331 sv_setsv(tmp, begin);
5332 sv_setsv_mg(begin, end);
5333 sv_setsv_mg(end, tmp);
5337 SV **begin = AvARRAY(av);
5340 SV **end = begin + AvFILLp(av);
5342 while (begin < end) {
5343 register SV * const tmp = *begin;
5354 register SV * const tmp = *MARK;
5358 /* safe as long as stack cannot get extended in the above */
5364 register char *down;
5369 SvUTF8_off(TARG); /* decontaminate */
5371 do_join(TARG, &PL_sv_no, MARK, SP);
5373 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5374 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5375 report_uninit(TARG);
5378 up = SvPV_force(TARG, len);
5380 if (DO_UTF8(TARG)) { /* first reverse each character */
5381 U8* s = (U8*)SvPVX(TARG);
5382 const U8* send = (U8*)(s + len);
5384 if (UTF8_IS_INVARIANT(*s)) {
5389 if (!utf8_to_uvchr(s, 0))
5393 down = (char*)(s - 1);
5394 /* reverse this character */
5398 *down-- = (char)tmp;
5404 down = SvPVX(TARG) + len - 1;
5408 *down-- = (char)tmp;
5410 (void)SvPOK_only_UTF8(TARG);
5422 register IV limit = POPi; /* note, negative is forever */
5423 SV * const sv = POPs;
5425 register const char *s = SvPV_const(sv, len);
5426 const bool do_utf8 = DO_UTF8(sv);
5427 const char *strend = s + len;
5429 register REGEXP *rx;
5431 register const char *m;
5433 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5434 I32 maxiters = slen + 10;
5435 I32 trailing_empty = 0;
5437 const I32 origlimit = limit;
5440 const I32 gimme = GIMME_V;
5442 const I32 oldsave = PL_savestack_ix;
5443 U32 make_mortal = SVs_TEMP;
5448 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5453 DIE(aTHX_ "panic: pp_split");
5456 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5457 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5459 RX_MATCH_UTF8_set(rx, do_utf8);
5462 if (pm->op_pmreplrootu.op_pmtargetoff) {
5463 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5466 if (pm->op_pmreplrootu.op_pmtargetgv) {
5467 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5472 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5478 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5480 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5487 for (i = AvFILLp(ary); i >= 0; i--)
5488 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5490 /* temporarily switch stacks */
5491 SAVESWITCHSTACK(PL_curstack, ary);
5495 base = SP - PL_stack_base;
5497 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5499 while (*s == ' ' || is_utf8_space((U8*)s))
5502 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5503 while (isSPACE_LC(*s))
5511 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5515 gimme_scalar = gimme == G_SCALAR && !ary;
5518 limit = maxiters + 2;
5519 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5522 /* this one uses 'm' and is a negative test */
5524 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5525 const int t = UTF8SKIP(m);
5526 /* is_utf8_space returns FALSE for malform utf8 */
5533 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5534 while (m < strend && !isSPACE_LC(*m))
5537 while (m < strend && !isSPACE(*m))
5550 dstr = newSVpvn_flags(s, m-s,
5551 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5555 /* skip the whitespace found last */
5557 s = m + UTF8SKIP(m);
5561 /* this one uses 's' and is a positive test */
5563 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5566 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5567 while (s < strend && isSPACE_LC(*s))
5570 while (s < strend && isSPACE(*s))
5575 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5577 for (m = s; m < strend && *m != '\n'; m++)
5590 dstr = newSVpvn_flags(s, m-s,
5591 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5597 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5599 Pre-extend the stack, either the number of bytes or
5600 characters in the string or a limited amount, triggered by:
5602 my ($x, $y) = split //, $str;
5606 if (!gimme_scalar) {
5607 const U32 items = limit - 1;
5616 /* keep track of how many bytes we skip over */
5626 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5639 dstr = newSVpvn(s, 1);
5655 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5656 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5657 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5658 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5659 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5660 SV * const csv = CALLREG_INTUIT_STRING(rx);
5662 len = RX_MINLENRET(rx);
5663 if (len == 1 && !RX_UTF8(rx) && !tail) {
5664 const char c = *SvPV_nolen_const(csv);
5666 for (m = s; m < strend && *m != c; m++)
5677 dstr = newSVpvn_flags(s, m-s,
5678 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5681 /* The rx->minlen is in characters but we want to step
5682 * s ahead by bytes. */
5684 s = (char*)utf8_hop((U8*)m, len);
5686 s = m + len; /* Fake \n at the end */
5690 while (s < strend && --limit &&
5691 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5692 csv, multiline ? FBMrf_MULTILINE : 0)) )
5701 dstr = newSVpvn_flags(s, m-s,
5702 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5705 /* The rx->minlen is in characters but we want to step
5706 * s ahead by bytes. */
5708 s = (char*)utf8_hop((U8*)m, len);
5710 s = m + len; /* Fake \n at the end */
5715 maxiters += slen * RX_NPARENS(rx);
5716 while (s < strend && --limit)
5720 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5721 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
5723 if (rex_return == 0)
5725 TAINT_IF(RX_MATCH_TAINTED(rx));
5726 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5729 orig = RX_SUBBEG(rx);
5731 strend = s + (strend - m);
5733 m = RX_OFFS(rx)[0].start + orig;
5742 dstr = newSVpvn_flags(s, m-s,
5743 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5746 if (RX_NPARENS(rx)) {
5748 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5749 s = RX_OFFS(rx)[i].start + orig;
5750 m = RX_OFFS(rx)[i].end + orig;
5752 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5753 parens that didn't match -- they should be set to
5754 undef, not the empty string */
5762 if (m >= orig && s >= orig) {
5763 dstr = newSVpvn_flags(s, m-s,
5764 (do_utf8 ? SVf_UTF8 : 0)
5768 dstr = &PL_sv_undef; /* undef, not "" */
5774 s = RX_OFFS(rx)[0].end + orig;
5778 if (!gimme_scalar) {
5779 iters = (SP - PL_stack_base) - base;
5781 if (iters > maxiters)
5782 DIE(aTHX_ "Split loop");
5784 /* keep field after final delim? */
5785 if (s < strend || (iters && origlimit)) {
5786 if (!gimme_scalar) {
5787 const STRLEN l = strend - s;
5788 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5793 else if (!origlimit) {
5795 iters -= trailing_empty;
5797 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5798 if (TOPs && !make_mortal)
5800 *SP-- = &PL_sv_undef;
5807 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5811 if (SvSMAGICAL(ary)) {
5813 mg_set(MUTABLE_SV(ary));
5816 if (gimme == G_ARRAY) {
5818 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5825 ENTER_with_name("call_PUSH");
5826 call_method("PUSH",G_SCALAR|G_DISCARD);
5827 LEAVE_with_name("call_PUSH");
5829 if (gimme == G_ARRAY) {
5831 /* EXTEND should not be needed - we just popped them */
5833 for (i=0; i < iters; i++) {
5834 SV **svp = av_fetch(ary, i, FALSE);
5835 PUSHs((svp) ? *svp : &PL_sv_undef);
5842 if (gimme == G_ARRAY)
5854 SV *const sv = PAD_SVl(PL_op->op_targ);
5856 if (SvPADSTALE(sv)) {
5859 RETURNOP(cLOGOP->op_other);
5861 RETURNOP(cLOGOP->op_next);
5871 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5872 || SvTYPE(retsv) == SVt_PVCV) {
5873 retsv = refto(retsv);
5880 PP(unimplemented_op)
5883 const Optype op_type = PL_op->op_type;
5884 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5885 with out of range op numbers - it only "special" cases op_custom.
5886 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5887 if we get here for a custom op then that means that the custom op didn't
5888 have an implementation. Given that OP_NAME() looks up the custom op
5889 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5890 registers &PL_unimplemented_op as the address of their custom op.
5891 NULL doesn't generate a useful error message. "custom" does. */
5892 const char *const name = op_type >= OP_max
5893 ? "[out of range]" : PL_op_name[PL_op->op_type];
5894 if(OP_IS_SOCKET(op_type))
5895 DIE(aTHX_ PL_no_sock_func, name);
5896 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5903 HV * const hv = (HV*)POPs;
5905 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5907 if (SvRMAGICAL(hv)) {
5908 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5910 XPUSHs(magic_scalarpack(hv, mg));
5915 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
5919 /* For sorting out arguments passed to a &CORE:: subroutine */
5923 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
5924 int defgv = PL_opargs[opnum] & OA_DEFGV, whicharg = 0;
5925 AV * const at_ = GvAV(PL_defgv);
5926 SV **svp = AvARRAY(at_);
5927 I32 minargs = 0, maxargs = 0, numargs = AvFILLp(at_)+1;
5928 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
5929 bool seen_question = 0;
5930 const char *err = NULL;
5931 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
5933 /* Count how many args there are first, to get some idea how far to
5934 extend the stack. */
5936 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
5938 if (oa & OA_OPTIONAL) seen_question = 1;
5939 if (!seen_question) minargs++;
5943 if(numargs < minargs) err = "Not enough";
5944 else if(numargs > maxargs) err = "Too many";
5946 /* diag_listed_as: Too many arguments for %s */
5948 "%s arguments for %s", err,
5949 opnum ? OP_DESC(PL_op->op_next) : SvPV_nolen_const(cSVOP_sv)
5952 /* Reset the stack pointer. Without this, we end up returning our own
5953 arguments in list context, in addition to the values we are supposed
5954 to return. nextstate usually does this on sub entry, but we need
5955 to run the next op with the caller’s hints, so we cannot have a
5957 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
5959 if(!maxargs) RETURN;
5961 /* We do this here, rather than with a separate pushmark op, as it has
5962 to come in between two things this function does (stack reset and
5963 arg pushing). This seems the easiest way to do it. */
5966 (void)Perl_pp_pushmark(aTHX);
5969 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
5970 PUTBACK; /* The code below can die in various places. */
5972 oa = PL_opargs[opnum] >> OASHIFT;
5973 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
5977 if (!numargs && defgv && whicharg == minargs + 1) {
5978 PERL_SI * const oldsi = PL_curstackinfo;
5979 I32 const oldcxix = oldsi->si_cxix;
5981 if (oldcxix) oldsi->si_cxix--;
5982 else PL_curstackinfo = oldsi->si_prev;
5983 caller = find_runcv(NULL);
5984 PL_curstackinfo = oldsi;
5985 oldsi->si_cxix = oldcxix;
5986 PUSHs(find_rundefsv2(
5987 caller,cxstack[cxstack_ix].blk_oldcop->cop_seq
5990 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
5994 PUSHs(svp && *svp ? *svp : &PL_sv_undef);
5999 if (!svp || !*svp || !SvROK(*svp)
6000 || SvTYPE(SvRV(*svp)) != SVt_PVHV)
6002 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
6003 "Type of arg %d to &CORE::%s must be hash reference",
6004 whicharg, OP_DESC(PL_op->op_next)
6009 if (!numargs) PUSHs(NULL);
6010 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
6011 /* no magic here, as the prototype will have added an extra
6012 refgen and we just want what was there before that */
6015 const bool constr = PL_op->op_private & whicharg;
6017 svp && *svp ? *svp : &PL_sv_undef,
6018 constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS,
6025 const bool wantscalar =
6026 PL_op->op_private & OPpCOREARGS_SCALARMOD;
6027 if (!svp || !*svp || !SvROK(*svp)
6028 /* We have to permit globrefs even for the \$ proto, as
6029 *foo is indistinguishable from ${\*foo}, and the proto-
6030 type permits the latter. */
6031 || SvTYPE(SvRV(*svp)) > (
6032 wantscalar ? SVt_PVLV
6033 : opnum == OP_LOCK ? SVt_PVCV
6038 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
6039 "Type of arg %d to &CORE::%s must be %s",
6040 whicharg, OP_DESC(PL_op->op_next),
6042 ? "scalar reference"
6044 ? "reference to one of [$@%&*]"
6045 : "reference to one of [$@%*]"
6051 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
6061 * c-indentation-style: bsd
6063 * indent-tabs-mode: t
6066 * ex: set ts=8 sts=4 sw=4 noet: