3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
144 /* In some cases this function inspects PL_op. If this function is called
145 for new op types, more bool parameters may need to be added in place of
148 When noinit is true, the absence of a gv will cause a retval of undef.
149 This is unrelated to the cv-to-gv assignment case.
151 Make sure to use SPAGAIN after calling this.
155 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
159 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
162 sv = amagic_deref_call(sv, to_gv_amg);
166 if (SvTYPE(sv) == SVt_PVIO) {
167 GV * const gv = MUTABLE_GV(sv_newmortal());
168 gv_init(gv, 0, "", 0, 0);
169 GvIOp(gv) = MUTABLE_IO(sv);
170 SvREFCNT_inc_void_NN(sv);
173 else if (!isGV_with_GP(sv))
174 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
177 if (!isGV_with_GP(sv)) {
179 /* If this is a 'my' scalar and flag is set then vivify
182 if (vivify_sv && sv != &PL_sv_undef) {
185 Perl_croak_no_modify(aTHX);
186 if (cUNOP->op_targ) {
188 SV * const namesv = PAD_SV(cUNOP->op_targ);
189 const char * const name = SvPV(namesv, len);
190 gv = MUTABLE_GV(newSV(0));
191 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
194 const char * const name = CopSTASHPV(PL_curcop);
197 prepare_SV_for_RV(sv);
198 SvRV_set(sv, MUTABLE_SV(gv));
203 if (PL_op->op_flags & OPf_REF || strict)
204 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
205 if (ckWARN(WARN_UNINITIALIZED))
211 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
212 sv, GV_ADDMG, SVt_PVGV
222 (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
225 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
226 == OPpDONT_INIT_GV) {
227 /* We are the target of a coderef assignment. Return
228 the scalar unchanged, and let pp_sasssign deal with
232 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
234 /* FAKE globs in the symbol table cause weird bugs (#77810) */
239 SV *newsv = sv_newmortal();
240 sv_setsv_flags(newsv, sv, 0);
252 sv, PL_op->op_private & OPpDEREF,
253 PL_op->op_private & HINT_STRICT_REFS,
254 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
255 || PL_op->op_type == OP_READLINE
258 if (PL_op->op_private & OPpLVAL_INTRO)
259 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
264 /* Helper function for pp_rv2sv and pp_rv2av */
266 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
267 const svtype type, SV ***spp)
272 PERL_ARGS_ASSERT_SOFTREF2XV;
274 if (PL_op->op_private & HINT_STRICT_REFS) {
276 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
278 Perl_die(aTHX_ PL_no_usym, what);
282 PL_op->op_flags & OPf_REF &&
283 PL_op->op_next->op_type != OP_BOOLKEYS
285 Perl_die(aTHX_ PL_no_usym, what);
286 if (ckWARN(WARN_UNINITIALIZED))
288 if (type != SVt_PV && GIMME_V == G_ARRAY) {
292 **spp = &PL_sv_undef;
295 if ((PL_op->op_flags & OPf_SPECIAL) &&
296 !(PL_op->op_flags & OPf_MOD))
298 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
300 **spp = &PL_sv_undef;
305 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
318 sv = amagic_deref_call(sv, to_sv_amg);
323 switch (SvTYPE(sv)) {
329 DIE(aTHX_ "Not a SCALAR reference");
336 if (!isGV_with_GP(gv)) {
337 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
343 if (PL_op->op_flags & OPf_MOD) {
344 if (PL_op->op_private & OPpLVAL_INTRO) {
345 if (cUNOP->op_first->op_type == OP_NULL)
346 sv = save_scalar(MUTABLE_GV(TOPs));
348 sv = save_scalar(gv);
350 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
352 else if (PL_op->op_private & OPpDEREF)
353 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
362 AV * const av = MUTABLE_AV(TOPs);
363 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
365 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
367 *sv = newSV_type(SVt_PVMG);
368 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
372 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
381 if (PL_op->op_flags & OPf_MOD || LVRET) {
382 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
383 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
385 LvTARG(ret) = SvREFCNT_inc_simple(sv);
386 PUSHs(ret); /* no SvSETMAGIC */
390 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
391 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
392 if (mg && mg->mg_len >= 0) {
410 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
412 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
415 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
416 /* (But not in defined().) */
418 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
421 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
422 if ((PL_op->op_private & OPpLVAL_INTRO)) {
423 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
426 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
429 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
433 cv = MUTABLE_CV(&PL_sv_undef);
434 SETs(MUTABLE_SV(cv));
444 SV *ret = &PL_sv_undef;
446 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
447 const char * s = SvPVX_const(TOPs);
448 if (strnEQ(s, "CORE::", 6)) {
449 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
450 if (!code || code == -KEY_CORE)
451 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
452 if (code < 0) { /* Overridable. */
453 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
459 cv = sv_2cv(TOPs, &stash, &gv, 0);
461 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
470 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
472 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
474 PUSHs(MUTABLE_SV(cv));
488 if (GIMME != G_ARRAY) {
492 *MARK = &PL_sv_undef;
493 *MARK = refto(*MARK);
497 EXTEND_MORTAL(SP - MARK);
499 *MARK = refto(*MARK);
504 S_refto(pTHX_ SV *sv)
509 PERL_ARGS_ASSERT_REFTO;
511 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
514 if (!(sv = LvTARG(sv)))
517 SvREFCNT_inc_void_NN(sv);
519 else if (SvTYPE(sv) == SVt_PVAV) {
520 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
521 av_reify(MUTABLE_AV(sv));
523 SvREFCNT_inc_void_NN(sv);
525 else if (SvPADTMP(sv) && !IS_PADGV(sv))
529 SvREFCNT_inc_void_NN(sv);
532 sv_upgrade(rv, SVt_IV);
542 SV * const sv = POPs;
547 if (!sv || !SvROK(sv))
550 pv = sv_reftype(SvRV(sv),TRUE);
551 PUSHp(pv, strlen(pv));
562 stash = CopSTASH(PL_curcop);
564 SV * const ssv = POPs;
568 if (!ssv) goto curstash;
569 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
570 Perl_croak(aTHX_ "Attempt to bless into a reference");
571 ptr = SvPV_const(ssv,len);
573 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
574 "Explicit blessing to '' (assuming package main)");
575 stash = gv_stashpvn(ptr, len, GV_ADD);
578 (void)sv_bless(TOPs, stash);
587 const char * const elem = SvPV_nolen_const(sv);
588 GV * const gv = MUTABLE_GV(POPs);
593 /* elem will always be NUL terminated. */
594 const char * const second_letter = elem + 1;
597 if (strEQ(second_letter, "RRAY"))
598 tmpRef = MUTABLE_SV(GvAV(gv));
601 if (strEQ(second_letter, "ODE"))
602 tmpRef = MUTABLE_SV(GvCVu(gv));
605 if (strEQ(second_letter, "ILEHANDLE")) {
606 /* finally deprecated in 5.8.0 */
607 deprecate("*glob{FILEHANDLE}");
608 tmpRef = MUTABLE_SV(GvIOp(gv));
611 if (strEQ(second_letter, "ORMAT"))
612 tmpRef = MUTABLE_SV(GvFORM(gv));
615 if (strEQ(second_letter, "LOB"))
616 tmpRef = MUTABLE_SV(gv);
619 if (strEQ(second_letter, "ASH"))
620 tmpRef = MUTABLE_SV(GvHV(gv));
623 if (*second_letter == 'O' && !elem[2])
624 tmpRef = MUTABLE_SV(GvIOp(gv));
627 if (strEQ(second_letter, "AME"))
628 sv = newSVhek(GvNAME_HEK(gv));
631 if (strEQ(second_letter, "ACKAGE")) {
632 const HV * const stash = GvSTASH(gv);
633 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
634 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
638 if (strEQ(second_letter, "CALAR"))
653 /* Pattern matching */
658 register unsigned char *s;
661 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
665 if (mg && SvSCREAM(sv))
668 s = (unsigned char*)(SvPV(sv, len));
669 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
670 /* No point in studying a zero length string, and not safe to study
671 anything that doesn't appear to be a simple scalar (and hence might
672 change between now and when the regexp engine runs without our set
673 magic ever running) such as a reference to an object with overloaded
674 stringification. Also refuse to study an FBM scalar, as this gives
675 more flexibility in SV flag usage. No real-world code would ever
676 end up studying an FBM scalar, so this isn't a real pessimisation.
677 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
678 the study length limit from I32_MAX to U32_MAX - 1.
685 } else if (len < 0xFFFF) {
690 size = (256 + len) * quanta;
691 sfirst_raw = (char *)safemalloc(size);
694 DIE(aTHX_ "do_study: out of memory");
698 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
699 mg->mg_ptr = sfirst_raw;
701 mg->mg_private = quanta;
703 memset(sfirst_raw, ~0, 256 * quanta);
705 /* The assumption here is that most studied strings are fairly short, hence
706 the pain of the extra code is worth it, given the memory savings.
707 80 character string, 336 bytes as U8, down from 1344 as U32
708 800 character string, 2112 bytes as U16, down from 4224 as U32
712 U8 *const sfirst = (U8 *)sfirst_raw;
713 U8 *const snext = sfirst + 256;
715 const U8 ch = s[len];
716 snext[len] = sfirst[ch];
719 } else if (quanta == 2) {
720 U16 *const sfirst = (U16 *)sfirst_raw;
721 U16 *const snext = sfirst + 256;
723 const U8 ch = s[len];
724 snext[len] = sfirst[ch];
728 U32 *const sfirst = (U32 *)sfirst_raw;
729 U32 *const snext = sfirst + 256;
731 const U8 ch = s[len];
732 snext[len] = sfirst[ch];
745 if (PL_op->op_flags & OPf_STACKED)
747 else if (PL_op->op_private & OPpTARGET_MY)
753 TARG = sv_newmortal();
754 if(PL_op->op_type == OP_TRANSR) {
755 SV * const newsv = newSVsv(sv);
759 else PUSHi(do_trans(sv));
763 /* Lvalue operators. */
766 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
772 PERL_ARGS_ASSERT_DO_CHOMP;
774 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
776 if (SvTYPE(sv) == SVt_PVAV) {
778 AV *const av = MUTABLE_AV(sv);
779 const I32 max = AvFILL(av);
781 for (i = 0; i <= max; i++) {
782 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
783 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
784 do_chomp(retval, sv, chomping);
788 else if (SvTYPE(sv) == SVt_PVHV) {
789 HV* const hv = MUTABLE_HV(sv);
791 (void)hv_iterinit(hv);
792 while ((entry = hv_iternext(hv)))
793 do_chomp(retval, hv_iterval(hv,entry), chomping);
796 else if (SvREADONLY(sv)) {
798 /* SV is copy-on-write */
799 sv_force_normal_flags(sv, 0);
802 Perl_croak_no_modify(aTHX);
807 /* XXX, here sv is utf8-ized as a side-effect!
808 If encoding.pm is used properly, almost string-generating
809 operations, including literal strings, chr(), input data, etc.
810 should have been utf8-ized already, right?
812 sv_recode_to_utf8(sv, PL_encoding);
818 char *temp_buffer = NULL;
827 while (len && s[-1] == '\n') {
834 STRLEN rslen, rs_charlen;
835 const char *rsptr = SvPV_const(PL_rs, rslen);
837 rs_charlen = SvUTF8(PL_rs)
841 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
842 /* Assumption is that rs is shorter than the scalar. */
844 /* RS is utf8, scalar is 8 bit. */
846 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
849 /* Cannot downgrade, therefore cannot possibly match
851 assert (temp_buffer == rsptr);
857 else if (PL_encoding) {
858 /* RS is 8 bit, encoding.pm is used.
859 * Do not recode PL_rs as a side-effect. */
860 svrecode = newSVpvn(rsptr, rslen);
861 sv_recode_to_utf8(svrecode, PL_encoding);
862 rsptr = SvPV_const(svrecode, rslen);
863 rs_charlen = sv_len_utf8(svrecode);
866 /* RS is 8 bit, scalar is utf8. */
867 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
881 if (memNE(s, rsptr, rslen))
883 SvIVX(retval) += rs_charlen;
886 s = SvPV_force_nolen(sv);
894 SvREFCNT_dec(svrecode);
896 Safefree(temp_buffer);
898 if (len && !SvPOK(sv))
899 s = SvPV_force_nomg(sv, len);
902 char * const send = s + len;
903 char * const start = s;
905 while (s > start && UTF8_IS_CONTINUATION(*s))
907 if (is_utf8_string((U8*)s, send - s)) {
908 sv_setpvn(retval, s, send - s);
910 SvCUR_set(sv, s - start);
916 sv_setpvs(retval, "");
920 sv_setpvn(retval, s, 1);
927 sv_setpvs(retval, "");
935 const bool chomping = PL_op->op_type == OP_SCHOMP;
939 do_chomp(TARG, TOPs, chomping);
946 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
947 const bool chomping = PL_op->op_type == OP_CHOMP;
952 do_chomp(TARG, *++MARK, chomping);
963 if (!PL_op->op_private) {
972 SV_CHECK_THINKFIRST_COW_DROP(sv);
974 switch (SvTYPE(sv)) {
978 av_undef(MUTABLE_AV(sv));
981 hv_undef(MUTABLE_HV(sv));
984 if (cv_const_sv((const CV *)sv))
985 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
986 CvANON((const CV *)sv) ? "(anonymous)"
987 : GvENAME(CvGV((const CV *)sv)));
991 /* let user-undef'd sub keep its identity */
992 GV* const gv = CvGV((const CV *)sv);
993 cv_undef(MUTABLE_CV(sv));
994 CvGV_set(MUTABLE_CV(sv), gv);
999 SvSetMagicSV(sv, &PL_sv_undef);
1002 else if (isGV_with_GP(sv)) {
1006 /* undef *Pkg::meth_name ... */
1008 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1009 && HvENAME_get(stash);
1011 if((stash = GvHV((const GV *)sv))) {
1012 if(HvENAME_get(stash))
1013 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1017 gp_free(MUTABLE_GV(sv));
1019 GvGP_set(sv, gp_ref(gp));
1020 GvSV(sv) = newSV(0);
1021 GvLINE(sv) = CopLINE(PL_curcop);
1022 GvEGV(sv) = MUTABLE_GV(sv);
1026 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1028 /* undef *Foo::ISA */
1029 if( strEQ(GvNAME((const GV *)sv), "ISA")
1030 && (stash = GvSTASH((const GV *)sv))
1031 && (method_changed || HvENAME(stash)) )
1032 mro_isa_changed_in(stash);
1033 else if(method_changed)
1034 mro_method_changed_in(
1035 GvSTASH((const GV *)sv)
1042 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1057 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
1058 Perl_croak_no_modify(aTHX);
1060 TARG = sv_newmortal();
1061 sv_setsv(TARG, TOPs);
1062 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1063 && SvIVX(TOPs) != IV_MAX)
1065 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1066 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1071 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1081 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
1082 Perl_croak_no_modify(aTHX);
1084 TARG = sv_newmortal();
1085 sv_setsv(TARG, TOPs);
1086 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1087 && SvIVX(TOPs) != IV_MIN)
1089 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1090 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1099 /* Ordinary operators. */
1103 dVAR; dSP; dATARGET; SV *svl, *svr;
1104 #ifdef PERL_PRESERVE_IVUV
1107 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1110 #ifdef PERL_PRESERVE_IVUV
1111 /* For integer to integer power, we do the calculation by hand wherever
1112 we're sure it is safe; otherwise we call pow() and try to convert to
1113 integer afterwards. */
1115 SvIV_please_nomg(svr);
1117 SvIV_please_nomg(svl);
1126 const IV iv = SvIVX(svr);
1130 goto float_it; /* Can't do negative powers this way. */
1134 baseuok = SvUOK(svl);
1136 baseuv = SvUVX(svl);
1138 const IV iv = SvIVX(svl);
1141 baseuok = TRUE; /* effectively it's a UV now */
1143 baseuv = -iv; /* abs, baseuok == false records sign */
1146 /* now we have integer ** positive integer. */
1149 /* foo & (foo - 1) is zero only for a power of 2. */
1150 if (!(baseuv & (baseuv - 1))) {
1151 /* We are raising power-of-2 to a positive integer.
1152 The logic here will work for any base (even non-integer
1153 bases) but it can be less accurate than
1154 pow (base,power) or exp (power * log (base)) when the
1155 intermediate values start to spill out of the mantissa.
1156 With powers of 2 we know this can't happen.
1157 And powers of 2 are the favourite thing for perl
1158 programmers to notice ** not doing what they mean. */
1160 NV base = baseuok ? baseuv : -(NV)baseuv;
1165 while (power >>= 1) {
1173 SvIV_please_nomg(svr);
1176 register unsigned int highbit = 8 * sizeof(UV);
1177 register unsigned int diff = 8 * sizeof(UV);
1178 while (diff >>= 1) {
1180 if (baseuv >> highbit) {
1184 /* we now have baseuv < 2 ** highbit */
1185 if (power * highbit <= 8 * sizeof(UV)) {
1186 /* result will definitely fit in UV, so use UV math
1187 on same algorithm as above */
1188 register UV result = 1;
1189 register UV base = baseuv;
1190 const bool odd_power = cBOOL(power & 1);
1194 while (power >>= 1) {
1201 if (baseuok || !odd_power)
1202 /* answer is positive */
1204 else if (result <= (UV)IV_MAX)
1205 /* answer negative, fits in IV */
1206 SETi( -(IV)result );
1207 else if (result == (UV)IV_MIN)
1208 /* 2's complement assumption: special case IV_MIN */
1211 /* answer negative, doesn't fit */
1212 SETn( -(NV)result );
1222 NV right = SvNV_nomg(svr);
1223 NV left = SvNV_nomg(svl);
1226 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1228 We are building perl with long double support and are on an AIX OS
1229 afflicted with a powl() function that wrongly returns NaNQ for any
1230 negative base. This was reported to IBM as PMR #23047-379 on
1231 03/06/2006. The problem exists in at least the following versions
1232 of AIX and the libm fileset, and no doubt others as well:
1234 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1235 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1236 AIX 5.2.0 bos.adt.libm 5.2.0.85
1238 So, until IBM fixes powl(), we provide the following workaround to
1239 handle the problem ourselves. Our logic is as follows: for
1240 negative bases (left), we use fmod(right, 2) to check if the
1241 exponent is an odd or even integer:
1243 - if odd, powl(left, right) == -powl(-left, right)
1244 - if even, powl(left, right) == powl(-left, right)
1246 If the exponent is not an integer, the result is rightly NaNQ, so
1247 we just return that (as NV_NAN).
1251 NV mod2 = Perl_fmod( right, 2.0 );
1252 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1253 SETn( -Perl_pow( -left, right) );
1254 } else if (mod2 == 0.0) { /* even integer */
1255 SETn( Perl_pow( -left, right) );
1256 } else { /* fractional power */
1260 SETn( Perl_pow( left, right) );
1263 SETn( Perl_pow( left, right) );
1264 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1266 #ifdef PERL_PRESERVE_IVUV
1268 SvIV_please_nomg(svr);
1276 dVAR; dSP; dATARGET; SV *svl, *svr;
1277 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1280 #ifdef PERL_PRESERVE_IVUV
1281 SvIV_please_nomg(svr);
1283 /* Unless the left argument is integer in range we are going to have to
1284 use NV maths. Hence only attempt to coerce the right argument if
1285 we know the left is integer. */
1286 /* Left operand is defined, so is it IV? */
1287 SvIV_please_nomg(svl);
1289 bool auvok = SvUOK(svl);
1290 bool buvok = SvUOK(svr);
1291 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1292 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1301 const IV aiv = SvIVX(svl);
1304 auvok = TRUE; /* effectively it's a UV now */
1306 alow = -aiv; /* abs, auvok == false records sign */
1312 const IV biv = SvIVX(svr);
1315 buvok = TRUE; /* effectively it's a UV now */
1317 blow = -biv; /* abs, buvok == false records sign */
1321 /* If this does sign extension on unsigned it's time for plan B */
1322 ahigh = alow >> (4 * sizeof (UV));
1324 bhigh = blow >> (4 * sizeof (UV));
1326 if (ahigh && bhigh) {
1328 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1329 which is overflow. Drop to NVs below. */
1330 } else if (!ahigh && !bhigh) {
1331 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1332 so the unsigned multiply cannot overflow. */
1333 const UV product = alow * blow;
1334 if (auvok == buvok) {
1335 /* -ve * -ve or +ve * +ve gives a +ve result. */
1339 } else if (product <= (UV)IV_MIN) {
1340 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1341 /* -ve result, which could overflow an IV */
1343 SETi( -(IV)product );
1345 } /* else drop to NVs below. */
1347 /* One operand is large, 1 small */
1350 /* swap the operands */
1352 bhigh = blow; /* bhigh now the temp var for the swap */
1356 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1357 multiplies can't overflow. shift can, add can, -ve can. */
1358 product_middle = ahigh * blow;
1359 if (!(product_middle & topmask)) {
1360 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1362 product_middle <<= (4 * sizeof (UV));
1363 product_low = alow * blow;
1365 /* as for pp_add, UV + something mustn't get smaller.
1366 IIRC ANSI mandates this wrapping *behaviour* for
1367 unsigned whatever the actual representation*/
1368 product_low += product_middle;
1369 if (product_low >= product_middle) {
1370 /* didn't overflow */
1371 if (auvok == buvok) {
1372 /* -ve * -ve or +ve * +ve gives a +ve result. */
1374 SETu( product_low );
1376 } else if (product_low <= (UV)IV_MIN) {
1377 /* 2s complement assumption again */
1378 /* -ve result, which could overflow an IV */
1380 SETi( -(IV)product_low );
1382 } /* else drop to NVs below. */
1384 } /* product_middle too large */
1385 } /* ahigh && bhigh */
1390 NV right = SvNV_nomg(svr);
1391 NV left = SvNV_nomg(svl);
1393 SETn( left * right );
1400 dVAR; dSP; dATARGET; SV *svl, *svr;
1401 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1404 /* Only try to do UV divide first
1405 if ((SLOPPYDIVIDE is true) or
1406 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1408 The assumption is that it is better to use floating point divide
1409 whenever possible, only doing integer divide first if we can't be sure.
1410 If NV_PRESERVES_UV is true then we know at compile time that no UV
1411 can be too large to preserve, so don't need to compile the code to
1412 test the size of UVs. */
1415 # define PERL_TRY_UV_DIVIDE
1416 /* ensure that 20./5. == 4. */
1418 # ifdef PERL_PRESERVE_IVUV
1419 # ifndef NV_PRESERVES_UV
1420 # define PERL_TRY_UV_DIVIDE
1425 #ifdef PERL_TRY_UV_DIVIDE
1426 SvIV_please_nomg(svr);
1428 SvIV_please_nomg(svl);
1430 bool left_non_neg = SvUOK(svl);
1431 bool right_non_neg = SvUOK(svr);
1435 if (right_non_neg) {
1439 const IV biv = SvIVX(svr);
1442 right_non_neg = TRUE; /* effectively it's a UV now */
1448 /* historically undef()/0 gives a "Use of uninitialized value"
1449 warning before dieing, hence this test goes here.
1450 If it were immediately before the second SvIV_please, then
1451 DIE() would be invoked before left was even inspected, so
1452 no inspection would give no warning. */
1454 DIE(aTHX_ "Illegal division by zero");
1460 const IV aiv = SvIVX(svl);
1463 left_non_neg = TRUE; /* effectively it's a UV now */
1472 /* For sloppy divide we always attempt integer division. */
1474 /* Otherwise we only attempt it if either or both operands
1475 would not be preserved by an NV. If both fit in NVs
1476 we fall through to the NV divide code below. However,
1477 as left >= right to ensure integer result here, we know that
1478 we can skip the test on the right operand - right big
1479 enough not to be preserved can't get here unless left is
1482 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1485 /* Integer division can't overflow, but it can be imprecise. */
1486 const UV result = left / right;
1487 if (result * right == left) {
1488 SP--; /* result is valid */
1489 if (left_non_neg == right_non_neg) {
1490 /* signs identical, result is positive. */
1494 /* 2s complement assumption */
1495 if (result <= (UV)IV_MIN)
1496 SETi( -(IV)result );
1498 /* It's exact but too negative for IV. */
1499 SETn( -(NV)result );
1502 } /* tried integer divide but it was not an integer result */
1503 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1504 } /* left wasn't SvIOK */
1505 } /* right wasn't SvIOK */
1506 #endif /* PERL_TRY_UV_DIVIDE */
1508 NV right = SvNV_nomg(svr);
1509 NV left = SvNV_nomg(svl);
1510 (void)POPs;(void)POPs;
1511 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1512 if (! Perl_isnan(right) && right == 0.0)
1516 DIE(aTHX_ "Illegal division by zero");
1517 PUSHn( left / right );
1524 dVAR; dSP; dATARGET;
1525 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1529 bool left_neg = FALSE;
1530 bool right_neg = FALSE;
1531 bool use_double = FALSE;
1532 bool dright_valid = FALSE;
1535 SV * const svr = TOPs;
1536 SV * const svl = TOPm1s;
1537 SvIV_please_nomg(svr);
1539 right_neg = !SvUOK(svr);
1543 const IV biv = SvIVX(svr);
1546 right_neg = FALSE; /* effectively it's a UV now */
1553 dright = SvNV_nomg(svr);
1554 right_neg = dright < 0;
1557 if (dright < UV_MAX_P1) {
1558 right = U_V(dright);
1559 dright_valid = TRUE; /* In case we need to use double below. */
1565 /* At this point use_double is only true if right is out of range for
1566 a UV. In range NV has been rounded down to nearest UV and
1567 use_double false. */
1568 SvIV_please_nomg(svl);
1569 if (!use_double && SvIOK(svl)) {
1571 left_neg = !SvUOK(svl);
1575 const IV aiv = SvIVX(svl);
1578 left_neg = FALSE; /* effectively it's a UV now */
1586 dleft = SvNV_nomg(svl);
1587 left_neg = dleft < 0;
1591 /* This should be exactly the 5.6 behaviour - if left and right are
1592 both in range for UV then use U_V() rather than floor. */
1594 if (dleft < UV_MAX_P1) {
1595 /* right was in range, so is dleft, so use UVs not double.
1599 /* left is out of range for UV, right was in range, so promote
1600 right (back) to double. */
1602 /* The +0.5 is used in 5.6 even though it is not strictly
1603 consistent with the implicit +0 floor in the U_V()
1604 inside the #if 1. */
1605 dleft = Perl_floor(dleft + 0.5);
1608 dright = Perl_floor(dright + 0.5);
1619 DIE(aTHX_ "Illegal modulus zero");
1621 dans = Perl_fmod(dleft, dright);
1622 if ((left_neg != right_neg) && dans)
1623 dans = dright - dans;
1626 sv_setnv(TARG, dans);
1632 DIE(aTHX_ "Illegal modulus zero");
1635 if ((left_neg != right_neg) && ans)
1638 /* XXX may warn: unary minus operator applied to unsigned type */
1639 /* could change -foo to be (~foo)+1 instead */
1640 if (ans <= ~((UV)IV_MAX)+1)
1641 sv_setiv(TARG, ~ans+1);
1643 sv_setnv(TARG, -(NV)ans);
1646 sv_setuv(TARG, ans);
1655 dVAR; dSP; dATARGET;
1659 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1660 /* TODO: think of some way of doing list-repeat overloading ??? */
1665 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1671 const UV uv = SvUV_nomg(sv);
1673 count = IV_MAX; /* The best we can do? */
1677 const IV iv = SvIV_nomg(sv);
1684 else if (SvNOKp(sv)) {
1685 const NV nv = SvNV_nomg(sv);
1692 count = SvIV_nomg(sv);
1694 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1696 static const char oom_list_extend[] = "Out of memory during list extend";
1697 const I32 items = SP - MARK;
1698 const I32 max = items * count;
1700 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1701 /* Did the max computation overflow? */
1702 if (items > 0 && max > 0 && (max < items || max < count))
1703 Perl_croak(aTHX_ oom_list_extend);
1708 /* This code was intended to fix 20010809.028:
1711 for (($x =~ /./g) x 2) {
1712 print chop; # "abcdabcd" expected as output.
1715 * but that change (#11635) broke this code:
1717 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1719 * I can't think of a better fix that doesn't introduce
1720 * an efficiency hit by copying the SVs. The stack isn't
1721 * refcounted, and mortalisation obviously doesn't
1722 * Do The Right Thing when the stack has more than
1723 * one pointer to the same mortal value.
1727 *SP = sv_2mortal(newSVsv(*SP));
1737 repeatcpy((char*)(MARK + items), (char*)MARK,
1738 items * sizeof(const SV *), count - 1);
1741 else if (count <= 0)
1744 else { /* Note: mark already snarfed by pp_list */
1745 SV * const tmpstr = POPs;
1748 static const char oom_string_extend[] =
1749 "Out of memory during string extend";
1752 sv_setsv_nomg(TARG, tmpstr);
1753 SvPV_force_nomg(TARG, len);
1754 isutf = DO_UTF8(TARG);
1759 const STRLEN max = (UV)count * len;
1760 if (len > MEM_SIZE_MAX / count)
1761 Perl_croak(aTHX_ oom_string_extend);
1762 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1763 SvGROW(TARG, max + 1);
1764 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1765 SvCUR_set(TARG, SvCUR(TARG) * count);
1767 *SvEND(TARG) = '\0';
1770 (void)SvPOK_only_UTF8(TARG);
1772 (void)SvPOK_only(TARG);
1774 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1775 /* The parser saw this as a list repeat, and there
1776 are probably several items on the stack. But we're
1777 in scalar context, and there's no pp_list to save us
1778 now. So drop the rest of the items -- robin@kitsite.com
1790 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1791 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1794 useleft = USE_LEFT(svl);
1795 #ifdef PERL_PRESERVE_IVUV
1796 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1797 "bad things" happen if you rely on signed integers wrapping. */
1798 SvIV_please_nomg(svr);
1800 /* Unless the left argument is integer in range we are going to have to
1801 use NV maths. Hence only attempt to coerce the right argument if
1802 we know the left is integer. */
1803 register UV auv = 0;
1809 a_valid = auvok = 1;
1810 /* left operand is undef, treat as zero. */
1812 /* Left operand is defined, so is it IV? */
1813 SvIV_please_nomg(svl);
1815 if ((auvok = SvUOK(svl)))
1818 register const IV aiv = SvIVX(svl);
1821 auvok = 1; /* Now acting as a sign flag. */
1822 } else { /* 2s complement assumption for IV_MIN */
1830 bool result_good = 0;
1833 bool buvok = SvUOK(svr);
1838 register const IV biv = SvIVX(svr);
1845 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1846 else "IV" now, independent of how it came in.
1847 if a, b represents positive, A, B negative, a maps to -A etc
1852 all UV maths. negate result if A negative.
1853 subtract if signs same, add if signs differ. */
1855 if (auvok ^ buvok) {
1864 /* Must get smaller */
1869 if (result <= buv) {
1870 /* result really should be -(auv-buv). as its negation
1871 of true value, need to swap our result flag */
1883 if (result <= (UV)IV_MIN)
1884 SETi( -(IV)result );
1886 /* result valid, but out of range for IV. */
1887 SETn( -(NV)result );
1891 } /* Overflow, drop through to NVs. */
1896 NV value = SvNV_nomg(svr);
1900 /* left operand is undef, treat as zero - value */
1904 SETn( SvNV_nomg(svl) - value );
1911 dVAR; dSP; dATARGET; SV *svl, *svr;
1912 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1916 const IV shift = SvIV_nomg(svr);
1917 if (PL_op->op_private & HINT_INTEGER) {
1918 const IV i = SvIV_nomg(svl);
1922 const UV u = SvUV_nomg(svl);
1931 dVAR; dSP; dATARGET; SV *svl, *svr;
1932 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1936 const IV shift = SvIV_nomg(svr);
1937 if (PL_op->op_private & HINT_INTEGER) {
1938 const IV i = SvIV_nomg(svl);
1942 const UV u = SvUV_nomg(svl);
1954 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1958 (SvIOK_notUV(left) && SvIOK_notUV(right))
1959 ? (SvIVX(left) < SvIVX(right))
1960 : (do_ncmp(left, right) == -1)
1970 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1974 (SvIOK_notUV(left) && SvIOK_notUV(right))
1975 ? (SvIVX(left) > SvIVX(right))
1976 : (do_ncmp(left, right) == 1)
1986 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1990 (SvIOK_notUV(left) && SvIOK_notUV(right))
1991 ? (SvIVX(left) <= SvIVX(right))
1992 : (do_ncmp(left, right) <= 0)
2002 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2006 (SvIOK_notUV(left) && SvIOK_notUV(right))
2007 ? (SvIVX(left) >= SvIVX(right))
2008 : ( (do_ncmp(left, right) & 2) == 0)
2018 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2022 (SvIOK_notUV(left) && SvIOK_notUV(right))
2023 ? (SvIVX(left) != SvIVX(right))
2024 : (do_ncmp(left, right) != 0)
2029 /* compare left and right SVs. Returns:
2033 * 2: left or right was a NaN
2036 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2040 PERL_ARGS_ASSERT_DO_NCMP;
2041 #ifdef PERL_PRESERVE_IVUV
2042 SvIV_please_nomg(right);
2043 /* Fortunately it seems NaN isn't IOK */
2045 SvIV_please_nomg(left);
2048 const IV leftiv = SvIVX(left);
2049 if (!SvUOK(right)) {
2050 /* ## IV <=> IV ## */
2051 const IV rightiv = SvIVX(right);
2052 return (leftiv > rightiv) - (leftiv < rightiv);
2054 /* ## IV <=> UV ## */
2056 /* As (b) is a UV, it's >=0, so it must be < */
2059 const UV rightuv = SvUVX(right);
2060 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2065 /* ## UV <=> UV ## */
2066 const UV leftuv = SvUVX(left);
2067 const UV rightuv = SvUVX(right);
2068 return (leftuv > rightuv) - (leftuv < rightuv);
2070 /* ## UV <=> IV ## */
2072 const IV rightiv = SvIVX(right);
2074 /* As (a) is a UV, it's >=0, so it cannot be < */
2077 const UV leftuv = SvUVX(left);
2078 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2086 NV const rnv = SvNV_nomg(right);
2087 NV const lnv = SvNV_nomg(left);
2089 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2090 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2093 return (lnv > rnv) - (lnv < rnv);
2112 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2115 value = do_ncmp(left, right);
2130 int amg_type = sle_amg;
2134 switch (PL_op->op_type) {
2153 tryAMAGICbin_MG(amg_type, AMGf_set);
2156 const int cmp = (IN_LOCALE_RUNTIME
2157 ? sv_cmp_locale_flags(left, right, 0)
2158 : sv_cmp_flags(left, right, 0));
2159 SETs(boolSV(cmp * multiplier < rhs));
2167 tryAMAGICbin_MG(seq_amg, AMGf_set);
2170 SETs(boolSV(sv_eq_flags(left, right, 0)));
2178 tryAMAGICbin_MG(sne_amg, AMGf_set);
2181 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2189 tryAMAGICbin_MG(scmp_amg, 0);
2192 const int cmp = (IN_LOCALE_RUNTIME
2193 ? sv_cmp_locale_flags(left, right, 0)
2194 : sv_cmp_flags(left, right, 0));
2202 dVAR; dSP; dATARGET;
2203 tryAMAGICbin_MG(band_amg, AMGf_assign);
2206 if (SvNIOKp(left) || SvNIOKp(right)) {
2207 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2208 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2209 if (PL_op->op_private & HINT_INTEGER) {
2210 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2214 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2217 if (left_ro_nonnum) SvNIOK_off(left);
2218 if (right_ro_nonnum) SvNIOK_off(right);
2221 do_vop(PL_op->op_type, TARG, left, right);
2230 dVAR; dSP; dATARGET;
2231 const int op_type = PL_op->op_type;
2233 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2236 if (SvNIOKp(left) || SvNIOKp(right)) {
2237 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2238 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2239 if (PL_op->op_private & HINT_INTEGER) {
2240 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2241 const IV r = SvIV_nomg(right);
2242 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2246 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2247 const UV r = SvUV_nomg(right);
2248 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2251 if (left_ro_nonnum) SvNIOK_off(left);
2252 if (right_ro_nonnum) SvNIOK_off(right);
2255 do_vop(op_type, TARG, left, right);
2265 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2267 SV * const sv = TOPs;
2268 const int flags = SvFLAGS(sv);
2270 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2274 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2275 /* It's publicly an integer, or privately an integer-not-float */
2278 if (SvIVX(sv) == IV_MIN) {
2279 /* 2s complement assumption. */
2280 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2283 else if (SvUVX(sv) <= IV_MAX) {
2288 else if (SvIVX(sv) != IV_MIN) {
2292 #ifdef PERL_PRESERVE_IVUV
2300 SETn(-SvNV_nomg(sv));
2301 else if (SvPOKp(sv)) {
2303 const char * const s = SvPV_nomg_const(sv, len);
2304 if (isIDFIRST(*s)) {
2305 sv_setpvs(TARG, "-");
2308 else if (*s == '+' || *s == '-') {
2309 sv_setsv_nomg(TARG, sv);
2310 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2312 else if (DO_UTF8(sv)) {
2313 SvIV_please_nomg(sv);
2315 goto oops_its_an_int;
2317 sv_setnv(TARG, -SvNV_nomg(sv));
2319 sv_setpvs(TARG, "-");
2324 SvIV_please_nomg(sv);
2326 goto oops_its_an_int;
2327 sv_setnv(TARG, -SvNV_nomg(sv));
2332 SETn(-SvNV_nomg(sv));
2340 tryAMAGICun_MG(not_amg, AMGf_set);
2341 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2348 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2352 if (PL_op->op_private & HINT_INTEGER) {
2353 const IV i = ~SvIV_nomg(sv);
2357 const UV u = ~SvUV_nomg(sv);
2366 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2367 sv_setsv_nomg(TARG, sv);
2368 tmps = (U8*)SvPV_force_nomg(TARG, len);
2371 /* Calculate exact length, let's not estimate. */
2376 U8 * const send = tmps + len;
2377 U8 * const origtmps = tmps;
2378 const UV utf8flags = UTF8_ALLOW_ANYUV;
2380 while (tmps < send) {
2381 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2383 targlen += UNISKIP(~c);
2389 /* Now rewind strings and write them. */
2396 Newx(result, targlen + 1, U8);
2398 while (tmps < send) {
2399 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2401 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2404 sv_usepvn_flags(TARG, (char*)result, targlen,
2405 SV_HAS_TRAILING_NUL);
2412 Newx(result, nchar + 1, U8);
2414 while (tmps < send) {
2415 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2420 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2428 register long *tmpl;
2429 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2432 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2437 for ( ; anum > 0; anum--, tmps++)
2445 /* integer versions of some of the above */
2449 dVAR; dSP; dATARGET;
2450 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2453 SETi( left * right );
2461 dVAR; dSP; dATARGET;
2462 tryAMAGICbin_MG(div_amg, AMGf_assign);
2465 IV value = SvIV_nomg(right);
2467 DIE(aTHX_ "Illegal division by zero");
2468 num = SvIV_nomg(left);
2470 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2474 value = num / value;
2480 #if defined(__GLIBC__) && IVSIZE == 8
2487 /* This is the vanilla old i_modulo. */
2488 dVAR; dSP; dATARGET;
2489 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2493 DIE(aTHX_ "Illegal modulus zero");
2494 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2498 SETi( left % right );
2503 #if defined(__GLIBC__) && IVSIZE == 8
2508 /* This is the i_modulo with the workaround for the _moddi3 bug
2509 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2510 * See below for pp_i_modulo. */
2511 dVAR; dSP; dATARGET;
2512 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2516 DIE(aTHX_ "Illegal modulus zero");
2517 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2521 SETi( left % PERL_ABS(right) );
2528 dVAR; dSP; dATARGET;
2529 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2533 DIE(aTHX_ "Illegal modulus zero");
2534 /* The assumption is to use hereafter the old vanilla version... */
2536 PL_ppaddr[OP_I_MODULO] =
2538 /* .. but if we have glibc, we might have a buggy _moddi3
2539 * (at least glicb 2.2.5 is known to have this bug), in other
2540 * words our integer modulus with negative quad as the second
2541 * argument might be broken. Test for this and re-patch the
2542 * opcode dispatch table if that is the case, remembering to
2543 * also apply the workaround so that this first round works
2544 * right, too. See [perl #9402] for more information. */
2548 /* Cannot do this check with inlined IV constants since
2549 * that seems to work correctly even with the buggy glibc. */
2551 /* Yikes, we have the bug.
2552 * Patch in the workaround version. */
2554 PL_ppaddr[OP_I_MODULO] =
2555 &Perl_pp_i_modulo_1;
2556 /* Make certain we work right this time, too. */
2557 right = PERL_ABS(right);
2560 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2564 SETi( left % right );
2572 dVAR; dSP; dATARGET;
2573 tryAMAGICbin_MG(add_amg, AMGf_assign);
2575 dPOPTOPiirl_ul_nomg;
2576 SETi( left + right );
2583 dVAR; dSP; dATARGET;
2584 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2586 dPOPTOPiirl_ul_nomg;
2587 SETi( left - right );
2595 tryAMAGICbin_MG(lt_amg, AMGf_set);
2598 SETs(boolSV(left < right));
2606 tryAMAGICbin_MG(gt_amg, AMGf_set);
2609 SETs(boolSV(left > right));
2617 tryAMAGICbin_MG(le_amg, AMGf_set);
2620 SETs(boolSV(left <= right));
2628 tryAMAGICbin_MG(ge_amg, AMGf_set);
2631 SETs(boolSV(left >= right));
2639 tryAMAGICbin_MG(eq_amg, AMGf_set);
2642 SETs(boolSV(left == right));
2650 tryAMAGICbin_MG(ne_amg, AMGf_set);
2653 SETs(boolSV(left != right));
2661 tryAMAGICbin_MG(ncmp_amg, 0);
2668 else if (left < right)
2680 tryAMAGICun_MG(neg_amg, 0);
2682 SV * const sv = TOPs;
2683 IV const i = SvIV_nomg(sv);
2689 /* High falutin' math. */
2694 tryAMAGICbin_MG(atan2_amg, 0);
2697 SETn(Perl_atan2(left, right));
2705 int amg_type = sin_amg;
2706 const char *neg_report = NULL;
2707 NV (*func)(NV) = Perl_sin;
2708 const int op_type = PL_op->op_type;
2725 amg_type = sqrt_amg;
2727 neg_report = "sqrt";
2732 tryAMAGICun_MG(amg_type, 0);
2734 SV * const arg = POPs;
2735 const NV value = SvNV_nomg(arg);
2737 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2738 SET_NUMERIC_STANDARD();
2739 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2742 XPUSHn(func(value));
2747 /* Support Configure command-line overrides for rand() functions.
2748 After 5.005, perhaps we should replace this by Configure support
2749 for drand48(), random(), or rand(). For 5.005, though, maintain
2750 compatibility by calling rand() but allow the user to override it.
2751 See INSTALL for details. --Andy Dougherty 15 July 1998
2753 /* Now it's after 5.005, and Configure supports drand48() and random(),
2754 in addition to rand(). So the overrides should not be needed any more.
2755 --Jarkko Hietaniemi 27 September 1998
2758 #ifndef HAS_DRAND48_PROTO
2759 extern double drand48 (void);
2769 value = 1.0; (void)POPs;
2775 if (!PL_srand_called) {
2776 (void)seedDrand01((Rand_seed_t)seed());
2777 PL_srand_called = TRUE;
2787 const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
2788 (void)seedDrand01((Rand_seed_t)anum);
2789 PL_srand_called = TRUE;
2793 /* Historically srand always returned true. We can avoid breaking
2795 sv_setpvs(TARG, "0 but true");
2804 tryAMAGICun_MG(int_amg, AMGf_numeric);
2806 SV * const sv = TOPs;
2807 const IV iv = SvIV_nomg(sv);
2808 /* XXX it's arguable that compiler casting to IV might be subtly
2809 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2810 else preferring IV has introduced a subtle behaviour change bug. OTOH
2811 relying on floating point to be accurate is a bug. */
2816 else if (SvIOK(sv)) {
2818 SETu(SvUV_nomg(sv));
2823 const NV value = SvNV_nomg(sv);
2825 if (value < (NV)UV_MAX + 0.5) {
2828 SETn(Perl_floor(value));
2832 if (value > (NV)IV_MIN - 0.5) {
2835 SETn(Perl_ceil(value));
2846 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2848 SV * const sv = TOPs;
2849 /* This will cache the NV value if string isn't actually integer */
2850 const IV iv = SvIV_nomg(sv);
2855 else if (SvIOK(sv)) {
2856 /* IVX is precise */
2858 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2866 /* 2s complement assumption. Also, not really needed as
2867 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2873 const NV value = SvNV_nomg(sv);
2887 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2891 SV* const sv = POPs;
2893 tmps = (SvPV_const(sv, len));
2895 /* If Unicode, try to downgrade
2896 * If not possible, croak. */
2897 SV* const tsv = sv_2mortal(newSVsv(sv));
2900 sv_utf8_downgrade(tsv, FALSE);
2901 tmps = SvPV_const(tsv, len);
2903 if (PL_op->op_type == OP_HEX)
2906 while (*tmps && len && isSPACE(*tmps))
2910 if (*tmps == 'x' || *tmps == 'X') {
2912 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2914 else if (*tmps == 'b' || *tmps == 'B')
2915 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2917 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2919 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2933 SV * const sv = TOPs;
2935 if (SvGAMAGIC(sv)) {
2936 /* For an overloaded or magic scalar, we can't know in advance if
2937 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2938 it likes to cache the length. Maybe that should be a documented
2943 = sv_2pv_flags(sv, &len,
2944 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2947 if (!SvPADTMP(TARG)) {
2948 sv_setsv(TARG, &PL_sv_undef);
2953 else if (DO_UTF8(sv)) {
2954 SETi(utf8_length((U8*)p, (U8*)p + len));
2958 } else if (SvOK(sv)) {
2959 /* Neither magic nor overloaded. */
2961 SETi(sv_len_utf8(sv));
2965 if (!SvPADTMP(TARG)) {
2966 sv_setsv_nomg(TARG, &PL_sv_undef);
2988 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2991 const char *repl = NULL;
2993 int num_args = PL_op->op_private & 7;
2994 bool repl_need_utf8_upgrade = FALSE;
2995 bool repl_is_utf8 = FALSE;
2999 if((repl_sv = POPs)) {
3000 repl = SvPV_const(repl_sv, repl_len);
3001 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3005 if ((len_sv = POPs)) {
3006 len_iv = SvIV(len_sv);
3007 len_is_uv = SvIOK_UV(len_sv);
3012 pos1_iv = SvIV(pos_sv);
3013 pos1_is_uv = SvIOK_UV(pos_sv);
3019 sv_utf8_upgrade(sv);
3021 else if (DO_UTF8(sv))
3022 repl_need_utf8_upgrade = TRUE;
3024 tmps = SvPV_const(sv, curlen);
3026 utf8_curlen = sv_len_utf8(sv);
3027 if (utf8_curlen == curlen)
3030 curlen = utf8_curlen;
3035 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3036 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3039 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3043 if (!len_is_uv && len_iv < 0) {
3044 pos2_iv = curlen + len_iv;
3046 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3049 } else { /* len_iv >= 0 */
3050 if (!pos1_is_uv && pos1_iv < 0) {
3051 pos2_iv = pos1_iv + len_iv;
3052 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3054 if ((UV)len_iv > curlen-(UV)pos1_iv)
3057 pos2_iv = pos1_iv+len_iv;
3067 if (!pos2_is_uv && pos2_iv < 0) {
3068 if (!pos1_is_uv && pos1_iv < 0)
3072 else if (!pos1_is_uv && pos1_iv < 0)
3075 if ((UV)pos2_iv < (UV)pos1_iv)
3077 if ((UV)pos2_iv > curlen)
3081 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3082 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3083 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3084 STRLEN byte_len = len;
3085 STRLEN byte_pos = utf8_curlen
3086 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3088 if (lvalue && !repl) {
3091 if (!SvGMAGICAL(sv)) {
3093 SvPV_force_nolen(sv);
3094 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3095 "Attempt to use reference as lvalue in substr");
3097 if (isGV_with_GP(sv))
3098 SvPV_force_nolen(sv);
3099 else if (SvOK(sv)) /* is it defined ? */
3100 (void)SvPOK_only_UTF8(sv);
3102 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3105 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3106 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3108 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3109 LvTARGOFF(ret) = pos;
3110 LvTARGLEN(ret) = len;
3113 PUSHs(ret); /* avoid SvSETMAGIC here */
3117 SvTAINTED_off(TARG); /* decontaminate */
3118 SvUTF8_off(TARG); /* decontaminate */
3121 sv_setpvn(TARG, tmps, byte_len);
3122 #ifdef USE_LOCALE_COLLATE
3123 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3129 SV* repl_sv_copy = NULL;
3131 if (repl_need_utf8_upgrade) {
3132 repl_sv_copy = newSVsv(repl_sv);
3133 sv_utf8_upgrade(repl_sv_copy);
3134 repl = SvPV_const(repl_sv_copy, repl_len);
3135 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3139 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3142 SvREFCNT_dec(repl_sv_copy);
3152 Perl_croak(aTHX_ "substr outside of string");
3153 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3160 register const IV size = POPi;
3161 register const IV offset = POPi;
3162 register SV * const src = POPs;
3163 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3166 if (lvalue) { /* it's an lvalue! */
3167 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3168 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3170 LvTARG(ret) = SvREFCNT_inc_simple(src);
3171 LvTARGOFF(ret) = offset;
3172 LvTARGLEN(ret) = size;
3176 SvTAINTED_off(TARG); /* decontaminate */
3180 sv_setuv(ret, do_vecget(src, offset, size));
3196 const char *little_p;
3199 const bool is_index = PL_op->op_type == OP_INDEX;
3200 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3206 big_p = SvPV_const(big, biglen);
3207 little_p = SvPV_const(little, llen);
3209 big_utf8 = DO_UTF8(big);
3210 little_utf8 = DO_UTF8(little);
3211 if (big_utf8 ^ little_utf8) {
3212 /* One needs to be upgraded. */
3213 if (little_utf8 && !PL_encoding) {
3214 /* Well, maybe instead we might be able to downgrade the small
3216 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3219 /* If the large string is ISO-8859-1, and it's not possible to
3220 convert the small string to ISO-8859-1, then there is no
3221 way that it could be found anywhere by index. */
3226 /* At this point, pv is a malloc()ed string. So donate it to temp
3227 to ensure it will get free()d */
3228 little = temp = newSV(0);
3229 sv_usepvn(temp, pv, llen);
3230 little_p = SvPVX(little);
3233 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3236 sv_recode_to_utf8(temp, PL_encoding);
3238 sv_utf8_upgrade(temp);
3243 big_p = SvPV_const(big, biglen);
3246 little_p = SvPV_const(little, llen);
3250 if (SvGAMAGIC(big)) {
3251 /* Life just becomes a lot easier if I use a temporary here.
3252 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3253 will trigger magic and overloading again, as will fbm_instr()
3255 big = newSVpvn_flags(big_p, biglen,
3256 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3259 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3260 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3261 warn on undef, and we've already triggered a warning with the
3262 SvPV_const some lines above. We can't remove that, as we need to
3263 call some SvPV to trigger overloading early and find out if the
3265 This is all getting to messy. The API isn't quite clean enough,
3266 because data access has side effects.
3268 little = newSVpvn_flags(little_p, llen,
3269 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3270 little_p = SvPVX(little);
3274 offset = is_index ? 0 : biglen;
3276 if (big_utf8 && offset > 0)
3277 sv_pos_u2b(big, &offset, 0);
3283 else if (offset > (I32)biglen)
3285 if (!(little_p = is_index
3286 ? fbm_instr((unsigned char*)big_p + offset,
3287 (unsigned char*)big_p + biglen, little, 0)
3288 : rninstr(big_p, big_p + offset,
3289 little_p, little_p + llen)))
3292 retval = little_p - big_p;
3293 if (retval > 0 && big_utf8)
3294 sv_pos_b2u(big, &retval);
3304 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3305 SvTAINTED_off(TARG);
3306 do_sprintf(TARG, SP-MARK, MARK+1);
3307 TAINT_IF(SvTAINTED(TARG));
3319 const U8 *s = (U8*)SvPV_const(argsv, len);
3321 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3322 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3323 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3327 XPUSHu(DO_UTF8(argsv) ?
3328 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3340 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3342 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3344 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3346 (void) POPs; /* Ignore the argument value. */
3347 value = UNICODE_REPLACEMENT;
3353 SvUPGRADE(TARG,SVt_PV);
3355 if (value > 255 && !IN_BYTES) {
3356 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3357 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3358 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3360 (void)SvPOK_only(TARG);
3369 *tmps++ = (char)value;
3371 (void)SvPOK_only(TARG);
3373 if (PL_encoding && !IN_BYTES) {
3374 sv_recode_to_utf8(TARG, PL_encoding);
3376 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3377 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3381 *tmps++ = (char)value;
3397 const char *tmps = SvPV_const(left, len);
3399 if (DO_UTF8(left)) {
3400 /* If Unicode, try to downgrade.
3401 * If not possible, croak.
3402 * Yes, we made this up. */
3403 SV* const tsv = sv_2mortal(newSVsv(left));
3406 sv_utf8_downgrade(tsv, FALSE);
3407 tmps = SvPV_const(tsv, len);
3409 # ifdef USE_ITHREADS
3411 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3412 /* This should be threadsafe because in ithreads there is only
3413 * one thread per interpreter. If this would not be true,
3414 * we would need a mutex to protect this malloc. */
3415 PL_reentrant_buffer->_crypt_struct_buffer =
3416 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3417 #if defined(__GLIBC__) || defined(__EMX__)
3418 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3419 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3420 /* work around glibc-2.2.5 bug */
3421 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3425 # endif /* HAS_CRYPT_R */
3426 # endif /* USE_ITHREADS */
3428 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3430 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3436 "The crypt() function is unimplemented due to excessive paranoia.");
3440 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3441 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3443 /* Below are several macros that generate code */
3444 /* Generates code to store a unicode codepoint c that is known to occupy
3445 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3446 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3448 *(p) = UTF8_TWO_BYTE_HI(c); \
3449 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3452 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3453 * available byte after the two bytes */
3454 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3456 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3457 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3460 /* Generates code to store the upper case of latin1 character l which is known
3461 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3462 * are only two characters that fit this description, and this macro knows
3463 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3465 #define STORE_NON_LATIN1_UC(p, l) \
3467 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3468 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3469 } else { /* Must be the following letter */ \
3470 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3474 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3475 * after the character stored */
3476 #define CAT_NON_LATIN1_UC(p, l) \
3478 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3479 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3481 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3485 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3486 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3487 * and must require two bytes to store it. Advances p to point to the next
3488 * available position */
3489 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3491 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3492 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3493 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3494 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3495 } else {/* else is one of the other two special cases */ \
3496 CAT_NON_LATIN1_UC((p), (l)); \
3502 /* Actually is both lcfirst() and ucfirst(). Only the first character
3503 * changes. This means that possibly we can change in-place, ie., just
3504 * take the source and change that one character and store it back, but not
3505 * if read-only etc, or if the length changes */
3510 STRLEN slen; /* slen is the byte length of the whole SV. */
3513 bool inplace; /* ? Convert first char only, in-place */
3514 bool doing_utf8 = FALSE; /* ? using utf8 */
3515 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3516 const int op_type = PL_op->op_type;
3519 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3520 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3521 * stored as UTF-8 at s. */
3522 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3523 * lowercased) character stored in tmpbuf. May be either
3524 * UTF-8 or not, but in either case is the number of bytes */
3528 s = (const U8*)SvPV_nomg_const(source, slen);
3530 if (ckWARN(WARN_UNINITIALIZED))
3531 report_uninit(source);
3536 /* We may be able to get away with changing only the first character, in
3537 * place, but not if read-only, etc. Later we may discover more reasons to
3538 * not convert in-place. */
3539 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3541 /* First calculate what the changed first character should be. This affects
3542 * whether we can just swap it out, leaving the rest of the string unchanged,
3543 * or even if have to convert the dest to UTF-8 when the source isn't */
3545 if (! slen) { /* If empty */
3546 need = 1; /* still need a trailing NUL */
3548 else if (DO_UTF8(source)) { /* Is the source utf8? */
3551 if (UTF8_IS_INVARIANT(*s)) {
3553 /* An invariant source character is either ASCII or, in EBCDIC, an
3554 * ASCII equivalent or a caseless C1 control. In both these cases,
3555 * the lower and upper cases of any character are also invariants
3556 * (and title case is the same as upper case). So it is safe to
3557 * use the simple case change macros which avoid the overhead of
3558 * the general functions. Note that if perl were to be extended to
3559 * do locale handling in UTF-8 strings, this wouldn't be true in,
3560 * for example, Lithuanian or Turkic. */
3561 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3565 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3568 /* Similarly, if the source character isn't invariant but is in the
3569 * latin1 range (or EBCDIC equivalent thereof), we have the case
3570 * changes compiled into perl, and can avoid the overhead of the
3571 * general functions. In this range, the characters are stored as
3572 * two UTF-8 bytes, and it so happens that any changed-case version
3573 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3577 /* Convert the two source bytes to a single Unicode code point
3578 * value, change case and save for below */
3579 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3580 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3581 U8 lower = toLOWER_LATIN1(chr);
3582 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3584 else { /* ucfirst */
3585 U8 upper = toUPPER_LATIN1_MOD(chr);
3587 /* Most of the latin1 range characters are well-behaved. Their
3588 * title and upper cases are the same, and are also in the
3589 * latin1 range. The macro above returns their upper (hence
3590 * title) case, and all that need be done is to save the result
3591 * for below. However, several characters are problematic, and
3592 * have to be handled specially. The MOD in the macro name
3593 * above means that these tricky characters all get mapped to
3594 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3595 * This mapping saves some tests for the majority of the
3598 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3600 /* Not tricky. Just save it. */
3601 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3603 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3605 /* This one is tricky because it is two characters long,
3606 * though the UTF-8 is still two bytes, so the stored
3607 * length doesn't change */
3608 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3609 *(tmpbuf + 1) = 's';
3613 /* The other two have their title and upper cases the same,
3614 * but are tricky because the changed-case characters
3615 * aren't in the latin1 range. They, however, do fit into
3616 * two UTF-8 bytes */
3617 STORE_NON_LATIN1_UC(tmpbuf, chr);
3623 /* Here, can't short-cut the general case */
3625 utf8_to_uvchr(s, &ulen);
3626 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3627 else toLOWER_utf8(s, tmpbuf, &tculen);
3629 /* we can't do in-place if the length changes. */
3630 if (ulen != tculen) inplace = FALSE;
3631 need = slen + 1 - ulen + tculen;
3634 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3635 * latin1 is treated as caseless. Note that a locale takes
3637 tculen = 1; /* Most characters will require one byte, but this will
3638 * need to be overridden for the tricky ones */
3641 if (op_type == OP_LCFIRST) {
3643 /* lower case the first letter: no trickiness for any character */
3644 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3645 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3648 else if (IN_LOCALE_RUNTIME) {
3649 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3650 * have upper and title case different
3653 else if (! IN_UNI_8_BIT) {
3654 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3655 * on EBCDIC machines whatever the
3656 * native function does */
3658 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3659 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3661 /* tmpbuf now has the correct title case for all latin1 characters
3662 * except for the several ones that have tricky handling. All
3663 * of these are mapped by the MOD to the letter below. */
3664 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3666 /* The length is going to change, with all three of these, so
3667 * can't replace just the first character */
3670 /* We use the original to distinguish between these tricky
3672 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3673 /* Two character title case 'Ss', but can remain non-UTF-8 */
3676 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3681 /* The other two tricky ones have their title case outside
3682 * latin1. It is the same as their upper case. */
3684 STORE_NON_LATIN1_UC(tmpbuf, *s);
3686 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3687 * and their upper cases is 2. */
3690 /* The entire result will have to be in UTF-8. Assume worst
3691 * case sizing in conversion. (all latin1 characters occupy
3692 * at most two bytes in utf8) */
3693 convert_source_to_utf8 = TRUE;
3694 need = slen * 2 + 1;
3696 } /* End of is one of the three special chars */
3697 } /* End of use Unicode (Latin1) semantics */
3698 } /* End of changing the case of the first character */
3700 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3701 * generate the result */
3704 /* We can convert in place. This means we change just the first
3705 * character without disturbing the rest; no need to grow */
3707 s = d = (U8*)SvPV_force_nomg(source, slen);
3713 /* Here, we can't convert in place; we earlier calculated how much
3714 * space we will need, so grow to accommodate that */
3715 SvUPGRADE(dest, SVt_PV);
3716 d = (U8*)SvGROW(dest, need);
3717 (void)SvPOK_only(dest);
3724 if (! convert_source_to_utf8) {
3726 /* Here both source and dest are in UTF-8, but have to create
3727 * the entire output. We initialize the result to be the
3728 * title/lower cased first character, and then append the rest
3730 sv_setpvn(dest, (char*)tmpbuf, tculen);
3732 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3736 const U8 *const send = s + slen;
3738 /* Here the dest needs to be in UTF-8, but the source isn't,
3739 * except we earlier UTF-8'd the first character of the source
3740 * into tmpbuf. First put that into dest, and then append the
3741 * rest of the source, converting it to UTF-8 as we go. */
3743 /* Assert tculen is 2 here because the only two characters that
3744 * get to this part of the code have 2-byte UTF-8 equivalents */
3746 *d++ = *(tmpbuf + 1);
3747 s++; /* We have just processed the 1st char */
3749 for (; s < send; s++) {
3750 d = uvchr_to_utf8(d, *s);
3753 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3757 else { /* in-place UTF-8. Just overwrite the first character */
3758 Copy(tmpbuf, d, tculen, U8);
3759 SvCUR_set(dest, need - 1);
3762 else { /* Neither source nor dest are in or need to be UTF-8 */
3764 if (IN_LOCALE_RUNTIME) {
3768 if (inplace) { /* in-place, only need to change the 1st char */
3771 else { /* Not in-place */
3773 /* Copy the case-changed character(s) from tmpbuf */
3774 Copy(tmpbuf, d, tculen, U8);
3775 d += tculen - 1; /* Code below expects d to point to final
3776 * character stored */
3779 else { /* empty source */
3780 /* See bug #39028: Don't taint if empty */
3784 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3785 * the destination to retain that flag */
3789 if (!inplace) { /* Finish the rest of the string, unchanged */
3790 /* This will copy the trailing NUL */
3791 Copy(s + 1, d + 1, slen, U8);
3792 SvCUR_set(dest, need - 1);
3795 if (dest != source && SvTAINTED(source))
3801 /* There's so much setup/teardown code common between uc and lc, I wonder if
3802 it would be worth merging the two, and just having a switch outside each
3803 of the three tight loops. There is less and less commonality though */
3817 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3818 && SvTEMP(source) && !DO_UTF8(source)
3819 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3821 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3822 * make the loop tight, so we overwrite the source with the dest before
3823 * looking at it, and we need to look at the original source
3824 * afterwards. There would also need to be code added to handle
3825 * switching to not in-place in midstream if we run into characters
3826 * that change the length.
3829 s = d = (U8*)SvPV_force_nomg(source, len);
3836 /* The old implementation would copy source into TARG at this point.
3837 This had the side effect that if source was undef, TARG was now
3838 an undefined SV with PADTMP set, and they don't warn inside
3839 sv_2pv_flags(). However, we're now getting the PV direct from
3840 source, which doesn't have PADTMP set, so it would warn. Hence the
3844 s = (const U8*)SvPV_nomg_const(source, len);
3846 if (ckWARN(WARN_UNINITIALIZED))
3847 report_uninit(source);
3853 SvUPGRADE(dest, SVt_PV);
3854 d = (U8*)SvGROW(dest, min);
3855 (void)SvPOK_only(dest);
3860 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3861 to check DO_UTF8 again here. */
3863 if (DO_UTF8(source)) {
3864 const U8 *const send = s + len;
3865 U8 tmpbuf[UTF8_MAXBYTES+1];
3867 /* All occurrences of these are to be moved to follow any other marks.
3868 * This is context-dependent. We may not be passed enough context to
3869 * move the iota subscript beyond all of them, but we do the best we can
3870 * with what we're given. The result is always better than if we
3871 * hadn't done this. And, the problem would only arise if we are
3872 * passed a character without all its combining marks, which would be
3873 * the caller's mistake. The information this is based on comes from a
3874 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3875 * itself) and so can't be checked properly to see if it ever gets
3876 * revised. But the likelihood of it changing is remote */
3877 bool in_iota_subscript = FALSE;
3880 if (in_iota_subscript && ! is_utf8_mark(s)) {
3881 /* A non-mark. Time to output the iota subscript */
3882 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3883 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3885 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3886 in_iota_subscript = FALSE;
3889 /* If the UTF-8 character is invariant, then it is in the range
3890 * known by the standard macro; result is only one byte long */
3891 if (UTF8_IS_INVARIANT(*s)) {
3895 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3897 /* Likewise, if it fits in a byte, its case change is in our
3899 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3900 U8 upper = toUPPER_LATIN1_MOD(orig);
3901 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3906 /* Otherwise, need the general UTF-8 case. Get the changed
3907 * case value and copy it to the output buffer */
3909 const STRLEN u = UTF8SKIP(s);
3912 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3913 if (uv == GREEK_CAPITAL_LETTER_IOTA
3914 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3916 in_iota_subscript = TRUE;
3919 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3920 /* If the eventually required minimum size outgrows
3921 * the available space, we need to grow. */
3922 const UV o = d - (U8*)SvPVX_const(dest);
3924 /* If someone uppercases one million U+03B0s we
3925 * SvGROW() one million times. Or we could try
3926 * guessing how much to allocate without allocating too
3927 * much. Such is life. See corresponding comment in
3928 * lc code for another option */
3930 d = (U8*)SvPVX(dest) + o;
3932 Copy(tmpbuf, d, ulen, U8);
3938 if (in_iota_subscript) {
3939 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3943 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3945 else { /* Not UTF-8 */
3947 const U8 *const send = s + len;
3949 /* Use locale casing if in locale; regular style if not treating
3950 * latin1 as having case; otherwise the latin1 casing. Do the
3951 * whole thing in a tight loop, for speed, */
3952 if (IN_LOCALE_RUNTIME) {
3955 for (; s < send; d++, s++)
3956 *d = toUPPER_LC(*s);
3958 else if (! IN_UNI_8_BIT) {
3959 for (; s < send; d++, s++) {
3964 for (; s < send; d++, s++) {
3965 *d = toUPPER_LATIN1_MOD(*s);
3966 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
3968 /* The mainstream case is the tight loop above. To avoid
3969 * extra tests in that, all three characters that require
3970 * special handling are mapped by the MOD to the one tested
3972 * Use the source to distinguish between the three cases */
3974 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3976 /* uc() of this requires 2 characters, but they are
3977 * ASCII. If not enough room, grow the string */
3978 if (SvLEN(dest) < ++min) {
3979 const UV o = d - (U8*)SvPVX_const(dest);
3981 d = (U8*)SvPVX(dest) + o;
3983 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3984 continue; /* Back to the tight loop; still in ASCII */
3987 /* The other two special handling characters have their
3988 * upper cases outside the latin1 range, hence need to be
3989 * in UTF-8, so the whole result needs to be in UTF-8. So,
3990 * here we are somewhere in the middle of processing a
3991 * non-UTF-8 string, and realize that we will have to convert
3992 * the whole thing to UTF-8. What to do? There are
3993 * several possibilities. The simplest to code is to
3994 * convert what we have so far, set a flag, and continue on
3995 * in the loop. The flag would be tested each time through
3996 * the loop, and if set, the next character would be
3997 * converted to UTF-8 and stored. But, I (khw) didn't want
3998 * to slow down the mainstream case at all for this fairly
3999 * rare case, so I didn't want to add a test that didn't
4000 * absolutely have to be there in the loop, besides the
4001 * possibility that it would get too complicated for
4002 * optimizers to deal with. Another possibility is to just
4003 * give up, convert the source to UTF-8, and restart the
4004 * function that way. Another possibility is to convert
4005 * both what has already been processed and what is yet to
4006 * come separately to UTF-8, then jump into the loop that
4007 * handles UTF-8. But the most efficient time-wise of the
4008 * ones I could think of is what follows, and turned out to
4009 * not require much extra code. */
4011 /* Convert what we have so far into UTF-8, telling the
4012 * function that we know it should be converted, and to
4013 * allow extra space for what we haven't processed yet.
4014 * Assume the worst case space requirements for converting
4015 * what we haven't processed so far: that it will require
4016 * two bytes for each remaining source character, plus the
4017 * NUL at the end. This may cause the string pointer to
4018 * move, so re-find it. */
4020 len = d - (U8*)SvPVX_const(dest);
4021 SvCUR_set(dest, len);
4022 len = sv_utf8_upgrade_flags_grow(dest,
4023 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4025 d = (U8*)SvPVX(dest) + len;
4027 /* And append the current character's upper case in UTF-8 */
4028 CAT_NON_LATIN1_UC(d, *s);
4030 /* Now process the remainder of the source, converting to
4031 * upper and UTF-8. If a resulting byte is invariant in
4032 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4033 * append it to the output. */
4036 for (; s < send; s++) {
4037 U8 upper = toUPPER_LATIN1_MOD(*s);
4038 if UTF8_IS_INVARIANT(upper) {
4042 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4046 /* Here have processed the whole source; no need to continue
4047 * with the outer loop. Each character has been converted
4048 * to upper case and converted to UTF-8 */
4051 } /* End of processing all latin1-style chars */
4052 } /* End of processing all chars */
4053 } /* End of source is not empty */
4055 if (source != dest) {
4056 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4057 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4059 } /* End of isn't utf8 */
4060 if (dest != source && SvTAINTED(source))
4079 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4080 && SvTEMP(source) && !DO_UTF8(source)) {
4082 /* We can convert in place, as lowercasing anything in the latin1 range
4083 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4085 s = d = (U8*)SvPV_force_nomg(source, len);
4092 /* The old implementation would copy source into TARG at this point.
4093 This had the side effect that if source was undef, TARG was now
4094 an undefined SV with PADTMP set, and they don't warn inside
4095 sv_2pv_flags(). However, we're now getting the PV direct from
4096 source, which doesn't have PADTMP set, so it would warn. Hence the
4100 s = (const U8*)SvPV_nomg_const(source, len);
4102 if (ckWARN(WARN_UNINITIALIZED))
4103 report_uninit(source);
4109 SvUPGRADE(dest, SVt_PV);
4110 d = (U8*)SvGROW(dest, min);
4111 (void)SvPOK_only(dest);
4116 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4117 to check DO_UTF8 again here. */
4119 if (DO_UTF8(source)) {
4120 const U8 *const send = s + len;
4121 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4124 if (UTF8_IS_INVARIANT(*s)) {
4126 /* Invariant characters use the standard mappings compiled in.
4131 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4133 /* As do the ones in the Latin1 range */
4134 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
4135 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4139 /* Here, is utf8 not in Latin-1 range, have to go out and get
4140 * the mappings from the tables. */
4142 const STRLEN u = UTF8SKIP(s);
4145 #ifndef CONTEXT_DEPENDENT_CASING
4146 toLOWER_utf8(s, tmpbuf, &ulen);
4148 /* This is ifdefd out because it probably is the wrong thing to do. The right
4149 * thing is probably to have an I/O layer that converts final sigma to regular
4150 * on input and vice versa (under the correct circumstances) on output. In
4151 * effect, the final sigma is just a glyph variation when the regular one
4152 * occurs at the end of a word. And we don't really know what's going to be
4153 * the end of the word until it is finally output, as splitting and joining can
4154 * occur at any time and change what once was the word end to be in the middle,
4155 * and vice versa. */
4157 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4159 /* If the lower case is a small sigma, it may be that we need
4160 * to change it to a final sigma. This happens at the end of
4161 * a word that contains more than just this character, and only
4162 * when we started with a capital sigma. */
4163 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4164 s > send - len && /* Makes sure not the first letter */
4165 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4168 /* We use the algorithm in:
4169 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4170 * is a CAPITAL SIGMA): If C is preceded by a sequence
4171 * consisting of a cased letter and a case-ignorable
4172 * sequence, and C is not followed by a sequence consisting
4173 * of a case ignorable sequence and then a cased letter,
4174 * then when lowercasing C, C becomes a final sigma */
4176 /* To determine if this is the end of a word, need to peek
4177 * ahead. Look at the next character */
4178 const U8 *peek = s + u;
4180 /* Skip any case ignorable characters */
4181 while (peek < send && is_utf8_case_ignorable(peek)) {
4182 peek += UTF8SKIP(peek);
4185 /* If we reached the end of the string without finding any
4186 * non-case ignorable characters, or if the next such one
4187 * is not-cased, then we have met the conditions for it
4188 * being a final sigma with regards to peek ahead, and so
4189 * must do peek behind for the remaining conditions. (We
4190 * know there is stuff behind to look at since we tested
4191 * above that this isn't the first letter) */
4192 if (peek >= send || ! is_utf8_cased(peek)) {
4193 peek = utf8_hop(s, -1);
4195 /* Here are at the beginning of the first character
4196 * before the original upper case sigma. Keep backing
4197 * up, skipping any case ignorable characters */
4198 while (is_utf8_case_ignorable(peek)) {
4199 peek = utf8_hop(peek, -1);
4202 /* Here peek points to the first byte of the closest
4203 * non-case-ignorable character before the capital
4204 * sigma. If it is cased, then by the Unicode
4205 * algorithm, we should use a small final sigma instead
4206 * of what we have */
4207 if (is_utf8_cased(peek)) {
4208 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4209 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4213 else { /* Not a context sensitive mapping */
4214 #endif /* End of commented out context sensitive */
4215 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4217 /* If the eventually required minimum size outgrows
4218 * the available space, we need to grow. */
4219 const UV o = d - (U8*)SvPVX_const(dest);
4221 /* If someone lowercases one million U+0130s we
4222 * SvGROW() one million times. Or we could try
4223 * guessing how much to allocate without allocating too
4224 * much. Such is life. Another option would be to
4225 * grow an extra byte or two more each time we need to
4226 * grow, which would cut down the million to 500K, with
4229 d = (U8*)SvPVX(dest) + o;
4231 #ifdef CONTEXT_DEPENDENT_CASING
4234 /* Copy the newly lowercased letter to the output buffer we're
4236 Copy(tmpbuf, d, ulen, U8);
4240 } /* End of looping through the source string */
4243 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4244 } else { /* Not utf8 */
4246 const U8 *const send = s + len;
4248 /* Use locale casing if in locale; regular style if not treating
4249 * latin1 as having case; otherwise the latin1 casing. Do the
4250 * whole thing in a tight loop, for speed, */
4251 if (IN_LOCALE_RUNTIME) {
4254 for (; s < send; d++, s++)
4255 *d = toLOWER_LC(*s);
4257 else if (! IN_UNI_8_BIT) {
4258 for (; s < send; d++, s++) {
4263 for (; s < send; d++, s++) {
4264 *d = toLOWER_LATIN1(*s);
4268 if (source != dest) {
4270 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4273 if (dest != source && SvTAINTED(source))
4282 SV * const sv = TOPs;
4284 register const char *s = SvPV_const(sv,len);
4286 SvUTF8_off(TARG); /* decontaminate */
4289 SvUPGRADE(TARG, SVt_PV);
4290 SvGROW(TARG, (len * 2) + 1);
4294 if (UTF8_IS_CONTINUED(*s)) {
4295 STRLEN ulen = UTF8SKIP(s);
4319 SvCUR_set(TARG, d - SvPVX_const(TARG));
4320 (void)SvPOK_only_UTF8(TARG);
4323 sv_setpvn(TARG, s, len);
4332 dVAR; dSP; dMARK; dORIGMARK;
4333 register AV *const av = MUTABLE_AV(POPs);
4334 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4336 if (SvTYPE(av) == SVt_PVAV) {
4337 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4338 bool can_preserve = FALSE;
4344 can_preserve = SvCANEXISTDELETE(av);
4347 if (lval && localizing) {
4350 for (svp = MARK + 1; svp <= SP; svp++) {
4351 const I32 elem = SvIV(*svp);
4355 if (max > AvMAX(av))
4359 while (++MARK <= SP) {
4361 I32 elem = SvIV(*MARK);
4362 bool preeminent = TRUE;
4364 if (localizing && can_preserve) {
4365 /* If we can determine whether the element exist,
4366 * Try to preserve the existenceness of a tied array
4367 * element by using EXISTS and DELETE if possible.
4368 * Fallback to FETCH and STORE otherwise. */
4369 preeminent = av_exists(av, elem);
4372 svp = av_fetch(av, elem, lval);
4374 if (!svp || *svp == &PL_sv_undef)
4375 DIE(aTHX_ PL_no_aelem, elem);
4378 save_aelem(av, elem, svp);
4380 SAVEADELETE(av, elem);
4383 *MARK = svp ? *svp : &PL_sv_undef;
4386 if (GIMME != G_ARRAY) {
4388 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4394 /* Smart dereferencing for keys, values and each */
4406 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4411 "Type of argument to %s must be unblessed hashref or arrayref",
4412 PL_op_desc[PL_op->op_type] );
4415 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4417 "Can't modify %s in %s",
4418 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4421 /* Delegate to correct function for op type */
4423 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4424 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4427 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4435 AV *array = MUTABLE_AV(POPs);
4436 const I32 gimme = GIMME_V;
4437 IV *iterp = Perl_av_iter_p(aTHX_ array);
4438 const IV current = (*iterp)++;
4440 if (current > av_len(array)) {
4442 if (gimme == G_SCALAR)
4450 if (gimme == G_ARRAY) {
4451 SV **const element = av_fetch(array, current, 0);
4452 PUSHs(element ? *element : &PL_sv_undef);
4461 AV *array = MUTABLE_AV(POPs);
4462 const I32 gimme = GIMME_V;
4464 *Perl_av_iter_p(aTHX_ array) = 0;
4466 if (gimme == G_SCALAR) {
4468 PUSHi(av_len(array) + 1);
4470 else if (gimme == G_ARRAY) {
4471 IV n = Perl_av_len(aTHX_ array);
4476 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4477 for (i = 0; i <= n; i++) {
4482 for (i = 0; i <= n; i++) {
4483 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4484 PUSHs(elem ? *elem : &PL_sv_undef);
4491 /* Associative arrays. */
4497 HV * hash = MUTABLE_HV(POPs);
4499 const I32 gimme = GIMME_V;
4502 /* might clobber stack_sp */
4503 entry = hv_iternext(hash);
4508 SV* const sv = hv_iterkeysv(entry);
4509 PUSHs(sv); /* won't clobber stack_sp */
4510 if (gimme == G_ARRAY) {
4513 /* might clobber stack_sp */
4514 val = hv_iterval(hash, entry);
4519 else if (gimme == G_SCALAR)
4526 S_do_delete_local(pTHX)
4530 const I32 gimme = GIMME_V;
4534 if (PL_op->op_private & OPpSLICE) {
4536 SV * const osv = POPs;
4537 const bool tied = SvRMAGICAL(osv)
4538 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4539 const bool can_preserve = SvCANEXISTDELETE(osv)
4540 || mg_find((const SV *)osv, PERL_MAGIC_env);
4541 const U32 type = SvTYPE(osv);
4542 if (type == SVt_PVHV) { /* hash element */
4543 HV * const hv = MUTABLE_HV(osv);
4544 while (++MARK <= SP) {
4545 SV * const keysv = *MARK;
4547 bool preeminent = TRUE;
4549 preeminent = hv_exists_ent(hv, keysv, 0);
4551 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4558 sv = hv_delete_ent(hv, keysv, 0, 0);
4559 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4562 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4564 *MARK = sv_mortalcopy(sv);
4570 SAVEHDELETE(hv, keysv);
4571 *MARK = &PL_sv_undef;
4575 else if (type == SVt_PVAV) { /* array element */
4576 if (PL_op->op_flags & OPf_SPECIAL) {
4577 AV * const av = MUTABLE_AV(osv);
4578 while (++MARK <= SP) {