3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
144 /* In some cases this function inspects PL_op. If this function is called
145 for new op types, more bool parameters may need to be added in place of
148 When noinit is true, the absence of a gv will cause a retval of undef.
149 This is unrelated to the cv-to-gv assignment case.
151 Make sure to use SPAGAIN after calling this.
155 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
159 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
162 sv = amagic_deref_call(sv, to_gv_amg);
166 if (SvTYPE(sv) == SVt_PVIO) {
167 GV * const gv = MUTABLE_GV(sv_newmortal());
168 gv_init(gv, 0, "", 0, 0);
169 GvIOp(gv) = MUTABLE_IO(sv);
170 SvREFCNT_inc_void_NN(sv);
173 else if (!isGV_with_GP(sv))
174 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
177 if (!isGV_with_GP(sv)) {
179 /* If this is a 'my' scalar and flag is set then vivify
182 if (vivify_sv && sv != &PL_sv_undef) {
185 Perl_croak_no_modify(aTHX);
186 if (cUNOP->op_targ) {
187 SV * const namesv = PAD_SV(cUNOP->op_targ);
188 gv = MUTABLE_GV(newSV(0));
189 gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
192 const char * const name = CopSTASHPV(PL_curcop);
193 gv = newGVgen_flags(name,
194 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
196 prepare_SV_for_RV(sv);
197 SvRV_set(sv, MUTABLE_SV(gv));
202 if (PL_op->op_flags & OPf_REF || strict)
203 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
204 if (ckWARN(WARN_UNINITIALIZED))
210 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
211 sv, GV_ADDMG, SVt_PVGV
221 (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
224 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
225 == OPpDONT_INIT_GV) {
226 /* We are the target of a coderef assignment. Return
227 the scalar unchanged, and let pp_sasssign deal with
231 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
233 /* FAKE globs in the symbol table cause weird bugs (#77810) */
238 SV *newsv = sv_newmortal();
239 sv_setsv_flags(newsv, sv, 0);
251 sv, PL_op->op_private & OPpDEREF,
252 PL_op->op_private & HINT_STRICT_REFS,
253 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
254 || PL_op->op_type == OP_READLINE
257 if (PL_op->op_private & OPpLVAL_INTRO)
258 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
263 /* Helper function for pp_rv2sv and pp_rv2av */
265 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
266 const svtype type, SV ***spp)
271 PERL_ARGS_ASSERT_SOFTREF2XV;
273 if (PL_op->op_private & HINT_STRICT_REFS) {
275 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
277 Perl_die(aTHX_ PL_no_usym, what);
281 PL_op->op_flags & OPf_REF &&
282 PL_op->op_next->op_type != OP_BOOLKEYS
284 Perl_die(aTHX_ PL_no_usym, what);
285 if (ckWARN(WARN_UNINITIALIZED))
287 if (type != SVt_PV && GIMME_V == G_ARRAY) {
291 **spp = &PL_sv_undef;
294 if ((PL_op->op_flags & OPf_SPECIAL) &&
295 !(PL_op->op_flags & OPf_MOD))
297 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
299 **spp = &PL_sv_undef;
304 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
317 sv = amagic_deref_call(sv, to_sv_amg);
322 switch (SvTYPE(sv)) {
328 DIE(aTHX_ "Not a SCALAR reference");
335 if (!isGV_with_GP(gv)) {
336 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
342 if (PL_op->op_flags & OPf_MOD) {
343 if (PL_op->op_private & OPpLVAL_INTRO) {
344 if (cUNOP->op_first->op_type == OP_NULL)
345 sv = save_scalar(MUTABLE_GV(TOPs));
347 sv = save_scalar(gv);
349 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
351 else if (PL_op->op_private & OPpDEREF)
352 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
361 AV * const av = MUTABLE_AV(TOPs);
362 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
364 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
366 *sv = newSV_type(SVt_PVMG);
367 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
371 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
380 if (PL_op->op_flags & OPf_MOD || LVRET) {
381 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
382 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
384 LvTARG(ret) = SvREFCNT_inc_simple(sv);
385 PUSHs(ret); /* no SvSETMAGIC */
389 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
390 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
391 if (mg && mg->mg_len >= 0) {
409 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
411 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
414 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
415 /* (But not in defined().) */
417 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
420 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
421 if ((PL_op->op_private & OPpLVAL_INTRO)) {
422 if (gv && GvCV(gv) == cv && (gv = gv_autoload_pvn(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), GvNAMEUTF8(gv) ? SVf_UTF8 : 0)))
425 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
428 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
432 cv = MUTABLE_CV(&PL_sv_undef);
433 SETs(MUTABLE_SV(cv));
443 SV *ret = &PL_sv_undef;
445 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
446 const char * s = SvPVX_const(TOPs);
447 if (strnEQ(s, "CORE::", 6)) {
448 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
449 if (!code || code == -KEY_CORE)
450 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
451 if (code < 0) { /* Overridable. */
452 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
458 cv = sv_2cv(TOPs, &stash, &gv, 0);
460 ret = newSVpvn_flags(
461 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
471 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
473 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
475 PUSHs(MUTABLE_SV(cv));
489 if (GIMME != G_ARRAY) {
493 *MARK = &PL_sv_undef;
494 *MARK = refto(*MARK);
498 EXTEND_MORTAL(SP - MARK);
500 *MARK = refto(*MARK);
505 S_refto(pTHX_ SV *sv)
510 PERL_ARGS_ASSERT_REFTO;
512 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
515 if (!(sv = LvTARG(sv)))
518 SvREFCNT_inc_void_NN(sv);
520 else if (SvTYPE(sv) == SVt_PVAV) {
521 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
522 av_reify(MUTABLE_AV(sv));
524 SvREFCNT_inc_void_NN(sv);
526 else if (SvPADTMP(sv) && !IS_PADGV(sv))
530 SvREFCNT_inc_void_NN(sv);
533 sv_upgrade(rv, SVt_IV);
542 SV * const sv = POPs;
547 if (!sv || !SvROK(sv))
550 (void)sv_ref(TARG,SvRV(sv),TRUE);
562 stash = CopSTASH(PL_curcop);
564 SV * const ssv = POPs;
568 if (!ssv) goto curstash;
569 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
570 Perl_croak(aTHX_ "Attempt to bless into a reference");
571 ptr = SvPV_const(ssv,len);
573 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
574 "Explicit blessing to '' (assuming package main)");
575 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
578 (void)sv_bless(TOPs, stash);
588 const char * const elem = SvPV_const(sv, len);
589 GV * const gv = MUTABLE_GV(POPs);
594 /* elem will always be NUL terminated. */
595 const char * const second_letter = elem + 1;
598 if (len == 5 && strEQ(second_letter, "RRAY"))
599 tmpRef = MUTABLE_SV(GvAV(gv));
602 if (len == 4 && strEQ(second_letter, "ODE"))
603 tmpRef = MUTABLE_SV(GvCVu(gv));
606 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
607 /* finally deprecated in 5.8.0 */
608 deprecate("*glob{FILEHANDLE}");
609 tmpRef = MUTABLE_SV(GvIOp(gv));
612 if (len == 6 && strEQ(second_letter, "ORMAT"))
613 tmpRef = MUTABLE_SV(GvFORM(gv));
616 if (len == 4 && strEQ(second_letter, "LOB"))
617 tmpRef = MUTABLE_SV(gv);
620 if (len == 4 && strEQ(second_letter, "ASH"))
621 tmpRef = MUTABLE_SV(GvHV(gv));
624 if (*second_letter == 'O' && !elem[2] && len == 2)
625 tmpRef = MUTABLE_SV(GvIOp(gv));
628 if (len == 4 && strEQ(second_letter, "AME"))
629 sv = newSVhek(GvNAME_HEK(gv));
632 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
633 const HV * const stash = GvSTASH(gv);
634 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
635 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
639 if (len == 6 && strEQ(second_letter, "CALAR"))
654 /* Pattern matching */
659 register unsigned char *s;
662 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
666 if (mg && SvSCREAM(sv))
669 s = (unsigned char*)(SvPV(sv, len));
670 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
671 /* No point in studying a zero length string, and not safe to study
672 anything that doesn't appear to be a simple scalar (and hence might
673 change between now and when the regexp engine runs without our set
674 magic ever running) such as a reference to an object with overloaded
675 stringification. Also refuse to study an FBM scalar, as this gives
676 more flexibility in SV flag usage. No real-world code would ever
677 end up studying an FBM scalar, so this isn't a real pessimisation.
678 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
679 the study length limit from I32_MAX to U32_MAX - 1.
686 } else if (len < 0xFFFF) {
691 size = (256 + len) * quanta;
692 sfirst_raw = (char *)safemalloc(size);
695 DIE(aTHX_ "do_study: out of memory");
699 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
700 mg->mg_ptr = sfirst_raw;
702 mg->mg_private = quanta;
704 memset(sfirst_raw, ~0, 256 * quanta);
706 /* The assumption here is that most studied strings are fairly short, hence
707 the pain of the extra code is worth it, given the memory savings.
708 80 character string, 336 bytes as U8, down from 1344 as U32
709 800 character string, 2112 bytes as U16, down from 4224 as U32
713 U8 *const sfirst = (U8 *)sfirst_raw;
714 U8 *const snext = sfirst + 256;
716 const U8 ch = s[len];
717 snext[len] = sfirst[ch];
720 } else if (quanta == 2) {
721 U16 *const sfirst = (U16 *)sfirst_raw;
722 U16 *const snext = sfirst + 256;
724 const U8 ch = s[len];
725 snext[len] = sfirst[ch];
729 U32 *const sfirst = (U32 *)sfirst_raw;
730 U32 *const snext = sfirst + 256;
732 const U8 ch = s[len];
733 snext[len] = sfirst[ch];
746 if (PL_op->op_flags & OPf_STACKED)
748 else if (PL_op->op_private & OPpTARGET_MY)
754 TARG = sv_newmortal();
755 if(PL_op->op_type == OP_TRANSR) {
756 SV * const newsv = newSVsv(sv);
760 else PUSHi(do_trans(sv));
764 /* Lvalue operators. */
767 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
773 PERL_ARGS_ASSERT_DO_CHOMP;
775 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
777 if (SvTYPE(sv) == SVt_PVAV) {
779 AV *const av = MUTABLE_AV(sv);
780 const I32 max = AvFILL(av);
782 for (i = 0; i <= max; i++) {
783 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
784 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
785 do_chomp(retval, sv, chomping);
789 else if (SvTYPE(sv) == SVt_PVHV) {
790 HV* const hv = MUTABLE_HV(sv);
792 (void)hv_iterinit(hv);
793 while ((entry = hv_iternext(hv)))
794 do_chomp(retval, hv_iterval(hv,entry), chomping);
797 else if (SvREADONLY(sv)) {
799 /* SV is copy-on-write */
800 sv_force_normal_flags(sv, 0);
803 Perl_croak_no_modify(aTHX);
808 /* XXX, here sv is utf8-ized as a side-effect!
809 If encoding.pm is used properly, almost string-generating
810 operations, including literal strings, chr(), input data, etc.
811 should have been utf8-ized already, right?
813 sv_recode_to_utf8(sv, PL_encoding);
819 char *temp_buffer = NULL;
828 while (len && s[-1] == '\n') {
835 STRLEN rslen, rs_charlen;
836 const char *rsptr = SvPV_const(PL_rs, rslen);
838 rs_charlen = SvUTF8(PL_rs)
842 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
843 /* Assumption is that rs is shorter than the scalar. */
845 /* RS is utf8, scalar is 8 bit. */
847 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
850 /* Cannot downgrade, therefore cannot possibly match
852 assert (temp_buffer == rsptr);
858 else if (PL_encoding) {
859 /* RS is 8 bit, encoding.pm is used.
860 * Do not recode PL_rs as a side-effect. */
861 svrecode = newSVpvn(rsptr, rslen);
862 sv_recode_to_utf8(svrecode, PL_encoding);
863 rsptr = SvPV_const(svrecode, rslen);
864 rs_charlen = sv_len_utf8(svrecode);
867 /* RS is 8 bit, scalar is utf8. */
868 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
882 if (memNE(s, rsptr, rslen))
884 SvIVX(retval) += rs_charlen;
887 s = SvPV_force_nolen(sv);
895 SvREFCNT_dec(svrecode);
897 Safefree(temp_buffer);
899 if (len && !SvPOK(sv))
900 s = SvPV_force_nomg(sv, len);
903 char * const send = s + len;
904 char * const start = s;
906 while (s > start && UTF8_IS_CONTINUATION(*s))
908 if (is_utf8_string((U8*)s, send - s)) {
909 sv_setpvn(retval, s, send - s);
911 SvCUR_set(sv, s - start);
917 sv_setpvs(retval, "");
921 sv_setpvn(retval, s, 1);
928 sv_setpvs(retval, "");
936 const bool chomping = PL_op->op_type == OP_SCHOMP;
940 do_chomp(TARG, TOPs, chomping);
947 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
948 const bool chomping = PL_op->op_type == OP_CHOMP;
953 do_chomp(TARG, *++MARK, chomping);
964 if (!PL_op->op_private) {
973 SV_CHECK_THINKFIRST_COW_DROP(sv);
975 switch (SvTYPE(sv)) {
979 av_undef(MUTABLE_AV(sv));
982 hv_undef(MUTABLE_HV(sv));
985 if (cv_const_sv((const CV *)sv))
986 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
987 "Constant subroutine %"SVf" undefined",
988 SVfARG(CvANON((const CV *)sv)
989 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
990 : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
994 /* let user-undef'd sub keep its identity */
995 GV* const gv = CvGV((const CV *)sv);
996 cv_undef(MUTABLE_CV(sv));
997 CvGV_set(MUTABLE_CV(sv), gv);
1002 SvSetMagicSV(sv, &PL_sv_undef);
1005 else if (isGV_with_GP(sv)) {
1009 /* undef *Pkg::meth_name ... */
1011 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1012 && HvENAME_get(stash);
1014 if((stash = GvHV((const GV *)sv))) {
1015 if(HvENAME_get(stash))
1016 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1020 gp_free(MUTABLE_GV(sv));
1022 GvGP_set(sv, gp_ref(gp));
1023 GvSV(sv) = newSV(0);
1024 GvLINE(sv) = CopLINE(PL_curcop);
1025 GvEGV(sv) = MUTABLE_GV(sv);
1029 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1031 /* undef *Foo::ISA */
1032 if( strEQ(GvNAME((const GV *)sv), "ISA")
1033 && (stash = GvSTASH((const GV *)sv))
1034 && (method_changed || HvENAME(stash)) )
1035 mro_isa_changed_in(stash);
1036 else if(method_changed)
1037 mro_method_changed_in(
1038 GvSTASH((const GV *)sv)
1045 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1061 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1062 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
1063 Perl_croak_no_modify(aTHX);
1065 TARG = sv_newmortal();
1066 sv_setsv(TARG, TOPs);
1067 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1068 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
1070 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
1071 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1075 else sv_dec_nomg(TOPs);
1077 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1078 if (inc && !SvOK(TARG))
1084 /* Ordinary operators. */
1088 dVAR; dSP; dATARGET; SV *svl, *svr;
1089 #ifdef PERL_PRESERVE_IVUV
1092 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1095 #ifdef PERL_PRESERVE_IVUV
1096 /* For integer to integer power, we do the calculation by hand wherever
1097 we're sure it is safe; otherwise we call pow() and try to convert to
1098 integer afterwards. */
1100 SvIV_please_nomg(svr);
1102 SvIV_please_nomg(svl);
1111 const IV iv = SvIVX(svr);
1115 goto float_it; /* Can't do negative powers this way. */
1119 baseuok = SvUOK(svl);
1121 baseuv = SvUVX(svl);
1123 const IV iv = SvIVX(svl);
1126 baseuok = TRUE; /* effectively it's a UV now */
1128 baseuv = -iv; /* abs, baseuok == false records sign */
1131 /* now we have integer ** positive integer. */
1134 /* foo & (foo - 1) is zero only for a power of 2. */
1135 if (!(baseuv & (baseuv - 1))) {
1136 /* We are raising power-of-2 to a positive integer.
1137 The logic here will work for any base (even non-integer
1138 bases) but it can be less accurate than
1139 pow (base,power) or exp (power * log (base)) when the
1140 intermediate values start to spill out of the mantissa.
1141 With powers of 2 we know this can't happen.
1142 And powers of 2 are the favourite thing for perl
1143 programmers to notice ** not doing what they mean. */
1145 NV base = baseuok ? baseuv : -(NV)baseuv;
1150 while (power >>= 1) {
1158 SvIV_please_nomg(svr);
1161 register unsigned int highbit = 8 * sizeof(UV);
1162 register unsigned int diff = 8 * sizeof(UV);
1163 while (diff >>= 1) {
1165 if (baseuv >> highbit) {
1169 /* we now have baseuv < 2 ** highbit */
1170 if (power * highbit <= 8 * sizeof(UV)) {
1171 /* result will definitely fit in UV, so use UV math
1172 on same algorithm as above */
1173 register UV result = 1;
1174 register UV base = baseuv;
1175 const bool odd_power = cBOOL(power & 1);
1179 while (power >>= 1) {
1186 if (baseuok || !odd_power)
1187 /* answer is positive */
1189 else if (result <= (UV)IV_MAX)
1190 /* answer negative, fits in IV */
1191 SETi( -(IV)result );
1192 else if (result == (UV)IV_MIN)
1193 /* 2's complement assumption: special case IV_MIN */
1196 /* answer negative, doesn't fit */
1197 SETn( -(NV)result );
1207 NV right = SvNV_nomg(svr);
1208 NV left = SvNV_nomg(svl);
1211 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1213 We are building perl with long double support and are on an AIX OS
1214 afflicted with a powl() function that wrongly returns NaNQ for any
1215 negative base. This was reported to IBM as PMR #23047-379 on
1216 03/06/2006. The problem exists in at least the following versions
1217 of AIX and the libm fileset, and no doubt others as well:
1219 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1220 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1221 AIX 5.2.0 bos.adt.libm 5.2.0.85
1223 So, until IBM fixes powl(), we provide the following workaround to
1224 handle the problem ourselves. Our logic is as follows: for
1225 negative bases (left), we use fmod(right, 2) to check if the
1226 exponent is an odd or even integer:
1228 - if odd, powl(left, right) == -powl(-left, right)
1229 - if even, powl(left, right) == powl(-left, right)
1231 If the exponent is not an integer, the result is rightly NaNQ, so
1232 we just return that (as NV_NAN).
1236 NV mod2 = Perl_fmod( right, 2.0 );
1237 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1238 SETn( -Perl_pow( -left, right) );
1239 } else if (mod2 == 0.0) { /* even integer */
1240 SETn( Perl_pow( -left, right) );
1241 } else { /* fractional power */
1245 SETn( Perl_pow( left, right) );
1248 SETn( Perl_pow( left, right) );
1249 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1251 #ifdef PERL_PRESERVE_IVUV
1253 SvIV_please_nomg(svr);
1261 dVAR; dSP; dATARGET; SV *svl, *svr;
1262 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1265 #ifdef PERL_PRESERVE_IVUV
1266 SvIV_please_nomg(svr);
1268 /* Unless the left argument is integer in range we are going to have to
1269 use NV maths. Hence only attempt to coerce the right argument if
1270 we know the left is integer. */
1271 /* Left operand is defined, so is it IV? */
1272 SvIV_please_nomg(svl);
1274 bool auvok = SvUOK(svl);
1275 bool buvok = SvUOK(svr);
1276 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1277 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1286 const IV aiv = SvIVX(svl);
1289 auvok = TRUE; /* effectively it's a UV now */
1291 alow = -aiv; /* abs, auvok == false records sign */
1297 const IV biv = SvIVX(svr);
1300 buvok = TRUE; /* effectively it's a UV now */
1302 blow = -biv; /* abs, buvok == false records sign */
1306 /* If this does sign extension on unsigned it's time for plan B */
1307 ahigh = alow >> (4 * sizeof (UV));
1309 bhigh = blow >> (4 * sizeof (UV));
1311 if (ahigh && bhigh) {
1313 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1314 which is overflow. Drop to NVs below. */
1315 } else if (!ahigh && !bhigh) {
1316 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1317 so the unsigned multiply cannot overflow. */
1318 const UV product = alow * blow;
1319 if (auvok == buvok) {
1320 /* -ve * -ve or +ve * +ve gives a +ve result. */
1324 } else if (product <= (UV)IV_MIN) {
1325 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1326 /* -ve result, which could overflow an IV */
1328 SETi( -(IV)product );
1330 } /* else drop to NVs below. */
1332 /* One operand is large, 1 small */
1335 /* swap the operands */
1337 bhigh = blow; /* bhigh now the temp var for the swap */
1341 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1342 multiplies can't overflow. shift can, add can, -ve can. */
1343 product_middle = ahigh * blow;
1344 if (!(product_middle & topmask)) {
1345 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1347 product_middle <<= (4 * sizeof (UV));
1348 product_low = alow * blow;
1350 /* as for pp_add, UV + something mustn't get smaller.
1351 IIRC ANSI mandates this wrapping *behaviour* for
1352 unsigned whatever the actual representation*/
1353 product_low += product_middle;
1354 if (product_low >= product_middle) {
1355 /* didn't overflow */
1356 if (auvok == buvok) {
1357 /* -ve * -ve or +ve * +ve gives a +ve result. */
1359 SETu( product_low );
1361 } else if (product_low <= (UV)IV_MIN) {
1362 /* 2s complement assumption again */
1363 /* -ve result, which could overflow an IV */
1365 SETi( -(IV)product_low );
1367 } /* else drop to NVs below. */
1369 } /* product_middle too large */
1370 } /* ahigh && bhigh */
1375 NV right = SvNV_nomg(svr);
1376 NV left = SvNV_nomg(svl);
1378 SETn( left * right );
1385 dVAR; dSP; dATARGET; SV *svl, *svr;
1386 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1389 /* Only try to do UV divide first
1390 if ((SLOPPYDIVIDE is true) or
1391 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1393 The assumption is that it is better to use floating point divide
1394 whenever possible, only doing integer divide first if we can't be sure.
1395 If NV_PRESERVES_UV is true then we know at compile time that no UV
1396 can be too large to preserve, so don't need to compile the code to
1397 test the size of UVs. */
1400 # define PERL_TRY_UV_DIVIDE
1401 /* ensure that 20./5. == 4. */
1403 # ifdef PERL_PRESERVE_IVUV
1404 # ifndef NV_PRESERVES_UV
1405 # define PERL_TRY_UV_DIVIDE
1410 #ifdef PERL_TRY_UV_DIVIDE
1411 SvIV_please_nomg(svr);
1413 SvIV_please_nomg(svl);
1415 bool left_non_neg = SvUOK(svl);
1416 bool right_non_neg = SvUOK(svr);
1420 if (right_non_neg) {
1424 const IV biv = SvIVX(svr);
1427 right_non_neg = TRUE; /* effectively it's a UV now */
1433 /* historically undef()/0 gives a "Use of uninitialized value"
1434 warning before dieing, hence this test goes here.
1435 If it were immediately before the second SvIV_please, then
1436 DIE() would be invoked before left was even inspected, so
1437 no inspection would give no warning. */
1439 DIE(aTHX_ "Illegal division by zero");
1445 const IV aiv = SvIVX(svl);
1448 left_non_neg = TRUE; /* effectively it's a UV now */
1457 /* For sloppy divide we always attempt integer division. */
1459 /* Otherwise we only attempt it if either or both operands
1460 would not be preserved by an NV. If both fit in NVs
1461 we fall through to the NV divide code below. However,
1462 as left >= right to ensure integer result here, we know that
1463 we can skip the test on the right operand - right big
1464 enough not to be preserved can't get here unless left is
1467 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1470 /* Integer division can't overflow, but it can be imprecise. */
1471 const UV result = left / right;
1472 if (result * right == left) {
1473 SP--; /* result is valid */
1474 if (left_non_neg == right_non_neg) {
1475 /* signs identical, result is positive. */
1479 /* 2s complement assumption */
1480 if (result <= (UV)IV_MIN)
1481 SETi( -(IV)result );
1483 /* It's exact but too negative for IV. */
1484 SETn( -(NV)result );
1487 } /* tried integer divide but it was not an integer result */
1488 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1489 } /* left wasn't SvIOK */
1490 } /* right wasn't SvIOK */
1491 #endif /* PERL_TRY_UV_DIVIDE */
1493 NV right = SvNV_nomg(svr);
1494 NV left = SvNV_nomg(svl);
1495 (void)POPs;(void)POPs;
1496 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1497 if (! Perl_isnan(right) && right == 0.0)
1501 DIE(aTHX_ "Illegal division by zero");
1502 PUSHn( left / right );
1509 dVAR; dSP; dATARGET;
1510 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1514 bool left_neg = FALSE;
1515 bool right_neg = FALSE;
1516 bool use_double = FALSE;
1517 bool dright_valid = FALSE;
1520 SV * const svr = TOPs;
1521 SV * const svl = TOPm1s;
1522 SvIV_please_nomg(svr);
1524 right_neg = !SvUOK(svr);
1528 const IV biv = SvIVX(svr);
1531 right_neg = FALSE; /* effectively it's a UV now */
1538 dright = SvNV_nomg(svr);
1539 right_neg = dright < 0;
1542 if (dright < UV_MAX_P1) {
1543 right = U_V(dright);
1544 dright_valid = TRUE; /* In case we need to use double below. */
1550 /* At this point use_double is only true if right is out of range for
1551 a UV. In range NV has been rounded down to nearest UV and
1552 use_double false. */
1553 SvIV_please_nomg(svl);
1554 if (!use_double && SvIOK(svl)) {
1556 left_neg = !SvUOK(svl);
1560 const IV aiv = SvIVX(svl);
1563 left_neg = FALSE; /* effectively it's a UV now */
1571 dleft = SvNV_nomg(svl);
1572 left_neg = dleft < 0;
1576 /* This should be exactly the 5.6 behaviour - if left and right are
1577 both in range for UV then use U_V() rather than floor. */
1579 if (dleft < UV_MAX_P1) {
1580 /* right was in range, so is dleft, so use UVs not double.
1584 /* left is out of range for UV, right was in range, so promote
1585 right (back) to double. */
1587 /* The +0.5 is used in 5.6 even though it is not strictly
1588 consistent with the implicit +0 floor in the U_V()
1589 inside the #if 1. */
1590 dleft = Perl_floor(dleft + 0.5);
1593 dright = Perl_floor(dright + 0.5);
1604 DIE(aTHX_ "Illegal modulus zero");
1606 dans = Perl_fmod(dleft, dright);
1607 if ((left_neg != right_neg) && dans)
1608 dans = dright - dans;
1611 sv_setnv(TARG, dans);
1617 DIE(aTHX_ "Illegal modulus zero");
1620 if ((left_neg != right_neg) && ans)
1623 /* XXX may warn: unary minus operator applied to unsigned type */
1624 /* could change -foo to be (~foo)+1 instead */
1625 if (ans <= ~((UV)IV_MAX)+1)
1626 sv_setiv(TARG, ~ans+1);
1628 sv_setnv(TARG, -(NV)ans);
1631 sv_setuv(TARG, ans);
1640 dVAR; dSP; dATARGET;
1644 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1645 /* TODO: think of some way of doing list-repeat overloading ??? */
1650 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1656 const UV uv = SvUV_nomg(sv);
1658 count = IV_MAX; /* The best we can do? */
1662 const IV iv = SvIV_nomg(sv);
1669 else if (SvNOKp(sv)) {
1670 const NV nv = SvNV_nomg(sv);
1677 count = SvIV_nomg(sv);
1679 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1681 static const char oom_list_extend[] = "Out of memory during list extend";
1682 const I32 items = SP - MARK;
1683 const I32 max = items * count;
1685 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1686 /* Did the max computation overflow? */
1687 if (items > 0 && max > 0 && (max < items || max < count))
1688 Perl_croak(aTHX_ oom_list_extend);
1693 /* This code was intended to fix 20010809.028:
1696 for (($x =~ /./g) x 2) {
1697 print chop; # "abcdabcd" expected as output.
1700 * but that change (#11635) broke this code:
1702 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1704 * I can't think of a better fix that doesn't introduce
1705 * an efficiency hit by copying the SVs. The stack isn't
1706 * refcounted, and mortalisation obviously doesn't
1707 * Do The Right Thing when the stack has more than
1708 * one pointer to the same mortal value.
1712 *SP = sv_2mortal(newSVsv(*SP));
1722 repeatcpy((char*)(MARK + items), (char*)MARK,
1723 items * sizeof(const SV *), count - 1);
1726 else if (count <= 0)
1729 else { /* Note: mark already snarfed by pp_list */
1730 SV * const tmpstr = POPs;
1733 static const char oom_string_extend[] =
1734 "Out of memory during string extend";
1737 sv_setsv_nomg(TARG, tmpstr);
1738 SvPV_force_nomg(TARG, len);
1739 isutf = DO_UTF8(TARG);
1744 const STRLEN max = (UV)count * len;
1745 if (len > MEM_SIZE_MAX / count)
1746 Perl_croak(aTHX_ oom_string_extend);
1747 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1748 SvGROW(TARG, max + 1);
1749 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1750 SvCUR_set(TARG, SvCUR(TARG) * count);
1752 *SvEND(TARG) = '\0';
1755 (void)SvPOK_only_UTF8(TARG);
1757 (void)SvPOK_only(TARG);
1759 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1760 /* The parser saw this as a list repeat, and there
1761 are probably several items on the stack. But we're
1762 in scalar context, and there's no pp_list to save us
1763 now. So drop the rest of the items -- robin@kitsite.com
1775 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1776 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1779 useleft = USE_LEFT(svl);
1780 #ifdef PERL_PRESERVE_IVUV
1781 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1782 "bad things" happen if you rely on signed integers wrapping. */
1783 SvIV_please_nomg(svr);
1785 /* Unless the left argument is integer in range we are going to have to
1786 use NV maths. Hence only attempt to coerce the right argument if
1787 we know the left is integer. */
1788 register UV auv = 0;
1794 a_valid = auvok = 1;
1795 /* left operand is undef, treat as zero. */
1797 /* Left operand is defined, so is it IV? */
1798 SvIV_please_nomg(svl);
1800 if ((auvok = SvUOK(svl)))
1803 register const IV aiv = SvIVX(svl);
1806 auvok = 1; /* Now acting as a sign flag. */
1807 } else { /* 2s complement assumption for IV_MIN */
1815 bool result_good = 0;
1818 bool buvok = SvUOK(svr);
1823 register const IV biv = SvIVX(svr);
1830 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1831 else "IV" now, independent of how it came in.
1832 if a, b represents positive, A, B negative, a maps to -A etc
1837 all UV maths. negate result if A negative.
1838 subtract if signs same, add if signs differ. */
1840 if (auvok ^ buvok) {
1849 /* Must get smaller */
1854 if (result <= buv) {
1855 /* result really should be -(auv-buv). as its negation
1856 of true value, need to swap our result flag */
1868 if (result <= (UV)IV_MIN)
1869 SETi( -(IV)result );
1871 /* result valid, but out of range for IV. */
1872 SETn( -(NV)result );
1876 } /* Overflow, drop through to NVs. */
1881 NV value = SvNV_nomg(svr);
1885 /* left operand is undef, treat as zero - value */
1889 SETn( SvNV_nomg(svl) - value );
1896 dVAR; dSP; dATARGET; SV *svl, *svr;
1897 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1901 const IV shift = SvIV_nomg(svr);
1902 if (PL_op->op_private & HINT_INTEGER) {
1903 const IV i = SvIV_nomg(svl);
1907 const UV u = SvUV_nomg(svl);
1916 dVAR; dSP; dATARGET; SV *svl, *svr;
1917 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1921 const IV shift = SvIV_nomg(svr);
1922 if (PL_op->op_private & HINT_INTEGER) {
1923 const IV i = SvIV_nomg(svl);
1927 const UV u = SvUV_nomg(svl);
1939 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1943 (SvIOK_notUV(left) && SvIOK_notUV(right))
1944 ? (SvIVX(left) < SvIVX(right))
1945 : (do_ncmp(left, right) == -1)
1955 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1959 (SvIOK_notUV(left) && SvIOK_notUV(right))
1960 ? (SvIVX(left) > SvIVX(right))
1961 : (do_ncmp(left, right) == 1)
1971 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1975 (SvIOK_notUV(left) && SvIOK_notUV(right))
1976 ? (SvIVX(left) <= SvIVX(right))
1977 : (do_ncmp(left, right) <= 0)
1987 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1991 (SvIOK_notUV(left) && SvIOK_notUV(right))
1992 ? (SvIVX(left) >= SvIVX(right))
1993 : ( (do_ncmp(left, right) & 2) == 0)
2003 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2007 (SvIOK_notUV(left) && SvIOK_notUV(right))
2008 ? (SvIVX(left) != SvIVX(right))
2009 : (do_ncmp(left, right) != 0)
2014 /* compare left and right SVs. Returns:
2018 * 2: left or right was a NaN
2021 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2025 PERL_ARGS_ASSERT_DO_NCMP;
2026 #ifdef PERL_PRESERVE_IVUV
2027 SvIV_please_nomg(right);
2028 /* Fortunately it seems NaN isn't IOK */
2030 SvIV_please_nomg(left);
2033 const IV leftiv = SvIVX(left);
2034 if (!SvUOK(right)) {
2035 /* ## IV <=> IV ## */
2036 const IV rightiv = SvIVX(right);
2037 return (leftiv > rightiv) - (leftiv < rightiv);
2039 /* ## IV <=> UV ## */
2041 /* As (b) is a UV, it's >=0, so it must be < */
2044 const UV rightuv = SvUVX(right);
2045 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2050 /* ## UV <=> UV ## */
2051 const UV leftuv = SvUVX(left);
2052 const UV rightuv = SvUVX(right);
2053 return (leftuv > rightuv) - (leftuv < rightuv);
2055 /* ## UV <=> IV ## */
2057 const IV rightiv = SvIVX(right);
2059 /* As (a) is a UV, it's >=0, so it cannot be < */
2062 const UV leftuv = SvUVX(left);
2063 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2071 NV const rnv = SvNV_nomg(right);
2072 NV const lnv = SvNV_nomg(left);
2074 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2075 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2078 return (lnv > rnv) - (lnv < rnv);
2097 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2100 value = do_ncmp(left, right);
2115 int amg_type = sle_amg;
2119 switch (PL_op->op_type) {
2138 tryAMAGICbin_MG(amg_type, AMGf_set);
2141 const int cmp = (IN_LOCALE_RUNTIME
2142 ? sv_cmp_locale_flags(left, right, 0)
2143 : sv_cmp_flags(left, right, 0));
2144 SETs(boolSV(cmp * multiplier < rhs));
2152 tryAMAGICbin_MG(seq_amg, AMGf_set);
2155 SETs(boolSV(sv_eq_flags(left, right, 0)));
2163 tryAMAGICbin_MG(sne_amg, AMGf_set);
2166 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2174 tryAMAGICbin_MG(scmp_amg, 0);
2177 const int cmp = (IN_LOCALE_RUNTIME
2178 ? sv_cmp_locale_flags(left, right, 0)
2179 : sv_cmp_flags(left, right, 0));
2187 dVAR; dSP; dATARGET;
2188 tryAMAGICbin_MG(band_amg, AMGf_assign);
2191 if (SvNIOKp(left) || SvNIOKp(right)) {
2192 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2193 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2194 if (PL_op->op_private & HINT_INTEGER) {
2195 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2199 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2202 if (left_ro_nonnum) SvNIOK_off(left);
2203 if (right_ro_nonnum) SvNIOK_off(right);
2206 do_vop(PL_op->op_type, TARG, left, right);
2215 dVAR; dSP; dATARGET;
2216 const int op_type = PL_op->op_type;
2218 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2221 if (SvNIOKp(left) || SvNIOKp(right)) {
2222 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2223 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2224 if (PL_op->op_private & HINT_INTEGER) {
2225 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2226 const IV r = SvIV_nomg(right);
2227 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2231 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2232 const UV r = SvUV_nomg(right);
2233 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2236 if (left_ro_nonnum) SvNIOK_off(left);
2237 if (right_ro_nonnum) SvNIOK_off(right);
2240 do_vop(op_type, TARG, left, right);
2250 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2252 SV * const sv = TOPs;
2253 const int flags = SvFLAGS(sv);
2255 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2259 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2260 /* It's publicly an integer, or privately an integer-not-float */
2263 if (SvIVX(sv) == IV_MIN) {
2264 /* 2s complement assumption. */
2265 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2268 else if (SvUVX(sv) <= IV_MAX) {
2273 else if (SvIVX(sv) != IV_MIN) {
2277 #ifdef PERL_PRESERVE_IVUV
2285 SETn(-SvNV_nomg(sv));
2286 else if (SvPOKp(sv)) {
2288 const char * const s = SvPV_nomg_const(sv, len);
2289 if (isIDFIRST(*s)) {
2290 sv_setpvs(TARG, "-");
2293 else if (*s == '+' || *s == '-') {
2294 sv_setsv_nomg(TARG, sv);
2295 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2297 else if (DO_UTF8(sv)) {
2298 SvIV_please_nomg(sv);
2300 goto oops_its_an_int;
2302 sv_setnv(TARG, -SvNV_nomg(sv));
2304 sv_setpvs(TARG, "-");
2309 SvIV_please_nomg(sv);
2311 goto oops_its_an_int;
2312 sv_setnv(TARG, -SvNV_nomg(sv));
2317 SETn(-SvNV_nomg(sv));
2325 tryAMAGICun_MG(not_amg, AMGf_set);
2326 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2333 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2337 if (PL_op->op_private & HINT_INTEGER) {
2338 const IV i = ~SvIV_nomg(sv);
2342 const UV u = ~SvUV_nomg(sv);
2351 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2352 sv_setsv_nomg(TARG, sv);
2353 tmps = (U8*)SvPV_force_nomg(TARG, len);
2356 /* Calculate exact length, let's not estimate. */
2361 U8 * const send = tmps + len;
2362 U8 * const origtmps = tmps;
2363 const UV utf8flags = UTF8_ALLOW_ANYUV;
2365 while (tmps < send) {
2366 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2368 targlen += UNISKIP(~c);
2374 /* Now rewind strings and write them. */
2381 Newx(result, targlen + 1, U8);
2383 while (tmps < send) {
2384 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2386 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2389 sv_usepvn_flags(TARG, (char*)result, targlen,
2390 SV_HAS_TRAILING_NUL);
2397 Newx(result, nchar + 1, U8);
2399 while (tmps < send) {
2400 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2405 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2413 register long *tmpl;
2414 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2417 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2422 for ( ; anum > 0; anum--, tmps++)
2430 /* integer versions of some of the above */
2434 dVAR; dSP; dATARGET;
2435 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2438 SETi( left * right );
2446 dVAR; dSP; dATARGET;
2447 tryAMAGICbin_MG(div_amg, AMGf_assign);
2450 IV value = SvIV_nomg(right);
2452 DIE(aTHX_ "Illegal division by zero");
2453 num = SvIV_nomg(left);
2455 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2459 value = num / value;
2465 #if defined(__GLIBC__) && IVSIZE == 8
2472 /* This is the vanilla old i_modulo. */
2473 dVAR; dSP; dATARGET;
2474 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2478 DIE(aTHX_ "Illegal modulus zero");
2479 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2483 SETi( left % right );
2488 #if defined(__GLIBC__) && IVSIZE == 8
2493 /* This is the i_modulo with the workaround for the _moddi3 bug
2494 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2495 * See below for pp_i_modulo. */
2496 dVAR; dSP; dATARGET;
2497 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2501 DIE(aTHX_ "Illegal modulus zero");
2502 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2506 SETi( left % PERL_ABS(right) );
2513 dVAR; dSP; dATARGET;
2514 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2518 DIE(aTHX_ "Illegal modulus zero");
2519 /* The assumption is to use hereafter the old vanilla version... */
2521 PL_ppaddr[OP_I_MODULO] =
2523 /* .. but if we have glibc, we might have a buggy _moddi3
2524 * (at least glicb 2.2.5 is known to have this bug), in other
2525 * words our integer modulus with negative quad as the second
2526 * argument might be broken. Test for this and re-patch the
2527 * opcode dispatch table if that is the case, remembering to
2528 * also apply the workaround so that this first round works
2529 * right, too. See [perl #9402] for more information. */
2533 /* Cannot do this check with inlined IV constants since
2534 * that seems to work correctly even with the buggy glibc. */
2536 /* Yikes, we have the bug.
2537 * Patch in the workaround version. */
2539 PL_ppaddr[OP_I_MODULO] =
2540 &Perl_pp_i_modulo_1;
2541 /* Make certain we work right this time, too. */
2542 right = PERL_ABS(right);
2545 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2549 SETi( left % right );
2557 dVAR; dSP; dATARGET;
2558 tryAMAGICbin_MG(add_amg, AMGf_assign);
2560 dPOPTOPiirl_ul_nomg;
2561 SETi( left + right );
2568 dVAR; dSP; dATARGET;
2569 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2571 dPOPTOPiirl_ul_nomg;
2572 SETi( left - right );
2580 tryAMAGICbin_MG(lt_amg, AMGf_set);
2583 SETs(boolSV(left < right));
2591 tryAMAGICbin_MG(gt_amg, AMGf_set);
2594 SETs(boolSV(left > right));
2602 tryAMAGICbin_MG(le_amg, AMGf_set);
2605 SETs(boolSV(left <= right));
2613 tryAMAGICbin_MG(ge_amg, AMGf_set);
2616 SETs(boolSV(left >= right));
2624 tryAMAGICbin_MG(eq_amg, AMGf_set);
2627 SETs(boolSV(left == right));
2635 tryAMAGICbin_MG(ne_amg, AMGf_set);
2638 SETs(boolSV(left != right));
2646 tryAMAGICbin_MG(ncmp_amg, 0);
2653 else if (left < right)
2665 tryAMAGICun_MG(neg_amg, 0);
2667 SV * const sv = TOPs;
2668 IV const i = SvIV_nomg(sv);
2674 /* High falutin' math. */
2679 tryAMAGICbin_MG(atan2_amg, 0);
2682 SETn(Perl_atan2(left, right));
2690 int amg_type = sin_amg;
2691 const char *neg_report = NULL;
2692 NV (*func)(NV) = Perl_sin;
2693 const int op_type = PL_op->op_type;
2710 amg_type = sqrt_amg;
2712 neg_report = "sqrt";
2717 tryAMAGICun_MG(amg_type, 0);
2719 SV * const arg = POPs;
2720 const NV value = SvNV_nomg(arg);
2722 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2723 SET_NUMERIC_STANDARD();
2724 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2727 XPUSHn(func(value));
2732 /* Support Configure command-line overrides for rand() functions.
2733 After 5.005, perhaps we should replace this by Configure support
2734 for drand48(), random(), or rand(). For 5.005, though, maintain
2735 compatibility by calling rand() but allow the user to override it.
2736 See INSTALL for details. --Andy Dougherty 15 July 1998
2738 /* Now it's after 5.005, and Configure supports drand48() and random(),
2739 in addition to rand(). So the overrides should not be needed any more.
2740 --Jarkko Hietaniemi 27 September 1998
2743 #ifndef HAS_DRAND48_PROTO
2744 extern double drand48 (void);
2754 value = 1.0; (void)POPs;
2760 if (!PL_srand_called) {
2761 (void)seedDrand01((Rand_seed_t)seed());
2762 PL_srand_called = TRUE;
2772 const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
2773 (void)seedDrand01((Rand_seed_t)anum);
2774 PL_srand_called = TRUE;
2778 /* Historically srand always returned true. We can avoid breaking
2780 sv_setpvs(TARG, "0 but true");
2789 tryAMAGICun_MG(int_amg, AMGf_numeric);
2791 SV * const sv = TOPs;
2792 const IV iv = SvIV_nomg(sv);
2793 /* XXX it's arguable that compiler casting to IV might be subtly
2794 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2795 else preferring IV has introduced a subtle behaviour change bug. OTOH
2796 relying on floating point to be accurate is a bug. */
2801 else if (SvIOK(sv)) {
2803 SETu(SvUV_nomg(sv));
2808 const NV value = SvNV_nomg(sv);
2810 if (value < (NV)UV_MAX + 0.5) {
2813 SETn(Perl_floor(value));
2817 if (value > (NV)IV_MIN - 0.5) {
2820 SETn(Perl_ceil(value));
2831 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2833 SV * const sv = TOPs;
2834 /* This will cache the NV value if string isn't actually integer */
2835 const IV iv = SvIV_nomg(sv);
2840 else if (SvIOK(sv)) {
2841 /* IVX is precise */
2843 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2851 /* 2s complement assumption. Also, not really needed as
2852 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2858 const NV value = SvNV_nomg(sv);
2872 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2876 SV* const sv = POPs;
2878 tmps = (SvPV_const(sv, len));
2880 /* If Unicode, try to downgrade
2881 * If not possible, croak. */
2882 SV* const tsv = sv_2mortal(newSVsv(sv));
2885 sv_utf8_downgrade(tsv, FALSE);
2886 tmps = SvPV_const(tsv, len);
2888 if (PL_op->op_type == OP_HEX)
2891 while (*tmps && len && isSPACE(*tmps))
2895 if (*tmps == 'x' || *tmps == 'X') {
2897 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2899 else if (*tmps == 'b' || *tmps == 'B')
2900 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2902 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2904 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2918 SV * const sv = TOPs;
2920 if (SvGAMAGIC(sv)) {
2921 /* For an overloaded or magic scalar, we can't know in advance if
2922 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2923 it likes to cache the length. Maybe that should be a documented
2928 = sv_2pv_flags(sv, &len,
2929 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2932 if (!SvPADTMP(TARG)) {
2933 sv_setsv(TARG, &PL_sv_undef);
2938 else if (DO_UTF8(sv)) {
2939 SETi(utf8_length((U8*)p, (U8*)p + len));
2943 } else if (SvOK(sv)) {
2944 /* Neither magic nor overloaded. */
2946 SETi(sv_len_utf8(sv));
2950 if (!SvPADTMP(TARG)) {
2951 sv_setsv_nomg(TARG, &PL_sv_undef);
2973 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2974 const bool rvalue = (GIMME_V != G_VOID);
2977 const char *repl = NULL;
2979 int num_args = PL_op->op_private & 7;
2980 bool repl_need_utf8_upgrade = FALSE;
2981 bool repl_is_utf8 = FALSE;
2985 if((repl_sv = POPs)) {
2986 repl = SvPV_const(repl_sv, repl_len);
2987 repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
2991 if ((len_sv = POPs)) {
2992 len_iv = SvIV(len_sv);
2993 len_is_uv = SvIOK_UV(len_sv);
2998 pos1_iv = SvIV(pos_sv);
2999 pos1_is_uv = SvIOK_UV(pos_sv);
3005 sv_utf8_upgrade(sv);
3007 else if (DO_UTF8(sv))
3008 repl_need_utf8_upgrade = TRUE;
3010 tmps = SvPV_const(sv, curlen);
3012 utf8_curlen = sv_len_utf8(sv);
3013 if (utf8_curlen == curlen)
3016 curlen = utf8_curlen;
3021 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3022 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3025 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3029 if (!len_is_uv && len_iv < 0) {
3030 pos2_iv = curlen + len_iv;
3032 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3035 } else { /* len_iv >= 0 */
3036 if (!pos1_is_uv && pos1_iv < 0) {
3037 pos2_iv = pos1_iv + len_iv;
3038 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3040 if ((UV)len_iv > curlen-(UV)pos1_iv)
3043 pos2_iv = pos1_iv+len_iv;
3053 if (!pos2_is_uv && pos2_iv < 0) {
3054 if (!pos1_is_uv && pos1_iv < 0)
3058 else if (!pos1_is_uv && pos1_iv < 0)
3061 if ((UV)pos2_iv < (UV)pos1_iv)
3063 if ((UV)pos2_iv > curlen)
3067 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3068 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3069 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3070 STRLEN byte_len = len;
3071 STRLEN byte_pos = utf8_curlen
3072 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3074 if (lvalue && !repl) {
3077 if (!SvGMAGICAL(sv)) {
3079 SvPV_force_nolen(sv);
3080 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3081 "Attempt to use reference as lvalue in substr");
3083 if (isGV_with_GP(sv))
3084 SvPV_force_nolen(sv);
3085 else if (SvOK(sv)) /* is it defined ? */
3086 (void)SvPOK_only_UTF8(sv);
3088 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3091 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3092 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3094 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3095 LvTARGOFF(ret) = pos;
3096 LvTARGLEN(ret) = len;
3099 PUSHs(ret); /* avoid SvSETMAGIC here */
3106 SvTAINTED_off(TARG); /* decontaminate */
3107 SvUTF8_off(TARG); /* decontaminate */
3108 sv_setpvn(TARG, tmps, byte_len);
3109 #ifdef USE_LOCALE_COLLATE
3110 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3117 SV* repl_sv_copy = NULL;
3119 if (repl_need_utf8_upgrade) {
3120 repl_sv_copy = newSVsv(repl_sv);
3121 sv_utf8_upgrade(repl_sv_copy);
3122 repl = SvPV_const(repl_sv_copy, repl_len);
3123 repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
3127 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3130 SvREFCNT_dec(repl_sv_copy);
3142 Perl_croak(aTHX_ "substr outside of string");
3143 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3150 register const IV size = POPi;
3151 register const IV offset = POPi;
3152 register SV * const src = POPs;
3153 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3156 if (lvalue) { /* it's an lvalue! */
3157 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3158 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3160 LvTARG(ret) = SvREFCNT_inc_simple(src);
3161 LvTARGOFF(ret) = offset;
3162 LvTARGLEN(ret) = size;
3166 SvTAINTED_off(TARG); /* decontaminate */
3170 sv_setuv(ret, do_vecget(src, offset, size));
3186 const char *little_p;
3189 const bool is_index = PL_op->op_type == OP_INDEX;
3190 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3196 big_p = SvPV_const(big, biglen);
3197 little_p = SvPV_const(little, llen);
3199 big_utf8 = DO_UTF8(big);
3200 little_utf8 = DO_UTF8(little);
3201 if (big_utf8 ^ little_utf8) {
3202 /* One needs to be upgraded. */
3203 if (little_utf8 && !PL_encoding) {
3204 /* Well, maybe instead we might be able to downgrade the small
3206 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3209 /* If the large string is ISO-8859-1, and it's not possible to
3210 convert the small string to ISO-8859-1, then there is no
3211 way that it could be found anywhere by index. */
3216 /* At this point, pv is a malloc()ed string. So donate it to temp
3217 to ensure it will get free()d */
3218 little = temp = newSV(0);
3219 sv_usepvn(temp, pv, llen);
3220 little_p = SvPVX(little);
3223 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3226 sv_recode_to_utf8(temp, PL_encoding);
3228 sv_utf8_upgrade(temp);
3233 big_p = SvPV_const(big, biglen);
3236 little_p = SvPV_const(little, llen);
3240 if (SvGAMAGIC(big)) {
3241 /* Life just becomes a lot easier if I use a temporary here.
3242 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3243 will trigger magic and overloading again, as will fbm_instr()
3245 big = newSVpvn_flags(big_p, biglen,
3246 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3249 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3250 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3251 warn on undef, and we've already triggered a warning with the
3252 SvPV_const some lines above. We can't remove that, as we need to
3253 call some SvPV to trigger overloading early and find out if the
3255 This is all getting to messy. The API isn't quite clean enough,
3256 because data access has side effects.
3258 little = newSVpvn_flags(little_p, llen,
3259 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3260 little_p = SvPVX(little);
3264 offset = is_index ? 0 : biglen;
3266 if (big_utf8 && offset > 0)
3267 sv_pos_u2b(big, &offset, 0);
3273 else if (offset > (I32)biglen)
3275 if (!(little_p = is_index
3276 ? fbm_instr((unsigned char*)big_p + offset,
3277 (unsigned char*)big_p + biglen, little, 0)
3278 : rninstr(big_p, big_p + offset,
3279 little_p, little_p + llen)))
3282 retval = little_p - big_p;
3283 if (retval > 0 && big_utf8)
3284 sv_pos_b2u(big, &retval);
3294 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3295 SvTAINTED_off(TARG);
3296 do_sprintf(TARG, SP-MARK, MARK+1);
3297 TAINT_IF(SvTAINTED(TARG));
3309 const U8 *s = (U8*)SvPV_const(argsv, len);
3311 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3312 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3313 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3317 XPUSHu(DO_UTF8(argsv) ?
3318 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3330 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3332 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3334 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3336 (void) POPs; /* Ignore the argument value. */
3337 value = UNICODE_REPLACEMENT;
3343 SvUPGRADE(TARG,SVt_PV);
3345 if (value > 255 && !IN_BYTES) {
3346 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3347 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3348 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3350 (void)SvPOK_only(TARG);
3359 *tmps++ = (char)value;
3361 (void)SvPOK_only(TARG);
3363 if (PL_encoding && !IN_BYTES) {
3364 sv_recode_to_utf8(TARG, PL_encoding);
3366 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3367 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3371 *tmps++ = (char)value;
3387 const char *tmps = SvPV_const(left, len);
3389 if (DO_UTF8(left)) {
3390 /* If Unicode, try to downgrade.
3391 * If not possible, croak.
3392 * Yes, we made this up. */
3393 SV* const tsv = sv_2mortal(newSVsv(left));
3396 sv_utf8_downgrade(tsv, FALSE);
3397 tmps = SvPV_const(tsv, len);
3399 # ifdef USE_ITHREADS
3401 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3402 /* This should be threadsafe because in ithreads there is only
3403 * one thread per interpreter. If this would not be true,
3404 * we would need a mutex to protect this malloc. */
3405 PL_reentrant_buffer->_crypt_struct_buffer =
3406 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3407 #if defined(__GLIBC__) || defined(__EMX__)
3408 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3409 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3410 /* work around glibc-2.2.5 bug */
3411 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3415 # endif /* HAS_CRYPT_R */
3416 # endif /* USE_ITHREADS */
3418 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3420 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3426 "The crypt() function is unimplemented due to excessive paranoia.");
3430 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3431 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3433 /* Below are several macros that generate code */
3434 /* Generates code to store a unicode codepoint c that is known to occupy
3435 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3436 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3438 *(p) = UTF8_TWO_BYTE_HI(c); \
3439 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3442 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3443 * available byte after the two bytes */
3444 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3446 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3447 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3450 /* Generates code to store the upper case of latin1 character l which is known
3451 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3452 * are only two characters that fit this description, and this macro knows
3453 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3455 #define STORE_NON_LATIN1_UC(p, l) \
3457 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3458 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3459 } else { /* Must be the following letter */ \
3460 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3464 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3465 * after the character stored */
3466 #define CAT_NON_LATIN1_UC(p, l) \
3468 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3469 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3471 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3475 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3476 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3477 * and must require two bytes to store it. Advances p to point to the next
3478 * available position */
3479 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3481 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3482 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3483 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3484 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3485 } else {/* else is one of the other two special cases */ \
3486 CAT_NON_LATIN1_UC((p), (l)); \
3492 /* Actually is both lcfirst() and ucfirst(). Only the first character
3493 * changes. This means that possibly we can change in-place, ie., just
3494 * take the source and change that one character and store it back, but not
3495 * if read-only etc, or if the length changes */
3500 STRLEN slen; /* slen is the byte length of the whole SV. */
3503 bool inplace; /* ? Convert first char only, in-place */
3504 bool doing_utf8 = FALSE; /* ? using utf8 */
3505 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3506 const int op_type = PL_op->op_type;
3509 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3510 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3511 * stored as UTF-8 at s. */
3512 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3513 * lowercased) character stored in tmpbuf. May be either
3514 * UTF-8 or not, but in either case is the number of bytes */
3518 s = (const U8*)SvPV_nomg_const(source, slen);
3520 if (ckWARN(WARN_UNINITIALIZED))
3521 report_uninit(source);
3526 /* We may be able to get away with changing only the first character, in
3527 * place, but not if read-only, etc. Later we may discover more reasons to
3528 * not convert in-place. */
3529 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3531 /* First calculate what the changed first character should be. This affects
3532 * whether we can just swap it out, leaving the rest of the string unchanged,
3533 * or even if have to convert the dest to UTF-8 when the source isn't */
3535 if (! slen) { /* If empty */
3536 need = 1; /* still need a trailing NUL */
3538 else if (DO_UTF8(source)) { /* Is the source utf8? */
3541 if (UTF8_IS_INVARIANT(*s)) {
3543 /* An invariant source character is either ASCII or, in EBCDIC, an
3544 * ASCII equivalent or a caseless C1 control. In both these cases,
3545 * the lower and upper cases of any character are also invariants
3546 * (and title case is the same as upper case). So it is safe to
3547 * use the simple case change macros which avoid the overhead of
3548 * the general functions. Note that if perl were to be extended to
3549 * do locale handling in UTF-8 strings, this wouldn't be true in,
3550 * for example, Lithuanian or Turkic. */
3551 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3555 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3558 /* Similarly, if the source character isn't invariant but is in the
3559 * latin1 range (or EBCDIC equivalent thereof), we have the case
3560 * changes compiled into perl, and can avoid the overhead of the
3561 * general functions. In this range, the characters are stored as
3562 * two UTF-8 bytes, and it so happens that any changed-case version
3563 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3567 /* Convert the two source bytes to a single Unicode code point
3568 * value, change case and save for below */
3569 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3570 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3571 U8 lower = toLOWER_LATIN1(chr);
3572 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3574 else { /* ucfirst */
3575 U8 upper = toUPPER_LATIN1_MOD(chr);
3577 /* Most of the latin1 range characters are well-behaved. Their
3578 * title and upper cases are the same, and are also in the
3579 * latin1 range. The macro above returns their upper (hence
3580 * title) case, and all that need be done is to save the result
3581 * for below. However, several characters are problematic, and
3582 * have to be handled specially. The MOD in the macro name
3583 * above means that these tricky characters all get mapped to
3584 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3585 * This mapping saves some tests for the majority of the
3588 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3590 /* Not tricky. Just save it. */
3591 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3593 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3595 /* This one is tricky because it is two characters long,
3596 * though the UTF-8 is still two bytes, so the stored
3597 * length doesn't change */
3598 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3599 *(tmpbuf + 1) = 's';
3603 /* The other two have their title and upper cases the same,
3604 * but are tricky because the changed-case characters
3605 * aren't in the latin1 range. They, however, do fit into
3606 * two UTF-8 bytes */
3607 STORE_NON_LATIN1_UC(tmpbuf, chr);
3613 /* Here, can't short-cut the general case */
3615 utf8_to_uvchr(s, &ulen);
3616 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3617 else toLOWER_utf8(s, tmpbuf, &tculen);
3619 /* we can't do in-place if the length changes. */
3620 if (ulen != tculen) inplace = FALSE;
3621 need = slen + 1 - ulen + tculen;
3624 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3625 * latin1 is treated as caseless. Note that a locale takes
3627 tculen = 1; /* Most characters will require one byte, but this will
3628 * need to be overridden for the tricky ones */
3631 if (op_type == OP_LCFIRST) {
3633 /* lower case the first letter: no trickiness for any character */
3634 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3635 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3638 else if (IN_LOCALE_RUNTIME) {
3639 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3640 * have upper and title case different
3643 else if (! IN_UNI_8_BIT) {
3644 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3645 * on EBCDIC machines whatever the
3646 * native function does */
3648 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3649 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3651 /* tmpbuf now has the correct title case for all latin1 characters
3652 * except for the several ones that have tricky handling. All
3653 * of these are mapped by the MOD to the letter below. */
3654 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3656 /* The length is going to change, with all three of these, so
3657 * can't replace just the first character */
3660 /* We use the original to distinguish between these tricky
3662 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3663 /* Two character title case 'Ss', but can remain non-UTF-8 */
3666 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3671 /* The other two tricky ones have their title case outside
3672 * latin1. It is the same as their upper case. */
3674 STORE_NON_LATIN1_UC(tmpbuf, *s);
3676 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3677 * and their upper cases is 2. */
3680 /* The entire result will have to be in UTF-8. Assume worst
3681 * case sizing in conversion. (all latin1 characters occupy
3682 * at most two bytes in utf8) */
3683 convert_source_to_utf8 = TRUE;
3684 need = slen * 2 + 1;
3686 } /* End of is one of the three special chars */
3687 } /* End of use Unicode (Latin1) semantics */
3688 } /* End of changing the case of the first character */
3690 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3691 * generate the result */
3694 /* We can convert in place. This means we change just the first
3695 * character without disturbing the rest; no need to grow */
3697 s = d = (U8*)SvPV_force_nomg(source, slen);
3703 /* Here, we can't convert in place; we earlier calculated how much
3704 * space we will need, so grow to accommodate that */
3705 SvUPGRADE(dest, SVt_PV);
3706 d = (U8*)SvGROW(dest, need);
3707 (void)SvPOK_only(dest);
3714 if (! convert_source_to_utf8) {
3716 /* Here both source and dest are in UTF-8, but have to create
3717 * the entire output. We initialize the result to be the
3718 * title/lower cased first character, and then append the rest
3720 sv_setpvn(dest, (char*)tmpbuf, tculen);
3722 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3726 const U8 *const send = s + slen;
3728 /* Here the dest needs to be in UTF-8, but the source isn't,
3729 * except we earlier UTF-8'd the first character of the source
3730 * into tmpbuf. First put that into dest, and then append the
3731 * rest of the source, converting it to UTF-8 as we go. */
3733 /* Assert tculen is 2 here because the only two characters that
3734 * get to this part of the code have 2-byte UTF-8 equivalents */
3736 *d++ = *(tmpbuf + 1);
3737 s++; /* We have just processed the 1st char */
3739 for (; s < send; s++) {
3740 d = uvchr_to_utf8(d, *s);
3743 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3747 else { /* in-place UTF-8. Just overwrite the first character */
3748 Copy(tmpbuf, d, tculen, U8);
3749 SvCUR_set(dest, need - 1);
3752 else { /* Neither source nor dest are in or need to be UTF-8 */
3754 if (IN_LOCALE_RUNTIME) {
3758 if (inplace) { /* in-place, only need to change the 1st char */
3761 else { /* Not in-place */
3763 /* Copy the case-changed character(s) from tmpbuf */
3764 Copy(tmpbuf, d, tculen, U8);
3765 d += tculen - 1; /* Code below expects d to point to final
3766 * character stored */
3769 else { /* empty source */
3770 /* See bug #39028: Don't taint if empty */
3774 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3775 * the destination to retain that flag */
3779 if (!inplace) { /* Finish the rest of the string, unchanged */
3780 /* This will copy the trailing NUL */
3781 Copy(s + 1, d + 1, slen, U8);
3782 SvCUR_set(dest, need - 1);
3785 if (dest != source && SvTAINTED(source))
3791 /* There's so much setup/teardown code common between uc and lc, I wonder if
3792 it would be worth merging the two, and just having a switch outside each
3793 of the three tight loops. There is less and less commonality though */
3807 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3808 && SvTEMP(source) && !DO_UTF8(source)
3809 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3811 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3812 * make the loop tight, so we overwrite the source with the dest before
3813 * looking at it, and we need to look at the original source
3814 * afterwards. There would also need to be code added to handle
3815 * switching to not in-place in midstream if we run into characters
3816 * that change the length.
3819 s = d = (U8*)SvPV_force_nomg(source, len);
3826 /* The old implementation would copy source into TARG at this point.
3827 This had the side effect that if source was undef, TARG was now
3828 an undefined SV with PADTMP set, and they don't warn inside
3829 sv_2pv_flags(). However, we're now getting the PV direct from
3830 source, which doesn't have PADTMP set, so it would warn. Hence the
3834 s = (const U8*)SvPV_nomg_const(source, len);
3836 if (ckWARN(WARN_UNINITIALIZED))
3837 report_uninit(source);
3843 SvUPGRADE(dest, SVt_PV);
3844 d = (U8*)SvGROW(dest, min);
3845 (void)SvPOK_only(dest);
3850 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3851 to check DO_UTF8 again here. */
3853 if (DO_UTF8(source)) {
3854 const U8 *const send = s + len;
3855 U8 tmpbuf[UTF8_MAXBYTES+1];
3857 /* All occurrences of these are to be moved to follow any other marks.
3858 * This is context-dependent. We may not be passed enough context to
3859 * move the iota subscript beyond all of them, but we do the best we can
3860 * with what we're given. The result is always better than if we
3861 * hadn't done this. And, the problem would only arise if we are
3862 * passed a character without all its combining marks, which would be
3863 * the caller's mistake. The information this is based on comes from a
3864 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3865 * itself) and so can't be checked properly to see if it ever gets
3866 * revised. But the likelihood of it changing is remote */
3867 bool in_iota_subscript = FALSE;
3870 if (in_iota_subscript && ! is_utf8_mark(s)) {
3871 /* A non-mark. Time to output the iota subscript */
3872 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3873 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3875 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3876 in_iota_subscript = FALSE;
3879 /* If the UTF-8 character is invariant, then it is in the range
3880 * known by the standard macro; result is only one byte long */
3881 if (UTF8_IS_INVARIANT(*s)) {
3885 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3887 /* Likewise, if it fits in a byte, its case change is in our
3889 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3890 U8 upper = toUPPER_LATIN1_MOD(orig);
3891 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3896 /* Otherwise, need the general UTF-8 case. Get the changed
3897 * case value and copy it to the output buffer */
3899 const STRLEN u = UTF8SKIP(s);
3902 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3903 if (uv == GREEK_CAPITAL_LETTER_IOTA
3904 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3906 in_iota_subscript = TRUE;
3909 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3910 /* If the eventually required minimum size outgrows
3911 * the available space, we need to grow. */
3912 const UV o = d - (U8*)SvPVX_const(dest);
3914 /* If someone uppercases one million U+03B0s we
3915 * SvGROW() one million times. Or we could try
3916 * guessing how much to allocate without allocating too
3917 * much. Such is life. See corresponding comment in
3918 * lc code for another option */
3920 d = (U8*)SvPVX(dest) + o;
3922 Copy(tmpbuf, d, ulen, U8);
3928 if (in_iota_subscript) {
3929 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3933 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3935 else { /* Not UTF-8 */
3937 const U8 *const send = s + len;
3939 /* Use locale casing if in locale; regular style if not treating
3940 * latin1 as having case; otherwise the latin1 casing. Do the
3941 * whole thing in a tight loop, for speed, */
3942 if (IN_LOCALE_RUNTIME) {
3945 for (; s < send; d++, s++)
3946 *d = toUPPER_LC(*s);
3948 else if (! IN_UNI_8_BIT) {
3949 for (; s < send; d++, s++) {
3954 for (; s < send; d++, s++) {
3955 *d = toUPPER_LATIN1_MOD(*s);
3956 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
3958 /* The mainstream case is the tight loop above. To avoid
3959 * extra tests in that, all three characters that require
3960 * special handling are mapped by the MOD to the one tested
3962 * Use the source to distinguish between the three cases */
3964 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3966 /* uc() of this requires 2 characters, but they are
3967 * ASCII. If not enough room, grow the string */
3968 if (SvLEN(dest) < ++min) {
3969 const UV o = d - (U8*)SvPVX_const(dest);
3971 d = (U8*)SvPVX(dest) + o;
3973 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3974 continue; /* Back to the tight loop; still in ASCII */
3977 /* The other two special handling characters have their
3978 * upper cases outside the latin1 range, hence need to be
3979 * in UTF-8, so the whole result needs to be in UTF-8. So,
3980 * here we are somewhere in the middle of processing a
3981 * non-UTF-8 string, and realize that we will have to convert
3982 * the whole thing to UTF-8. What to do? There are
3983 * several possibilities. The simplest to code is to
3984 * convert what we have so far, set a flag, and continue on
3985 * in the loop. The flag would be tested each time through
3986 * the loop, and if set, the next character would be
3987 * converted to UTF-8 and stored. But, I (khw) didn't want
3988 * to slow down the mainstream case at all for this fairly
3989 * rare case, so I didn't want to add a test that didn't
3990 * absolutely have to be there in the loop, besides the
3991 * possibility that it would get too complicated for
3992 * optimizers to deal with. Another possibility is to just
3993 * give up, convert the source to UTF-8, and restart the
3994 * function that way. Another possibility is to convert
3995 * both what has already been processed and what is yet to
3996 * come separately to UTF-8, then jump into the loop that
3997 * handles UTF-8. But the most efficient time-wise of the
3998 * ones I could think of is what follows, and turned out to
3999 * not require much extra code. */
4001 /* Convert what we have so far into UTF-8, telling the
4002 * function that we know it should be converted, and to
4003 * allow extra space for what we haven't processed yet.
4004 * Assume the worst case space requirements for converting
4005 * what we haven't processed so far: that it will require
4006 * two bytes for each remaining source character, plus the
4007 * NUL at the end. This may cause the string pointer to
4008 * move, so re-find it. */
4010 len = d - (U8*)SvPVX_const(dest);
4011 SvCUR_set(dest, len);
4012 len = sv_utf8_upgrade_flags_grow(dest,
4013 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4015 d = (U8*)SvPVX(dest) + len;
4017 /* And append the current character's upper case in UTF-8 */
4018 CAT_NON_LATIN1_UC(d, *s);
4020 /* Now process the remainder of the source, converting to
4021 * upper and UTF-8. If a resulting byte is invariant in
4022 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4023 * append it to the output. */
4026 for (; s < send; s++) {
4027 U8 upper = toUPPER_LATIN1_MOD(*s);
4028 if UTF8_IS_INVARIANT(upper) {
4032 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4036 /* Here have processed the whole source; no need to continue
4037 * with the outer loop. Each character has been converted
4038 * to upper case and converted to UTF-8 */
4041 } /* End of processing all latin1-style chars */
4042 } /* End of processing all chars */
4043 } /* End of source is not empty */
4045 if (source != dest) {
4046 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4047 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4049 } /* End of isn't utf8 */
4050 if (dest != source && SvTAINTED(source))
4069 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4070 && SvTEMP(source) && !DO_UTF8(source)) {
4072 /* We can convert in place, as lowercasing anything in the latin1 range
4073 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4075 s = d = (U8*)SvPV_force_nomg(source, len);
4082 /* The old implementation would copy source into TARG at this point.
4083 This had the side effect that if source was undef, TARG was now
4084 an undefined SV with PADTMP set, and they don't warn inside
4085 sv_2pv_flags(). However, we're now getting the PV direct from
4086 source, which doesn't have PADTMP set, so it would warn. Hence the
4090 s = (const U8*)SvPV_nomg_const(source, len);
4092 if (ckWARN(WARN_UNINITIALIZED))
4093 report_uninit(source);
4099 SvUPGRADE(dest, SVt_PV);
4100 d = (U8*)SvGROW(dest, min);
4101 (void)SvPOK_only(dest);
4106 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4107 to check DO_UTF8 again here. */
4109 if (DO_UTF8(source)) {
4110 const U8 *const send = s + len;
4111 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4114 if (UTF8_IS_INVARIANT(*s)) {
4116 /* Invariant characters use the standard mappings compiled in.
4121 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4123 /* As do the ones in the Latin1 range */
4124 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
4125 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4129 /* Here, is utf8 not in Latin-1 range, have to go out and get
4130 * the mappings from the tables. */
4132 const STRLEN u = UTF8SKIP(s);
4135 #ifndef CONTEXT_DEPENDENT_CASING
4136 toLOWER_utf8(s, tmpbuf, &ulen);
4138 /* This is ifdefd out because it probably is the wrong thing to do. The right
4139 * thing is probably to have an I/O layer that converts final sigma to regular
4140 * on input and vice versa (under the correct circumstances) on output. In
4141 * effect, the final sigma is just a glyph variation when the regular one
4142 * occurs at the end of a word. And we don't really know what's going to be
4143 * the end of the word until it is finally output, as splitting and joining can
4144 * occur at any time and change what once was the word end to be in the middle,
4145 * and vice versa. */
4147 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4149 /* If the lower case is a small sigma, it may be that we need
4150 * to change it to a final sigma. This happens at the end of
4151 * a word that contains more than just this character, and only
4152 * when we started with a capital sigma. */
4153 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4154 s > send - len && /* Makes sure not the first letter */
4155 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4158 /* We use the algorithm in:
4159 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4160 * is a CAPITAL SIGMA): If C is preceded by a sequence
4161 * consisting of a cased letter and a case-ignorable
4162 * sequence, and C is not followed by a sequence consisting
4163 * of a case ignorable sequence and then a cased letter,
4164 * then when lowercasing C, C becomes a final sigma */
4166 /* To determine if this is the end of a word, need to peek
4167 * ahead. Look at the next character */
4168 const U8 *peek = s + u;
4170 /* Skip any case ignorable characters */
4171 while (peek < send && is_utf8_case_ignorable(peek)) {
4172 peek += UTF8SKIP(peek);
4175 /* If we reached the end of the string without finding any
4176 * non-case ignorable characters, or if the next such one
4177 * is not-cased, then we have met the conditions for it
4178 * being a final sigma with regards to peek ahead, and so
4179 * must do peek behind for the remaining conditions. (We
4180 * know there is stuff behind to look at since we tested
4181 * above that this isn't the first letter) */
4182 if (peek >= send || ! is_utf8_cased(peek)) {
4183 peek = utf8_hop(s, -1);
4185 /* Here are at the beginning of the first character
4186 * before the original upper case sigma. Keep backing
4187 * up, skipping any case ignorable characters */
4188 while (is_utf8_case_ignorable(peek)) {
4189 peek = utf8_hop(peek, -1);
4192 /* Here peek points to the first byte of the closest
4193 * non-case-ignorable character before the capital
4194 * sigma. If it is cased, then by the Unicode
4195 * algorithm, we should use a small final sigma instead
4196 * of what we have */
4197 if (is_utf8_cased(peek)) {
4198 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4199 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4203 else { /* Not a context sensitive mapping */
4204 #endif /* End of commented out context sensitive */
4205 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4207 /* If the eventually required minimum size outgrows
4208 * the available space, we need to grow. */
4209 const UV o = d - (U8*)SvPVX_const(dest);
4211 /* If someone lowercases one million U+0130s we
4212 * SvGROW() one million times. Or we could try
4213 * guessing how much to allocate without allocating too
4214 * much. Such is life. Another option would be to
4215 * grow an extra byte or two more each time we need to
4216 * grow, which would cut down the million to 500K, with
4219 d = (U8*)SvPVX(dest) + o;
4221 #ifdef CONTEXT_DEPENDENT_CASING
4224 /* Copy the newly lowercased letter to the output buffer we're
4226 Copy(tmpbuf, d, ulen, U8);
4230 } /* End of looping through the source string */
4233 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4234 } else { /* Not utf8 */
4236 const U8 *const send = s + len;
4238 /* Use locale casing if in locale; regular style if not treating
4239 * latin1 as having case; otherwise the latin1 casing. Do the
4240 * whole thing in a tight loop, for speed, */
4241 if (IN_LOCALE_RUNTIME) {
4244 for (; s < send; d++, s++)
4245 *d = toLOWER_LC(*s);
4247 else if (! IN_UNI_8_BIT) {
4248 for (; s < send; d++, s++) {
4253 for (; s < send; d++, s++) {
4254 *d = toLOWER_LATIN1(*s);
4258 if (source != dest) {
4260 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4263 if (dest != source && SvTAINTED(source))
4272 SV * const sv = TOPs;
4274 register const char *s = SvPV_const(sv,len);
4276 SvUTF8_off(TARG); /* decontaminate */
4279 SvUPGRADE(TARG, SVt_PV);
4280 SvGROW(TARG, (len * 2) + 1);
4284 if (UTF8_IS_CONTINUED(*s)) {
4285 STRLEN ulen = UTF8SKIP(s);
4309 SvCUR_set(TARG, d - SvPVX_const(TARG));
4310 (void)SvPOK_only_UTF8(TARG);
4313 sv_setpvn(TARG, s, len);
4322 dVAR; dSP; dMARK; dORIGMARK;
4323 register AV *const av = MUTABLE_AV(POPs);
4324 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4326 if (SvTYPE(av) == SVt_PVAV) {
4327 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4328 bool can_preserve = FALSE;
4334 can_preserve = SvCANEXISTDELETE(av);
4337 if (lval && localizing) {
4340 for (svp = MARK + 1; svp <= SP; svp++) {
4341 const I32 elem = SvIV(*svp);
4345 if (max > AvMAX(av))
4349 while (++MARK <= SP) {
4351 I32 elem = SvIV(*MARK);
4352 bool preeminent = TRUE;
4354 if (localizing && can_preserve) {
4355 /* If we can determine whether the element exist,
4356 * Try to preserve the existenceness of a tied array
4357 * element by using EXISTS and DELETE if possible.
4358 * Fallback to FETCH and STORE otherwise. */
4359 preeminent = av_exists(av, elem);
4362 svp = av_fetch(av, elem, lval);
4364 if (!svp || *svp == &PL_sv_undef)
4365 DIE(aTHX_ PL_no_aelem, elem);
4368 save_aelem(av, elem, svp);
4370 SAVEADELETE(av, elem);
4373 *MARK = svp ? *svp : &PL_sv_undef;
4376 if (GIMME != G_ARRAY) {
4378 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4384 /* Smart dereferencing for keys, values and each */
4396 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4401 "Type of argument to %s must be unblessed hashref or arrayref",
4402 PL_op_desc[PL_op->op_type] );
4405 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4407 "Can't modify %s in %s",
4408 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4411 /* Delegate to correct function for op type */
4413 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4414 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4417 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4425 AV *array = MUTABLE_AV(POPs);
4426 const I32 gimme = GIMME_V;
4427 IV *iterp = Perl_av_iter_p(aTHX_ array);
4428 const IV current = (*iterp)++;
4430 if (current > av_len(array)) {
4432 if (gimme == G_SCALAR)
4440 if (gimme == G_ARRAY) {
4441 SV **const element = av_fetch(array, current, 0);
4442 PUSHs(element ? *element : &PL_sv_undef);
4451 AV *array = MUTABLE_AV(POPs);
4452 const I32 gimme = GIMME_V;
4454 *Perl_av_iter_p(aTHX_ array) = 0;
4456 if (gimme == G_SCALAR) {
4458 PUSHi(av_len(array) + 1);
4460 else if (gimme == G_ARRAY) {
4461 IV n = Perl_av_len(aTHX_ array);
4466 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4467 for (i = 0; i <= n; i++) {
4472 for (i = 0; i <= n; i++) {
4473 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4474 PUSHs(elem ? *elem : &PL_sv_undef);
4481 /* Associative arrays. */
4487 HV * hash = MUTABLE_HV(POPs);
4489 const I32 gimme = GIMME_V;
4492 /* might clobber stack_sp */
4493 entry = hv_iternext(hash);
4498 SV* const sv = hv_iterkeysv(entry);
4499 PUSHs(sv); /* won't clobber stack_sp */
4500 if (gimme == G_ARRAY) {
4503 /* might clobber stack_sp */
4504 val = hv_iterval(hash, entry);
4509 else if (gimme == G_SCALAR)
4516 S_do_delete_local(pTHX)
4520 const I32 gimme = GIMME_V;
4524 if (PL_op->op_private & OPpSLICE) {
4526 SV * const osv = POPs;
4527 const bool tied = SvRMAGICAL(osv)
4528 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4529 const bool can_preserve = SvCANEXISTDELETE(osv)
4530 || mg_find((const SV *)osv, PERL_MAGIC_env);
4531 const U32 type = SvTYPE(osv);
4532 if (type == SVt_PVHV) { /* hash element */
4533 HV * const hv = MUTABLE_HV(osv);
4534 while (++MARK <= SP) {
4535 SV * const keysv = *MARK;
4537 bool preeminent = TRUE;
4539 preeminent = hv_exists_ent(hv, keysv, 0);
4541 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4548 sv = hv_delete_ent(hv, keysv, 0, 0);
4549 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4552 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4554 *MARK = sv_mortalcopy(sv);