3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
75 if (GIMME == G_SCALAR)
76 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
81 if (gimme == G_ARRAY) {
82 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
84 if (SvMAGICAL(TARG)) {
86 for (i=0; i < (U32)maxarg; i++) {
87 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
88 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
92 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
96 else if (gimme == G_SCALAR) {
97 SV* const sv = sv_newmortal();
98 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
110 assert(SvTYPE(TARG) == SVt_PVHV);
112 if (PL_op->op_private & OPpLVAL_INTRO)
113 if (!(PL_op->op_private & OPpPAD_STATE))
114 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
115 if (PL_op->op_flags & OPf_REF)
118 if (GIMME == G_SCALAR)
119 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
123 if (gimme == G_ARRAY) {
126 else if (gimme == G_SCALAR) {
127 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
135 static const char S_no_symref_sv[] =
136 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
145 tryAMAGICunDEREF(to_gv);
148 if (SvTYPE(sv) == SVt_PVIO) {
149 GV * const gv = MUTABLE_GV(sv_newmortal());
150 gv_init(gv, 0, "", 0, 0);
151 GvIOp(gv) = MUTABLE_IO(sv);
152 SvREFCNT_inc_void_NN(sv);
155 else if (!isGV_with_GP(sv))
156 DIE(aTHX_ "Not a GLOB reference");
159 if (!isGV_with_GP(sv)) {
160 if (!SvOK(sv) && sv != &PL_sv_undef) {
161 /* If this is a 'my' scalar and flag is set then vivify
165 Perl_croak_no_modify(aTHX);
166 if (PL_op->op_private & OPpDEREF) {
168 if (cUNOP->op_targ) {
170 SV * const namesv = PAD_SV(cUNOP->op_targ);
171 const char * const name = SvPV(namesv, len);
172 gv = MUTABLE_GV(newSV(0));
173 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
176 const char * const name = CopSTASHPV(PL_curcop);
179 prepare_SV_for_RV(sv);
180 SvRV_set(sv, MUTABLE_SV(gv));
185 if (PL_op->op_flags & OPf_REF ||
186 PL_op->op_private & HINT_STRICT_REFS)
187 DIE(aTHX_ PL_no_usym, "a symbol");
188 if (ckWARN(WARN_UNINITIALIZED))
192 if ((PL_op->op_flags & OPf_SPECIAL) &&
193 !(PL_op->op_flags & OPf_MOD))
195 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
197 && (!is_gv_magical_sv(sv,0)
198 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
205 if (PL_op->op_private & HINT_STRICT_REFS)
206 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
207 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
208 == OPpDONT_INIT_GV) {
209 /* We are the target of a coderef assignment. Return
210 the scalar unchanged, and let pp_sasssign deal with
214 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
218 if (PL_op->op_private & OPpLVAL_INTRO)
219 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
224 /* Helper function for pp_rv2sv and pp_rv2av */
226 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
227 const svtype type, SV ***spp)
232 PERL_ARGS_ASSERT_SOFTREF2XV;
234 if (PL_op->op_private & HINT_STRICT_REFS) {
236 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
238 Perl_die(aTHX_ PL_no_usym, what);
241 if (PL_op->op_flags & OPf_REF)
242 Perl_die(aTHX_ PL_no_usym, what);
243 if (ckWARN(WARN_UNINITIALIZED))
245 if (type != SVt_PV && GIMME_V == G_ARRAY) {
249 **spp = &PL_sv_undef;
252 if ((PL_op->op_flags & OPf_SPECIAL) &&
253 !(PL_op->op_flags & OPf_MOD))
255 gv = gv_fetchsv(sv, 0, type);
257 && (!is_gv_magical_sv(sv,0)
258 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
260 **spp = &PL_sv_undef;
265 gv = gv_fetchsv(sv, GV_ADD, type);
275 if (!(PL_op->op_private & OPpDEREFed))
278 tryAMAGICunDEREF(to_sv);
281 switch (SvTYPE(sv)) {
287 DIE(aTHX_ "Not a SCALAR reference");
294 if (!isGV_with_GP(gv)) {
295 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
301 if (PL_op->op_flags & OPf_MOD) {
302 if (PL_op->op_private & OPpLVAL_INTRO) {
303 if (cUNOP->op_first->op_type == OP_NULL)
304 sv = save_scalar(MUTABLE_GV(TOPs));
306 sv = save_scalar(gv);
308 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
310 else if (PL_op->op_private & OPpDEREF)
311 vivify_ref(sv, PL_op->op_private & OPpDEREF);
320 AV * const av = MUTABLE_AV(TOPs);
321 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
323 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
325 *sv = newSV_type(SVt_PVMG);
326 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
330 SETs(sv_2mortal(newSViv(
331 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
339 dVAR; dSP; dTARGET; dPOPss;
341 if (PL_op->op_flags & OPf_MOD || LVRET) {
342 if (SvTYPE(TARG) < SVt_PVLV) {
343 sv_upgrade(TARG, SVt_PVLV);
344 sv_magic(TARG, NULL, PERL_MAGIC_pos, NULL, 0);
348 if (LvTARG(TARG) != sv) {
349 SvREFCNT_dec(LvTARG(TARG));
350 LvTARG(TARG) = SvREFCNT_inc_simple(sv);
352 PUSHs(TARG); /* no SvSETMAGIC */
356 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
357 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
358 if (mg && mg->mg_len >= 0) {
362 PUSHi(i + CopARYBASE_get(PL_curcop));
375 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
377 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
380 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
381 /* (But not in defined().) */
383 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
386 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
387 if ((PL_op->op_private & OPpLVAL_INTRO)) {
388 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
391 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
394 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
398 cv = MUTABLE_CV(&PL_sv_undef);
399 SETs(MUTABLE_SV(cv));
409 SV *ret = &PL_sv_undef;
411 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
412 const char * s = SvPVX_const(TOPs);
413 if (strnEQ(s, "CORE::", 6)) {
414 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
415 if (code < 0) { /* Overridable. */
416 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
417 int i = 0, n = 0, seen_question = 0, defgv = 0;
419 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
421 if (code == -KEY_chop || code == -KEY_chomp
422 || code == -KEY_exec || code == -KEY_system)
424 if (code == -KEY_mkdir) {
425 ret = newSVpvs_flags("_;$", SVs_TEMP);
428 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
429 ret = newSVpvs_flags("\\[@%]", SVs_TEMP);
432 if (code == -KEY_tied || code == -KEY_untie) {
433 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
436 if (code == -KEY_tie) {
437 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
440 if (code == -KEY_readpipe) {
441 s = "CORE::backtick";
443 while (i < MAXO) { /* The slow way. */
444 if (strEQ(s + 6, PL_op_name[i])
445 || strEQ(s + 6, PL_op_desc[i]))
451 goto nonesuch; /* Should not happen... */
453 defgv = PL_opargs[i] & OA_DEFGV;
454 oa = PL_opargs[i] >> OASHIFT;
456 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
460 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
461 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
462 /* But globs are already references (kinda) */
463 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
467 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
470 if (defgv && str[n - 1] == '$')
473 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
475 else if (code) /* Non-Overridable */
477 else { /* None such */
479 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
483 cv = sv_2cv(TOPs, &stash, &gv, 0);
485 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
494 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
496 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
498 PUSHs(MUTABLE_SV(cv));
512 if (GIMME != G_ARRAY) {
516 *MARK = &PL_sv_undef;
517 *MARK = refto(*MARK);
521 EXTEND_MORTAL(SP - MARK);
523 *MARK = refto(*MARK);
528 S_refto(pTHX_ SV *sv)
533 PERL_ARGS_ASSERT_REFTO;
535 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
538 if (!(sv = LvTARG(sv)))
541 SvREFCNT_inc_void_NN(sv);
543 else if (SvTYPE(sv) == SVt_PVAV) {
544 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
545 av_reify(MUTABLE_AV(sv));
547 SvREFCNT_inc_void_NN(sv);
549 else if (SvPADTMP(sv) && !IS_PADGV(sv))
553 SvREFCNT_inc_void_NN(sv);
556 sv_upgrade(rv, SVt_IV);
566 SV * const sv = POPs;
571 if (!sv || !SvROK(sv))
574 pv = sv_reftype(SvRV(sv),TRUE);
575 PUSHp(pv, strlen(pv));
585 stash = CopSTASH(PL_curcop);
587 SV * const ssv = POPs;
591 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
592 Perl_croak(aTHX_ "Attempt to bless into a reference");
593 ptr = SvPV_const(ssv,len);
595 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
596 "Explicit blessing to '' (assuming package main)");
597 stash = gv_stashpvn(ptr, len, GV_ADD);
600 (void)sv_bless(TOPs, stash);
609 const char * const elem = SvPV_nolen_const(sv);
610 GV * const gv = MUTABLE_GV(POPs);
615 /* elem will always be NUL terminated. */
616 const char * const second_letter = elem + 1;
619 if (strEQ(second_letter, "RRAY"))
620 tmpRef = MUTABLE_SV(GvAV(gv));
623 if (strEQ(second_letter, "ODE"))
624 tmpRef = MUTABLE_SV(GvCVu(gv));
627 if (strEQ(second_letter, "ILEHANDLE")) {
628 /* finally deprecated in 5.8.0 */
629 deprecate("*glob{FILEHANDLE}");
630 tmpRef = MUTABLE_SV(GvIOp(gv));
633 if (strEQ(second_letter, "ORMAT"))
634 tmpRef = MUTABLE_SV(GvFORM(gv));
637 if (strEQ(second_letter, "LOB"))
638 tmpRef = MUTABLE_SV(gv);
641 if (strEQ(second_letter, "ASH"))
642 tmpRef = MUTABLE_SV(GvHV(gv));
645 if (*second_letter == 'O' && !elem[2])
646 tmpRef = MUTABLE_SV(GvIOp(gv));
649 if (strEQ(second_letter, "AME"))
650 sv = newSVhek(GvNAME_HEK(gv));
653 if (strEQ(second_letter, "ACKAGE")) {
654 const HV * const stash = GvSTASH(gv);
655 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
656 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
660 if (strEQ(second_letter, "CALAR"))
675 /* Pattern matching */
680 register unsigned char *s;
683 register I32 *sfirst;
687 if (sv == PL_lastscream) {
691 s = (unsigned char*)(SvPV(sv, len));
693 if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
694 /* No point in studying a zero length string, and not safe to study
695 anything that doesn't appear to be a simple scalar (and hence might
696 change between now and when the regexp engine runs without our set
697 magic ever running) such as a reference to an object with overloaded
703 SvSCREAM_off(PL_lastscream);
704 SvREFCNT_dec(PL_lastscream);
706 PL_lastscream = SvREFCNT_inc_simple(sv);
708 s = (unsigned char*)(SvPV(sv, len));
712 if (pos > PL_maxscream) {
713 if (PL_maxscream < 0) {
714 PL_maxscream = pos + 80;
715 Newx(PL_screamfirst, 256, I32);
716 Newx(PL_screamnext, PL_maxscream, I32);
719 PL_maxscream = pos + pos / 4;
720 Renew(PL_screamnext, PL_maxscream, I32);
724 sfirst = PL_screamfirst;
725 snext = PL_screamnext;
727 if (!sfirst || !snext)
728 DIE(aTHX_ "do_study: out of memory");
730 for (ch = 256; ch; --ch)
735 register const I32 ch = s[pos];
737 snext[pos] = sfirst[ch] - pos;
744 /* piggyback on m//g magic */
745 sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
754 if (PL_op->op_flags & OPf_STACKED)
756 else if (PL_op->op_private & OPpTARGET_MY)
762 TARG = sv_newmortal();
767 /* Lvalue operators. */
779 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
781 do_chop(TARG, *++MARK);
790 SETi(do_chomp(TOPs));
796 dVAR; dSP; dMARK; dTARGET;
797 register I32 count = 0;
800 count += do_chomp(POPs);
810 if (!PL_op->op_private) {
819 SV_CHECK_THINKFIRST_COW_DROP(sv);
821 switch (SvTYPE(sv)) {
825 av_undef(MUTABLE_AV(sv));
828 hv_undef(MUTABLE_HV(sv));
831 if (cv_const_sv((const CV *)sv))
832 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
833 CvANON((const CV *)sv) ? "(anonymous)"
834 : GvENAME(CvGV((const CV *)sv)));
838 /* let user-undef'd sub keep its identity */
839 GV* const gv = CvGV((const CV *)sv);
840 cv_undef(MUTABLE_CV(sv));
841 CvGV((const CV *)sv) = gv;
846 SvSetMagicSV(sv, &PL_sv_undef);
849 else if (isGV_with_GP(sv)) {
854 if((stash = GvHV((const GV *)sv)) && HvNAME_get(stash))
855 mro_isa_changed_in(stash);
856 /* undef *Pkg::meth_name ... */
857 else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
858 && HvNAME_get(stash))
859 mro_method_changed_in(stash);
861 gp_free(MUTABLE_GV(sv));
863 GvGP(sv) = gp_ref(gp);
865 GvLINE(sv) = CopLINE(PL_curcop);
866 GvEGV(sv) = MUTABLE_GV(sv);
872 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
887 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
888 Perl_croak_no_modify(aTHX);
889 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
890 && SvIVX(TOPs) != IV_MIN)
892 SvIV_set(TOPs, SvIVX(TOPs) - 1);
893 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
904 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
905 Perl_croak_no_modify(aTHX);
906 sv_setsv(TARG, TOPs);
907 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
908 && SvIVX(TOPs) != IV_MAX)
910 SvIV_set(TOPs, SvIVX(TOPs) + 1);
911 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
916 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
926 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
927 Perl_croak_no_modify(aTHX);
928 sv_setsv(TARG, TOPs);
929 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
930 && SvIVX(TOPs) != IV_MIN)
932 SvIV_set(TOPs, SvIVX(TOPs) - 1);
933 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
942 /* Ordinary operators. */
946 dVAR; dSP; dATARGET; SV *svl, *svr;
947 #ifdef PERL_PRESERVE_IVUV
950 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
953 #ifdef PERL_PRESERVE_IVUV
954 /* For integer to integer power, we do the calculation by hand wherever
955 we're sure it is safe; otherwise we call pow() and try to convert to
956 integer afterwards. */
958 SvIV_please_nomg(svr);
960 SvIV_please_nomg(svl);
969 const IV iv = SvIVX(svr);
973 goto float_it; /* Can't do negative powers this way. */
977 baseuok = SvUOK(svl);
981 const IV iv = SvIVX(svl);
984 baseuok = TRUE; /* effectively it's a UV now */
986 baseuv = -iv; /* abs, baseuok == false records sign */
989 /* now we have integer ** positive integer. */
992 /* foo & (foo - 1) is zero only for a power of 2. */
993 if (!(baseuv & (baseuv - 1))) {
994 /* We are raising power-of-2 to a positive integer.
995 The logic here will work for any base (even non-integer
996 bases) but it can be less accurate than
997 pow (base,power) or exp (power * log (base)) when the
998 intermediate values start to spill out of the mantissa.
999 With powers of 2 we know this can't happen.
1000 And powers of 2 are the favourite thing for perl
1001 programmers to notice ** not doing what they mean. */
1003 NV base = baseuok ? baseuv : -(NV)baseuv;
1008 while (power >>= 1) {
1016 SvIV_please_nomg(svr);
1019 register unsigned int highbit = 8 * sizeof(UV);
1020 register unsigned int diff = 8 * sizeof(UV);
1021 while (diff >>= 1) {
1023 if (baseuv >> highbit) {
1027 /* we now have baseuv < 2 ** highbit */
1028 if (power * highbit <= 8 * sizeof(UV)) {
1029 /* result will definitely fit in UV, so use UV math
1030 on same algorithm as above */
1031 register UV result = 1;
1032 register UV base = baseuv;
1033 const bool odd_power = cBOOL(power & 1);
1037 while (power >>= 1) {
1044 if (baseuok || !odd_power)
1045 /* answer is positive */
1047 else if (result <= (UV)IV_MAX)
1048 /* answer negative, fits in IV */
1049 SETi( -(IV)result );
1050 else if (result == (UV)IV_MIN)
1051 /* 2's complement assumption: special case IV_MIN */
1054 /* answer negative, doesn't fit */
1055 SETn( -(NV)result );
1065 NV right = SvNV_nomg(svr);
1066 NV left = SvNV_nomg(svl);
1069 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1071 We are building perl with long double support and are on an AIX OS
1072 afflicted with a powl() function that wrongly returns NaNQ for any
1073 negative base. This was reported to IBM as PMR #23047-379 on
1074 03/06/2006. The problem exists in at least the following versions
1075 of AIX and the libm fileset, and no doubt others as well:
1077 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1078 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1079 AIX 5.2.0 bos.adt.libm 5.2.0.85
1081 So, until IBM fixes powl(), we provide the following workaround to
1082 handle the problem ourselves. Our logic is as follows: for
1083 negative bases (left), we use fmod(right, 2) to check if the
1084 exponent is an odd or even integer:
1086 - if odd, powl(left, right) == -powl(-left, right)
1087 - if even, powl(left, right) == powl(-left, right)
1089 If the exponent is not an integer, the result is rightly NaNQ, so
1090 we just return that (as NV_NAN).
1094 NV mod2 = Perl_fmod( right, 2.0 );
1095 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1096 SETn( -Perl_pow( -left, right) );
1097 } else if (mod2 == 0.0) { /* even integer */
1098 SETn( Perl_pow( -left, right) );
1099 } else { /* fractional power */
1103 SETn( Perl_pow( left, right) );
1106 SETn( Perl_pow( left, right) );
1107 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1109 #ifdef PERL_PRESERVE_IVUV
1111 SvIV_please_nomg(svr);
1119 dVAR; dSP; dATARGET; SV *svl, *svr;
1120 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1123 #ifdef PERL_PRESERVE_IVUV
1124 SvIV_please_nomg(svr);
1126 /* Unless the left argument is integer in range we are going to have to
1127 use NV maths. Hence only attempt to coerce the right argument if
1128 we know the left is integer. */
1129 /* Left operand is defined, so is it IV? */
1130 SvIV_please_nomg(svl);
1132 bool auvok = SvUOK(svl);
1133 bool buvok = SvUOK(svr);
1134 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1135 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1144 const IV aiv = SvIVX(svl);
1147 auvok = TRUE; /* effectively it's a UV now */
1149 alow = -aiv; /* abs, auvok == false records sign */
1155 const IV biv = SvIVX(svr);
1158 buvok = TRUE; /* effectively it's a UV now */
1160 blow = -biv; /* abs, buvok == false records sign */
1164 /* If this does sign extension on unsigned it's time for plan B */
1165 ahigh = alow >> (4 * sizeof (UV));
1167 bhigh = blow >> (4 * sizeof (UV));
1169 if (ahigh && bhigh) {
1171 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1172 which is overflow. Drop to NVs below. */
1173 } else if (!ahigh && !bhigh) {
1174 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1175 so the unsigned multiply cannot overflow. */
1176 const UV product = alow * blow;
1177 if (auvok == buvok) {
1178 /* -ve * -ve or +ve * +ve gives a +ve result. */
1182 } else if (product <= (UV)IV_MIN) {
1183 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1184 /* -ve result, which could overflow an IV */
1186 SETi( -(IV)product );
1188 } /* else drop to NVs below. */
1190 /* One operand is large, 1 small */
1193 /* swap the operands */
1195 bhigh = blow; /* bhigh now the temp var for the swap */
1199 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1200 multiplies can't overflow. shift can, add can, -ve can. */
1201 product_middle = ahigh * blow;
1202 if (!(product_middle & topmask)) {
1203 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1205 product_middle <<= (4 * sizeof (UV));
1206 product_low = alow * blow;
1208 /* as for pp_add, UV + something mustn't get smaller.
1209 IIRC ANSI mandates this wrapping *behaviour* for
1210 unsigned whatever the actual representation*/
1211 product_low += product_middle;
1212 if (product_low >= product_middle) {
1213 /* didn't overflow */
1214 if (auvok == buvok) {
1215 /* -ve * -ve or +ve * +ve gives a +ve result. */
1217 SETu( product_low );
1219 } else if (product_low <= (UV)IV_MIN) {
1220 /* 2s complement assumption again */
1221 /* -ve result, which could overflow an IV */
1223 SETi( -(IV)product_low );
1225 } /* else drop to NVs below. */
1227 } /* product_middle too large */
1228 } /* ahigh && bhigh */
1233 NV right = SvNV_nomg(svr);
1234 NV left = SvNV_nomg(svl);
1236 SETn( left * right );
1243 dVAR; dSP; dATARGET; SV *svl, *svr;
1244 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1247 /* Only try to do UV divide first
1248 if ((SLOPPYDIVIDE is true) or
1249 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1251 The assumption is that it is better to use floating point divide
1252 whenever possible, only doing integer divide first if we can't be sure.
1253 If NV_PRESERVES_UV is true then we know at compile time that no UV
1254 can be too large to preserve, so don't need to compile the code to
1255 test the size of UVs. */
1258 # define PERL_TRY_UV_DIVIDE
1259 /* ensure that 20./5. == 4. */
1261 # ifdef PERL_PRESERVE_IVUV
1262 # ifndef NV_PRESERVES_UV
1263 # define PERL_TRY_UV_DIVIDE
1268 #ifdef PERL_TRY_UV_DIVIDE
1269 SvIV_please_nomg(svr);
1271 SvIV_please_nomg(svl);
1273 bool left_non_neg = SvUOK(svl);
1274 bool right_non_neg = SvUOK(svr);
1278 if (right_non_neg) {
1282 const IV biv = SvIVX(svr);
1285 right_non_neg = TRUE; /* effectively it's a UV now */
1291 /* historically undef()/0 gives a "Use of uninitialized value"
1292 warning before dieing, hence this test goes here.
1293 If it were immediately before the second SvIV_please, then
1294 DIE() would be invoked before left was even inspected, so
1295 no inpsection would give no warning. */
1297 DIE(aTHX_ "Illegal division by zero");
1303 const IV aiv = SvIVX(svl);
1306 left_non_neg = TRUE; /* effectively it's a UV now */
1315 /* For sloppy divide we always attempt integer division. */
1317 /* Otherwise we only attempt it if either or both operands
1318 would not be preserved by an NV. If both fit in NVs
1319 we fall through to the NV divide code below. However,
1320 as left >= right to ensure integer result here, we know that
1321 we can skip the test on the right operand - right big
1322 enough not to be preserved can't get here unless left is
1325 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1328 /* Integer division can't overflow, but it can be imprecise. */
1329 const UV result = left / right;
1330 if (result * right == left) {
1331 SP--; /* result is valid */
1332 if (left_non_neg == right_non_neg) {
1333 /* signs identical, result is positive. */
1337 /* 2s complement assumption */
1338 if (result <= (UV)IV_MIN)
1339 SETi( -(IV)result );
1341 /* It's exact but too negative for IV. */
1342 SETn( -(NV)result );
1345 } /* tried integer divide but it was not an integer result */
1346 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1347 } /* left wasn't SvIOK */
1348 } /* right wasn't SvIOK */
1349 #endif /* PERL_TRY_UV_DIVIDE */
1351 NV right = SvNV_nomg(svr);
1352 NV left = SvNV_nomg(svl);
1353 (void)POPs;(void)POPs;
1354 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1355 if (! Perl_isnan(right) && right == 0.0)
1359 DIE(aTHX_ "Illegal division by zero");
1360 PUSHn( left / right );
1367 dVAR; dSP; dATARGET;
1368 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1372 bool left_neg = FALSE;
1373 bool right_neg = FALSE;
1374 bool use_double = FALSE;
1375 bool dright_valid = FALSE;
1378 SV * const svr = TOPs;
1379 SV * const svl = TOPm1s;
1380 SvIV_please_nomg(svr);
1382 right_neg = !SvUOK(svr);
1386 const IV biv = SvIVX(svr);
1389 right_neg = FALSE; /* effectively it's a UV now */
1396 dright = SvNV_nomg(svr);
1397 right_neg = dright < 0;
1400 if (dright < UV_MAX_P1) {
1401 right = U_V(dright);
1402 dright_valid = TRUE; /* In case we need to use double below. */
1408 /* At this point use_double is only true if right is out of range for
1409 a UV. In range NV has been rounded down to nearest UV and
1410 use_double false. */
1411 SvIV_please_nomg(svl);
1412 if (!use_double && SvIOK(svl)) {
1414 left_neg = !SvUOK(svl);
1418 const IV aiv = SvIVX(svl);
1421 left_neg = FALSE; /* effectively it's a UV now */
1429 dleft = SvNV_nomg(svl);
1430 left_neg = dleft < 0;
1434 /* This should be exactly the 5.6 behaviour - if left and right are
1435 both in range for UV then use U_V() rather than floor. */
1437 if (dleft < UV_MAX_P1) {
1438 /* right was in range, so is dleft, so use UVs not double.
1442 /* left is out of range for UV, right was in range, so promote
1443 right (back) to double. */
1445 /* The +0.5 is used in 5.6 even though it is not strictly
1446 consistent with the implicit +0 floor in the U_V()
1447 inside the #if 1. */
1448 dleft = Perl_floor(dleft + 0.5);
1451 dright = Perl_floor(dright + 0.5);
1462 DIE(aTHX_ "Illegal modulus zero");
1464 dans = Perl_fmod(dleft, dright);
1465 if ((left_neg != right_neg) && dans)
1466 dans = dright - dans;
1469 sv_setnv(TARG, dans);
1475 DIE(aTHX_ "Illegal modulus zero");
1478 if ((left_neg != right_neg) && ans)
1481 /* XXX may warn: unary minus operator applied to unsigned type */
1482 /* could change -foo to be (~foo)+1 instead */
1483 if (ans <= ~((UV)IV_MAX)+1)
1484 sv_setiv(TARG, ~ans+1);
1486 sv_setnv(TARG, -(NV)ans);
1489 sv_setuv(TARG, ans);
1498 dVAR; dSP; dATARGET;
1502 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1503 /* TODO: think of some way of doing list-repeat overloading ??? */
1508 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1514 const UV uv = SvUV_nomg(sv);
1516 count = IV_MAX; /* The best we can do? */
1520 const IV iv = SvIV_nomg(sv);
1527 else if (SvNOKp(sv)) {
1528 const NV nv = SvNV_nomg(sv);
1535 count = SvIV_nomg(sv);
1537 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1539 static const char oom_list_extend[] = "Out of memory during list extend";
1540 const I32 items = SP - MARK;
1541 const I32 max = items * count;
1543 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1544 /* Did the max computation overflow? */
1545 if (items > 0 && max > 0 && (max < items || max < count))
1546 Perl_croak(aTHX_ oom_list_extend);
1551 /* This code was intended to fix 20010809.028:
1554 for (($x =~ /./g) x 2) {
1555 print chop; # "abcdabcd" expected as output.
1558 * but that change (#11635) broke this code:
1560 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1562 * I can't think of a better fix that doesn't introduce
1563 * an efficiency hit by copying the SVs. The stack isn't
1564 * refcounted, and mortalisation obviously doesn't
1565 * Do The Right Thing when the stack has more than
1566 * one pointer to the same mortal value.
1570 *SP = sv_2mortal(newSVsv(*SP));
1580 repeatcpy((char*)(MARK + items), (char*)MARK,
1581 items * sizeof(const SV *), count - 1);
1584 else if (count <= 0)
1587 else { /* Note: mark already snarfed by pp_list */
1588 SV * const tmpstr = POPs;
1591 static const char oom_string_extend[] =
1592 "Out of memory during string extend";
1595 sv_setsv_nomg(TARG, tmpstr);
1596 SvPV_force_nomg(TARG, len);
1597 isutf = DO_UTF8(TARG);
1602 const STRLEN max = (UV)count * len;
1603 if (len > MEM_SIZE_MAX / count)
1604 Perl_croak(aTHX_ oom_string_extend);
1605 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1606 SvGROW(TARG, max + 1);
1607 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1608 SvCUR_set(TARG, SvCUR(TARG) * count);
1610 *SvEND(TARG) = '\0';
1613 (void)SvPOK_only_UTF8(TARG);
1615 (void)SvPOK_only(TARG);
1617 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1618 /* The parser saw this as a list repeat, and there
1619 are probably several items on the stack. But we're
1620 in scalar context, and there's no pp_list to save us
1621 now. So drop the rest of the items -- robin@kitsite.com
1633 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1634 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1637 useleft = USE_LEFT(svl);
1638 #ifdef PERL_PRESERVE_IVUV
1639 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1640 "bad things" happen if you rely on signed integers wrapping. */
1641 SvIV_please_nomg(svr);
1643 /* Unless the left argument is integer in range we are going to have to
1644 use NV maths. Hence only attempt to coerce the right argument if
1645 we know the left is integer. */
1646 register UV auv = 0;
1652 a_valid = auvok = 1;
1653 /* left operand is undef, treat as zero. */
1655 /* Left operand is defined, so is it IV? */
1656 SvIV_please_nomg(svl);
1658 if ((auvok = SvUOK(svl)))
1661 register const IV aiv = SvIVX(svl);
1664 auvok = 1; /* Now acting as a sign flag. */
1665 } else { /* 2s complement assumption for IV_MIN */
1673 bool result_good = 0;
1676 bool buvok = SvUOK(svr);
1681 register const IV biv = SvIVX(svr);
1688 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1689 else "IV" now, independent of how it came in.
1690 if a, b represents positive, A, B negative, a maps to -A etc
1695 all UV maths. negate result if A negative.
1696 subtract if signs same, add if signs differ. */
1698 if (auvok ^ buvok) {
1707 /* Must get smaller */
1712 if (result <= buv) {
1713 /* result really should be -(auv-buv). as its negation
1714 of true value, need to swap our result flag */
1726 if (result <= (UV)IV_MIN)
1727 SETi( -(IV)result );
1729 /* result valid, but out of range for IV. */
1730 SETn( -(NV)result );
1734 } /* Overflow, drop through to NVs. */
1739 NV value = SvNV_nomg(svr);
1743 /* left operand is undef, treat as zero - value */
1747 SETn( SvNV_nomg(svl) - value );
1754 dVAR; dSP; dATARGET; SV *svl, *svr;
1755 tryAMAGICbin_MG(lshift_amg, AMGf_assign);
1759 const IV shift = SvIV_nomg(svr);
1760 if (PL_op->op_private & HINT_INTEGER) {
1761 const IV i = SvIV_nomg(svl);
1765 const UV u = SvUV_nomg(svl);
1774 dVAR; dSP; dATARGET; SV *svl, *svr;
1775 tryAMAGICbin_MG(rshift_amg, AMGf_assign);
1779 const IV shift = SvIV_nomg(svr);
1780 if (PL_op->op_private & HINT_INTEGER) {
1781 const IV i = SvIV_nomg(svl);
1785 const UV u = SvUV_nomg(svl);
1795 tryAMAGICbin_MG(lt_amg, AMGf_set);
1796 #ifdef PERL_PRESERVE_IVUV
1797 SvIV_please_nomg(TOPs);
1799 SvIV_please_nomg(TOPm1s);
1800 if (SvIOK(TOPm1s)) {
1801 bool auvok = SvUOK(TOPm1s);
1802 bool buvok = SvUOK(TOPs);
1804 if (!auvok && !buvok) { /* ## IV < IV ## */
1805 const IV aiv = SvIVX(TOPm1s);
1806 const IV biv = SvIVX(TOPs);
1809 SETs(boolSV(aiv < biv));
1812 if (auvok && buvok) { /* ## UV < UV ## */
1813 const UV auv = SvUVX(TOPm1s);
1814 const UV buv = SvUVX(TOPs);
1817 SETs(boolSV(auv < buv));
1820 if (auvok) { /* ## UV < IV ## */
1822 const IV biv = SvIVX(TOPs);
1825 /* As (a) is a UV, it's >=0, so it cannot be < */
1830 SETs(boolSV(auv < (UV)biv));
1833 { /* ## IV < UV ## */
1834 const IV aiv = SvIVX(TOPm1s);
1838 /* As (b) is a UV, it's >=0, so it must be < */
1845 SETs(boolSV((UV)aiv < buv));
1851 #ifndef NV_PRESERVES_UV
1852 #ifdef PERL_PRESERVE_IVUV
1855 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1857 SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s)));
1862 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1864 if (Perl_isnan(left) || Perl_isnan(right))
1866 SETs(boolSV(left < right));
1869 SETs(boolSV(SvNV_nomg(TOPs) < value));
1878 tryAMAGICbin_MG(gt_amg, AMGf_set);
1879 #ifdef PERL_PRESERVE_IVUV
1880 SvIV_please_nomg(TOPs);
1882 SvIV_please_nomg(TOPm1s);
1883 if (SvIOK(TOPm1s)) {
1884 bool auvok = SvUOK(TOPm1s);
1885 bool buvok = SvUOK(TOPs);
1887 if (!auvok && !buvok) { /* ## IV > IV ## */
1888 const IV aiv = SvIVX(TOPm1s);
1889 const IV biv = SvIVX(TOPs);
1892 SETs(boolSV(aiv > biv));
1895 if (auvok && buvok) { /* ## UV > UV ## */
1896 const UV auv = SvUVX(TOPm1s);
1897 const UV buv = SvUVX(TOPs);
1900 SETs(boolSV(auv > buv));
1903 if (auvok) { /* ## UV > IV ## */
1905 const IV biv = SvIVX(TOPs);
1909 /* As (a) is a UV, it's >=0, so it must be > */
1914 SETs(boolSV(auv > (UV)biv));
1917 { /* ## IV > UV ## */
1918 const IV aiv = SvIVX(TOPm1s);
1922 /* As (b) is a UV, it's >=0, so it cannot be > */
1929 SETs(boolSV((UV)aiv > buv));
1935 #ifndef NV_PRESERVES_UV
1936 #ifdef PERL_PRESERVE_IVUV
1939 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1941 SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s)));
1946 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1948 if (Perl_isnan(left) || Perl_isnan(right))
1950 SETs(boolSV(left > right));
1953 SETs(boolSV(SvNV_nomg(TOPs) > value));
1962 tryAMAGICbin_MG(le_amg, AMGf_set);
1963 #ifdef PERL_PRESERVE_IVUV
1964 SvIV_please_nomg(TOPs);
1966 SvIV_please_nomg(TOPm1s);
1967 if (SvIOK(TOPm1s)) {
1968 bool auvok = SvUOK(TOPm1s);
1969 bool buvok = SvUOK(TOPs);
1971 if (!auvok && !buvok) { /* ## IV <= IV ## */
1972 const IV aiv = SvIVX(TOPm1s);
1973 const IV biv = SvIVX(TOPs);
1976 SETs(boolSV(aiv <= biv));
1979 if (auvok && buvok) { /* ## UV <= UV ## */
1980 UV auv = SvUVX(TOPm1s);
1981 UV buv = SvUVX(TOPs);
1984 SETs(boolSV(auv <= buv));
1987 if (auvok) { /* ## UV <= IV ## */
1989 const IV biv = SvIVX(TOPs);
1993 /* As (a) is a UV, it's >=0, so a cannot be <= */
1998 SETs(boolSV(auv <= (UV)biv));
2001 { /* ## IV <= UV ## */
2002 const IV aiv = SvIVX(TOPm1s);
2006 /* As (b) is a UV, it's >=0, so a must be <= */
2013 SETs(boolSV((UV)aiv <= buv));
2019 #ifndef NV_PRESERVES_UV
2020 #ifdef PERL_PRESERVE_IVUV
2023 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2025 SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s)));
2030 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2032 if (Perl_isnan(left) || Perl_isnan(right))
2034 SETs(boolSV(left <= right));
2037 SETs(boolSV(SvNV_nomg(TOPs) <= value));
2046 tryAMAGICbin_MG(ge_amg,AMGf_set);
2047 #ifdef PERL_PRESERVE_IVUV
2048 SvIV_please_nomg(TOPs);
2050 SvIV_please_nomg(TOPm1s);
2051 if (SvIOK(TOPm1s)) {
2052 bool auvok = SvUOK(TOPm1s);
2053 bool buvok = SvUOK(TOPs);
2055 if (!auvok && !buvok) { /* ## IV >= IV ## */
2056 const IV aiv = SvIVX(TOPm1s);
2057 const IV biv = SvIVX(TOPs);
2060 SETs(boolSV(aiv >= biv));
2063 if (auvok && buvok) { /* ## UV >= UV ## */
2064 const UV auv = SvUVX(TOPm1s);
2065 const UV buv = SvUVX(TOPs);
2068 SETs(boolSV(auv >= buv));
2071 if (auvok) { /* ## UV >= IV ## */
2073 const IV biv = SvIVX(TOPs);
2077 /* As (a) is a UV, it's >=0, so it must be >= */
2082 SETs(boolSV(auv >= (UV)biv));
2085 { /* ## IV >= UV ## */
2086 const IV aiv = SvIVX(TOPm1s);
2090 /* As (b) is a UV, it's >=0, so a cannot be >= */
2097 SETs(boolSV((UV)aiv >= buv));
2103 #ifndef NV_PRESERVES_UV
2104 #ifdef PERL_PRESERVE_IVUV
2107 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2109 SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s)));
2114 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2116 if (Perl_isnan(left) || Perl_isnan(right))
2118 SETs(boolSV(left >= right));
2121 SETs(boolSV(SvNV_nomg(TOPs) >= value));
2130 tryAMAGICbin_MG(ne_amg,AMGf_set);
2131 #ifndef NV_PRESERVES_UV
2132 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2134 SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s)));
2138 #ifdef PERL_PRESERVE_IVUV
2139 SvIV_please_nomg(TOPs);
2141 SvIV_please_nomg(TOPm1s);
2142 if (SvIOK(TOPm1s)) {
2143 const bool auvok = SvUOK(TOPm1s);
2144 const bool buvok = SvUOK(TOPs);
2146 if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
2147 /* Casting IV to UV before comparison isn't going to matter
2148 on 2s complement. On 1s complement or sign&magnitude
2149 (if we have any of them) it could make negative zero
2150 differ from normal zero. As I understand it. (Need to
2151 check - is negative zero implementation defined behaviour
2153 const UV buv = SvUVX(POPs);
2154 const UV auv = SvUVX(TOPs);
2156 SETs(boolSV(auv != buv));
2159 { /* ## Mixed IV,UV ## */
2163 /* != is commutative so swap if needed (save code) */
2165 /* swap. top of stack (b) is the iv */
2169 /* As (a) is a UV, it's >0, so it cannot be == */
2178 /* As (b) is a UV, it's >0, so it cannot be == */
2182 uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
2184 SETs(boolSV((UV)iv != uv));
2191 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2193 if (Perl_isnan(left) || Perl_isnan(right))
2195 SETs(boolSV(left != right));
2198 SETs(boolSV(SvNV_nomg(TOPs) != value));
2207 tryAMAGICbin_MG(ncmp_amg, 0);
2208 #ifndef NV_PRESERVES_UV
2209 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2210 const UV right = PTR2UV(SvRV(POPs));
2211 const UV left = PTR2UV(SvRV(TOPs));
2212 SETi((left > right) - (left < right));
2216 #ifdef PERL_PRESERVE_IVUV
2217 /* Fortunately it seems NaN isn't IOK */
2218 SvIV_please_nomg(TOPs);
2220 SvIV_please_nomg(TOPm1s);
2221 if (SvIOK(TOPm1s)) {
2222 const bool leftuvok = SvUOK(TOPm1s);
2223 const bool rightuvok = SvUOK(TOPs);
2225 if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
2226 const IV leftiv = SvIVX(TOPm1s);
2227 const IV rightiv = SvIVX(TOPs);
2229 if (leftiv > rightiv)
2231 else if (leftiv < rightiv)
2235 } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
2236 const UV leftuv = SvUVX(TOPm1s);
2237 const UV rightuv = SvUVX(TOPs);
2239 if (leftuv > rightuv)
2241 else if (leftuv < rightuv)
2245 } else if (leftuvok) { /* ## UV <=> IV ## */
2246 const IV rightiv = SvIVX(TOPs);
2248 /* As (a) is a UV, it's >=0, so it cannot be < */
2251 const UV leftuv = SvUVX(TOPm1s);
2252 if (leftuv > (UV)rightiv) {
2254 } else if (leftuv < (UV)rightiv) {
2260 } else { /* ## IV <=> UV ## */
2261 const IV leftiv = SvIVX(TOPm1s);
2263 /* As (b) is a UV, it's >=0, so it must be < */
2266 const UV rightuv = SvUVX(TOPs);
2267 if ((UV)leftiv > rightuv) {
2269 } else if ((UV)leftiv < rightuv) {
2287 if (Perl_isnan(left) || Perl_isnan(right)) {
2291 value = (left > right) - (left < right);
2295 else if (left < right)
2297 else if (left > right)
2313 int amg_type = sle_amg;
2317 switch (PL_op->op_type) {
2336 tryAMAGICbin_MG(amg_type, AMGf_set);
2339 const int cmp = (IN_LOCALE_RUNTIME
2340 ? sv_cmp_locale(left, right)
2341 : sv_cmp(left, right));
2342 SETs(boolSV(cmp * multiplier < rhs));
2350 tryAMAGICbin_MG(seq_amg, AMGf_set);
2353 SETs(boolSV(sv_eq(left, right)));
2361 tryAMAGICbin_MG(sne_amg, AMGf_set);
2364 SETs(boolSV(!sv_eq(left, right)));
2372 tryAMAGICbin_MG(scmp_amg, 0);
2375 const int cmp = (IN_LOCALE_RUNTIME
2376 ? sv_cmp_locale(left, right)
2377 : sv_cmp(left, right));
2385 dVAR; dSP; dATARGET;
2386 tryAMAGICbin_MG(band_amg, AMGf_assign);
2389 if (SvNIOKp(left) || SvNIOKp(right)) {
2390 if (PL_op->op_private & HINT_INTEGER) {
2391 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2395 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2400 do_vop(PL_op->op_type, TARG, left, right);
2409 dVAR; dSP; dATARGET;
2410 const int op_type = PL_op->op_type;
2412 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2415 if (SvNIOKp(left) || SvNIOKp(right)) {
2416 if (PL_op->op_private & HINT_INTEGER) {
2417 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2418 const IV r = SvIV_nomg(right);
2419 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2423 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2424 const UV r = SvUV_nomg(right);
2425 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2430 do_vop(op_type, TARG, left, right);
2440 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2442 SV * const sv = TOPs;
2443 const int flags = SvFLAGS(sv);
2444 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2445 /* It's publicly an integer, or privately an integer-not-float */
2448 if (SvIVX(sv) == IV_MIN) {
2449 /* 2s complement assumption. */
2450 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2453 else if (SvUVX(sv) <= IV_MAX) {
2458 else if (SvIVX(sv) != IV_MIN) {
2462 #ifdef PERL_PRESERVE_IVUV
2470 SETn(-SvNV_nomg(sv));
2471 else if (SvPOKp(sv)) {
2473 const char * const s = SvPV_nomg_const(sv, len);
2474 if (isIDFIRST(*s)) {
2475 sv_setpvs(TARG, "-");
2478 else if (*s == '+' || *s == '-') {
2479 sv_setsv_nomg(TARG, sv);
2480 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2482 else if (DO_UTF8(sv)) {
2483 SvIV_please_nomg(sv);
2485 goto oops_its_an_int;
2487 sv_setnv(TARG, -SvNV_nomg(sv));
2489 sv_setpvs(TARG, "-");
2494 SvIV_please_nomg(sv);
2496 goto oops_its_an_int;
2497 sv_setnv(TARG, -SvNV_nomg(sv));
2502 SETn(-SvNV_nomg(sv));
2510 tryAMAGICun_MG(not_amg, AMGf_set);
2511 *PL_stack_sp = boolSV(!SvTRUE(*PL_stack_sp));
2518 tryAMAGICun_MG(compl_amg, 0);
2522 if (PL_op->op_private & HINT_INTEGER) {
2523 const IV i = ~SvIV_nomg(sv);
2527 const UV u = ~SvUV_nomg(sv);
2536 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2537 sv_setsv_nomg(TARG, sv);
2538 tmps = (U8*)SvPV_force_nomg(TARG, len);
2541 /* Calculate exact length, let's not estimate. */
2546 U8 * const send = tmps + len;
2547 U8 * const origtmps = tmps;
2548 const UV utf8flags = UTF8_ALLOW_ANYUV;
2550 while (tmps < send) {
2551 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2553 targlen += UNISKIP(~c);
2559 /* Now rewind strings and write them. */
2566 Newx(result, targlen + 1, U8);
2568 while (tmps < send) {
2569 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2571 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2574 sv_usepvn_flags(TARG, (char*)result, targlen,
2575 SV_HAS_TRAILING_NUL);
2582 Newx(result, nchar + 1, U8);
2584 while (tmps < send) {
2585 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2590 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2598 register long *tmpl;
2599 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2602 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2607 for ( ; anum > 0; anum--, tmps++)
2615 /* integer versions of some of the above */
2619 dVAR; dSP; dATARGET;
2620 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2623 SETi( left * right );
2631 dVAR; dSP; dATARGET;
2632 tryAMAGICbin_MG(div_amg, AMGf_assign);
2635 IV value = SvIV_nomg(right);
2637 DIE(aTHX_ "Illegal division by zero");
2638 num = SvIV_nomg(left);
2640 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2644 value = num / value;
2650 #if defined(__GLIBC__) && IVSIZE == 8
2657 /* This is the vanilla old i_modulo. */
2658 dVAR; dSP; dATARGET;
2659 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2663 DIE(aTHX_ "Illegal modulus zero");
2664 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2668 SETi( left % right );
2673 #if defined(__GLIBC__) && IVSIZE == 8
2678 /* This is the i_modulo with the workaround for the _moddi3 bug
2679 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2680 * See below for pp_i_modulo. */
2681 dVAR; dSP; dATARGET;
2682 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2686 DIE(aTHX_ "Illegal modulus zero");
2687 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2691 SETi( left % PERL_ABS(right) );
2698 dVAR; dSP; dATARGET;
2699 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2703 DIE(aTHX_ "Illegal modulus zero");
2704 /* The assumption is to use hereafter the old vanilla version... */
2706 PL_ppaddr[OP_I_MODULO] =
2708 /* .. but if we have glibc, we might have a buggy _moddi3
2709 * (at least glicb 2.2.5 is known to have this bug), in other
2710 * words our integer modulus with negative quad as the second
2711 * argument might be broken. Test for this and re-patch the
2712 * opcode dispatch table if that is the case, remembering to
2713 * also apply the workaround so that this first round works
2714 * right, too. See [perl #9402] for more information. */
2718 /* Cannot do this check with inlined IV constants since
2719 * that seems to work correctly even with the buggy glibc. */
2721 /* Yikes, we have the bug.
2722 * Patch in the workaround version. */
2724 PL_ppaddr[OP_I_MODULO] =
2725 &Perl_pp_i_modulo_1;
2726 /* Make certain we work right this time, too. */
2727 right = PERL_ABS(right);
2730 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2734 SETi( left % right );
2742 dVAR; dSP; dATARGET;
2743 tryAMAGICbin_MG(add_amg, AMGf_assign);
2745 dPOPTOPiirl_ul_nomg;
2746 SETi( left + right );
2753 dVAR; dSP; dATARGET;
2754 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2756 dPOPTOPiirl_ul_nomg;
2757 SETi( left - right );
2765 tryAMAGICbin_MG(lt_amg, AMGf_set);
2768 SETs(boolSV(left < right));
2776 tryAMAGICbin_MG(gt_amg, AMGf_set);
2779 SETs(boolSV(left > right));
2787 tryAMAGICbin_MG(le_amg, AMGf_set);
2790 SETs(boolSV(left <= right));
2798 tryAMAGICbin_MG(ge_amg, AMGf_set);
2801 SETs(boolSV(left >= right));
2809 tryAMAGICbin_MG(eq_amg, AMGf_set);
2812 SETs(boolSV(left == right));
2820 tryAMAGICbin_MG(ne_amg, AMGf_set);
2823 SETs(boolSV(left != right));
2831 tryAMAGICbin_MG(ncmp_amg, 0);
2838 else if (left < right)
2850 tryAMAGICun_MG(neg_amg, 0);
2852 SV * const sv = TOPs;
2853 IV const i = SvIV_nomg(sv);
2859 /* High falutin' math. */
2864 tryAMAGICbin_MG(atan2_amg, 0);
2867 SETn(Perl_atan2(left, right));
2875 int amg_type = sin_amg;
2876 const char *neg_report = NULL;
2877 NV (*func)(NV) = Perl_sin;
2878 const int op_type = PL_op->op_type;
2895 amg_type = sqrt_amg;
2897 neg_report = "sqrt";
2902 tryAMAGICun_MG(amg_type, 0);
2904 SV * const arg = POPs;
2905 const NV value = SvNV_nomg(arg);
2907 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2908 SET_NUMERIC_STANDARD();
2909 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2912 XPUSHn(func(value));
2917 /* Support Configure command-line overrides for rand() functions.
2918 After 5.005, perhaps we should replace this by Configure support
2919 for drand48(), random(), or rand(). For 5.005, though, maintain
2920 compatibility by calling rand() but allow the user to override it.
2921 See INSTALL for details. --Andy Dougherty 15 July 1998
2923 /* Now it's after 5.005, and Configure supports drand48() and random(),
2924 in addition to rand(). So the overrides should not be needed any more.
2925 --Jarkko Hietaniemi 27 September 1998
2928 #ifndef HAS_DRAND48_PROTO
2929 extern double drand48 (void);
2942 if (!PL_srand_called) {
2943 (void)seedDrand01((Rand_seed_t)seed());
2944 PL_srand_called = TRUE;
2954 const UV anum = (MAXARG < 1) ? seed() : POPu;
2955 (void)seedDrand01((Rand_seed_t)anum);
2956 PL_srand_called = TRUE;
2964 tryAMAGICun_MG(int_amg, AMGf_numeric);
2966 SV * const sv = TOPs;
2967 const IV iv = SvIV_nomg(sv);
2968 /* XXX it's arguable that compiler casting to IV might be subtly
2969 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2970 else preferring IV has introduced a subtle behaviour change bug. OTOH
2971 relying on floating point to be accurate is a bug. */
2976 else if (SvIOK(sv)) {
2978 SETu(SvUV_nomg(sv));
2983 const NV value = SvNV_nomg(sv);
2985 if (value < (NV)UV_MAX + 0.5) {
2988 SETn(Perl_floor(value));
2992 if (value > (NV)IV_MIN - 0.5) {
2995 SETn(Perl_ceil(value));
3006 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3008 SV * const sv = TOPs;
3009 /* This will cache the NV value if string isn't actually integer */
3010 const IV iv = SvIV_nomg(sv);
3015 else if (SvIOK(sv)) {
3016 /* IVX is precise */
3018 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3026 /* 2s complement assumption. Also, not really needed as
3027 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3033 const NV value = SvNV_nomg(sv);
3047 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3051 SV* const sv = POPs;
3053 tmps = (SvPV_const(sv, len));
3055 /* If Unicode, try to downgrade
3056 * If not possible, croak. */
3057 SV* const tsv = sv_2mortal(newSVsv(sv));
3060 sv_utf8_downgrade(tsv, FALSE);
3061 tmps = SvPV_const(tsv, len);
3063 if (PL_op->op_type == OP_HEX)
3066 while (*tmps && len && isSPACE(*tmps))
3072 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3074 else if (*tmps == 'b')
3075 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3077 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3079 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3093 SV * const sv = TOPs;
3095 if (SvGAMAGIC(sv)) {
3096 /* For an overloaded or magic scalar, we can't know in advance if
3097 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3098 it likes to cache the length. Maybe that should be a documented
3103 = sv_2pv_flags(sv, &len,
3104 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3108 else if (DO_UTF8(sv)) {
3109 SETi(utf8_length((U8*)p, (U8*)p + len));
3113 } else if (SvOK(sv)) {
3114 /* Neither magic nor overloaded. */
3116 SETi(sv_len_utf8(sv));
3139 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3141 const IV arybase = CopARYBASE_get(PL_curcop);
3143 const char *repl = NULL;
3145 const int num_args = PL_op->op_private & 7;
3146 bool repl_need_utf8_upgrade = FALSE;
3147 bool repl_is_utf8 = FALSE;
3149 SvTAINTED_off(TARG); /* decontaminate */
3150 SvUTF8_off(TARG); /* decontaminate */
3154 repl = SvPV_const(repl_sv, repl_len);
3155 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3158 len_iv = SvIV(len_sv);
3159 len_is_uv = SvIOK_UV(len_sv);
3162 pos1_iv = SvIV(pos_sv);
3163 pos1_is_uv = SvIOK_UV(pos_sv);
3169 sv_utf8_upgrade(sv);
3171 else if (DO_UTF8(sv))
3172 repl_need_utf8_upgrade = TRUE;
3174 tmps = SvPV_const(sv, curlen);
3176 utf8_curlen = sv_len_utf8(sv);
3177 if (utf8_curlen == curlen)
3180 curlen = utf8_curlen;
3185 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3186 UV pos1_uv = pos1_iv-arybase;
3187 /* Overflow can occur when $[ < 0 */
3188 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3193 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3194 goto bound_fail; /* $[=3; substr($_,2,...) */
3196 else { /* pos < $[ */
3197 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3202 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3207 if (pos1_is_uv || pos1_iv > 0) {
3208 if ((UV)pos1_iv > curlen)
3213 if (!len_is_uv && len_iv < 0) {
3214 pos2_iv = curlen + len_iv;
3216 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3219 } else { /* len_iv >= 0 */
3220 if (!pos1_is_uv && pos1_iv < 0) {
3221 pos2_iv = pos1_iv + len_iv;
3222 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3224 if ((UV)len_iv > curlen-(UV)pos1_iv)
3227 pos2_iv = pos1_iv+len_iv;
3237 if (!pos2_is_uv && pos2_iv < 0) {
3238 if (!pos1_is_uv && pos1_iv < 0)
3242 else if (!pos1_is_uv && pos1_iv < 0)
3245 if ((UV)pos2_iv < (UV)pos1_iv)
3247 if ((UV)pos2_iv > curlen)
3251 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3252 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3253 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3254 STRLEN byte_len = len;
3255 STRLEN byte_pos = utf8_curlen
3256 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3259 /* we either return a PV or an LV. If the TARG hasn't been used
3260 * before, or is of that type, reuse it; otherwise use a mortal
3261 * instead. Note that LVs can have an extended lifetime, so also
3262 * dont reuse if refcount > 1 (bug #20933) */
3263 if (SvTYPE(TARG) > SVt_NULL) {
3264 if ( (SvTYPE(TARG) == SVt_PVLV)
3265 ? (!lvalue || SvREFCNT(TARG) > 1)
3268 TARG = sv_newmortal();
3272 sv_setpvn(TARG, tmps, byte_len);
3273 #ifdef USE_LOCALE_COLLATE
3274 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3279 SV* repl_sv_copy = NULL;
3281 if (repl_need_utf8_upgrade) {
3282 repl_sv_copy = newSVsv(repl_sv);
3283 sv_utf8_upgrade(repl_sv_copy);
3284 repl = SvPV_const(repl_sv_copy, repl_len);
3285 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3289 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3292 SvREFCNT_dec(repl_sv_copy);
3294 else if (lvalue) { /* it's an lvalue! */
3295 if (!SvGMAGICAL(sv)) {
3297 SvPV_force_nolen(sv);
3298 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3299 "Attempt to use reference as lvalue in substr");
3301 if (isGV_with_GP(sv))
3302 SvPV_force_nolen(sv);
3303 else if (SvOK(sv)) /* is it defined ? */
3304 (void)SvPOK_only_UTF8(sv);
3306 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3309 if (SvTYPE(TARG) < SVt_PVLV) {
3310 sv_upgrade(TARG, SVt_PVLV);
3311 sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0);
3315 if (LvTARG(TARG) != sv) {
3316 SvREFCNT_dec(LvTARG(TARG));
3317 LvTARG(TARG) = SvREFCNT_inc_simple(sv);
3319 LvTARGOFF(TARG) = pos;
3320 LvTARGLEN(TARG) = len;
3324 PUSHs(TARG); /* avoid SvSETMAGIC here */
3329 Perl_croak(aTHX_ "substr outside of string");
3330 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3337 register const IV size = POPi;
3338 register const IV offset = POPi;
3339 register SV * const src = POPs;
3340 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3342 SvTAINTED_off(TARG); /* decontaminate */
3343 if (lvalue) { /* it's an lvalue! */
3344 if (SvREFCNT(TARG) > 1) /* don't share the TARG (#20933) */
3345 TARG = sv_newmortal();
3346 if (SvTYPE(TARG) < SVt_PVLV) {
3347 sv_upgrade(TARG, SVt_PVLV);
3348 sv_magic(TARG, NULL, PERL_MAGIC_vec, NULL, 0);
3351 if (LvTARG(TARG) != src) {
3352 SvREFCNT_dec(LvTARG(TARG));
3353 LvTARG(TARG) = SvREFCNT_inc_simple(src);
3355 LvTARGOFF(TARG) = offset;
3356 LvTARGLEN(TARG) = size;
3359 sv_setuv(TARG, do_vecget(src, offset, size));
3375 const char *little_p;
3376 const I32 arybase = CopARYBASE_get(PL_curcop);
3379 const bool is_index = PL_op->op_type == OP_INDEX;
3382 /* arybase is in characters, like offset, so combine prior to the
3383 UTF-8 to bytes calculation. */
3384 offset = POPi - arybase;
3388 big_p = SvPV_const(big, biglen);
3389 little_p = SvPV_const(little, llen);
3391 big_utf8 = DO_UTF8(big);
3392 little_utf8 = DO_UTF8(little);
3393 if (big_utf8 ^ little_utf8) {
3394 /* One needs to be upgraded. */
3395 if (little_utf8 && !PL_encoding) {
3396 /* Well, maybe instead we might be able to downgrade the small
3398 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3401 /* If the large string is ISO-8859-1, and it's not possible to
3402 convert the small string to ISO-8859-1, then there is no
3403 way that it could be found anywhere by index. */
3408 /* At this point, pv is a malloc()ed string. So donate it to temp
3409 to ensure it will get free()d */
3410 little = temp = newSV(0);
3411 sv_usepvn(temp, pv, llen);
3412 little_p = SvPVX(little);
3415 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3418 sv_recode_to_utf8(temp, PL_encoding);
3420 sv_utf8_upgrade(temp);
3425 big_p = SvPV_const(big, biglen);
3428 little_p = SvPV_const(little, llen);
3432 if (SvGAMAGIC(big)) {
3433 /* Life just becomes a lot easier if I use a temporary here.
3434 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3435 will trigger magic and overloading again, as will fbm_instr()
3437 big = newSVpvn_flags(big_p, biglen,
3438 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3441 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3442 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3443 warn on undef, and we've already triggered a warning with the
3444 SvPV_const some lines above. We can't remove that, as we need to
3445 call some SvPV to trigger overloading early and find out if the
3447 This is all getting to messy. The API isn't quite clean enough,
3448 because data access has side effects.
3450 little = newSVpvn_flags(little_p, llen,
3451 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3452 little_p = SvPVX(little);
3456 offset = is_index ? 0 : biglen;
3458 if (big_utf8 && offset > 0)
3459 sv_pos_u2b(big, &offset, 0);
3465 else if (offset > (I32)biglen)
3467 if (!(little_p = is_index
3468 ? fbm_instr((unsigned char*)big_p + offset,
3469 (unsigned char*)big_p + biglen, little, 0)
3470 : rninstr(big_p, big_p + offset,
3471 little_p, little_p + llen)))
3474 retval = little_p - big_p;
3475 if (retval > 0 && big_utf8)
3476 sv_pos_b2u(big, &retval);
3480 PUSHi(retval + arybase);
3486 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3487 if (SvTAINTED(MARK[1]))
3488 TAINT_PROPER("sprintf");
3489 SvTAINTED_off(TARG);
3490 do_sprintf(TARG, SP-MARK, MARK+1);
3491 TAINT_IF(SvTAINTED(TARG));
3503 const U8 *s = (U8*)SvPV_const(argsv, len);
3505 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3506 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3507 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3511 XPUSHu(DO_UTF8(argsv) ?
3512 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3524 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3526 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3528 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3530 (void) POPs; /* Ignore the argument value. */
3531 value = UNICODE_REPLACEMENT;
3537 SvUPGRADE(TARG,SVt_PV);
3539 if (value > 255 && !IN_BYTES) {
3540 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3541 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3542 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3544 (void)SvPOK_only(TARG);
3553 *tmps++ = (char)value;
3555 (void)SvPOK_only(TARG);
3557 if (PL_encoding && !IN_BYTES) {
3558 sv_recode_to_utf8(TARG, PL_encoding);
3560 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3561 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3565 *tmps++ = (char)value;
3581 const char *tmps = SvPV_const(left, len);
3583 if (DO_UTF8(left)) {
3584 /* If Unicode, try to downgrade.
3585 * If not possible, croak.
3586 * Yes, we made this up. */
3587 SV* const tsv = sv_2mortal(newSVsv(left));
3590 sv_utf8_downgrade(tsv, FALSE);
3591 tmps = SvPV_const(tsv, len);
3593 # ifdef USE_ITHREADS
3595 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3596 /* This should be threadsafe because in ithreads there is only
3597 * one thread per interpreter. If this would not be true,
3598 * we would need a mutex to protect this malloc. */
3599 PL_reentrant_buffer->_crypt_struct_buffer =
3600 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3601 #if defined(__GLIBC__) || defined(__EMX__)
3602 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3603 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3604 /* work around glibc-2.2.5 bug */
3605 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3609 # endif /* HAS_CRYPT_R */
3610 # endif /* USE_ITHREADS */
3612 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3614 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3620 "The crypt() function is unimplemented due to excessive paranoia.");
3624 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3625 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3627 /* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max
3628 * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF.
3629 * See http://www.unicode.org/unicode/reports/tr16 */
3630 #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
3631 #define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
3633 /* Below are several macros that generate code */
3634 /* Generates code to store a unicode codepoint c that is known to occupy
3635 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3636 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3638 *(p) = UTF8_TWO_BYTE_HI(c); \
3639 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3642 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3643 * available byte after the two bytes */
3644 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3646 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3647 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3650 /* Generates code to store the upper case of latin1 character l which is known
3651 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3652 * are only two characters that fit this description, and this macro knows
3653 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3655 #define STORE_NON_LATIN1_UC(p, l) \
3657 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3658 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3659 } else { /* Must be the following letter */ \
3660 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3664 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3665 * after the character stored */
3666 #define CAT_NON_LATIN1_UC(p, l) \
3668 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3669 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3671 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3675 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3676 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3677 * and must require two bytes to store it. Advances p to point to the next
3678 * available position */
3679 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3681 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3682 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3683 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3684 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3685 } else {/* else is one of the other two special cases */ \
3686 CAT_NON_LATIN1_UC((p), (l)); \
3692 /* Actually is both lcfirst() and ucfirst(). Only the first character
3693 * changes. This means that possibly we can change in-place, ie., just
3694 * take the source and change that one character and store it back, but not
3695 * if read-only etc, or if the length changes */
3700 STRLEN slen; /* slen is the byte length of the whole SV. */
3703 bool inplace; /* ? Convert first char only, in-place */
3704 bool doing_utf8 = FALSE; /* ? using utf8 */
3705 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3706 const int op_type = PL_op->op_type;
3709 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3710 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3711 * stored as UTF-8 at s. */
3712 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3713 * lowercased) character stored in tmpbuf. May be either
3714 * UTF-8 or not, but in either case is the number of bytes */
3718 s = (const U8*)SvPV_nomg_const(source, slen);
3720 if (ckWARN(WARN_UNINITIALIZED))
3721 report_uninit(source);
3726 /* We may be able to get away with changing only the first character, in
3727 * place, but not if read-only, etc. Later we may discover more reasons to
3728 * not convert in-place. */
3729 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3731 /* First calculate what the changed first character should be. This affects
3732 * whether we can just swap it out, leaving the rest of the string unchanged,
3733 * or even if have to convert the dest to UTF-8 when the source isn't */
3735 if (! slen) { /* If empty */
3736 need = 1; /* still need a trailing NUL */
3738 else if (DO_UTF8(source)) { /* Is the source utf8? */
3741 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3742 * and doesn't allow for the user to specify their own. When code is added to
3743 * detect if there is a user-defined mapping in force here, and if so to use
3744 * that, then the code below can be compiled. The detection would be a good
3745 * thing anyway, as currently the user-defined mappings only work on utf8
3746 * strings, and thus depend on the chosen internal storage method, which is a
3748 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3749 if (UTF8_IS_INVARIANT(*s)) {
3751 /* An invariant source character is either ASCII or, in EBCDIC, an
3752 * ASCII equivalent or a caseless C1 control. In both these cases,
3753 * the lower and upper cases of any character are also invariants
3754 * (and title case is the same as upper case). So it is safe to
3755 * use the simple case change macros which avoid the overhead of
3756 * the general functions. Note that if perl were to be extended to
3757 * do locale handling in UTF-8 strings, this wouldn't be true in,
3758 * for example, Lithuanian or Turkic. */
3759 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3763 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3766 /* Similarly, if the source character isn't invariant but is in the
3767 * latin1 range (or EBCDIC equivalent thereof), we have the case
3768 * changes compiled into perl, and can avoid the overhead of the
3769 * general functions. In this range, the characters are stored as
3770 * two UTF-8 bytes, and it so happens that any changed-case version
3771 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3775 /* Convert the two source bytes to a single Unicode code point
3776 * value, change case and save for below */
3777 chr = UTF8_ACCUMULATE(*s, *(s+1));
3778 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3779 U8 lower = toLOWER_LATIN1(chr);
3780 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3782 else { /* ucfirst */
3783 U8 upper = toUPPER_LATIN1_MOD(chr);
3785 /* Most of the latin1 range characters are well-behaved. Their
3786 * title and upper cases are the same, and are also in the
3787 * latin1 range. The macro above returns their upper (hence
3788 * title) case, and all that need be done is to save the result
3789 * for below. However, several characters are problematic, and
3790 * have to be handled specially. The MOD in the macro name
3791 * above means that these tricky characters all get mapped to
3792 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3793 * This mapping saves some tests for the majority of the
3796 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3798 /* Not tricky. Just save it. */
3799 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3801 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3803 /* This one is tricky because it is two characters long,
3804 * though the UTF-8 is still two bytes, so the stored
3805 * length doesn't change */
3806 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3807 *(tmpbuf + 1) = 's';
3811 /* The other two have their title and upper cases the same,
3812 * but are tricky because the changed-case characters
3813 * aren't in the latin1 range. They, however, do fit into
3814 * two UTF-8 bytes */
3815 STORE_NON_LATIN1_UC(tmpbuf, chr);
3820 #endif /* end of dont want to break user-defined casing */
3822 /* Here, can't short-cut the general case */
3824 utf8_to_uvchr(s, &ulen);
3825 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3826 else toLOWER_utf8(s, tmpbuf, &tculen);
3828 /* we can't do in-place if the length changes. */
3829 if (ulen != tculen) inplace = FALSE;
3830 need = slen + 1 - ulen + tculen;
3831 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3835 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3836 * latin1 is treated as caseless. Note that a locale takes
3838 tculen = 1; /* Most characters will require one byte, but this will
3839 * need to be overridden for the tricky ones */
3842 if (op_type == OP_LCFIRST) {
3844 /* lower case the first letter: no trickiness for any character */
3845 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3846 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3849 else if (IN_LOCALE_RUNTIME) {
3850 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3851 * have upper and title case different
3854 else if (! IN_UNI_8_BIT) {
3855 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3856 * on EBCDIC machines whatever the
3857 * native function does */
3859 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3860 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3862 /* tmpbuf now has the correct title case for all latin1 characters
3863 * except for the several ones that have tricky handling. All
3864 * of these are mapped by the MOD to the letter below. */
3865 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3867 /* The length is going to change, with all three of these, so
3868 * can't replace just the first character */
3871 /* We use the original to distinguish between these tricky
3873 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3874 /* Two character title case 'Ss', but can remain non-UTF-8 */
3877 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3882 /* The other two tricky ones have their title case outside
3883 * latin1. It is the same as their upper case. */
3885 STORE_NON_LATIN1_UC(tmpbuf, *s);
3887 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3888 * and their upper cases is 2. */
3891 /* The entire result will have to be in UTF-8. Assume worst
3892 * case sizing in conversion. (all latin1 characters occupy
3893 * at most two bytes in utf8) */
3894 convert_source_to_utf8 = TRUE;
3895 need = slen * 2 + 1;
3897 } /* End of is one of the three special chars */
3898 } /* End of use Unicode (Latin1) semantics */
3899 } /* End of changing the case of the first character */
3901 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3902 * generate the result */
3905 /* We can convert in place. This means we change just the first
3906 * character without disturbing the rest; no need to grow */
3908 s = d = (U8*)SvPV_force_nomg(source, slen);
3914 /* Here, we can't convert in place; we earlier calculated how much
3915 * space we will need, so grow to accommodate that */
3916 SvUPGRADE(dest, SVt_PV);
3917 d = (U8*)SvGROW(dest, need);
3918 (void)SvPOK_only(dest);
3925 if (! convert_source_to_utf8) {
3927 /* Here both source and dest are in UTF-8, but have to create
3928 * the entire output. We initialize the result to be the
3929 * title/lower cased first character, and then append the rest
3931 sv_setpvn(dest, (char*)tmpbuf, tculen);
3933 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3937 const U8 *const send = s + slen;
3939 /* Here the dest needs to be in UTF-8, but the source isn't,
3940 * except we earlier UTF-8'd the first character of the source
3941 * into tmpbuf. First put that into dest, and then append the
3942 * rest of the source, converting it to UTF-8 as we go. */
3944 /* Assert tculen is 2 here because the only two characters that
3945 * get to this part of the code have 2-byte UTF-8 equivalents */
3947 *d++ = *(tmpbuf + 1);
3948 s++; /* We have just processed the 1st char */
3950 for (; s < send; s++) {
3951 d = uvchr_to_utf8(d, *s);
3954 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3958 else { /* in-place UTF-8. Just overwrite the first character */
3959 Copy(tmpbuf, d, tculen, U8);
3960 SvCUR_set(dest, need - 1);
3963 else { /* Neither source nor dest are in or need to be UTF-8 */
3965 if (IN_LOCALE_RUNTIME) {
3969 if (inplace) { /* in-place, only need to change the 1st char */
3972 else { /* Not in-place */
3974 /* Copy the case-changed character(s) from tmpbuf */
3975 Copy(tmpbuf, d, tculen, U8);
3976 d += tculen - 1; /* Code below expects d to point to final
3977 * character stored */
3980 else { /* empty source */
3981 /* See bug #39028: Don't taint if empty */
3985 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3986 * the destination to retain that flag */
3990 if (!inplace) { /* Finish the rest of the string, unchanged */
3991 /* This will copy the trailing NUL */
3992 Copy(s + 1, d + 1, slen, U8);
3993 SvCUR_set(dest, need - 1);
4000 /* There's so much setup/teardown code common between uc and lc, I wonder if
4001 it would be worth merging the two, and just having a switch outside each
4002 of the three tight loops. There is less and less commonality though */
4016 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4017 && SvTEMP(source) && !DO_UTF8(source)
4018 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
4020 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4021 * make the loop tight, so we overwrite the source with the dest before
4022 * looking at it, and we need to look at the original source
4023 * afterwards. There would also need to be code added to handle
4024 * switching to not in-place in midstream if we run into characters
4025 * that change the length.
4028 s = d = (U8*)SvPV_force_nomg(source, len);
4035 /* The old implementation would copy source into TARG at this point.
4036 This had the side effect that if source was undef, TARG was now
4037 an undefined SV with PADTMP set, and they don't warn inside
4038 sv_2pv_flags(). However, we're now getting the PV direct from
4039 source, which doesn't have PADTMP set, so it would warn. Hence the
4043 s = (const U8*)SvPV_nomg_const(source, len);
4045 if (ckWARN(WARN_UNINITIALIZED))
4046 report_uninit(source);
4052 SvUPGRADE(dest, SVt_PV);
4053 d = (U8*)SvGROW(dest, min);
4054 (void)SvPOK_only(dest);
4059 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4060 to check DO_UTF8 again here. */
4062 if (DO_UTF8(source)) {
4063 const U8 *const send = s + len;
4064 U8 tmpbuf[UTF8_MAXBYTES+1];
4066 /* All occurrences of these are to be moved to follow any other marks.
4067 * This is context-dependent. We may not be passed enough context to
4068 * move the iota subscript beyond all of them, but we do the best we can
4069 * with what we're given. The result is always better than if we
4070 * hadn't done this. And, the problem would only arise if we are
4071 * passed a character without all its combining marks, which would be
4072 * the caller's mistake. The information this is based on comes from a
4073 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4074 * itself) and so can't be checked properly to see if it ever gets
4075 * revised. But the likelihood of it changing is remote */
4076 bool in_iota_subscript = FALSE;
4079 if (in_iota_subscript && ! is_utf8_mark(s)) {
4080 /* A non-mark. Time to output the iota subscript */
4081 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4082 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4084 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4085 in_iota_subscript = FALSE;
4089 /* See comments at the first instance in this file of this ifdef */
4090 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4092 /* If the UTF-8 character is invariant, then it is in the range
4093 * known by the standard macro; result is only one byte long */
4094 if (UTF8_IS_INVARIANT(*s)) {
4098 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4100 /* Likewise, if it fits in a byte, its case change is in our
4102 U8 orig = UTF8_ACCUMULATE(*s, *(s+1));
4103 U8 upper = toUPPER_LATIN1_MOD(orig);
4104 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4112 /* Otherwise, need the general UTF-8 case. Get the changed
4113 * case value and copy it to the output buffer */
4115 const STRLEN u = UTF8SKIP(s);
4118 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4119 if (uv == GREEK_CAPITAL_LETTER_IOTA
4120 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4122 in_iota_subscript = TRUE;
4125 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4126 /* If the eventually required minimum size outgrows
4127 * the available space, we need to grow. */
4128 const UV o = d - (U8*)SvPVX_const(dest);
4130 /* If someone uppercases one million U+03B0s we
4131 * SvGROW() one million times. Or we could try
4132 * guessing how much to allocate without allocating too
4133 * much. Such is life. See corresponding comment in
4134 * lc code for another option */
4136 d = (U8*)SvPVX(dest) + o;
4138 Copy(tmpbuf, d, ulen, U8);
4144 if (in_iota_subscript) {
4145 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4149 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4151 else { /* Not UTF-8 */
4153 const U8 *const send = s + len;
4155 /* Use locale casing if in locale; regular style if not treating
4156 * latin1 as having case; otherwise the latin1 casing. Do the
4157 * whole thing in a tight loop, for speed, */
4158 if (IN_LOCALE_RUNTIME) {
4161 for (; s < send; d++, s++)
4162 *d = toUPPER_LC(*s);
4164 else if (! IN_UNI_8_BIT) {
4165 for (; s < send; d++, s++) {
4170 for (; s < send; d++, s++) {
4171 *d = toUPPER_LATIN1_MOD(*s);
4172 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4174 /* The mainstream case is the tight loop above. To avoid
4175 * extra tests in that, all three characters that require
4176 * special handling are mapped by the MOD to the one tested
4178 * Use the source to distinguish between the three cases */
4180 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4182 /* uc() of this requires 2 characters, but they are
4183 * ASCII. If not enough room, grow the string */
4184 if (SvLEN(dest) < ++min) {
4185 const UV o = d - (U8*)SvPVX_const(dest);
4187 d = (U8*)SvPVX(dest) + o;
4189 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4190 continue; /* Back to the tight loop; still in ASCII */
4193 /* The other two special handling characters have their
4194 * upper cases outside the latin1 range, hence need to be
4195 * in UTF-8, so the whole result needs to be in UTF-8. So,
4196 * here we are somewhere in the middle of processing a
4197 * non-UTF-8 string, and realize that we will have to convert
4198 * the whole thing to UTF-8. What to do? There are
4199 * several possibilities. The simplest to code is to
4200 * convert what we have so far, set a flag, and continue on
4201 * in the loop. The flag would be tested each time through
4202 * the loop, and if set, the next character would be
4203 * converted to UTF-8 and stored. But, I (khw) didn't want
4204 * to slow down the mainstream case at all for this fairly
4205 * rare case, so I didn't want to add a test that didn't
4206 * absolutely have to be there in the loop, besides the
4207 * possibility that it would get too complicated for
4208 * optimizers to deal with. Another possibility is to just
4209 * give up, convert the source to UTF-8, and restart the
4210 * function that way. Another possibility is to convert
4211 * both what has already been processed and what is yet to
4212 * come separately to UTF-8, then jump into the loop that
4213 * handles UTF-8. But the most efficient time-wise of the
4214 * ones I could think of is what follows, and turned out to
4215 * not require much extra code. */
4217 /* Convert what we have so far into UTF-8, telling the
4218 * function that we know it should be converted, and to
4219 * allow extra space for what we haven't processed yet.
4220 * Assume the worst case space requirements for converting
4221 * what we haven't processed so far: that it will require
4222 * two bytes for each remaining source character, plus the
4223 * NUL at the end. This may cause the string pointer to
4224 * move, so re-find it. */
4226 len = d - (U8*)SvPVX_const(dest);
4227 SvCUR_set(dest, len);
4228 len = sv_utf8_upgrade_flags_grow(dest,
4229 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4231 d = (U8*)SvPVX(dest) + len;
4233 /* And append the current character's upper case in UTF-8 */
4234 CAT_NON_LATIN1_UC(d, *s);
4236 /* Now process the remainder of the source, converting to
4237 * upper and UTF-8. If a resulting byte is invariant in
4238 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4239 * append it to the output. */
4242 for (; s < send; s++) {
4243 U8 upper = toUPPER_LATIN1_MOD(*s);
4244 if UTF8_IS_INVARIANT(upper) {
4248 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4252 /* Here have processed the whole source; no need to continue
4253 * with the outer loop. Each character has been converted
4254 * to upper case and converted to UTF-8 */
4257 } /* End of processing all latin1-style chars */
4258 } /* End of processing all chars */
4259 } /* End of source is not empty */
4261 if (source != dest) {
4262 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4263 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4265 } /* End of isn't utf8 */
4283 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4284 && SvTEMP(source) && !DO_UTF8(source)) {
4286 /* We can convert in place, as lowercasing anything in the latin1 range
4287 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4289 s = d = (U8*)SvPV_force_nomg(source, len);
4296 /* The old implementation would copy source into TARG at this point.
4297 This had the side effect that if source was undef, TARG was now
4298 an undefined SV with PADTMP set, and they don't warn inside
4299 sv_2pv_flags(). However, we're now getting the PV direct from
4300 source, which doesn't have PADTMP set, so it would warn. Hence the
4304 s = (const U8*)SvPV_nomg_const(source, len);
4306 if (ckWARN(WARN_UNINITIALIZED))
4307 report_uninit(source);
4313 SvUPGRADE(dest, SVt_PV);
4314 d = (U8*)SvGROW(dest, min);
4315 (void)SvPOK_only(dest);
4320 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4321 to check DO_UTF8 again here. */
4323 if (DO_UTF8(source)) {
4324 const U8 *const send = s + len;
4325 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4328 /* See comments at the first instance in this file of this ifdef */
4329 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4330 if (UTF8_IS_INVARIANT(*s)) {
4332 /* Invariant characters use the standard mappings compiled in.
4337 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4339 /* As do the ones in the Latin1 range */
4340 U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1)));
4341 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4346 /* Here, is utf8 not in Latin-1 range, have to go out and get
4347 * the mappings from the tables. */
4349 const STRLEN u = UTF8SKIP(s);
4352 #ifndef CONTEXT_DEPENDENT_CASING
4353 toLOWER_utf8(s, tmpbuf, &ulen);
4355 /* This is ifdefd out because it needs more work and thought. It isn't clear
4356 * that we should do it.
4357 * A minor objection is that this is based on a hard-coded rule from the
4358 * Unicode standard, and may change, but this is not very likely at all.
4359 * mktables should check and warn if it does.
4360 * More importantly, if the sigma occurs at the end of the string, we don't
4361 * have enough context to know whether it is part of a larger string or going
4362 * to be or not. It may be that we are passed a subset of the context, via
4363 * a \U...\E, for example, and we could conceivably know the larger context if
4364 * code were changed to pass that in. But, if the string passed in is an
4365 * intermediate result, and the user concatenates two strings together
4366 * after we have made a final sigma, that would be wrong. If the final sigma
4367 * occurs in the middle of the string we are working on, then we know that it
4368 * should be a final sigma, but otherwise we can't be sure. */
4370 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4372 /* If the lower case is a small sigma, it may be that we need
4373 * to change it to a final sigma. This happens at the end of
4374 * a word that contains more than just this character, and only
4375 * when we started with a capital sigma. */
4376 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4377 s > send - len && /* Makes sure not the first letter */
4378 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4381 /* We use the algorithm in:
4382 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4383 * is a CAPITAL SIGMA): If C is preceded by a sequence
4384 * consisting of a cased letter and a case-ignorable
4385 * sequence, and C is not followed by a sequence consisting
4386 * of a case ignorable sequence and then a cased letter,
4387 * then when lowercasing C, C becomes a final sigma */
4389 /* To determine if this is the end of a word, need to peek
4390 * ahead. Look at the next character */
4391 const U8 *peek = s + u;
4393 /* Skip any case ignorable characters */
4394 while (peek < send && is_utf8_case_ignorable(peek)) {
4395 peek += UTF8SKIP(peek);
4398 /* If we reached the end of the string without finding any
4399 * non-case ignorable characters, or if the next such one
4400 * is not-cased, then we have met the conditions for it
4401 * being a final sigma with regards to peek ahead, and so
4402 * must do peek behind for the remaining conditions. (We
4403 * know there is stuff behind to look at since we tested
4404 * above that this isn't the first letter) */
4405 if (peek >= send || ! is_utf8_cased(peek)) {
4406 peek = utf8_hop(s, -1);
4408 /* Here are at the beginning of the first character
4409 * before the original upper case sigma. Keep backing
4410 * up, skipping any case ignorable characters */
4411 while (is_utf8_case_ignorable(peek)) {
4412 peek = utf8_hop(peek, -1);
4415 /* Here peek points to the first byte of the closest
4416 * non-case-ignorable character before the capital
4417 * sigma. If it is cased, then by the Unicode
4418 * algorithm, we should use a small final sigma instead
4419 * of what we have */
4420 if (is_utf8_cased(peek)) {
4421 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4422 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4426 else { /* Not a context sensitive mapping */
4427 #endif /* End of commented out context sensitive */
4428 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4430 /* If the eventually required minimum size outgrows
4431 * the available space, we need to grow. */
4432 const UV o = d - (U8*)SvPVX_const(dest);
4434 /* If someone lowercases one million U+0130s we
4435 * SvGROW() one million times. Or we could try
4436 * guessing how much to allocate without allocating too
4437 * much. Such is life. Another option would be to
4438 * grow an extra byte or two more each time we need to
4439 * grow, which would cut down the million to 500K, with
4442 d = (U8*)SvPVX(dest) + o;
4444 #ifdef CONTEXT_DEPENDENT_CASING
4447 /* Copy the newly lowercased letter to the output buffer we're
4449 Copy(tmpbuf, d, ulen, U8);
4452 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4455 } /* End of looping through the source string */
4458 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4459 } else { /* Not utf8 */
4461 const U8 *const send = s + len;
4463 /* Use locale casing if in locale; regular style if not treating
4464 * latin1 as having case; otherwise the latin1 casing. Do the
4465 * whole thing in a tight loop, for speed, */
4466 if (IN_LOCALE_RUNTIME) {
4469 for (; s < send; d++, s++)
4470 *d = toLOWER_LC(*s);
4472 else if (! IN_UNI_8_BIT) {
4473 for (; s < send; d++, s++) {
4478 for (; s < send; d++, s++) {
4479 *d = toLOWER_LATIN1(*s);
4483 if (source != dest) {
4485 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4495 SV * const sv = TOPs;
4497 register const char *s = SvPV_const(sv,len);
4499 SvUTF8_off(TARG); /* decontaminate */
4502 SvUPGRADE(TARG, SVt_PV);
4503 SvGROW(TARG, (len * 2) + 1);
4507 if (UTF8_IS_CONTINUED(*s)) {
4508 STRLEN ulen = UTF8SKIP(s);
4532 SvCUR_set(TARG, d - SvPVX_const(TARG));
4533 (void)SvPOK_only_UTF8(TARG);
4536 sv_setpvn(TARG, s, len);
4545 dVAR; dSP; dMARK; dORIGMARK;
4546 register AV *const av = MUTABLE_AV(POPs);
4547 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4549 if (SvTYPE(av) == SVt_PVAV) {
4550 const I32 arybase = CopARYBASE_get(PL_curcop);
4551 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4552 bool can_preserve = FALSE;
4558 can_preserve = SvCANEXISTDELETE(av);
4561 if (lval && localizing) {
4564 for (svp = MARK + 1; svp <= SP; svp++) {
4565 const I32 elem = SvIV(*svp);
4569 if (max > AvMAX(av))
4573 while (++MARK <= SP) {
4575 I32 elem = SvIV(*MARK);
4576 bool preeminent = TRUE;
4580 if (localizing && can_preserve) {
4581 /* If we can determine whether the element exist,
4582 * Try to preserve the existenceness of a tied array
4583 * element by using EXISTS and DELETE if possible.
4584 * Fallback to FETCH and STORE otherwise. */
4585 preeminent = av_exists(av, elem);
4588 svp = av_fetch(av, elem, lval);
4590 if (!svp || *svp == &PL_sv_undef)
4591 DIE(aTHX_ PL_no_aelem, elem);
4594 save_aelem(av, elem, svp);
4596 SAVEADELETE(av, elem);
4599 *MARK = svp ? *svp : &PL_sv_undef;
4602 if (GIMME != G_ARRAY) {
4604 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4614 AV *array = MUTABLE_AV(POPs);
4615 const I32 gimme = GIMME_V;
4616 IV *iterp = Perl_av_iter_p(aTHX_ array);
4617 const IV current = (*iterp)++;
4619 if (current > av_len(array)) {
4621 if (gimme == G_SCALAR)
4628 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4629 if (gimme == G_ARRAY) {
4630 SV **const element = av_fetch(array, current, 0);
4631 PUSHs(element ? *element : &PL_sv_undef);
4640 AV *array = MUTABLE_AV(POPs);
4641 const I32 gimme = GIMME_V;
4643 *Perl_av_iter_p(aTHX_ array) = 0;
4645 if (gimme == G_SCALAR) {
4647 PUSHi(av_len(array) + 1);
4649 else if (gimme == G_ARRAY) {
4650 IV n = Perl_av_len(aTHX_ array);
4651 IV i = CopARYBASE_get(PL_curcop);
4655 if (PL_op->op_type == OP_AKEYS) {
4657 for (; i <= n; i++) {
4662 for (i = 0; i <= n; i++) {
4663 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4664 PUSHs(elem ? *elem : &PL_sv_undef);
4671 /* Associative arrays. */
4677 HV * hash = MUTABLE_HV(POPs);
4679 const I32 gimme = GIMME_V;
4682 /* might clobber stack_sp */
4683 entry = hv_iternext(hash);
4688 SV* const sv = hv_iterkeysv(entry);
4689 PUSHs(sv); /* won't clobber stack_sp */
4690 if (gimme == G_ARRAY) {
4693 /* might clobber stack_sp */
4694 val = hv_iterval(hash, entry);
4699 else if (gimme == G_SCALAR)
4706 S_do_delete_local(pTHX)
4710 const I32 gimme = GIMME_V;
4714 if (PL_op->op_private & OPpSLICE) {
4716 SV * const osv = POPs;
4717 const bool tied = SvRMAGICAL(osv)
4718 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4719 const bool can_preserve = SvCANEXISTDELETE(osv)
4720 || mg_find((const SV *)osv, PERL_MAGIC_env);
4721 const U32 type = SvTYPE(osv);
4722 if (type == SVt_PVHV) { /* hash element */
4723 HV * const hv = MUTABLE_HV(osv);
4724 while (++MARK <= SP) {
4725 SV * const keysv = *MARK;
4727 bool preeminent = TRUE;
4729 preeminent = hv_exists_ent(hv, keysv, 0);
4731 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4738 sv = hv_delete_ent(hv, keysv, 0, 0);
4739 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4742 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4744 *MARK = sv_mortalcopy(sv);
4750 SAVEHDELETE(hv, keysv);
4751 *MARK = &PL_sv_undef;
4755 else if (type == SVt_PVAV) { /* array element */
4756 if (PL_op->op_flags & OPf_SPECIAL) {
4757 AV * const av = MUTABLE_AV(osv);
4758 while (++MARK <= SP) {
4759 I32 idx = SvIV(*MARK);
4761 bool preeminent = TRUE;
4763 preeminent = av_exists(av, idx);
4765 SV **svp = av_fetch(av, idx, 1);
4772 sv = av_delete(av, idx, 0);
4773 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4776 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4778 *MARK = sv_mortalcopy(sv);
4784 SAVEADELETE(av, idx);
4785 *MARK = &PL_sv_undef;
4791 DIE(aTHX_ "Not a HASH reference");
4792 if (gimme == G_VOID)
4794 else if (gimme == G_SCALAR) {
4799 *++MARK = &PL_sv_undef;
4804 SV * const keysv = POPs;
4805 SV * const osv = POPs;
4806 const bool tied = SvRMAGICAL(osv)
4807 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4808 const bool can_preserve = SvCANEXISTDELETE(osv)
4809 || mg_find((const SV *)osv, PERL_MAGIC_env);
4810 const U32 type = SvTYPE(osv);
4812 if (type == SVt_PVHV) {
4813 HV * const hv = MUTABLE_HV(osv);
4814 bool preeminent = TRUE;
4816 preeminent = hv_exists_ent(hv, keysv, 0);
4818 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4825 sv = hv_delete_ent(hv, keysv, 0, 0);
4826 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4829 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4831 SV *nsv = sv_mortalcopy(sv);
4837 SAVEHDELETE(hv, keysv);
4839 else if (type == SVt_PVAV) {
4840 if (PL_op->op_flags & OPf_SPECIAL) {
4841 AV * const av = MUTABLE_AV(osv);
4842 I32 idx = SvIV(keysv);
4843 bool preeminent = TRUE;
4845 preeminent = av_exists(av, idx);
4847 SV **svp = av_fetch(av, idx, 1);
4854 sv = av_delete(av, idx, 0);
4855 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4858 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4860 SV *nsv = sv_mortalcopy(sv);
4866 SAVEADELETE(av, idx);
4869 DIE(aTHX_ "panic: avhv_delete no longer supported");
4872 DIE(aTHX_ "Not a HASH reference");
4875 if (gimme != G_VOID)
4889 if (PL_op->op_private & OPpLVAL_INTRO)
4890 return do_delete_local();
4893 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4895 if (PL_op->op_private & OPpSLICE) {
4897 HV * const hv = MUTABLE_HV(POPs);
4898 const U32 hvtype = SvTYPE(hv);
4899 if (hvtype == SVt_PVHV) { /* hash element */
4900 while (++MARK <= SP) {
4901 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4902 *MARK = sv ? sv : &PL_sv_undef;
4905 else if (hvtype == SVt_PVAV) { /* array element */
4906 if (PL_op->op_flags & OPf_SPECIAL) {
4907 while (++MARK <= SP) {
4908 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4909 *MARK = sv ? sv : &PL_sv_undef;
4914 DIE(aTHX_ "Not a HASH reference");
4917 else if (gimme == G_SCALAR) {
4922 *++MARK = &PL_sv_undef;
4928 HV * const hv = MUTABLE_HV(POPs);
4930 if (SvTYPE(hv) == SVt_PVHV)
4931 sv = hv_delete_ent(hv, keysv, discard, 0);
4932 else if (SvTYPE(hv) == SVt_PVAV) {
4933 if (PL_op->op_flags & OPf_SPECIAL)
4934 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4936 DIE(aTHX_ "panic: avhv_delete no longer supported");
4939 DIE(aTHX_ "Not a HASH reference");
4955 if (PL_op->op_private & OPpEXISTS_SUB) {
4957 SV * const sv = POPs;
4958 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4961 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4966 hv = MUTABLE_HV(POPs);
4967 if (SvTYPE(hv) == SVt_PVHV) {
4968 if (hv_exists_ent(hv, tmpsv, 0))
4971 else if (SvTYPE(hv) == SVt_PVAV) {
4972 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4973 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4978 DIE(aTHX_ "Not a HASH reference");
4985 dVAR; dSP; dMARK; dORIGMARK;
4986 register HV * const hv = MUTABLE_HV(POPs);
4987 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4988 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4989 bool can_preserve = FALSE;
4995 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4996 can_preserve = TRUE;
4999 while (++MARK <= SP) {
5000 SV * const keysv = *MARK;
5003 bool preeminent = TRUE;
5005 if (localizing && can_preserve) {
5006 /* If we can determine whether the element exist,
5007 * try to preserve the existenceness of a tied hash
5008 * element by using EXISTS and DELETE if possible.
5009 * Fallback to FETCH and STORE otherwise. */
5010 preeminent = hv_exists_ent(hv, keysv, 0);
5013 he = hv_fetch_ent(hv, keysv, lval, 0);
5014 svp = he ? &HeVAL(he) : NULL;
5017 if (!svp || *svp == &PL_sv_undef) {
5018 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
5021 if (HvNAME_get(hv) && isGV(*svp))
5022 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
5023 else if (preeminent)
5024 save_helem_flags(hv, keysv, svp,
5025 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
5027 SAVEHDELETE(hv, keysv);
5030 *MARK = svp ? *svp : &PL_sv_undef;
5032 if (GIMME != G_ARRAY) {
5034 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
5040 /* List operators. */
5045 if (GIMME != G_ARRAY) {
5047 *MARK = *SP; /* unwanted list, return last item */
5049 *MARK = &PL_sv_undef;
5059 SV ** const lastrelem = PL_stack_sp;
5060 SV ** const lastlelem = PL_stack_base + POPMARK;
5061 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
5062 register SV ** const firstrelem = lastlelem + 1;
5063 const I32 arybase = CopARYBASE_get(PL_curcop);
5064 I32 is_something_there = FALSE;
5066 register const I32 max = lastrelem - lastlelem;
5067 register SV **lelem;
5069 if (GIMME != G_ARRAY) {
5070 I32 ix = SvIV(*lastlelem);
5075 if (ix < 0 || ix >= max)
5076 *firstlelem = &PL_sv_undef;
5078 *firstlelem = firstrelem[ix];
5084 SP = firstlelem - 1;
5088 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
5089 I32 ix = SvIV(*lelem);
5094 if (ix < 0 || ix >= max)
5095 *lelem = &PL_sv_undef;
5097 is_something_there = TRUE;
5098 if (!(*lelem = firstrelem[ix]))
5099 *lelem = &PL_sv_undef;
5102 if (is_something_there)
5105 SP = firstlelem - 1;
5111 dVAR; dSP; dMARK; dORIGMARK;
5112 const I32 items = SP - MARK;
5113 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
5114 SP = ORIGMARK; /* av_make() might realloc stack_sp */
5115 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5116 ? newRV_noinc(av) : av);
5122 dVAR; dSP; dMARK; dORIGMARK;
5123 HV* const hv = newHV();
5126 SV * const key = *++MARK;
5127 SV * const val = newSV(0);
5129 sv_setsv(val, *++MARK);
5131 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5132 (void)hv_store_ent(hv,key,val,0);
5135 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5136 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5142 dVAR; dSP; dMARK; dORIGMARK;
5143 register AV *ary = MUTABLE_AV(*++MARK);
5147 register I32 offset;
5148 register I32 length;
5152 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5155 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5158 ENTER_with_name("call_SPLICE");
5159 call_method("SPLICE",GIMME_V);
5160 LEAVE_with_name("call_SPLICE");
5168 offset = i = SvIV(*MARK);
5170 offset += AvFILLp(ary) + 1;
5172 offset -= CopARYBASE_get(PL_curcop);
5174 DIE(aTHX_ PL_no_aelem, i);
5176 length = SvIVx(*MARK++);
5178 length += AvFILLp(ary) - offset + 1;
5184 length = AvMAX(ary) + 1; /* close enough to infinity */
5188 length = AvMAX(ary) + 1;
5190 if (offset > AvFILLp(ary) + 1) {
5191 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5192 offset = AvFILLp(ary) + 1;
5194 after = AvFILLp(ary) + 1 - (offset + length);
5195 if (after < 0) { /* not that much array */
5196 length += after; /* offset+length now in array */
5202 /* At this point, MARK .. SP-1 is our new LIST */
5205 diff = newlen - length;
5206 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5209 /* make new elements SVs now: avoid problems if they're from the array */
5210 for (dst = MARK, i = newlen; i; i--) {
5211 SV * const h = *dst;
5212 *dst++ = newSVsv(h);
5215 if (diff < 0) { /* shrinking the area */
5216 SV **tmparyval = NULL;
5218 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5219 Copy(MARK, tmparyval, newlen, SV*);
5222 MARK = ORIGMARK + 1;
5223 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5224 MEXTEND(MARK, length);
5225 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5227 EXTEND_MORTAL(length);
5228 for (i = length, dst = MARK; i; i--) {
5229 sv_2mortal(*dst); /* free them eventualy */
5236 *MARK = AvARRAY(ary)[offset+length-1];
5239 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5240 SvREFCNT_dec(*dst++); /* free them now */
5243 AvFILLp(ary) += diff;
5245 /* pull up or down? */
5247 if (offset < after) { /* easier to pull up */
5248 if (offset) { /* esp. if nothing to pull */
5249 src = &AvARRAY(ary)[offset-1];
5250 dst = src - diff; /* diff is negative */
5251 for (i = offset; i > 0; i--) /* can't trust Copy */
5255 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5259 if (after) { /* anything to pull down? */
5260 src = AvARRAY(ary) + offset + length;
5261 dst = src + diff; /* diff is negative */
5262 Move(src, dst, after, SV*);
5264 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5265 /* avoid later double free */
5269 dst[--i] = &PL_sv_undef;
5272 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5273 Safefree(tmparyval);
5276 else { /* no, expanding (or same) */
5277 SV** tmparyval = NULL;
5279 Newx(tmparyval, length, SV*); /* so remember deletion */
5280 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5283 if (diff > 0) { /* expanding */
5284 /* push up or down? */
5285 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5289 Move(src, dst, offset, SV*);
5291 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5293 AvFILLp(ary) += diff;
5296 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5297 av_extend(ary, AvFILLp(ary) + diff);
5298 AvFILLp(ary) += diff;
5301 dst = AvARRAY(ary) + AvFILLp(ary);
5303 for (i = after; i; i--) {
5311 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5314 MARK = ORIGMARK + 1;
5315 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5317 Copy(tmparyval, MARK, length, SV*);
5319 EXTEND_MORTAL(length);
5320 for (i = length, dst = MARK; i; i--) {
5321 sv_2mortal(*dst); /* free them eventualy */
5328 else if (length--) {
5329 *MARK = tmparyval[length];
5332 while (length-- > 0)
5333 SvREFCNT_dec(tmparyval[length]);
5337 *MARK = &PL_sv_undef;
5338 Safefree(tmparyval);
5346 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5347 register AV * const ary = MUTABLE_AV(*++MARK);
5348 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5351 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5354 ENTER_with_name("call_PUSH");
5355 call_method("PUSH",G_SCALAR|G_DISCARD);
5356 LEAVE_with_name("call_PUSH");
5360 PL_delaymagic = DM_DELAY;
5361 for (++MARK; MARK <= SP; MARK++) {
5362 SV * const sv = newSV(0);
5364 sv_setsv(sv, *MARK);
5365 av_store(ary, AvFILLp(ary)+1, sv);
5367 if (PL_delaymagic & DM_ARRAY_ISA)
5368 mg_set(MUTABLE_SV(ary));
5373 if (OP_GIMME(PL_op, 0) != G_VOID) {
5374 PUSHi( AvFILL(ary) + 1 );
5383 AV * const av = PL_op->op_flags & OPf_SPECIAL
5384 ? MUTABLE_AV(GvAV(PL_defgv)) : MUTABLE_AV(POPs);
5385 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5389 (void)sv_2mortal(sv);
5396 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5397 register AV *ary = MUTABLE_AV(*++MARK);
5398 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5401 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5404 ENTER_with_name("call_UNSHIFT");
5405 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5406 LEAVE_with_name("call_UNSHIFT");
5411 av_unshift(ary, SP - MARK);
5413 SV * const sv = newSVsv(*++MARK);
5414 (void)av_store(ary, i++, sv);
5418 if (OP_GIMME(PL_op, 0) != G_VOID) {
5419 PUSHi( AvFILL(ary) + 1 );
5428 if (GIMME == G_ARRAY) {
5429 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5433 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5434 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5435 av = MUTABLE_AV((*SP));
5436 /* In-place reversing only happens in void context for the array
5437 * assignment. We don't need to push anything on the stack. */
5440 if (SvMAGICAL(av)) {
5442 register SV *tmp = sv_newmortal();
5443 /* For SvCANEXISTDELETE */
5446 bool can_preserve = SvCANEXISTDELETE(av);
5448 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5449 register SV *begin, *end;
5452 if (!av_exists(av, i)) {
5453 if (av_exists(av, j)) {
5454 register SV *sv = av_delete(av, j, 0);
5455 begin = *av_fetch(av, i, TRUE);
5456 sv_setsv_mg(begin, sv);
5460 else if (!av_exists(av, j)) {
5461 register SV *sv = av_delete(av, i, 0);
5462 end = *av_fetch(av, j, TRUE);
5463 sv_setsv_mg(end, sv);
5468 begin = *av_fetch(av, i, TRUE);
5469 end = *av_fetch(av, j, TRUE);
5470 sv_setsv(tmp, begin);
5471 sv_setsv_mg(begin, end);
5472 sv_setsv_mg(end, tmp);
5476 SV **begin = AvARRAY(av);
5479 SV **end = begin + AvFILLp(av);
5481 while (begin < end) {
5482 register SV * const tmp = *begin;
5493 register SV * const tmp = *MARK;
5497 /* safe as long as stack cannot get extended in the above */
5503 register char *down;
5508 SvUTF8_off(TARG); /* decontaminate */
5510 do_join(TARG, &PL_sv_no, MARK, SP);
5512 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5513 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5514 report_uninit(TARG);
5517 up = SvPV_force(TARG, len);
5519 if (DO_UTF8(TARG)) { /* first reverse each character */
5520 U8* s = (U8*)SvPVX(TARG);
5521 const U8* send = (U8*)(s + len);
5523 if (UTF8_IS_INVARIANT(*s)) {
5528 if (!utf8_to_uvchr(s, 0))
5532 down = (char*)(s - 1);
5533 /* reverse this character */
5537 *down-- = (char)tmp;
5543 down = SvPVX(TARG) + len - 1;
5547 *down-- = (char)tmp;
5549 (void)SvPOK_only_UTF8(TARG);
5561 register IV limit = POPi; /* note, negative is forever */
5562 SV * const sv = POPs;
5564 register const char *s = SvPV_const(sv, len);
5565 const bool do_utf8 = DO_UTF8(sv);
5566 const char *strend = s + len;
5568 register REGEXP *rx;
5570 register const char *m;
5572 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5573 I32 maxiters = slen + 10;
5574 I32 trailing_empty = 0;
5576 const I32 origlimit = limit;
5579 const I32 gimme = GIMME_V;
5581 const I32 oldsave = PL_savestack_ix;
5582 U32 make_mortal = SVs_TEMP;
5587 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5592 DIE(aTHX_ "panic: pp_split");
5595 TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) &&
5596 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5598 RX_MATCH_UTF8_set(rx, do_utf8);
5601 if (pm->op_pmreplrootu.op_pmtargetoff) {
5602 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5605 if (pm->op_pmreplrootu.op_pmtargetgv) {
5606 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5611 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5617 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5619 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5626 for (i = AvFILLp(ary); i >= 0; i--)
5627 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5629 /* temporarily switch stacks */
5630 SAVESWITCHSTACK(PL_curstack, ary);
5634 base = SP - PL_stack_base;
5636 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5638 while (*s == ' ' || is_utf8_space((U8*)s))
5641 else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) {
5642 while (isSPACE_LC(*s))
5650 if (RX_EXTFLAGS(rx) & PMf_MULTILINE) {
5654 gimme_scalar = gimme == G_SCALAR && !ary;
5657 limit = maxiters + 2;
5658 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5661 /* this one uses 'm' and is a negative test */
5663 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5664 const int t = UTF8SKIP(m);
5665 /* is_utf8_space returns FALSE for malform utf8 */
5671 } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) {
5672 while (m < strend && !isSPACE_LC(*m))
5675 while (m < strend && !isSPACE(*m))
5688 dstr = newSVpvn_flags(s, m-s,
5689 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5693 /* skip the whitespace found last */
5695 s = m + UTF8SKIP(m);
5699 /* this one uses 's' and is a positive test */
5701 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5703 } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) {
5704 while (s < strend && isSPACE_LC(*s))
5707 while (s < strend && isSPACE(*s))
5712 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5714 for (m = s; m < strend && *m != '\n'; m++)
5727 dstr = newSVpvn_flags(s, m-s,
5728 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5734 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5736 Pre-extend the stack, either the number of bytes or
5737 characters in the string or a limited amount, triggered by:
5739 my ($x, $y) = split //, $str;
5743 if (!gimme_scalar) {
5744 const U32 items = limit - 1;
5753 /* keep track of how many bytes we skip over */
5763 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5776 dstr = newSVpvn(s, 1);
5792 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5793 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5794 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5795 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5796 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5797 SV * const csv = CALLREG_INTUIT_STRING(rx);
5799 len = RX_MINLENRET(rx);
5800 if (len == 1 && !RX_UTF8(rx) && !tail) {
5801 const char c = *SvPV_nolen_const(csv);
5803 for (m = s; m < strend && *m != c; m++)
5814 dstr = newSVpvn_flags(s, m-s,
5815 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5818 /* The rx->minlen is in characters but we want to step
5819 * s ahead by bytes. */
5821 s = (char*)utf8_hop((U8*)m, len);
5823 s = m + len; /* Fake \n at the end */
5827 while (s < strend && --limit &&
5828 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5829 csv, multiline ? FBMrf_MULTILINE : 0)) )
5838 dstr = newSVpvn_flags(s, m-s,
5839 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5842 /* The rx->minlen is in characters but we want to step
5843 * s ahead by bytes. */
5845 s = (char*)utf8_hop((U8*)m, len);
5847 s = m + len; /* Fake \n at the end */
5852 maxiters += slen * RX_NPARENS(rx);
5853 while (s < strend && --limit)
5857 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5860 if (rex_return == 0)
5862 TAINT_IF(RX_MATCH_TAINTED(rx));
5863 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5866 orig = RX_SUBBEG(rx);
5868 strend = s + (strend - m);
5870 m = RX_OFFS(rx)[0].start + orig;
5879 dstr = newSVpvn_flags(s, m-s,
5880 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5883 if (RX_NPARENS(rx)) {
5885 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5886 s = RX_OFFS(rx)[i].start + orig;
5887 m = RX_OFFS(rx)[i].end + orig;
5889 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5890 parens that didn't match -- they should be set to
5891 undef, not the empty string */
5899 if (m >= orig && s >= orig) {
5900 dstr = newSVpvn_flags(s, m-s,
5901 (do_utf8 ? SVf_UTF8 : 0)
5905 dstr = &PL_sv_undef; /* undef, not "" */
5911 s = RX_OFFS(rx)[0].end + orig;
5915 if (!gimme_scalar) {
5916 iters = (SP - PL_stack_base) - base;
5918 if (iters > maxiters)
5919 DIE(aTHX_ "Split loop");
5921 /* keep field after final delim? */
5922 if (s < strend || (iters && origlimit)) {
5923 if (!gimme_scalar) {
5924 const STRLEN l = strend - s;
5925 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5930 else if (!origlimit) {
5932 iters -= trailing_empty;
5934 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5935 if (TOPs && !make_mortal)
5937 *SP-- = &PL_sv_undef;
5944 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5948 if (SvSMAGICAL(ary)) {
5950 mg_set(MUTABLE_SV(ary));
5953 if (gimme == G_ARRAY) {
5955 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5962 ENTER_with_name("call_PUSH");
5963 call_method("PUSH",G_SCALAR|G_DISCARD);
5964 LEAVE_with_name("call_PUSH");
5966 if (gimme == G_ARRAY) {
5968 /* EXTEND should not be needed - we just popped them */
5970 for (i=0; i < iters; i++) {
5971 SV **svp = av_fetch(ary, i, FALSE);
5972 PUSHs((svp) ? *svp : &PL_sv_undef);
5979 if (gimme == G_ARRAY)
5991 SV *const sv = PAD_SVl(PL_op->op_targ);
5993 if (SvPADSTALE(sv)) {
5996 RETURNOP(cLOGOP->op_other);
5998 RETURNOP(cLOGOP->op_next);
6007 assert(SvTYPE(retsv) != SVt_PVCV);
6009 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
6010 retsv = refto(retsv);
6017 PP(unimplemented_op)
6020 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", OP_NAME(PL_op),
6028 HV * const hv = (HV*)POPs;
6030 if (SvRMAGICAL(hv)) {
6031 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
6033 XPUSHs(magic_scalarpack(hv, mg));
6038 XPUSHs(boolSV(HvKEYS(hv) != 0));
6044 * c-indentation-style: bsd
6046 * indent-tabs-mode: t
6049 * ex: set ts=8 sts=4 sw=4 noet: