3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
152 sv = amagic_deref_call(sv, to_gv_amg);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
173 Perl_croak_no_modify(aTHX);
174 if (PL_op->op_private & OPpDEREF) {
176 if (cUNOP->op_targ) {
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
184 const char * const name = CopSTASHPV(PL_curcop);
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
205 && (!is_gv_magical_sv(sv,0)
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
213 if (PL_op->op_private & HINT_STRICT_REFS)
214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
224 const char * const nambeg = SvPV_nomg_const(sv, len);
227 nambeg, len, GV_ADD | SvUTF8(sv), SVt_PVGV
232 /* FAKE globs in the symbol table cause weird bugs (#77810) */
233 if (sv) SvFAKE_off(sv);
236 if (sv && SvFAKE(sv)) {
237 SV *newsv = sv_newmortal();
238 sv_setsv_flags(newsv, sv, 0);
242 if (PL_op->op_private & OPpLVAL_INTRO)
243 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
248 /* Helper function for pp_rv2sv and pp_rv2av */
250 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
251 const svtype type, SV ***spp)
256 PERL_ARGS_ASSERT_SOFTREF2XV;
258 if (PL_op->op_private & HINT_STRICT_REFS) {
260 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
262 Perl_die(aTHX_ PL_no_usym, what);
266 PL_op->op_flags & OPf_REF &&
267 PL_op->op_next->op_type != OP_BOOLKEYS
269 Perl_die(aTHX_ PL_no_usym, what);
270 if (ckWARN(WARN_UNINITIALIZED))
272 if (type != SVt_PV && GIMME_V == G_ARRAY) {
276 **spp = &PL_sv_undef;
279 if ((PL_op->op_flags & OPf_SPECIAL) &&
280 !(PL_op->op_flags & OPf_MOD))
282 gv = gv_fetchsv(sv, 0, type);
284 && (!is_gv_magical_sv(sv,0)
285 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
287 **spp = &PL_sv_undef;
293 const char * const nambeg = SvPV_nomg_const(sv, len);
294 gv = gv_fetchpvn_flags(nambeg, len, GV_ADD | SvUTF8(sv), type);
304 if (!(PL_op->op_private & OPpDEREFed))
308 sv = amagic_deref_call(sv, to_sv_amg);
313 switch (SvTYPE(sv)) {
319 DIE(aTHX_ "Not a SCALAR reference");
326 if (!isGV_with_GP(gv)) {
327 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
333 if (PL_op->op_flags & OPf_MOD) {
334 if (PL_op->op_private & OPpLVAL_INTRO) {
335 if (cUNOP->op_first->op_type == OP_NULL)
336 sv = save_scalar(MUTABLE_GV(TOPs));
338 sv = save_scalar(gv);
340 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
342 else if (PL_op->op_private & OPpDEREF)
343 vivify_ref(sv, PL_op->op_private & OPpDEREF);
352 AV * const av = MUTABLE_AV(TOPs);
353 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
355 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
357 *sv = newSV_type(SVt_PVMG);
358 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
362 SETs(sv_2mortal(newSViv(
363 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
373 if (PL_op->op_flags & OPf_MOD || LVRET) {
374 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
375 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
377 LvTARG(ret) = SvREFCNT_inc_simple(sv);
378 PUSHs(ret); /* no SvSETMAGIC */
382 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
383 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
384 if (mg && mg->mg_len >= 0) {
389 PUSHi(i + CopARYBASE_get(PL_curcop));
402 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
404 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
407 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
408 /* (But not in defined().) */
410 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
413 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
414 if ((PL_op->op_private & OPpLVAL_INTRO)) {
415 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
418 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
421 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
425 cv = MUTABLE_CV(&PL_sv_undef);
426 SETs(MUTABLE_SV(cv));
436 SV *ret = &PL_sv_undef;
438 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
439 const char * s = SvPVX_const(TOPs);
440 if (strnEQ(s, "CORE::", 6)) {
441 SV *const sv = core_prototype(NULL, s + 6, SvCUR(TOPs) - 6, 1);
446 cv = sv_2cv(TOPs, &stash, &gv, 0);
448 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
457 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
459 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
461 PUSHs(MUTABLE_SV(cv));
475 if (GIMME != G_ARRAY) {
479 *MARK = &PL_sv_undef;
480 *MARK = refto(*MARK);
484 EXTEND_MORTAL(SP - MARK);
486 *MARK = refto(*MARK);
491 S_refto(pTHX_ SV *sv)
496 PERL_ARGS_ASSERT_REFTO;
498 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
501 if (!(sv = LvTARG(sv)))
504 SvREFCNT_inc_void_NN(sv);
506 else if (SvTYPE(sv) == SVt_PVAV) {
507 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
508 av_reify(MUTABLE_AV(sv));
510 SvREFCNT_inc_void_NN(sv);
512 else if (SvPADTMP(sv) && !IS_PADGV(sv))
516 SvREFCNT_inc_void_NN(sv);
519 sv_upgrade(rv, SVt_IV);
529 SV * const sv = POPs;
534 if (!sv || !SvROK(sv))
537 pv = sv_reftype(SvRV(sv),TRUE);
538 PUSHp(pv, strlen(pv));
548 stash = CopSTASH(PL_curcop);
550 SV * const ssv = POPs;
554 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
555 Perl_croak(aTHX_ "Attempt to bless into a reference");
556 ptr = SvPV_const(ssv,len);
558 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
559 "Explicit blessing to '' (assuming package main)");
560 stash = gv_stashpvn(ptr, len, GV_ADD);
563 (void)sv_bless(TOPs, stash);
572 const char * const elem = SvPV_nolen_const(sv);
573 GV * const gv = MUTABLE_GV(POPs);
578 /* elem will always be NUL terminated. */
579 const char * const second_letter = elem + 1;
582 if (strEQ(second_letter, "RRAY"))
583 tmpRef = MUTABLE_SV(GvAV(gv));
586 if (strEQ(second_letter, "ODE"))
587 tmpRef = MUTABLE_SV(GvCVu(gv));
590 if (strEQ(second_letter, "ILEHANDLE")) {
591 /* finally deprecated in 5.8.0 */
592 deprecate("*glob{FILEHANDLE}");
593 tmpRef = MUTABLE_SV(GvIOp(gv));
596 if (strEQ(second_letter, "ORMAT"))
597 tmpRef = MUTABLE_SV(GvFORM(gv));
600 if (strEQ(second_letter, "LOB"))
601 tmpRef = MUTABLE_SV(gv);
604 if (strEQ(second_letter, "ASH"))
605 tmpRef = MUTABLE_SV(GvHV(gv));
608 if (*second_letter == 'O' && !elem[2])
609 tmpRef = MUTABLE_SV(GvIOp(gv));
612 if (strEQ(second_letter, "AME"))
613 sv = newSVhek(GvNAME_HEK(gv));
616 if (strEQ(second_letter, "ACKAGE")) {
617 const HV * const stash = GvSTASH(gv);
618 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
619 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
623 if (strEQ(second_letter, "CALAR"))
638 /* Pattern matching */
643 register unsigned char *s;
646 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
650 if (mg && SvSCREAM(sv))
653 s = (unsigned char*)(SvPV(sv, len));
654 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
655 /* No point in studying a zero length string, and not safe to study
656 anything that doesn't appear to be a simple scalar (and hence might
657 change between now and when the regexp engine runs without our set
658 magic ever running) such as a reference to an object with overloaded
659 stringification. Also refuse to study an FBM scalar, as this gives
660 more flexibility in SV flag usage. No real-world code would ever
661 end up studying an FBM scalar, so this isn't a real pessimisation.
662 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
663 the study length limit from I32_MAX to U32_MAX - 1.
670 } else if (len < 0xFFFF) {
675 size = (256 + len) * quanta;
676 sfirst_raw = (char *)safemalloc(size);
679 DIE(aTHX_ "do_study: out of memory");
683 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
684 mg->mg_ptr = sfirst_raw;
686 mg->mg_private = quanta;
688 memset(sfirst_raw, ~0, 256 * quanta);
690 /* The assumption here is that most studied strings are fairly short, hence
691 the pain of the extra code is worth it, given the memory savings.
692 80 character string, 336 bytes as U8, down from 1344 as U32
693 800 character string, 2112 bytes as U16, down from 4224 as U32
697 U8 *const sfirst = (U8 *)sfirst_raw;
698 U8 *const snext = sfirst + 256;
700 const U8 ch = s[len];
701 snext[len] = sfirst[ch];
704 } else if (quanta == 2) {
705 U16 *const sfirst = (U16 *)sfirst_raw;
706 U16 *const snext = sfirst + 256;
708 const U8 ch = s[len];
709 snext[len] = sfirst[ch];
713 U32 *const sfirst = (U32 *)sfirst_raw;
714 U32 *const snext = sfirst + 256;
716 const U8 ch = s[len];
717 snext[len] = sfirst[ch];
730 if (PL_op->op_flags & OPf_STACKED)
732 else if (PL_op->op_private & OPpTARGET_MY)
738 TARG = sv_newmortal();
739 if(PL_op->op_type == OP_TRANSR) {
740 SV * const newsv = newSVsv(sv);
744 else PUSHi(do_trans(sv));
748 /* Lvalue operators. */
751 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
757 PERL_ARGS_ASSERT_DO_CHOMP;
759 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
761 if (SvTYPE(sv) == SVt_PVAV) {
763 AV *const av = MUTABLE_AV(sv);
764 const I32 max = AvFILL(av);
766 for (i = 0; i <= max; i++) {
767 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
768 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
769 do_chomp(retval, sv, chomping);
773 else if (SvTYPE(sv) == SVt_PVHV) {
774 HV* const hv = MUTABLE_HV(sv);
776 (void)hv_iterinit(hv);
777 while ((entry = hv_iternext(hv)))
778 do_chomp(retval, hv_iterval(hv,entry), chomping);
781 else if (SvREADONLY(sv)) {
783 /* SV is copy-on-write */
784 sv_force_normal_flags(sv, 0);
787 Perl_croak_no_modify(aTHX);
792 /* XXX, here sv is utf8-ized as a side-effect!
793 If encoding.pm is used properly, almost string-generating
794 operations, including literal strings, chr(), input data, etc.
795 should have been utf8-ized already, right?
797 sv_recode_to_utf8(sv, PL_encoding);
803 char *temp_buffer = NULL;
812 while (len && s[-1] == '\n') {
819 STRLEN rslen, rs_charlen;
820 const char *rsptr = SvPV_const(PL_rs, rslen);
822 rs_charlen = SvUTF8(PL_rs)
826 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
827 /* Assumption is that rs is shorter than the scalar. */
829 /* RS is utf8, scalar is 8 bit. */
831 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
834 /* Cannot downgrade, therefore cannot possibly match
836 assert (temp_buffer == rsptr);
842 else if (PL_encoding) {
843 /* RS is 8 bit, encoding.pm is used.
844 * Do not recode PL_rs as a side-effect. */
845 svrecode = newSVpvn(rsptr, rslen);
846 sv_recode_to_utf8(svrecode, PL_encoding);
847 rsptr = SvPV_const(svrecode, rslen);
848 rs_charlen = sv_len_utf8(svrecode);
851 /* RS is 8 bit, scalar is utf8. */
852 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
866 if (memNE(s, rsptr, rslen))
868 SvIVX(retval) += rs_charlen;
871 s = SvPV_force_nolen(sv);
879 SvREFCNT_dec(svrecode);
881 Safefree(temp_buffer);
883 if (len && !SvPOK(sv))
884 s = SvPV_force_nomg(sv, len);
887 char * const send = s + len;
888 char * const start = s;
890 while (s > start && UTF8_IS_CONTINUATION(*s))
892 if (is_utf8_string((U8*)s, send - s)) {
893 sv_setpvn(retval, s, send - s);
895 SvCUR_set(sv, s - start);
901 sv_setpvs(retval, "");
905 sv_setpvn(retval, s, 1);
912 sv_setpvs(retval, "");
920 const bool chomping = PL_op->op_type == OP_SCHOMP;
924 do_chomp(TARG, TOPs, chomping);
931 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
932 const bool chomping = PL_op->op_type == OP_CHOMP;
937 do_chomp(TARG, *++MARK, chomping);
948 if (!PL_op->op_private) {
957 SV_CHECK_THINKFIRST_COW_DROP(sv);
959 switch (SvTYPE(sv)) {
963 av_undef(MUTABLE_AV(sv));
966 hv_undef(MUTABLE_HV(sv));
969 if (cv_const_sv((const CV *)sv))
970 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
971 CvANON((const CV *)sv) ? "(anonymous)"
972 : GvENAME(CvGV((const CV *)sv)));
976 /* let user-undef'd sub keep its identity */
977 GV* const gv = CvGV((const CV *)sv);
978 cv_undef(MUTABLE_CV(sv));
979 CvGV_set(MUTABLE_CV(sv), gv);
984 SvSetMagicSV(sv, &PL_sv_undef);
987 else if (isGV_with_GP(sv)) {
991 /* undef *Pkg::meth_name ... */
993 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
994 && HvENAME_get(stash);
996 if((stash = GvHV((const GV *)sv))) {
997 if(HvENAME_get(stash))
998 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1002 gp_free(MUTABLE_GV(sv));
1004 GvGP_set(sv, gp_ref(gp));
1005 GvSV(sv) = newSV(0);
1006 GvLINE(sv) = CopLINE(PL_curcop);
1007 GvEGV(sv) = MUTABLE_GV(sv);
1011 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1013 /* undef *Foo::ISA */
1014 if( strEQ(GvNAME((const GV *)sv), "ISA")
1015 && (stash = GvSTASH((const GV *)sv))
1016 && (method_changed || HvENAME(stash)) )
1017 mro_isa_changed_in(stash);
1018 else if(method_changed)
1019 mro_method_changed_in(
1020 GvSTASH((const GV *)sv)
1027 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1042 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1043 Perl_croak_no_modify(aTHX);
1044 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1045 && SvIVX(TOPs) != IV_MIN)
1047 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1048 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1059 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1060 Perl_croak_no_modify(aTHX);
1062 TARG = sv_newmortal();
1063 sv_setsv(TARG, TOPs);
1064 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1065 && SvIVX(TOPs) != IV_MAX)
1067 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1068 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1073 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1083 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1084 Perl_croak_no_modify(aTHX);
1086 TARG = sv_newmortal();
1087 sv_setsv(TARG, TOPs);
1088 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1089 && SvIVX(TOPs) != IV_MIN)
1091 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1092 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1101 /* Ordinary operators. */
1105 dVAR; dSP; dATARGET; SV *svl, *svr;
1106 #ifdef PERL_PRESERVE_IVUV
1109 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1112 #ifdef PERL_PRESERVE_IVUV
1113 /* For integer to integer power, we do the calculation by hand wherever
1114 we're sure it is safe; otherwise we call pow() and try to convert to
1115 integer afterwards. */
1117 SvIV_please_nomg(svr);
1119 SvIV_please_nomg(svl);
1128 const IV iv = SvIVX(svr);
1132 goto float_it; /* Can't do negative powers this way. */
1136 baseuok = SvUOK(svl);
1138 baseuv = SvUVX(svl);
1140 const IV iv = SvIVX(svl);
1143 baseuok = TRUE; /* effectively it's a UV now */
1145 baseuv = -iv; /* abs, baseuok == false records sign */
1148 /* now we have integer ** positive integer. */
1151 /* foo & (foo - 1) is zero only for a power of 2. */
1152 if (!(baseuv & (baseuv - 1))) {
1153 /* We are raising power-of-2 to a positive integer.
1154 The logic here will work for any base (even non-integer
1155 bases) but it can be less accurate than
1156 pow (base,power) or exp (power * log (base)) when the
1157 intermediate values start to spill out of the mantissa.
1158 With powers of 2 we know this can't happen.
1159 And powers of 2 are the favourite thing for perl
1160 programmers to notice ** not doing what they mean. */
1162 NV base = baseuok ? baseuv : -(NV)baseuv;
1167 while (power >>= 1) {
1175 SvIV_please_nomg(svr);
1178 register unsigned int highbit = 8 * sizeof(UV);
1179 register unsigned int diff = 8 * sizeof(UV);
1180 while (diff >>= 1) {
1182 if (baseuv >> highbit) {
1186 /* we now have baseuv < 2 ** highbit */
1187 if (power * highbit <= 8 * sizeof(UV)) {
1188 /* result will definitely fit in UV, so use UV math
1189 on same algorithm as above */
1190 register UV result = 1;
1191 register UV base = baseuv;
1192 const bool odd_power = cBOOL(power & 1);
1196 while (power >>= 1) {
1203 if (baseuok || !odd_power)
1204 /* answer is positive */
1206 else if (result <= (UV)IV_MAX)
1207 /* answer negative, fits in IV */
1208 SETi( -(IV)result );
1209 else if (result == (UV)IV_MIN)
1210 /* 2's complement assumption: special case IV_MIN */
1213 /* answer negative, doesn't fit */
1214 SETn( -(NV)result );
1224 NV right = SvNV_nomg(svr);
1225 NV left = SvNV_nomg(svl);
1228 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1230 We are building perl with long double support and are on an AIX OS
1231 afflicted with a powl() function that wrongly returns NaNQ for any
1232 negative base. This was reported to IBM as PMR #23047-379 on
1233 03/06/2006. The problem exists in at least the following versions
1234 of AIX and the libm fileset, and no doubt others as well:
1236 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1237 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1238 AIX 5.2.0 bos.adt.libm 5.2.0.85
1240 So, until IBM fixes powl(), we provide the following workaround to
1241 handle the problem ourselves. Our logic is as follows: for
1242 negative bases (left), we use fmod(right, 2) to check if the
1243 exponent is an odd or even integer:
1245 - if odd, powl(left, right) == -powl(-left, right)
1246 - if even, powl(left, right) == powl(-left, right)
1248 If the exponent is not an integer, the result is rightly NaNQ, so
1249 we just return that (as NV_NAN).
1253 NV mod2 = Perl_fmod( right, 2.0 );
1254 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1255 SETn( -Perl_pow( -left, right) );
1256 } else if (mod2 == 0.0) { /* even integer */
1257 SETn( Perl_pow( -left, right) );
1258 } else { /* fractional power */
1262 SETn( Perl_pow( left, right) );
1265 SETn( Perl_pow( left, right) );
1266 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1268 #ifdef PERL_PRESERVE_IVUV
1270 SvIV_please_nomg(svr);
1278 dVAR; dSP; dATARGET; SV *svl, *svr;
1279 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1282 #ifdef PERL_PRESERVE_IVUV
1283 SvIV_please_nomg(svr);
1285 /* Unless the left argument is integer in range we are going to have to
1286 use NV maths. Hence only attempt to coerce the right argument if
1287 we know the left is integer. */
1288 /* Left operand is defined, so is it IV? */
1289 SvIV_please_nomg(svl);
1291 bool auvok = SvUOK(svl);
1292 bool buvok = SvUOK(svr);
1293 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1294 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1303 const IV aiv = SvIVX(svl);
1306 auvok = TRUE; /* effectively it's a UV now */
1308 alow = -aiv; /* abs, auvok == false records sign */
1314 const IV biv = SvIVX(svr);
1317 buvok = TRUE; /* effectively it's a UV now */
1319 blow = -biv; /* abs, buvok == false records sign */
1323 /* If this does sign extension on unsigned it's time for plan B */
1324 ahigh = alow >> (4 * sizeof (UV));
1326 bhigh = blow >> (4 * sizeof (UV));
1328 if (ahigh && bhigh) {
1330 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1331 which is overflow. Drop to NVs below. */
1332 } else if (!ahigh && !bhigh) {
1333 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1334 so the unsigned multiply cannot overflow. */
1335 const UV product = alow * blow;
1336 if (auvok == buvok) {
1337 /* -ve * -ve or +ve * +ve gives a +ve result. */
1341 } else if (product <= (UV)IV_MIN) {
1342 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1343 /* -ve result, which could overflow an IV */
1345 SETi( -(IV)product );
1347 } /* else drop to NVs below. */
1349 /* One operand is large, 1 small */
1352 /* swap the operands */
1354 bhigh = blow; /* bhigh now the temp var for the swap */
1358 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1359 multiplies can't overflow. shift can, add can, -ve can. */
1360 product_middle = ahigh * blow;
1361 if (!(product_middle & topmask)) {
1362 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1364 product_middle <<= (4 * sizeof (UV));
1365 product_low = alow * blow;
1367 /* as for pp_add, UV + something mustn't get smaller.
1368 IIRC ANSI mandates this wrapping *behaviour* for
1369 unsigned whatever the actual representation*/
1370 product_low += product_middle;
1371 if (product_low >= product_middle) {
1372 /* didn't overflow */
1373 if (auvok == buvok) {
1374 /* -ve * -ve or +ve * +ve gives a +ve result. */
1376 SETu( product_low );
1378 } else if (product_low <= (UV)IV_MIN) {
1379 /* 2s complement assumption again */
1380 /* -ve result, which could overflow an IV */
1382 SETi( -(IV)product_low );
1384 } /* else drop to NVs below. */
1386 } /* product_middle too large */
1387 } /* ahigh && bhigh */
1392 NV right = SvNV_nomg(svr);
1393 NV left = SvNV_nomg(svl);
1395 SETn( left * right );
1402 dVAR; dSP; dATARGET; SV *svl, *svr;
1403 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1406 /* Only try to do UV divide first
1407 if ((SLOPPYDIVIDE is true) or
1408 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1410 The assumption is that it is better to use floating point divide
1411 whenever possible, only doing integer divide first if we can't be sure.
1412 If NV_PRESERVES_UV is true then we know at compile time that no UV
1413 can be too large to preserve, so don't need to compile the code to
1414 test the size of UVs. */
1417 # define PERL_TRY_UV_DIVIDE
1418 /* ensure that 20./5. == 4. */
1420 # ifdef PERL_PRESERVE_IVUV
1421 # ifndef NV_PRESERVES_UV
1422 # define PERL_TRY_UV_DIVIDE
1427 #ifdef PERL_TRY_UV_DIVIDE
1428 SvIV_please_nomg(svr);
1430 SvIV_please_nomg(svl);
1432 bool left_non_neg = SvUOK(svl);
1433 bool right_non_neg = SvUOK(svr);
1437 if (right_non_neg) {
1441 const IV biv = SvIVX(svr);
1444 right_non_neg = TRUE; /* effectively it's a UV now */
1450 /* historically undef()/0 gives a "Use of uninitialized value"
1451 warning before dieing, hence this test goes here.
1452 If it were immediately before the second SvIV_please, then
1453 DIE() would be invoked before left was even inspected, so
1454 no inspection would give no warning. */
1456 DIE(aTHX_ "Illegal division by zero");
1462 const IV aiv = SvIVX(svl);
1465 left_non_neg = TRUE; /* effectively it's a UV now */
1474 /* For sloppy divide we always attempt integer division. */
1476 /* Otherwise we only attempt it if either or both operands
1477 would not be preserved by an NV. If both fit in NVs
1478 we fall through to the NV divide code below. However,
1479 as left >= right to ensure integer result here, we know that
1480 we can skip the test on the right operand - right big
1481 enough not to be preserved can't get here unless left is
1484 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1487 /* Integer division can't overflow, but it can be imprecise. */
1488 const UV result = left / right;
1489 if (result * right == left) {
1490 SP--; /* result is valid */
1491 if (left_non_neg == right_non_neg) {
1492 /* signs identical, result is positive. */
1496 /* 2s complement assumption */
1497 if (result <= (UV)IV_MIN)
1498 SETi( -(IV)result );
1500 /* It's exact but too negative for IV. */
1501 SETn( -(NV)result );
1504 } /* tried integer divide but it was not an integer result */
1505 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1506 } /* left wasn't SvIOK */
1507 } /* right wasn't SvIOK */
1508 #endif /* PERL_TRY_UV_DIVIDE */
1510 NV right = SvNV_nomg(svr);
1511 NV left = SvNV_nomg(svl);
1512 (void)POPs;(void)POPs;
1513 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1514 if (! Perl_isnan(right) && right == 0.0)
1518 DIE(aTHX_ "Illegal division by zero");
1519 PUSHn( left / right );
1526 dVAR; dSP; dATARGET;
1527 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1531 bool left_neg = FALSE;
1532 bool right_neg = FALSE;
1533 bool use_double = FALSE;
1534 bool dright_valid = FALSE;
1537 SV * const svr = TOPs;
1538 SV * const svl = TOPm1s;
1539 SvIV_please_nomg(svr);
1541 right_neg = !SvUOK(svr);
1545 const IV biv = SvIVX(svr);
1548 right_neg = FALSE; /* effectively it's a UV now */
1555 dright = SvNV_nomg(svr);
1556 right_neg = dright < 0;
1559 if (dright < UV_MAX_P1) {
1560 right = U_V(dright);
1561 dright_valid = TRUE; /* In case we need to use double below. */
1567 /* At this point use_double is only true if right is out of range for
1568 a UV. In range NV has been rounded down to nearest UV and
1569 use_double false. */
1570 SvIV_please_nomg(svl);
1571 if (!use_double && SvIOK(svl)) {
1573 left_neg = !SvUOK(svl);
1577 const IV aiv = SvIVX(svl);
1580 left_neg = FALSE; /* effectively it's a UV now */
1588 dleft = SvNV_nomg(svl);
1589 left_neg = dleft < 0;
1593 /* This should be exactly the 5.6 behaviour - if left and right are
1594 both in range for UV then use U_V() rather than floor. */
1596 if (dleft < UV_MAX_P1) {
1597 /* right was in range, so is dleft, so use UVs not double.
1601 /* left is out of range for UV, right was in range, so promote
1602 right (back) to double. */
1604 /* The +0.5 is used in 5.6 even though it is not strictly
1605 consistent with the implicit +0 floor in the U_V()
1606 inside the #if 1. */
1607 dleft = Perl_floor(dleft + 0.5);
1610 dright = Perl_floor(dright + 0.5);
1621 DIE(aTHX_ "Illegal modulus zero");
1623 dans = Perl_fmod(dleft, dright);
1624 if ((left_neg != right_neg) && dans)
1625 dans = dright - dans;
1628 sv_setnv(TARG, dans);
1634 DIE(aTHX_ "Illegal modulus zero");
1637 if ((left_neg != right_neg) && ans)
1640 /* XXX may warn: unary minus operator applied to unsigned type */
1641 /* could change -foo to be (~foo)+1 instead */
1642 if (ans <= ~((UV)IV_MAX)+1)
1643 sv_setiv(TARG, ~ans+1);
1645 sv_setnv(TARG, -(NV)ans);
1648 sv_setuv(TARG, ans);
1657 dVAR; dSP; dATARGET;
1661 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1662 /* TODO: think of some way of doing list-repeat overloading ??? */
1667 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1673 const UV uv = SvUV_nomg(sv);
1675 count = IV_MAX; /* The best we can do? */
1679 const IV iv = SvIV_nomg(sv);
1686 else if (SvNOKp(sv)) {
1687 const NV nv = SvNV_nomg(sv);
1694 count = SvIV_nomg(sv);
1696 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1698 static const char oom_list_extend[] = "Out of memory during list extend";
1699 const I32 items = SP - MARK;
1700 const I32 max = items * count;
1702 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1703 /* Did the max computation overflow? */
1704 if (items > 0 && max > 0 && (max < items || max < count))
1705 Perl_croak(aTHX_ oom_list_extend);
1710 /* This code was intended to fix 20010809.028:
1713 for (($x =~ /./g) x 2) {
1714 print chop; # "abcdabcd" expected as output.
1717 * but that change (#11635) broke this code:
1719 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1721 * I can't think of a better fix that doesn't introduce
1722 * an efficiency hit by copying the SVs. The stack isn't
1723 * refcounted, and mortalisation obviously doesn't
1724 * Do The Right Thing when the stack has more than
1725 * one pointer to the same mortal value.
1729 *SP = sv_2mortal(newSVsv(*SP));
1739 repeatcpy((char*)(MARK + items), (char*)MARK,
1740 items * sizeof(const SV *), count - 1);
1743 else if (count <= 0)
1746 else { /* Note: mark already snarfed by pp_list */
1747 SV * const tmpstr = POPs;
1750 static const char oom_string_extend[] =
1751 "Out of memory during string extend";
1754 sv_setsv_nomg(TARG, tmpstr);
1755 SvPV_force_nomg(TARG, len);
1756 isutf = DO_UTF8(TARG);
1761 const STRLEN max = (UV)count * len;
1762 if (len > MEM_SIZE_MAX / count)
1763 Perl_croak(aTHX_ oom_string_extend);
1764 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1765 SvGROW(TARG, max + 1);
1766 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1767 SvCUR_set(TARG, SvCUR(TARG) * count);
1769 *SvEND(TARG) = '\0';
1772 (void)SvPOK_only_UTF8(TARG);
1774 (void)SvPOK_only(TARG);
1776 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1777 /* The parser saw this as a list repeat, and there
1778 are probably several items on the stack. But we're
1779 in scalar context, and there's no pp_list to save us
1780 now. So drop the rest of the items -- robin@kitsite.com
1792 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1793 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1796 useleft = USE_LEFT(svl);
1797 #ifdef PERL_PRESERVE_IVUV
1798 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1799 "bad things" happen if you rely on signed integers wrapping. */
1800 SvIV_please_nomg(svr);
1802 /* Unless the left argument is integer in range we are going to have to
1803 use NV maths. Hence only attempt to coerce the right argument if
1804 we know the left is integer. */
1805 register UV auv = 0;
1811 a_valid = auvok = 1;
1812 /* left operand is undef, treat as zero. */
1814 /* Left operand is defined, so is it IV? */
1815 SvIV_please_nomg(svl);
1817 if ((auvok = SvUOK(svl)))
1820 register const IV aiv = SvIVX(svl);
1823 auvok = 1; /* Now acting as a sign flag. */
1824 } else { /* 2s complement assumption for IV_MIN */
1832 bool result_good = 0;
1835 bool buvok = SvUOK(svr);
1840 register const IV biv = SvIVX(svr);
1847 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1848 else "IV" now, independent of how it came in.
1849 if a, b represents positive, A, B negative, a maps to -A etc
1854 all UV maths. negate result if A negative.
1855 subtract if signs same, add if signs differ. */
1857 if (auvok ^ buvok) {
1866 /* Must get smaller */
1871 if (result <= buv) {
1872 /* result really should be -(auv-buv). as its negation
1873 of true value, need to swap our result flag */
1885 if (result <= (UV)IV_MIN)
1886 SETi( -(IV)result );
1888 /* result valid, but out of range for IV. */
1889 SETn( -(NV)result );
1893 } /* Overflow, drop through to NVs. */
1898 NV value = SvNV_nomg(svr);
1902 /* left operand is undef, treat as zero - value */
1906 SETn( SvNV_nomg(svl) - value );
1913 dVAR; dSP; dATARGET; SV *svl, *svr;
1914 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1918 const IV shift = SvIV_nomg(svr);
1919 if (PL_op->op_private & HINT_INTEGER) {
1920 const IV i = SvIV_nomg(svl);
1924 const UV u = SvUV_nomg(svl);
1933 dVAR; dSP; dATARGET; SV *svl, *svr;
1934 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1938 const IV shift = SvIV_nomg(svr);
1939 if (PL_op->op_private & HINT_INTEGER) {
1940 const IV i = SvIV_nomg(svl);
1944 const UV u = SvUV_nomg(svl);
1956 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
1960 (SvIOK_notUV(left) && SvIOK_notUV(right))
1961 ? (SvIVX(left) < SvIVX(right))
1962 : (do_ncmp(left, right) == -1)
1972 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1976 (SvIOK_notUV(left) && SvIOK_notUV(right))
1977 ? (SvIVX(left) > SvIVX(right))
1978 : (do_ncmp(left, right) == 1)
1988 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1992 (SvIOK_notUV(left) && SvIOK_notUV(right))
1993 ? (SvIVX(left) <= SvIVX(right))
1994 : (do_ncmp(left, right) <= 0)
2004 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2008 (SvIOK_notUV(left) && SvIOK_notUV(right))
2009 ? (SvIVX(left) >= SvIVX(right))
2010 : ( (do_ncmp(left, right) & 2) == 0)
2020 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2024 (SvIOK_notUV(left) && SvIOK_notUV(right))
2025 ? (SvIVX(left) != SvIVX(right))
2026 : (do_ncmp(left, right) != 0)
2031 /* compare left and right SVs. Returns:
2035 * 2: left or right was a NaN
2038 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2042 PERL_ARGS_ASSERT_DO_NCMP;
2043 #ifdef PERL_PRESERVE_IVUV
2044 SvIV_please_nomg(right);
2045 /* Fortunately it seems NaN isn't IOK */
2047 SvIV_please_nomg(left);
2050 const IV leftiv = SvIVX(left);
2051 if (!SvUOK(right)) {
2052 /* ## IV <=> IV ## */
2053 const IV rightiv = SvIVX(right);
2054 return (leftiv > rightiv) - (leftiv < rightiv);
2056 /* ## IV <=> UV ## */
2058 /* As (b) is a UV, it's >=0, so it must be < */
2061 const UV rightuv = SvUVX(right);
2062 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2067 /* ## UV <=> UV ## */
2068 const UV leftuv = SvUVX(left);
2069 const UV rightuv = SvUVX(right);
2070 return (leftuv > rightuv) - (leftuv < rightuv);
2072 /* ## UV <=> IV ## */
2074 const IV rightiv = SvIVX(right);
2076 /* As (a) is a UV, it's >=0, so it cannot be < */
2079 const UV leftuv = SvUVX(left);
2080 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2088 NV const rnv = SvNV_nomg(right);
2089 NV const lnv = SvNV_nomg(left);
2091 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2092 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2095 return (lnv > rnv) - (lnv < rnv);
2114 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2117 value = do_ncmp(left, right);
2132 int amg_type = sle_amg;
2136 switch (PL_op->op_type) {
2155 tryAMAGICbin_MG(amg_type, AMGf_set);
2158 const int cmp = (IN_LOCALE_RUNTIME
2159 ? sv_cmp_locale_flags(left, right, 0)
2160 : sv_cmp_flags(left, right, 0));
2161 SETs(boolSV(cmp * multiplier < rhs));
2169 tryAMAGICbin_MG(seq_amg, AMGf_set);
2172 SETs(boolSV(sv_eq_flags(left, right, 0)));
2180 tryAMAGICbin_MG(sne_amg, AMGf_set);
2183 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2191 tryAMAGICbin_MG(scmp_amg, 0);
2194 const int cmp = (IN_LOCALE_RUNTIME
2195 ? sv_cmp_locale_flags(left, right, 0)
2196 : sv_cmp_flags(left, right, 0));
2204 dVAR; dSP; dATARGET;
2205 tryAMAGICbin_MG(band_amg, AMGf_assign);
2208 if (SvNIOKp(left) || SvNIOKp(right)) {
2209 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2210 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2211 if (PL_op->op_private & HINT_INTEGER) {
2212 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2216 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2219 if (left_ro_nonnum) SvNIOK_off(left);
2220 if (right_ro_nonnum) SvNIOK_off(right);
2223 do_vop(PL_op->op_type, TARG, left, right);
2232 dVAR; dSP; dATARGET;
2233 const int op_type = PL_op->op_type;
2235 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2238 if (SvNIOKp(left) || SvNIOKp(right)) {
2239 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2240 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2241 if (PL_op->op_private & HINT_INTEGER) {
2242 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2243 const IV r = SvIV_nomg(right);
2244 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2248 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2249 const UV r = SvUV_nomg(right);
2250 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2253 if (left_ro_nonnum) SvNIOK_off(left);
2254 if (right_ro_nonnum) SvNIOK_off(right);
2257 do_vop(op_type, TARG, left, right);
2267 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2269 SV * const sv = TOPs;
2270 const int flags = SvFLAGS(sv);
2272 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2276 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2277 /* It's publicly an integer, or privately an integer-not-float */
2280 if (SvIVX(sv) == IV_MIN) {
2281 /* 2s complement assumption. */
2282 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2285 else if (SvUVX(sv) <= IV_MAX) {
2290 else if (SvIVX(sv) != IV_MIN) {
2294 #ifdef PERL_PRESERVE_IVUV
2302 SETn(-SvNV_nomg(sv));
2303 else if (SvPOKp(sv)) {
2305 const char * const s = SvPV_nomg_const(sv, len);
2306 if (isIDFIRST(*s)) {
2307 sv_setpvs(TARG, "-");
2310 else if (*s == '+' || *s == '-') {
2311 sv_setsv_nomg(TARG, sv);
2312 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2314 else if (DO_UTF8(sv)) {
2315 SvIV_please_nomg(sv);
2317 goto oops_its_an_int;
2319 sv_setnv(TARG, -SvNV_nomg(sv));
2321 sv_setpvs(TARG, "-");
2326 SvIV_please_nomg(sv);
2328 goto oops_its_an_int;
2329 sv_setnv(TARG, -SvNV_nomg(sv));
2334 SETn(-SvNV_nomg(sv));
2342 tryAMAGICun_MG(not_amg, AMGf_set);
2343 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2350 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2354 if (PL_op->op_private & HINT_INTEGER) {
2355 const IV i = ~SvIV_nomg(sv);
2359 const UV u = ~SvUV_nomg(sv);
2368 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2369 sv_setsv_nomg(TARG, sv);
2370 tmps = (U8*)SvPV_force_nomg(TARG, len);
2373 /* Calculate exact length, let's not estimate. */
2378 U8 * const send = tmps + len;
2379 U8 * const origtmps = tmps;
2380 const UV utf8flags = UTF8_ALLOW_ANYUV;
2382 while (tmps < send) {
2383 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2385 targlen += UNISKIP(~c);
2391 /* Now rewind strings and write them. */
2398 Newx(result, targlen + 1, U8);
2400 while (tmps < send) {
2401 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2403 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2406 sv_usepvn_flags(TARG, (char*)result, targlen,
2407 SV_HAS_TRAILING_NUL);
2414 Newx(result, nchar + 1, U8);
2416 while (tmps < send) {
2417 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2422 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2430 register long *tmpl;
2431 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2434 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2439 for ( ; anum > 0; anum--, tmps++)
2447 /* integer versions of some of the above */
2451 dVAR; dSP; dATARGET;
2452 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2455 SETi( left * right );
2463 dVAR; dSP; dATARGET;
2464 tryAMAGICbin_MG(div_amg, AMGf_assign);
2467 IV value = SvIV_nomg(right);
2469 DIE(aTHX_ "Illegal division by zero");
2470 num = SvIV_nomg(left);
2472 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2476 value = num / value;
2482 #if defined(__GLIBC__) && IVSIZE == 8
2489 /* This is the vanilla old i_modulo. */
2490 dVAR; dSP; dATARGET;
2491 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2495 DIE(aTHX_ "Illegal modulus zero");
2496 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2500 SETi( left % right );
2505 #if defined(__GLIBC__) && IVSIZE == 8
2510 /* This is the i_modulo with the workaround for the _moddi3 bug
2511 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2512 * See below for pp_i_modulo. */
2513 dVAR; dSP; dATARGET;
2514 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2518 DIE(aTHX_ "Illegal modulus zero");
2519 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2523 SETi( left % PERL_ABS(right) );
2530 dVAR; dSP; dATARGET;
2531 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2535 DIE(aTHX_ "Illegal modulus zero");
2536 /* The assumption is to use hereafter the old vanilla version... */
2538 PL_ppaddr[OP_I_MODULO] =
2540 /* .. but if we have glibc, we might have a buggy _moddi3
2541 * (at least glicb 2.2.5 is known to have this bug), in other
2542 * words our integer modulus with negative quad as the second
2543 * argument might be broken. Test for this and re-patch the
2544 * opcode dispatch table if that is the case, remembering to
2545 * also apply the workaround so that this first round works
2546 * right, too. See [perl #9402] for more information. */
2550 /* Cannot do this check with inlined IV constants since
2551 * that seems to work correctly even with the buggy glibc. */
2553 /* Yikes, we have the bug.
2554 * Patch in the workaround version. */
2556 PL_ppaddr[OP_I_MODULO] =
2557 &Perl_pp_i_modulo_1;
2558 /* Make certain we work right this time, too. */
2559 right = PERL_ABS(right);
2562 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2566 SETi( left % right );
2574 dVAR; dSP; dATARGET;
2575 tryAMAGICbin_MG(add_amg, AMGf_assign);
2577 dPOPTOPiirl_ul_nomg;
2578 SETi( left + right );
2585 dVAR; dSP; dATARGET;
2586 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2588 dPOPTOPiirl_ul_nomg;
2589 SETi( left - right );
2597 tryAMAGICbin_MG(lt_amg, AMGf_set);
2600 SETs(boolSV(left < right));
2608 tryAMAGICbin_MG(gt_amg, AMGf_set);
2611 SETs(boolSV(left > right));
2619 tryAMAGICbin_MG(le_amg, AMGf_set);
2622 SETs(boolSV(left <= right));
2630 tryAMAGICbin_MG(ge_amg, AMGf_set);
2633 SETs(boolSV(left >= right));
2641 tryAMAGICbin_MG(eq_amg, AMGf_set);
2644 SETs(boolSV(left == right));
2652 tryAMAGICbin_MG(ne_amg, AMGf_set);
2655 SETs(boolSV(left != right));
2663 tryAMAGICbin_MG(ncmp_amg, 0);
2670 else if (left < right)
2682 tryAMAGICun_MG(neg_amg, 0);
2684 SV * const sv = TOPs;
2685 IV const i = SvIV_nomg(sv);
2691 /* High falutin' math. */
2696 tryAMAGICbin_MG(atan2_amg, 0);
2699 SETn(Perl_atan2(left, right));
2707 int amg_type = sin_amg;
2708 const char *neg_report = NULL;
2709 NV (*func)(NV) = Perl_sin;
2710 const int op_type = PL_op->op_type;
2727 amg_type = sqrt_amg;
2729 neg_report = "sqrt";
2734 tryAMAGICun_MG(amg_type, 0);
2736 SV * const arg = POPs;
2737 const NV value = SvNV_nomg(arg);
2739 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2740 SET_NUMERIC_STANDARD();
2741 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2744 XPUSHn(func(value));
2749 /* Support Configure command-line overrides for rand() functions.
2750 After 5.005, perhaps we should replace this by Configure support
2751 for drand48(), random(), or rand(). For 5.005, though, maintain
2752 compatibility by calling rand() but allow the user to override it.
2753 See INSTALL for details. --Andy Dougherty 15 July 1998
2755 /* Now it's after 5.005, and Configure supports drand48() and random(),
2756 in addition to rand(). So the overrides should not be needed any more.
2757 --Jarkko Hietaniemi 27 September 1998
2760 #ifndef HAS_DRAND48_PROTO
2761 extern double drand48 (void);
2774 if (!PL_srand_called) {
2775 (void)seedDrand01((Rand_seed_t)seed());
2776 PL_srand_called = TRUE;
2786 const UV anum = (MAXARG < 1) ? seed() : POPu;
2787 (void)seedDrand01((Rand_seed_t)anum);
2788 PL_srand_called = TRUE;
2792 /* Historically srand always returned true. We can avoid breaking
2794 sv_setpvs(TARG, "0 but true");
2803 tryAMAGICun_MG(int_amg, AMGf_numeric);
2805 SV * const sv = TOPs;
2806 const IV iv = SvIV_nomg(sv);
2807 /* XXX it's arguable that compiler casting to IV might be subtly
2808 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2809 else preferring IV has introduced a subtle behaviour change bug. OTOH
2810 relying on floating point to be accurate is a bug. */
2815 else if (SvIOK(sv)) {
2817 SETu(SvUV_nomg(sv));
2822 const NV value = SvNV_nomg(sv);
2824 if (value < (NV)UV_MAX + 0.5) {
2827 SETn(Perl_floor(value));
2831 if (value > (NV)IV_MIN - 0.5) {
2834 SETn(Perl_ceil(value));
2845 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2847 SV * const sv = TOPs;
2848 /* This will cache the NV value if string isn't actually integer */
2849 const IV iv = SvIV_nomg(sv);
2854 else if (SvIOK(sv)) {
2855 /* IVX is precise */
2857 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2865 /* 2s complement assumption. Also, not really needed as
2866 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2872 const NV value = SvNV_nomg(sv);
2886 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2890 SV* const sv = POPs;
2892 tmps = (SvPV_const(sv, len));
2894 /* If Unicode, try to downgrade
2895 * If not possible, croak. */
2896 SV* const tsv = sv_2mortal(newSVsv(sv));
2899 sv_utf8_downgrade(tsv, FALSE);
2900 tmps = SvPV_const(tsv, len);
2902 if (PL_op->op_type == OP_HEX)
2905 while (*tmps && len && isSPACE(*tmps))
2909 if (*tmps == 'x' || *tmps == 'X') {
2911 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2913 else if (*tmps == 'b' || *tmps == 'B')
2914 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2916 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2918 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2932 SV * const sv = TOPs;
2934 if (SvGAMAGIC(sv)) {
2935 /* For an overloaded or magic scalar, we can't know in advance if
2936 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2937 it likes to cache the length. Maybe that should be a documented
2942 = sv_2pv_flags(sv, &len,
2943 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2946 if (!SvPADTMP(TARG)) {
2947 sv_setsv(TARG, &PL_sv_undef);
2952 else if (DO_UTF8(sv)) {
2953 SETi(utf8_length((U8*)p, (U8*)p + len));
2957 } else if (SvOK(sv)) {
2958 /* Neither magic nor overloaded. */
2960 SETi(sv_len_utf8(sv));
2964 if (!SvPADTMP(TARG)) {
2965 sv_setsv_nomg(TARG, &PL_sv_undef);
2987 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2989 const IV arybase = CopARYBASE_get(PL_curcop);
2991 const char *repl = NULL;
2993 const int num_args = PL_op->op_private & 7;
2994 bool repl_need_utf8_upgrade = FALSE;
2995 bool repl_is_utf8 = FALSE;
3000 repl = SvPV_const(repl_sv, repl_len);
3001 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3004 len_iv = SvIV(len_sv);
3005 len_is_uv = SvIOK_UV(len_sv);
3008 pos1_iv = SvIV(pos_sv);
3009 pos1_is_uv = SvIOK_UV(pos_sv);
3015 sv_utf8_upgrade(sv);
3017 else if (DO_UTF8(sv))
3018 repl_need_utf8_upgrade = TRUE;
3020 tmps = SvPV_const(sv, curlen);
3022 utf8_curlen = sv_len_utf8(sv);
3023 if (utf8_curlen == curlen)
3026 curlen = utf8_curlen;
3031 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3032 UV pos1_uv = pos1_iv-arybase;
3033 /* Overflow can occur when $[ < 0 */
3034 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3039 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3040 goto bound_fail; /* $[=3; substr($_,2,...) */
3042 else { /* pos < $[ */
3043 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3048 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3053 if (pos1_is_uv || pos1_iv > 0) {
3054 if ((UV)pos1_iv > curlen)
3059 if (!len_is_uv && len_iv < 0) {
3060 pos2_iv = curlen + len_iv;
3062 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3065 } else { /* len_iv >= 0 */
3066 if (!pos1_is_uv && pos1_iv < 0) {
3067 pos2_iv = pos1_iv + len_iv;
3068 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3070 if ((UV)len_iv > curlen-(UV)pos1_iv)
3073 pos2_iv = pos1_iv+len_iv;
3083 if (!pos2_is_uv && pos2_iv < 0) {
3084 if (!pos1_is_uv && pos1_iv < 0)
3088 else if (!pos1_is_uv && pos1_iv < 0)
3091 if ((UV)pos2_iv < (UV)pos1_iv)
3093 if ((UV)pos2_iv > curlen)
3097 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3098 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3099 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3100 STRLEN byte_len = len;
3101 STRLEN byte_pos = utf8_curlen
3102 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3104 if (lvalue && !repl) {
3107 if (!SvGMAGICAL(sv)) {
3109 SvPV_force_nolen(sv);
3110 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3111 "Attempt to use reference as lvalue in substr");
3113 if (isGV_with_GP(sv))
3114 SvPV_force_nolen(sv);
3115 else if (SvOK(sv)) /* is it defined ? */
3116 (void)SvPOK_only_UTF8(sv);
3118 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3121 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3122 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3124 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3125 LvTARGOFF(ret) = pos;
3126 LvTARGLEN(ret) = len;
3129 PUSHs(ret); /* avoid SvSETMAGIC here */
3133 SvTAINTED_off(TARG); /* decontaminate */
3134 SvUTF8_off(TARG); /* decontaminate */
3137 sv_setpvn(TARG, tmps, byte_len);
3138 #ifdef USE_LOCALE_COLLATE
3139 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3145 SV* repl_sv_copy = NULL;
3147 if (repl_need_utf8_upgrade) {
3148 repl_sv_copy = newSVsv(repl_sv);
3149 sv_utf8_upgrade(repl_sv_copy);
3150 repl = SvPV_const(repl_sv_copy, repl_len);
3151 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3155 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3158 SvREFCNT_dec(repl_sv_copy);
3168 Perl_croak(aTHX_ "substr outside of string");
3169 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3176 register const IV size = POPi;
3177 register const IV offset = POPi;
3178 register SV * const src = POPs;
3179 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3182 if (lvalue) { /* it's an lvalue! */
3183 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3184 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3186 LvTARG(ret) = SvREFCNT_inc_simple(src);
3187 LvTARGOFF(ret) = offset;
3188 LvTARGLEN(ret) = size;
3192 SvTAINTED_off(TARG); /* decontaminate */
3196 sv_setuv(ret, do_vecget(src, offset, size));
3212 const char *little_p;
3213 const I32 arybase = CopARYBASE_get(PL_curcop);
3216 const bool is_index = PL_op->op_type == OP_INDEX;
3219 /* arybase is in characters, like offset, so combine prior to the
3220 UTF-8 to bytes calculation. */
3221 offset = POPi - arybase;
3225 big_p = SvPV_const(big, biglen);
3226 little_p = SvPV_const(little, llen);
3228 big_utf8 = DO_UTF8(big);
3229 little_utf8 = DO_UTF8(little);
3230 if (big_utf8 ^ little_utf8) {
3231 /* One needs to be upgraded. */
3232 if (little_utf8 && !PL_encoding) {
3233 /* Well, maybe instead we might be able to downgrade the small
3235 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3238 /* If the large string is ISO-8859-1, and it's not possible to
3239 convert the small string to ISO-8859-1, then there is no
3240 way that it could be found anywhere by index. */
3245 /* At this point, pv is a malloc()ed string. So donate it to temp
3246 to ensure it will get free()d */
3247 little = temp = newSV(0);
3248 sv_usepvn(temp, pv, llen);
3249 little_p = SvPVX(little);
3252 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3255 sv_recode_to_utf8(temp, PL_encoding);
3257 sv_utf8_upgrade(temp);
3262 big_p = SvPV_const(big, biglen);
3265 little_p = SvPV_const(little, llen);
3269 if (SvGAMAGIC(big)) {
3270 /* Life just becomes a lot easier if I use a temporary here.
3271 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3272 will trigger magic and overloading again, as will fbm_instr()
3274 big = newSVpvn_flags(big_p, biglen,
3275 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3278 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3279 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3280 warn on undef, and we've already triggered a warning with the
3281 SvPV_const some lines above. We can't remove that, as we need to
3282 call some SvPV to trigger overloading early and find out if the
3284 This is all getting to messy. The API isn't quite clean enough,
3285 because data access has side effects.
3287 little = newSVpvn_flags(little_p, llen,
3288 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3289 little_p = SvPVX(little);
3293 offset = is_index ? 0 : biglen;
3295 if (big_utf8 && offset > 0)
3296 sv_pos_u2b(big, &offset, 0);
3302 else if (offset > (I32)biglen)
3304 if (!(little_p = is_index
3305 ? fbm_instr((unsigned char*)big_p + offset,
3306 (unsigned char*)big_p + biglen, little, 0)
3307 : rninstr(big_p, big_p + offset,
3308 little_p, little_p + llen)))
3311 retval = little_p - big_p;
3312 if (retval > 0 && big_utf8)
3313 sv_pos_b2u(big, &retval);
3317 PUSHi(retval + arybase);
3323 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3324 SvTAINTED_off(TARG);
3325 do_sprintf(TARG, SP-MARK, MARK+1);
3326 TAINT_IF(SvTAINTED(TARG));
3338 const U8 *s = (U8*)SvPV_const(argsv, len);
3340 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3341 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3342 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3346 XPUSHu(DO_UTF8(argsv) ?
3347 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3359 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3361 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3363 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3365 (void) POPs; /* Ignore the argument value. */
3366 value = UNICODE_REPLACEMENT;
3372 SvUPGRADE(TARG,SVt_PV);
3374 if (value > 255 && !IN_BYTES) {
3375 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3376 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3377 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3379 (void)SvPOK_only(TARG);
3388 *tmps++ = (char)value;
3390 (void)SvPOK_only(TARG);
3392 if (PL_encoding && !IN_BYTES) {
3393 sv_recode_to_utf8(TARG, PL_encoding);
3395 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3396 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3400 *tmps++ = (char)value;
3416 const char *tmps = SvPV_const(left, len);
3418 if (DO_UTF8(left)) {
3419 /* If Unicode, try to downgrade.
3420 * If not possible, croak.
3421 * Yes, we made this up. */
3422 SV* const tsv = sv_2mortal(newSVsv(left));
3425 sv_utf8_downgrade(tsv, FALSE);
3426 tmps = SvPV_const(tsv, len);
3428 # ifdef USE_ITHREADS
3430 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3431 /* This should be threadsafe because in ithreads there is only
3432 * one thread per interpreter. If this would not be true,
3433 * we would need a mutex to protect this malloc. */
3434 PL_reentrant_buffer->_crypt_struct_buffer =
3435 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3436 #if defined(__GLIBC__) || defined(__EMX__)
3437 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3438 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3439 /* work around glibc-2.2.5 bug */
3440 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3444 # endif /* HAS_CRYPT_R */
3445 # endif /* USE_ITHREADS */
3447 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3449 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3455 "The crypt() function is unimplemented due to excessive paranoia.");
3459 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3460 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3462 /* Below are several macros that generate code */
3463 /* Generates code to store a unicode codepoint c that is known to occupy
3464 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3465 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3467 *(p) = UTF8_TWO_BYTE_HI(c); \
3468 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3471 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3472 * available byte after the two bytes */
3473 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3475 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3476 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3479 /* Generates code to store the upper case of latin1 character l which is known
3480 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3481 * are only two characters that fit this description, and this macro knows
3482 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3484 #define STORE_NON_LATIN1_UC(p, l) \
3486 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3487 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3488 } else { /* Must be the following letter */ \
3489 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3493 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3494 * after the character stored */
3495 #define CAT_NON_LATIN1_UC(p, l) \
3497 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3498 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3500 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3504 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3505 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3506 * and must require two bytes to store it. Advances p to point to the next
3507 * available position */
3508 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3510 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3511 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3512 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3513 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3514 } else {/* else is one of the other two special cases */ \
3515 CAT_NON_LATIN1_UC((p), (l)); \
3521 /* Actually is both lcfirst() and ucfirst(). Only the first character
3522 * changes. This means that possibly we can change in-place, ie., just
3523 * take the source and change that one character and store it back, but not
3524 * if read-only etc, or if the length changes */
3529 STRLEN slen; /* slen is the byte length of the whole SV. */
3532 bool inplace; /* ? Convert first char only, in-place */
3533 bool doing_utf8 = FALSE; /* ? using utf8 */
3534 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3535 const int op_type = PL_op->op_type;
3538 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3539 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3540 * stored as UTF-8 at s. */
3541 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3542 * lowercased) character stored in tmpbuf. May be either
3543 * UTF-8 or not, but in either case is the number of bytes */
3547 s = (const U8*)SvPV_nomg_const(source, slen);
3549 if (ckWARN(WARN_UNINITIALIZED))
3550 report_uninit(source);
3555 /* We may be able to get away with changing only the first character, in
3556 * place, but not if read-only, etc. Later we may discover more reasons to
3557 * not convert in-place. */
3558 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3560 /* First calculate what the changed first character should be. This affects
3561 * whether we can just swap it out, leaving the rest of the string unchanged,
3562 * or even if have to convert the dest to UTF-8 when the source isn't */
3564 if (! slen) { /* If empty */
3565 need = 1; /* still need a trailing NUL */
3567 else if (DO_UTF8(source)) { /* Is the source utf8? */
3570 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3571 * and doesn't allow for the user to specify their own. When code is added to
3572 * detect if there is a user-defined mapping in force here, and if so to use
3573 * that, then the code below can be compiled. The detection would be a good
3574 * thing anyway, as currently the user-defined mappings only work on utf8
3575 * strings, and thus depend on the chosen internal storage method, which is a
3577 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3578 if (UTF8_IS_INVARIANT(*s)) {
3580 /* An invariant source character is either ASCII or, in EBCDIC, an
3581 * ASCII equivalent or a caseless C1 control. In both these cases,
3582 * the lower and upper cases of any character are also invariants
3583 * (and title case is the same as upper case). So it is safe to
3584 * use the simple case change macros which avoid the overhead of
3585 * the general functions. Note that if perl were to be extended to
3586 * do locale handling in UTF-8 strings, this wouldn't be true in,
3587 * for example, Lithuanian or Turkic. */
3588 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3592 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3595 /* Similarly, if the source character isn't invariant but is in the
3596 * latin1 range (or EBCDIC equivalent thereof), we have the case
3597 * changes compiled into perl, and can avoid the overhead of the
3598 * general functions. In this range, the characters are stored as
3599 * two UTF-8 bytes, and it so happens that any changed-case version
3600 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3604 /* Convert the two source bytes to a single Unicode code point
3605 * value, change case and save for below */
3606 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3607 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3608 U8 lower = toLOWER_LATIN1(chr);
3609 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3611 else { /* ucfirst */
3612 U8 upper = toUPPER_LATIN1_MOD(chr);
3614 /* Most of the latin1 range characters are well-behaved. Their
3615 * title and upper cases are the same, and are also in the
3616 * latin1 range. The macro above returns their upper (hence
3617 * title) case, and all that need be done is to save the result
3618 * for below. However, several characters are problematic, and
3619 * have to be handled specially. The MOD in the macro name
3620 * above means that these tricky characters all get mapped to
3621 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3622 * This mapping saves some tests for the majority of the
3625 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3627 /* Not tricky. Just save it. */
3628 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3630 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3632 /* This one is tricky because it is two characters long,
3633 * though the UTF-8 is still two bytes, so the stored
3634 * length doesn't change */
3635 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3636 *(tmpbuf + 1) = 's';
3640 /* The other two have their title and upper cases the same,
3641 * but are tricky because the changed-case characters
3642 * aren't in the latin1 range. They, however, do fit into
3643 * two UTF-8 bytes */
3644 STORE_NON_LATIN1_UC(tmpbuf, chr);
3649 #endif /* end of dont want to break user-defined casing */
3651 /* Here, can't short-cut the general case */
3653 utf8_to_uvchr(s, &ulen);
3654 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3655 else toLOWER_utf8(s, tmpbuf, &tculen);
3657 /* we can't do in-place if the length changes. */
3658 if (ulen != tculen) inplace = FALSE;
3659 need = slen + 1 - ulen + tculen;
3660 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3664 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3665 * latin1 is treated as caseless. Note that a locale takes
3667 tculen = 1; /* Most characters will require one byte, but this will
3668 * need to be overridden for the tricky ones */
3671 if (op_type == OP_LCFIRST) {
3673 /* lower case the first letter: no trickiness for any character */
3674 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3675 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3678 else if (IN_LOCALE_RUNTIME) {
3679 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3680 * have upper and title case different
3683 else if (! IN_UNI_8_BIT) {
3684 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3685 * on EBCDIC machines whatever the
3686 * native function does */
3688 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3689 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3691 /* tmpbuf now has the correct title case for all latin1 characters
3692 * except for the several ones that have tricky handling. All
3693 * of these are mapped by the MOD to the letter below. */
3694 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3696 /* The length is going to change, with all three of these, so
3697 * can't replace just the first character */
3700 /* We use the original to distinguish between these tricky
3702 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3703 /* Two character title case 'Ss', but can remain non-UTF-8 */
3706 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3711 /* The other two tricky ones have their title case outside
3712 * latin1. It is the same as their upper case. */
3714 STORE_NON_LATIN1_UC(tmpbuf, *s);
3716 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3717 * and their upper cases is 2. */
3720 /* The entire result will have to be in UTF-8. Assume worst
3721 * case sizing in conversion. (all latin1 characters occupy
3722 * at most two bytes in utf8) */
3723 convert_source_to_utf8 = TRUE;
3724 need = slen * 2 + 1;
3726 } /* End of is one of the three special chars */
3727 } /* End of use Unicode (Latin1) semantics */
3728 } /* End of changing the case of the first character */
3730 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3731 * generate the result */
3734 /* We can convert in place. This means we change just the first
3735 * character without disturbing the rest; no need to grow */
3737 s = d = (U8*)SvPV_force_nomg(source, slen);
3743 /* Here, we can't convert in place; we earlier calculated how much
3744 * space we will need, so grow to accommodate that */
3745 SvUPGRADE(dest, SVt_PV);
3746 d = (U8*)SvGROW(dest, need);
3747 (void)SvPOK_only(dest);
3754 if (! convert_source_to_utf8) {
3756 /* Here both source and dest are in UTF-8, but have to create
3757 * the entire output. We initialize the result to be the
3758 * title/lower cased first character, and then append the rest
3760 sv_setpvn(dest, (char*)tmpbuf, tculen);
3762 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3766 const U8 *const send = s + slen;
3768 /* Here the dest needs to be in UTF-8, but the source isn't,
3769 * except we earlier UTF-8'd the first character of the source
3770 * into tmpbuf. First put that into dest, and then append the
3771 * rest of the source, converting it to UTF-8 as we go. */
3773 /* Assert tculen is 2 here because the only two characters that
3774 * get to this part of the code have 2-byte UTF-8 equivalents */
3776 *d++ = *(tmpbuf + 1);
3777 s++; /* We have just processed the 1st char */
3779 for (; s < send; s++) {
3780 d = uvchr_to_utf8(d, *s);
3783 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3787 else { /* in-place UTF-8. Just overwrite the first character */
3788 Copy(tmpbuf, d, tculen, U8);
3789 SvCUR_set(dest, need - 1);
3792 else { /* Neither source nor dest are in or need to be UTF-8 */
3794 if (IN_LOCALE_RUNTIME) {
3798 if (inplace) { /* in-place, only need to change the 1st char */
3801 else { /* Not in-place */
3803 /* Copy the case-changed character(s) from tmpbuf */
3804 Copy(tmpbuf, d, tculen, U8);
3805 d += tculen - 1; /* Code below expects d to point to final
3806 * character stored */
3809 else { /* empty source */
3810 /* See bug #39028: Don't taint if empty */
3814 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3815 * the destination to retain that flag */
3819 if (!inplace) { /* Finish the rest of the string, unchanged */
3820 /* This will copy the trailing NUL */
3821 Copy(s + 1, d + 1, slen, U8);
3822 SvCUR_set(dest, need - 1);
3825 if (dest != source && SvTAINTED(source))
3831 /* There's so much setup/teardown code common between uc and lc, I wonder if
3832 it would be worth merging the two, and just having a switch outside each
3833 of the three tight loops. There is less and less commonality though */
3847 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3848 && SvTEMP(source) && !DO_UTF8(source)
3849 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3851 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3852 * make the loop tight, so we overwrite the source with the dest before
3853 * looking at it, and we need to look at the original source
3854 * afterwards. There would also need to be code added to handle
3855 * switching to not in-place in midstream if we run into characters
3856 * that change the length.
3859 s = d = (U8*)SvPV_force_nomg(source, len);
3866 /* The old implementation would copy source into TARG at this point.
3867 This had the side effect that if source was undef, TARG was now
3868 an undefined SV with PADTMP set, and they don't warn inside
3869 sv_2pv_flags(). However, we're now getting the PV direct from
3870 source, which doesn't have PADTMP set, so it would warn. Hence the
3874 s = (const U8*)SvPV_nomg_const(source, len);
3876 if (ckWARN(WARN_UNINITIALIZED))
3877 report_uninit(source);
3883 SvUPGRADE(dest, SVt_PV);
3884 d = (U8*)SvGROW(dest, min);
3885 (void)SvPOK_only(dest);
3890 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3891 to check DO_UTF8 again here. */
3893 if (DO_UTF8(source)) {
3894 const U8 *const send = s + len;
3895 U8 tmpbuf[UTF8_MAXBYTES+1];
3897 /* All occurrences of these are to be moved to follow any other marks.
3898 * This is context-dependent. We may not be passed enough context to
3899 * move the iota subscript beyond all of them, but we do the best we can
3900 * with what we're given. The result is always better than if we
3901 * hadn't done this. And, the problem would only arise if we are
3902 * passed a character without all its combining marks, which would be
3903 * the caller's mistake. The information this is based on comes from a
3904 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3905 * itself) and so can't be checked properly to see if it ever gets
3906 * revised. But the likelihood of it changing is remote */
3907 bool in_iota_subscript = FALSE;
3910 if (in_iota_subscript && ! is_utf8_mark(s)) {
3911 /* A non-mark. Time to output the iota subscript */
3912 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3913 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3915 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3916 in_iota_subscript = FALSE;
3920 /* See comments at the first instance in this file of this ifdef */
3921 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3923 /* If the UTF-8 character is invariant, then it is in the range
3924 * known by the standard macro; result is only one byte long */
3925 if (UTF8_IS_INVARIANT(*s)) {
3929 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3931 /* Likewise, if it fits in a byte, its case change is in our
3933 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
3934 U8 upper = toUPPER_LATIN1_MOD(orig);
3935 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3943 /* Otherwise, need the general UTF-8 case. Get the changed
3944 * case value and copy it to the output buffer */
3946 const STRLEN u = UTF8SKIP(s);
3949 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
3950 if (uv == GREEK_CAPITAL_LETTER_IOTA
3951 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3953 in_iota_subscript = TRUE;
3956 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3957 /* If the eventually required minimum size outgrows
3958 * the available space, we need to grow. */
3959 const UV o = d - (U8*)SvPVX_const(dest);
3961 /* If someone uppercases one million U+03B0s we
3962 * SvGROW() one million times. Or we could try
3963 * guessing how much to allocate without allocating too
3964 * much. Such is life. See corresponding comment in
3965 * lc code for another option */
3967 d = (U8*)SvPVX(dest) + o;
3969 Copy(tmpbuf, d, ulen, U8);
3975 if (in_iota_subscript) {
3976 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3980 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3982 else { /* Not UTF-8 */
3984 const U8 *const send = s + len;
3986 /* Use locale casing if in locale; regular style if not treating
3987 * latin1 as having case; otherwise the latin1 casing. Do the
3988 * whole thing in a tight loop, for speed, */
3989 if (IN_LOCALE_RUNTIME) {
3992 for (; s < send; d++, s++)
3993 *d = toUPPER_LC(*s);
3995 else if (! IN_UNI_8_BIT) {
3996 for (; s < send; d++, s++) {
4001 for (; s < send; d++, s++) {
4002 *d = toUPPER_LATIN1_MOD(*s);
4003 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4005 /* The mainstream case is the tight loop above. To avoid
4006 * extra tests in that, all three characters that require
4007 * special handling are mapped by the MOD to the one tested
4009 * Use the source to distinguish between the three cases */
4011 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4013 /* uc() of this requires 2 characters, but they are
4014 * ASCII. If not enough room, grow the string */
4015 if (SvLEN(dest) < ++min) {
4016 const UV o = d - (U8*)SvPVX_const(dest);
4018 d = (U8*)SvPVX(dest) + o;
4020 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4021 continue; /* Back to the tight loop; still in ASCII */
4024 /* The other two special handling characters have their
4025 * upper cases outside the latin1 range, hence need to be
4026 * in UTF-8, so the whole result needs to be in UTF-8. So,
4027 * here we are somewhere in the middle of processing a
4028 * non-UTF-8 string, and realize that we will have to convert
4029 * the whole thing to UTF-8. What to do? There are
4030 * several possibilities. The simplest to code is to
4031 * convert what we have so far, set a flag, and continue on
4032 * in the loop. The flag would be tested each time through
4033 * the loop, and if set, the next character would be
4034 * converted to UTF-8 and stored. But, I (khw) didn't want
4035 * to slow down the mainstream case at all for this fairly
4036 * rare case, so I didn't want to add a test that didn't
4037 * absolutely have to be there in the loop, besides the
4038 * possibility that it would get too complicated for
4039 * optimizers to deal with. Another possibility is to just
4040 * give up, convert the source to UTF-8, and restart the
4041 * function that way. Another possibility is to convert
4042 * both what has already been processed and what is yet to
4043 * come separately to UTF-8, then jump into the loop that
4044 * handles UTF-8. But the most efficient time-wise of the
4045 * ones I could think of is what follows, and turned out to
4046 * not require much extra code. */
4048 /* Convert what we have so far into UTF-8, telling the
4049 * function that we know it should be converted, and to
4050 * allow extra space for what we haven't processed yet.
4051 * Assume the worst case space requirements for converting
4052 * what we haven't processed so far: that it will require
4053 * two bytes for each remaining source character, plus the
4054 * NUL at the end. This may cause the string pointer to
4055 * move, so re-find it. */
4057 len = d - (U8*)SvPVX_const(dest);
4058 SvCUR_set(dest, len);
4059 len = sv_utf8_upgrade_flags_grow(dest,
4060 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4062 d = (U8*)SvPVX(dest) + len;
4064 /* And append the current character's upper case in UTF-8 */
4065 CAT_NON_LATIN1_UC(d, *s);
4067 /* Now process the remainder of the source, converting to
4068 * upper and UTF-8. If a resulting byte is invariant in
4069 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4070 * append it to the output. */
4073 for (; s < send; s++) {
4074 U8 upper = toUPPER_LATIN1_MOD(*s);
4075 if UTF8_IS_INVARIANT(upper) {
4079 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4083 /* Here have processed the whole source; no need to continue
4084 * with the outer loop. Each character has been converted
4085 * to upper case and converted to UTF-8 */
4088 } /* End of processing all latin1-style chars */
4089 } /* End of processing all chars */
4090 } /* End of source is not empty */
4092 if (source != dest) {
4093 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4094 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4096 } /* End of isn't utf8 */
4097 if (dest != source && SvTAINTED(source))
4116 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4117 && SvTEMP(source) && !DO_UTF8(source)) {
4119 /* We can convert in place, as lowercasing anything in the latin1 range
4120 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4122 s = d = (U8*)SvPV_force_nomg(source, len);
4129 /* The old implementation would copy source into TARG at this point.
4130 This had the side effect that if source was undef, TARG was now
4131 an undefined SV with PADTMP set, and they don't warn inside
4132 sv_2pv_flags(). However, we're now getting the PV direct from
4133 source, which doesn't have PADTMP set, so it would warn. Hence the
4137 s = (const U8*)SvPV_nomg_const(source, len);
4139 if (ckWARN(WARN_UNINITIALIZED))
4140 report_uninit(source);
4146 SvUPGRADE(dest, SVt_PV);
4147 d = (U8*)SvGROW(dest, min);
4148 (void)SvPOK_only(dest);
4153 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4154 to check DO_UTF8 again here. */
4156 if (DO_UTF8(source)) {
4157 const U8 *const send = s + len;
4158 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4161 /* See comments at the first instance in this file of this ifdef */
4162 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4163 if (UTF8_IS_INVARIANT(*s)) {
4165 /* Invariant characters use the standard mappings compiled in.
4170 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4172 /* As do the ones in the Latin1 range */
4173 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4174 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4179 /* Here, is utf8 not in Latin-1 range, have to go out and get
4180 * the mappings from the tables. */
4182 const STRLEN u = UTF8SKIP(s);
4185 #ifndef CONTEXT_DEPENDENT_CASING
4186 toLOWER_utf8(s, tmpbuf, &ulen);
4188 /* This is ifdefd out because it needs more work and thought. It isn't clear
4189 * that we should do it.
4190 * A minor objection is that this is based on a hard-coded rule from the
4191 * Unicode standard, and may change, but this is not very likely at all.
4192 * mktables should check and warn if it does.
4193 * More importantly, if the sigma occurs at the end of the string, we don't
4194 * have enough context to know whether it is part of a larger string or going
4195 * to be or not. It may be that we are passed a subset of the context, via
4196 * a \U...\E, for example, and we could conceivably know the larger context if
4197 * code were changed to pass that in. But, if the string passed in is an
4198 * intermediate result, and the user concatenates two strings together
4199 * after we have made a final sigma, that would be wrong. If the final sigma
4200 * occurs in the middle of the string we are working on, then we know that it
4201 * should be a final sigma, but otherwise we can't be sure. */
4203 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4205 /* If the lower case is a small sigma, it may be that we need
4206 * to change it to a final sigma. This happens at the end of
4207 * a word that contains more than just this character, and only
4208 * when we started with a capital sigma. */
4209 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4210 s > send - len && /* Makes sure not the first letter */
4211 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4214 /* We use the algorithm in:
4215 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4216 * is a CAPITAL SIGMA): If C is preceded by a sequence
4217 * consisting of a cased letter and a case-ignorable
4218 * sequence, and C is not followed by a sequence consisting
4219 * of a case ignorable sequence and then a cased letter,
4220 * then when lowercasing C, C becomes a final sigma */
4222 /* To determine if this is the end of a word, need to peek
4223 * ahead. Look at the next character */
4224 const U8 *peek = s + u;
4226 /* Skip any case ignorable characters */
4227 while (peek < send && is_utf8_case_ignorable(peek)) {
4228 peek += UTF8SKIP(peek);
4231 /* If we reached the end of the string without finding any
4232 * non-case ignorable characters, or if the next such one
4233 * is not-cased, then we have met the conditions for it
4234 * being a final sigma with regards to peek ahead, and so
4235 * must do peek behind for the remaining conditions. (We
4236 * know there is stuff behind to look at since we tested
4237 * above that this isn't the first letter) */
4238 if (peek >= send || ! is_utf8_cased(peek)) {
4239 peek = utf8_hop(s, -1);
4241 /* Here are at the beginning of the first character
4242 * before the original upper case sigma. Keep backing
4243 * up, skipping any case ignorable characters */
4244 while (is_utf8_case_ignorable(peek)) {
4245 peek = utf8_hop(peek, -1);
4248 /* Here peek points to the first byte of the closest
4249 * non-case-ignorable character before the capital
4250 * sigma. If it is cased, then by the Unicode
4251 * algorithm, we should use a small final sigma instead
4252 * of what we have */
4253 if (is_utf8_cased(peek)) {
4254 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4255 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4259 else { /* Not a context sensitive mapping */
4260 #endif /* End of commented out context sensitive */
4261 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4263 /* If the eventually required minimum size outgrows
4264 * the available space, we need to grow. */
4265 const UV o = d - (U8*)SvPVX_const(dest);
4267 /* If someone lowercases one million U+0130s we
4268 * SvGROW() one million times. Or we could try
4269 * guessing how much to allocate without allocating too
4270 * much. Such is life. Another option would be to
4271 * grow an extra byte or two more each time we need to
4272 * grow, which would cut down the million to 500K, with
4275 d = (U8*)SvPVX(dest) + o;
4277 #ifdef CONTEXT_DEPENDENT_CASING
4280 /* Copy the newly lowercased letter to the output buffer we're
4282 Copy(tmpbuf, d, ulen, U8);
4285 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4288 } /* End of looping through the source string */
4291 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4292 } else { /* Not utf8 */
4294 const U8 *const send = s + len;
4296 /* Use locale casing if in locale; regular style if not treating
4297 * latin1 as having case; otherwise the latin1 casing. Do the
4298 * whole thing in a tight loop, for speed, */
4299 if (IN_LOCALE_RUNTIME) {
4302 for (; s < send; d++, s++)
4303 *d = toLOWER_LC(*s);
4305 else if (! IN_UNI_8_BIT) {
4306 for (; s < send; d++, s++) {
4311 for (; s < send; d++, s++) {
4312 *d = toLOWER_LATIN1(*s);
4316 if (source != dest) {
4318 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4321 if (dest != source && SvTAINTED(source))
4330 SV * const sv = TOPs;
4332 register const char *s = SvPV_const(sv,len);
4334 SvUTF8_off(TARG); /* decontaminate */
4337 SvUPGRADE(TARG, SVt_PV);
4338 SvGROW(TARG, (len * 2) + 1);
4342 if (UTF8_IS_CONTINUED(*s)) {
4343 STRLEN ulen = UTF8SKIP(s);
4367 SvCUR_set(TARG, d - SvPVX_const(TARG));
4368 (void)SvPOK_only_UTF8(TARG);
4371 sv_setpvn(TARG, s, len);
4380 dVAR; dSP; dMARK; dORIGMARK;
4381 register AV *const av = MUTABLE_AV(POPs);
4382 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4384 if (SvTYPE(av) == SVt_PVAV) {
4385 const I32 arybase = CopARYBASE_get(PL_curcop);
4386 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4387 bool can_preserve = FALSE;
4393 can_preserve = SvCANEXISTDELETE(av);
4396 if (lval && localizing) {
4399 for (svp = MARK + 1; svp <= SP; svp++) {
4400 const I32 elem = SvIV(*svp);
4404 if (max > AvMAX(av))
4408 while (++MARK <= SP) {
4410 I32 elem = SvIV(*MARK);
4411 bool preeminent = TRUE;
4415 if (localizing && can_preserve) {
4416 /* If we can determine whether the element exist,
4417 * Try to preserve the existenceness of a tied array
4418 * element by using EXISTS and DELETE if possible.
4419 * Fallback to FETCH and STORE otherwise. */
4420 preeminent = av_exists(av, elem);
4423 svp = av_fetch(av, elem, lval);
4425 if (!svp || *svp == &PL_sv_undef)
4426 DIE(aTHX_ PL_no_aelem, elem);
4429 save_aelem(av, elem, svp);
4431 SAVEADELETE(av, elem);
4434 *MARK = svp ? *svp : &PL_sv_undef;
4437 if (GIMME != G_ARRAY) {
4439 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4445 /* Smart dereferencing for keys, values and each */
4457 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4462 "Type of argument to %s must be unblessed hashref or arrayref",
4463 PL_op_desc[PL_op->op_type] );
4466 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4468 "Can't modify %s in %s",
4469 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4472 /* Delegate to correct function for op type */
4474 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4475 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4478 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4486 AV *array = MUTABLE_AV(POPs);
4487 const I32 gimme = GIMME_V;
4488 IV *iterp = Perl_av_iter_p(aTHX_ array);
4489 const IV current = (*iterp)++;
4491 if (current > av_len(array)) {
4493 if (gimme == G_SCALAR)
4500 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4501 if (gimme == G_ARRAY) {
4502 SV **const element = av_fetch(array, current, 0);
4503 PUSHs(element ? *element : &PL_sv_undef);
4512 AV *array = MUTABLE_AV(POPs);
4513 const I32 gimme = GIMME_V;
4515 *Perl_av_iter_p(aTHX_ array) = 0;
4517 if (gimme == G_SCALAR) {
4519 PUSHi(av_len(array) + 1);
4521 else if (gimme == G_ARRAY) {
4522 IV n = Perl_av_len(aTHX_ array);
4523 IV i = CopARYBASE_get(PL_curcop);
4527 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4529 for (; i <= n; i++) {
4534 for (i = 0; i <= n; i++) {
4535 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4536 PUSHs(elem ? *elem : &PL_sv_undef);
4543 /* Associative arrays. */
4549 HV * hash = MUTABLE_HV(POPs);
4551 const I32 gimme = GIMME_V;
4554 /* might clobber stack_sp */
4555 entry = hv_iternext(hash);
4560 SV* const sv = hv_iterkeysv(entry);
4561 PUSHs(sv); /* won't clobber stack_sp */
4562 if (gimme == G_ARRAY) {
4565 /* might clobber stack_sp */
4566 val = hv_iterval(hash, entry);
4571 else if (gimme == G_SCALAR)
4578 S_do_delete_local(pTHX)
4582 const I32 gimme = GIMME_V;
4586 if (PL_op->op_private & OPpSLICE) {
4588 SV * const osv = POPs;
4589 const bool tied = SvRMAGICAL(osv)
4590 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4591 const bool can_preserve = SvCANEXISTDELETE(osv)
4592 || mg_find((const SV *)osv, PERL_MAGIC_env);
4593 const U32 type = SvTYPE(osv);
4594 if (type == SVt_PVHV) { /* hash element */
4595 HV * const hv = MUTABLE_HV(osv);
4596 while (++MARK <= SP) {
4597 SV * const keysv = *MARK;
4599 bool preeminent = TRUE;
4601 preeminent = hv_exists_ent(hv, keysv, 0);
4603 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4610 sv = hv_delete_ent(hv, keysv, 0, 0);
4611 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4614 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4616 *MARK = sv_mortalcopy(sv);
4622 SAVEHDELETE(hv, keysv);
4623 *MARK = &PL_sv_undef;
4627 else if (type == SVt_PVAV) { /* array element */
4628 if (PL_op->op_flags & OPf_SPECIAL) {
4629 AV * const av = MUTABLE_AV(osv);
4630 while (++MARK <= SP) {
4631 I32 idx = SvIV(*MARK);
4633 bool preeminent = TRUE;
4635 preeminent = av_exists(av, idx);
4637 SV **svp = av_fetch(av, idx, 1);
4644 sv = av_delete(av, idx, 0);
4645 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4648 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4650 *MARK = sv_mortalcopy(sv);
4656 SAVEADELETE(av, idx);
4657 *MARK = &PL_sv_undef;
4663 DIE(aTHX_ "Not a HASH reference");
4664 if (gimme == G_VOID)
4666 else if (gimme == G_SCALAR) {
4671 *++MARK = &PL_sv_undef;
4676 SV * const keysv = POPs;
4677 SV * const osv = POPs;
4678 const bool tied = SvRMAGICAL(osv)
4679 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4680 const bool can_preserve = SvCANEXISTDELETE(osv)
4681 || mg_find((const SV *)osv, PERL_MAGIC_env);
4682 const U32 type = SvTYPE(osv);
4684 if (type == SVt_PVHV) {
4685 HV * const hv = MUTABLE_HV(osv);
4686 bool preeminent = TRUE;
4688 preeminent = hv_exists_ent(hv, keysv, 0);
4690 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4697 sv = hv_delete_ent(hv, keysv, 0, 0);
4698 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4701 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4703 SV *nsv = sv_mortalcopy(sv);
4709 SAVEHDELETE(hv, keysv);
4711 else if (type == SVt_PVAV) {
4712 if (PL_op->op_flags & OPf_SPECIAL) {
4713 AV * const av = MUTABLE_AV(osv);
4714 I32 idx = SvIV(keysv);
4715 bool preeminent = TRUE;
4717 preeminent = av_exists(av, idx);
4719 SV **svp = av_fetch(av, idx, 1);
4726 sv = av_delete(av, idx, 0);
4727 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4730 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4732 SV *nsv = sv_mortalcopy(sv);
4738 SAVEADELETE(av, idx);
4741 DIE(aTHX_ "panic: avhv_delete no longer supported");
4744 DIE(aTHX_ "Not a HASH reference");
4747 if (gimme != G_VOID)
4761 if (PL_op->op_private & OPpLVAL_INTRO)
4762 return do_delete_local();
4765 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4767 if (PL_op->op_private & OPpSLICE) {
4769 HV * const hv = MUTABLE_HV(POPs);
4770 const U32 hvtype = SvTYPE(hv);
4771 if (hvtype == SVt_PVHV) { /* hash element */
4772 while (++MARK <= SP) {
4773 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4774 *MARK = sv ? sv : &PL_sv_undef;
4777 else if (hvtype == SVt_PVAV) { /* array element */
4778 if (PL_op->op_flags & OPf_SPECIAL) {
4779 while (++MARK <= SP) {
4780 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4781 *MARK = sv ? sv : &PL_sv_undef;
4786 DIE(aTHX_ "Not a HASH reference");
4789 else if (gimme == G_SCALAR) {
4794 *++MARK = &PL_sv_undef;
4800 HV * const hv = MUTABLE_HV(POPs);
4802 if (SvTYPE(hv) == SVt_PVHV)
4803 sv = hv_delete_ent(hv, keysv, discard, 0);
4804 else if (SvTYPE(hv) == SVt_PVAV) {
4805 if (PL_op->op_flags & OPf_SPECIAL)
4806 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4808 DIE(aTHX_ "panic: avhv_delete no longer supported");
4811 DIE(aTHX_ "Not a HASH reference");
4827 if (PL_op->op_private & OPpEXISTS_SUB) {
4829 SV * const sv = POPs;
4830 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4833 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4838 hv = MUTABLE_HV(POPs);
4839 if (SvTYPE(hv) == SVt_PVHV) {
4840 if (hv_exists_ent(hv, tmpsv, 0))
4843 else if (SvTYPE(hv) == SVt_PVAV) {
4844 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4845 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4850 DIE(aTHX_ "Not a HASH reference");
4857 dVAR; dSP; dMARK; dORIGMARK;
4858 register HV * const hv = MUTABLE_HV(POPs);
4859 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4860 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4861 bool can_preserve = FALSE;
4867 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4868 can_preserve = TRUE;
4871 while (++MARK <= SP) {
4872 SV * const keysv = *MARK;
4875 bool preeminent = TRUE;
4877 if (localizing && can_preserve) {
4878 /* If we can determine whether the element exist,
4879 * try to preserve the existenceness of a tied hash
4880 * element by using EXISTS and DELETE if possible.
4881 * Fallback to FETCH and STORE otherwise. */
4882 preeminent = hv_exists_ent(hv, keysv, 0);
4885 he = hv_fetch_ent(hv, keysv, lval, 0);
4886 svp = he ? &HeVAL(he) : NULL;
4889 if (!svp || *svp == &PL_sv_undef) {
4890 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4893 if (HvNAME_get(hv) && isGV(*svp))
4894 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4895 else if (preeminent)
4896 save_helem_flags(hv, keysv, svp,
4897 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4899 SAVEHDELETE(hv, keysv);
4902 *MARK = svp ? *svp : &PL_sv_undef;
4904 if (GIMME != G_ARRAY) {
4906 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4912 /* List operators. */
4917 if (GIMME != G_ARRAY) {
4919 *MARK = *SP; /* unwanted list, return last item */
4921 *MARK = &PL_sv_undef;
4931 SV ** const lastrelem = PL_stack_sp;
4932 SV ** const lastlelem = PL_stack_base + POPMARK;
4933 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4934 register SV ** const firstrelem = lastlelem + 1;
4935 const I32 arybase = CopARYBASE_get(PL_curcop);
4936 I32 is_something_there = FALSE;
4938 register const I32 max = lastrelem - lastlelem;
4939 register SV **lelem;
4941 if (GIMME != G_ARRAY) {
4942 I32 ix = SvIV(*lastlelem);
4947 if (ix < 0 || ix >= max)
4948 *firstlelem = &PL_sv_undef;
4950 *firstlelem = firstrelem[ix];
4956 SP = firstlelem - 1;
4960 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4961 I32 ix = SvIV(*lelem);
4966 if (ix < 0 || ix >= max)
4967 *lelem = &PL_sv_undef;
4969 is_something_there = TRUE;
4970 if (!(*lelem = firstrelem[ix]))
4971 *lelem = &PL_sv_undef;
4974 if (is_something_there)
4977 SP = firstlelem - 1;
4983 dVAR; dSP; dMARK; dORIGMARK;
4984 const I32 items = SP - MARK;
4985 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
4986 SP = ORIGMARK; /* av_make() might realloc stack_sp */
4987 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4988 ? newRV_noinc(av) : av);
4994 dVAR; dSP; dMARK; dORIGMARK;
4995 HV* const hv = newHV();
4998 SV * const key = *++MARK;
4999 SV * const val = newSV(0);
5001 sv_setsv(val, *++MARK);
5003 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5004 (void)hv_store_ent(hv,key,val,0);
5007 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5008 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5013 S_deref_plain_array(pTHX_ AV *ary)
5015 if (SvTYPE(ary) == SVt_PVAV) return ary;
5016 SvGETMAGIC((SV *)ary);
5017 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5018 Perl_die(aTHX_ "Not an ARRAY reference");
5019 else if (SvOBJECT(SvRV(ary)))
5020 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5021 return (AV *)SvRV(ary);
5024 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5025 # define DEREF_PLAIN_ARRAY(ary) \
5028 SvTYPE(aRrRay) == SVt_PVAV \
5030 : S_deref_plain_array(aTHX_ aRrRay); \
5033 # define DEREF_PLAIN_ARRAY(ary) \
5035 PL_Sv = (SV *)(ary), \
5036 SvTYPE(PL_Sv) == SVt_PVAV \
5038 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
5044 dVAR; dSP; dMARK; dORIGMARK;
5045 int num_args = (SP - MARK);
5046 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5050 register I32 offset;
5051 register I32 length;
5055 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5058 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5059 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5066 offset = i = SvIV(*MARK);
5068 offset += AvFILLp(ary) + 1;
5070 offset -= CopARYBASE_get(PL_curcop);
5072 DIE(aTHX_ PL_no_aelem, i);
5074 length = SvIVx(*MARK++);
5076 length += AvFILLp(ary) - offset + 1;
5082 length = AvMAX(ary) + 1; /* close enough to infinity */
5086 length = AvMAX(ary) + 1;
5088 if (offset > AvFILLp(ary) + 1) {
5090 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5091 offset = AvFILLp(ary) + 1;
5093 after = AvFILLp(ary) + 1 - (offset + length);
5094 if (after < 0) { /* not that much array */
5095 length += after; /* offset+length now in array */
5101 /* At this point, MARK .. SP-1 is our new LIST */
5104 diff = newlen - length;
5105 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5108 /* make new elements SVs now: avoid problems if they're from the array */
5109 for (dst = MARK, i = newlen; i; i--) {
5110 SV * const h = *dst;
5111 *dst++ = newSVsv(h);
5114 if (diff < 0) { /* shrinking the area */
5115 SV **tmparyval = NULL;
5117 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5118 Copy(MARK, tmparyval, newlen, SV*);
5121 MARK = ORIGMARK + 1;
5122 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5123 MEXTEND(MARK, length);
5124 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5126 EXTEND_MORTAL(length);
5127 for (i = length, dst = MARK; i; i--) {
5128 sv_2mortal(*dst); /* free them eventually */
5135 *MARK = AvARRAY(ary)[offset+length-1];
5138 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5139 SvREFCNT_dec(*dst++); /* free them now */
5142 AvFILLp(ary) += diff;
5144 /* pull up or down? */
5146 if (offset < after) { /* easier to pull up */
5147 if (offset) { /* esp. if nothing to pull */
5148 src = &AvARRAY(ary)[offset-1];
5149 dst = src - diff; /* diff is negative */
5150 for (i = offset; i > 0; i--) /* can't trust Copy */
5154 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5158 if (after) { /* anything to pull down? */
5159 src = AvARRAY(ary) + offset + length;
5160 dst = src + diff; /* diff is negative */
5161 Move(src, dst, after, SV*);
5163 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5164 /* avoid later double free */
5168 dst[--i] = &PL_sv_undef;
5171 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5172 Safefree(tmparyval);
5175 else { /* no, expanding (or same) */
5176 SV** tmparyval = NULL;
5178 Newx(tmparyval, length, SV*); /* so remember deletion */
5179 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5182 if (diff > 0) { /* expanding */
5183 /* push up or down? */
5184 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5188 Move(src, dst, offset, SV*);
5190 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5192 AvFILLp(ary) += diff;
5195 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5196 av_extend(ary, AvFILLp(ary) + diff);
5197 AvFILLp(ary) += diff;
5200 dst = AvARRAY(ary) + AvFILLp(ary);
5202 for (i = after; i; i--) {
5210 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5213 MARK = ORIGMARK + 1;
5214 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5216 Copy(tmparyval, MARK, length, SV*);
5218 EXTEND_MORTAL(length);
5219 for (i = length, dst = MARK; i; i--) {
5220 sv_2mortal(*dst); /* free them eventually */
5227 else if (length--) {
5228 *MARK = tmparyval[length];
5231 while (length-- > 0)
5232 SvREFCNT_dec(tmparyval[length]);
5236 *MARK = &PL_sv_undef;
5237 Safefree(tmparyval);
5241 mg_set(MUTABLE_SV(ary));
5249 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5250 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5251 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5254 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5257 ENTER_with_name("call_PUSH");
5258 call_method("PUSH",G_SCALAR|G_DISCARD);
5259 LEAVE_with_name("call_PUSH");
5263 PL_delaymagic = DM_DELAY;
5264 for (++MARK; MARK <= SP; MARK++) {
5265 SV * const sv = newSV(0);
5267 sv_setsv(sv, *MARK);
5268 av_store(ary, AvFILLp(ary)+1, sv);
5270 if (PL_delaymagic & DM_ARRAY_ISA)
5271 mg_set(MUTABLE_SV(ary));
5276 if (OP_GIMME(PL_op, 0) != G_VOID) {
5277 PUSHi( AvFILL(ary) + 1 );
5286 AV * const av = PL_op->op_flags & OPf_SPECIAL
5287 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5288 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5292 (void)sv_2mortal(sv);
5299 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5300 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5301 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5304 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5307 ENTER_with_name("call_UNSHIFT");
5308 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5309 LEAVE_with_name("call_UNSHIFT");
5314 av_unshift(ary, SP - MARK);
5316 SV * const sv = newSVsv(*++MARK);
5317 (void)av_store(ary, i++, sv);
5321 if (OP_GIMME(PL_op, 0) != G_VOID) {
5322 PUSHi( AvFILL(ary) + 1 );
5331 if (GIMME == G_ARRAY) {
5332 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5336 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5337 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5338 av = MUTABLE_AV((*SP));
5339 /* In-place reversing only happens in void context for the array
5340 * assignment. We don't need to push anything on the stack. */
5343 if (SvMAGICAL(av)) {
5345 register SV *tmp = sv_newmortal();
5346 /* For SvCANEXISTDELETE */
5349 bool can_preserve = SvCANEXISTDELETE(av);
5351 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5352 register SV *begin, *end;
5355 if (!av_exists(av, i)) {
5356 if (av_exists(av, j)) {
5357 register SV *sv = av_delete(av, j, 0);
5358 begin = *av_fetch(av, i, TRUE);
5359 sv_setsv_mg(begin, sv);
5363 else if (!av_exists(av, j)) {
5364 register SV *sv = av_delete(av, i, 0);
5365 end = *av_fetch(av, j, TRUE);
5366 sv_setsv_mg(end, sv);
5371 begin = *av_fetch(av, i, TRUE);
5372 end = *av_fetch(av, j, TRUE);
5373 sv_setsv(tmp, begin);
5374 sv_setsv_mg(begin, end);
5375 sv_setsv_mg(end, tmp);
5379 SV **begin = AvARRAY(av);
5382 SV **end = begin + AvFILLp(av);
5384 while (begin < end) {
5385 register SV * const tmp = *begin;
5396 register SV * const tmp = *MARK;
5400 /* safe as long as stack cannot get extended in the above */
5406 register char *down;
5411 SvUTF8_off(TARG); /* decontaminate */
5413 do_join(TARG, &PL_sv_no, MARK, SP);
5415 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5416 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5417 report_uninit(TARG);
5420 up = SvPV_force(TARG, len);
5422 if (DO_UTF8(TARG)) { /* first reverse each character */
5423 U8* s = (U8*)SvPVX(TARG);
5424 const U8* send = (U8*)(s + len);
5426 if (UTF8_IS_INVARIANT(*s)) {
5431 if (!utf8_to_uvchr(s, 0))
5435 down = (char*)(s - 1);
5436 /* reverse this character */
5440 *down-- = (char)tmp;
5446 down = SvPVX(TARG) + len - 1;
5450 *down-- = (char)tmp;
5452 (void)SvPOK_only_UTF8(TARG);
5464 register IV limit = POPi; /* note, negative is forever */
5465 SV * const sv = POPs;
5467 register const char *s = SvPV_const(sv, len);
5468 const bool do_utf8 = DO_UTF8(sv);
5469 const char *strend = s + len;
5471 register REGEXP *rx;
5473 register const char *m;
5475 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5476 I32 maxiters = slen + 10;
5477 I32 trailing_empty = 0;
5479 const I32 origlimit = limit;
5482 const I32 gimme = GIMME_V;
5484 const I32 oldsave = PL_savestack_ix;
5485 U32 make_mortal = SVs_TEMP;
5490 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5495 DIE(aTHX_ "panic: pp_split");
5498 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5499 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5501 RX_MATCH_UTF8_set(rx, do_utf8);
5504 if (pm->op_pmreplrootu.op_pmtargetoff) {
5505 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5508 if (pm->op_pmreplrootu.op_pmtargetgv) {
5509 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5514 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5520 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5522 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5529 for (i = AvFILLp(ary); i >= 0; i--)
5530 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5532 /* temporarily switch stacks */
5533 SAVESWITCHSTACK(PL_curstack, ary);
5537 base = SP - PL_stack_base;
5539 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5541 while (*s == ' ' || is_utf8_space((U8*)s))
5544 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5545 while (isSPACE_LC(*s))
5553 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5557 gimme_scalar = gimme == G_SCALAR && !ary;
5560 limit = maxiters + 2;
5561 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5564 /* this one uses 'm' and is a negative test */
5566 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5567 const int t = UTF8SKIP(m);
5568 /* is_utf8_space returns FALSE for malform utf8 */
5575 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5576 while (m < strend && !isSPACE_LC(*m))
5579 while (m < strend && !isSPACE(*m))
5592 dstr = newSVpvn_flags(s, m-s,
5593 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5597 /* skip the whitespace found last */
5599 s = m + UTF8SKIP(m);
5603 /* this one uses 's' and is a positive test */
5605 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5608 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5609 while (s < strend && isSPACE_LC(*s))
5612 while (s < strend && isSPACE(*s))
5617 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5619 for (m = s; m < strend && *m != '\n'; m++)
5632 dstr = newSVpvn_flags(s, m-s,
5633 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5639 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5641 Pre-extend the stack, either the number of bytes or
5642 characters in the string or a limited amount, triggered by:
5644 my ($x, $y) = split //, $str;
5648 if (!gimme_scalar) {
5649 const U32 items = limit - 1;
5658 /* keep track of how many bytes we skip over */
5668 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5681 dstr = newSVpvn(s, 1);
5697 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5698 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5699 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5700 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5701 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5702 SV * const csv = CALLREG_INTUIT_STRING(rx);
5704 len = RX_MINLENRET(rx);
5705 if (len == 1 && !RX_UTF8(rx) && !tail) {
5706 const char c = *SvPV_nolen_const(csv);
5708 for (m = s; m < strend && *m != c; m++)
5719 dstr = newSVpvn_flags(s, m-s,
5720 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5723 /* The rx->minlen is in characters but we want to step
5724 * s ahead by bytes. */
5726 s = (char*)utf8_hop((U8*)m, len);
5728 s = m + len; /* Fake \n at the end */
5732 while (s < strend && --limit &&
5733 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5734 csv, multiline ? FBMrf_MULTILINE : 0)) )
5743 dstr = newSVpvn_flags(s, m-s,
5744 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5747 /* The rx->minlen is in characters but we want to step
5748 * s ahead by bytes. */
5750 s = (char*)utf8_hop((U8*)m, len);
5752 s = m + len; /* Fake \n at the end */
5757 maxiters += slen * RX_NPARENS(rx);
5758 while (s < strend && --limit)
5762 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5763 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
5765 if (rex_return == 0)
5767 TAINT_IF(RX_MATCH_TAINTED(rx));
5768 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5771 orig = RX_SUBBEG(rx);
5773 strend = s + (strend - m);
5775 m = RX_OFFS(rx)[0].start + orig;
5784 dstr = newSVpvn_flags(s, m-s,
5785 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5788 if (RX_NPARENS(rx)) {
5790 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5791 s = RX_OFFS(rx)[i].start + orig;
5792 m = RX_OFFS(rx)[i].end + orig;
5794 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5795 parens that didn't match -- they should be set to
5796 undef, not the empty string */
5804 if (m >= orig && s >= orig) {
5805 dstr = newSVpvn_flags(s, m-s,
5806 (do_utf8 ? SVf_UTF8 : 0)
5810 dstr = &PL_sv_undef; /* undef, not "" */
5816 s = RX_OFFS(rx)[0].end + orig;
5820 if (!gimme_scalar) {
5821 iters = (SP - PL_stack_base) - base;
5823 if (iters > maxiters)
5824 DIE(aTHX_ "Split loop");
5826 /* keep field after final delim? */
5827 if (s < strend || (iters && origlimit)) {
5828 if (!gimme_scalar) {
5829 const STRLEN l = strend - s;
5830 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5835 else if (!origlimit) {
5837 iters -= trailing_empty;
5839 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5840 if (TOPs && !make_mortal)
5842 *SP-- = &PL_sv_undef;
5849 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5853 if (SvSMAGICAL(ary)) {
5855 mg_set(MUTABLE_SV(ary));
5858 if (gimme == G_ARRAY) {
5860 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5867 ENTER_with_name("call_PUSH");
5868 call_method("PUSH",G_SCALAR|G_DISCARD);
5869 LEAVE_with_name("call_PUSH");
5871 if (gimme == G_ARRAY) {
5873 /* EXTEND should not be needed - we just popped them */
5875 for (i=0; i < iters; i++) {
5876 SV **svp = av_fetch(ary, i, FALSE);
5877 PUSHs((svp) ? *svp : &PL_sv_undef);
5884 if (gimme == G_ARRAY)
5896 SV *const sv = PAD_SVl(PL_op->op_targ);
5898 if (SvPADSTALE(sv)) {
5901 RETURNOP(cLOGOP->op_other);
5903 RETURNOP(cLOGOP->op_next);
5912 assert(SvTYPE(retsv) != SVt_PVCV);
5914 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
5915 retsv = refto(retsv);
5922 PP(unimplemented_op)
5925 const Optype op_type = PL_op->op_type;
5926 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5927 with out of range op numbers - it only "special" cases op_custom.
5928 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5929 if we get here for a custom op then that means that the custom op didn't
5930 have an implementation. Given that OP_NAME() looks up the custom op
5931 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5932 registers &PL_unimplemented_op as the address of their custom op.
5933 NULL doesn't generate a useful error message. "custom" does. */
5934 const char *const name = op_type >= OP_max
5935 ? "[out of range]" : PL_op_name[PL_op->op_type];
5936 if(OP_IS_SOCKET(op_type))
5937 DIE(aTHX_ PL_no_sock_func, name);
5938 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
5945 HV * const hv = (HV*)POPs;
5947 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5949 if (SvRMAGICAL(hv)) {
5950 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5952 XPUSHs(magic_scalarpack(hv, mg));
5957 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
5963 * c-indentation-style: bsd
5965 * indent-tabs-mode: t
5968 * ex: set ts=8 sts=4 sw=4 noet: