3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
152 sv = amagic_deref_call(sv, to_gv_amg);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
173 Perl_croak_no_modify(aTHX);
174 if (PL_op->op_private & OPpDEREF) {
176 if (cUNOP->op_targ) {
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
184 const char * const name = CopSTASHPV(PL_curcop);
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
205 && (!is_gv_magical_sv(sv,0)
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
213 if (PL_op->op_private & HINT_STRICT_REFS)
214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
222 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
224 /* FAKE globs in the symbol table cause weird bugs (#77810) */
225 if (sv) SvFAKE_off(sv);
228 if (sv && SvFAKE(sv)) {
229 SV *newsv = sv_newmortal();
230 sv_setsv_flags(newsv, sv, 0);
234 if (PL_op->op_private & OPpLVAL_INTRO)
235 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
240 /* Helper function for pp_rv2sv and pp_rv2av */
242 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
243 const svtype type, SV ***spp)
248 PERL_ARGS_ASSERT_SOFTREF2XV;
250 if (PL_op->op_private & HINT_STRICT_REFS) {
252 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
254 Perl_die(aTHX_ PL_no_usym, what);
258 PL_op->op_flags & OPf_REF &&
259 PL_op->op_next->op_type != OP_BOOLKEYS
261 Perl_die(aTHX_ PL_no_usym, what);
262 if (ckWARN(WARN_UNINITIALIZED))
264 if (type != SVt_PV && GIMME_V == G_ARRAY) {
268 **spp = &PL_sv_undef;
271 if ((PL_op->op_flags & OPf_SPECIAL) &&
272 !(PL_op->op_flags & OPf_MOD))
274 gv = gv_fetchsv(sv, 0, type);
276 && (!is_gv_magical_sv(sv,0)
277 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
279 **spp = &PL_sv_undef;
284 gv = gv_fetchsv(sv, GV_ADD, type);
294 if (!(PL_op->op_private & OPpDEREFed))
298 sv = amagic_deref_call(sv, to_sv_amg);
303 switch (SvTYPE(sv)) {
309 DIE(aTHX_ "Not a SCALAR reference");
316 if (!isGV_with_GP(gv)) {
317 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
323 if (PL_op->op_flags & OPf_MOD) {
324 if (PL_op->op_private & OPpLVAL_INTRO) {
325 if (cUNOP->op_first->op_type == OP_NULL)
326 sv = save_scalar(MUTABLE_GV(TOPs));
328 sv = save_scalar(gv);
330 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
332 else if (PL_op->op_private & OPpDEREF)
333 vivify_ref(sv, PL_op->op_private & OPpDEREF);
342 AV * const av = MUTABLE_AV(TOPs);
343 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
345 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
347 *sv = newSV_type(SVt_PVMG);
348 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
352 SETs(sv_2mortal(newSViv(
353 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
363 if (PL_op->op_flags & OPf_MOD || LVRET) {
364 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
365 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
367 LvTARG(ret) = SvREFCNT_inc_simple(sv);
368 PUSHs(ret); /* no SvSETMAGIC */
372 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
373 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
374 if (mg && mg->mg_len >= 0) {
379 PUSHi(i + CopARYBASE_get(PL_curcop));
392 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
394 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
397 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
398 /* (But not in defined().) */
400 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
403 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
404 if ((PL_op->op_private & OPpLVAL_INTRO)) {
405 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
408 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
411 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
415 cv = MUTABLE_CV(&PL_sv_undef);
416 SETs(MUTABLE_SV(cv));
426 SV *ret = &PL_sv_undef;
428 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
429 const char * s = SvPVX_const(TOPs);
430 if (strnEQ(s, "CORE::", 6)) {
431 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
432 if (code < 0) { /* Overridable. */
433 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
434 int i = 0, n = 0, seen_question = 0, defgv = 0;
436 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
438 if (code == -KEY_chop || code == -KEY_chomp
439 || code == -KEY_exec || code == -KEY_system)
441 if (code == -KEY_mkdir) {
442 ret = newSVpvs_flags("_;$", SVs_TEMP);
445 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
446 ret = newSVpvs_flags("+", SVs_TEMP);
449 if (code == -KEY_push || code == -KEY_unshift) {
450 ret = newSVpvs_flags("+@", SVs_TEMP);
453 if (code == -KEY_pop || code == -KEY_shift) {
454 ret = newSVpvs_flags(";+", SVs_TEMP);
457 if (code == -KEY_splice) {
458 ret = newSVpvs_flags("+;$$@", SVs_TEMP);
461 if (code == -KEY_tied || code == -KEY_untie) {
462 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
465 if (code == -KEY_tie) {
466 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
469 if (code == -KEY___FILE__ || code == -KEY___LINE__
470 || code == -KEY___PACKAGE__) {
471 ret = newSVpvs_flags("", SVs_TEMP);
474 if (code == -KEY_readpipe) {
475 s = "CORE::backtick";
477 while (i < MAXO) { /* The slow way. */
478 if (strEQ(s + 6, PL_op_name[i])
479 || strEQ(s + 6, PL_op_desc[i]))
485 goto nonesuch; /* Should not happen... */
487 defgv = PL_opargs[i] & OA_DEFGV;
488 oa = PL_opargs[i] >> OASHIFT;
490 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
494 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
495 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
496 /* But globs are already references (kinda) */
497 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
501 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
504 if (defgv && str[n - 1] == '$')
507 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
509 else if (code) /* Non-Overridable */
511 else { /* None such */
513 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
517 cv = sv_2cv(TOPs, &stash, &gv, 0);
519 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
528 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
530 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
532 PUSHs(MUTABLE_SV(cv));
546 if (GIMME != G_ARRAY) {
550 *MARK = &PL_sv_undef;
551 *MARK = refto(*MARK);
555 EXTEND_MORTAL(SP - MARK);
557 *MARK = refto(*MARK);
562 S_refto(pTHX_ SV *sv)
567 PERL_ARGS_ASSERT_REFTO;
569 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
572 if (!(sv = LvTARG(sv)))
575 SvREFCNT_inc_void_NN(sv);
577 else if (SvTYPE(sv) == SVt_PVAV) {
578 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
579 av_reify(MUTABLE_AV(sv));
581 SvREFCNT_inc_void_NN(sv);
583 else if (SvPADTMP(sv) && !IS_PADGV(sv))
587 SvREFCNT_inc_void_NN(sv);
590 sv_upgrade(rv, SVt_IV);
600 SV * const sv = POPs;
605 if (!sv || !SvROK(sv))
608 pv = sv_reftype(SvRV(sv),TRUE);
609 PUSHp(pv, strlen(pv));
619 stash = CopSTASH(PL_curcop);
621 SV * const ssv = POPs;
625 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
626 Perl_croak(aTHX_ "Attempt to bless into a reference");
627 ptr = SvPV_const(ssv,len);
629 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
630 "Explicit blessing to '' (assuming package main)");
631 stash = gv_stashpvn(ptr, len, GV_ADD);
634 (void)sv_bless(TOPs, stash);
643 const char * const elem = SvPV_nolen_const(sv);
644 GV * const gv = MUTABLE_GV(POPs);
649 /* elem will always be NUL terminated. */
650 const char * const second_letter = elem + 1;
653 if (strEQ(second_letter, "RRAY"))
654 tmpRef = MUTABLE_SV(GvAV(gv));
657 if (strEQ(second_letter, "ODE"))
658 tmpRef = MUTABLE_SV(GvCVu(gv));
661 if (strEQ(second_letter, "ILEHANDLE")) {
662 /* finally deprecated in 5.8.0 */
663 deprecate("*glob{FILEHANDLE}");
664 tmpRef = MUTABLE_SV(GvIOp(gv));
667 if (strEQ(second_letter, "ORMAT"))
668 tmpRef = MUTABLE_SV(GvFORM(gv));
671 if (strEQ(second_letter, "LOB"))
672 tmpRef = MUTABLE_SV(gv);
675 if (strEQ(second_letter, "ASH"))
676 tmpRef = MUTABLE_SV(GvHV(gv));
679 if (*second_letter == 'O' && !elem[2])
680 tmpRef = MUTABLE_SV(GvIOp(gv));
683 if (strEQ(second_letter, "AME"))
684 sv = newSVhek(GvNAME_HEK(gv));
687 if (strEQ(second_letter, "ACKAGE")) {
688 const HV * const stash = GvSTASH(gv);
689 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
690 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
694 if (strEQ(second_letter, "CALAR"))
709 /* Pattern matching */
714 register unsigned char *s;
717 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
721 if (mg && SvSCREAM(sv))
724 s = (unsigned char*)(SvPV(sv, len));
725 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
726 /* No point in studying a zero length string, and not safe to study
727 anything that doesn't appear to be a simple scalar (and hence might
728 change between now and when the regexp engine runs without our set
729 magic ever running) such as a reference to an object with overloaded
730 stringification. Also refuse to study an FBM scalar, as this gives
731 more flexibility in SV flag usage. No real-world code would ever
732 end up studying an FBM scalar, so this isn't a real pessimisation.
733 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
734 the study length limit from I32_MAX to U32_MAX - 1.
741 } else if (len < 0xFFFF) {
746 size = (256 + len) * quanta;
747 sfirst_raw = (char *)safemalloc(size);
750 DIE(aTHX_ "do_study: out of memory");
754 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
755 mg->mg_ptr = sfirst_raw;
757 mg->mg_private = quanta;
759 memset(sfirst_raw, ~0, 256 * quanta);
761 /* The assumption here is that most studied strings are fairly short, hence
762 the pain of the extra code is worth it, given the memory savings.
763 80 character string, 336 bytes as U8, down from 1344 as U32
764 800 character string, 2112 bytes as U16, down from 4224 as U32
768 U8 *const sfirst = (U8 *)sfirst_raw;
769 U8 *const snext = sfirst + 256;
771 const U8 ch = s[len];
772 snext[len] = sfirst[ch];
775 } else if (quanta == 2) {
776 U16 *const sfirst = (U16 *)sfirst_raw;
777 U16 *const snext = sfirst + 256;
779 const U8 ch = s[len];
780 snext[len] = sfirst[ch];
784 U32 *const sfirst = (U32 *)sfirst_raw;
785 U32 *const snext = sfirst + 256;
787 const U8 ch = s[len];
788 snext[len] = sfirst[ch];
801 if (PL_op->op_flags & OPf_STACKED)
803 else if (PL_op->op_private & OPpTARGET_MY)
809 TARG = sv_newmortal();
810 if(PL_op->op_type == OP_TRANSR) {
811 SV * const newsv = newSVsv(sv);
815 else PUSHi(do_trans(sv));
819 /* Lvalue operators. */
822 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
828 PERL_ARGS_ASSERT_DO_CHOMP;
830 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
832 if (SvTYPE(sv) == SVt_PVAV) {
834 AV *const av = MUTABLE_AV(sv);
835 const I32 max = AvFILL(av);
837 for (i = 0; i <= max; i++) {
838 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
839 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
840 do_chomp(retval, sv, chomping);
844 else if (SvTYPE(sv) == SVt_PVHV) {
845 HV* const hv = MUTABLE_HV(sv);
847 (void)hv_iterinit(hv);
848 while ((entry = hv_iternext(hv)))
849 do_chomp(retval, hv_iterval(hv,entry), chomping);
852 else if (SvREADONLY(sv)) {
854 /* SV is copy-on-write */
855 sv_force_normal_flags(sv, 0);
858 Perl_croak_no_modify(aTHX);
863 /* XXX, here sv is utf8-ized as a side-effect!
864 If encoding.pm is used properly, almost string-generating
865 operations, including literal strings, chr(), input data, etc.
866 should have been utf8-ized already, right?
868 sv_recode_to_utf8(sv, PL_encoding);
874 char *temp_buffer = NULL;
883 while (len && s[-1] == '\n') {
890 STRLEN rslen, rs_charlen;
891 const char *rsptr = SvPV_const(PL_rs, rslen);
893 rs_charlen = SvUTF8(PL_rs)
897 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
898 /* Assumption is that rs is shorter than the scalar. */
900 /* RS is utf8, scalar is 8 bit. */
902 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
905 /* Cannot downgrade, therefore cannot possibly match
907 assert (temp_buffer == rsptr);
913 else if (PL_encoding) {
914 /* RS is 8 bit, encoding.pm is used.
915 * Do not recode PL_rs as a side-effect. */
916 svrecode = newSVpvn(rsptr, rslen);
917 sv_recode_to_utf8(svrecode, PL_encoding);
918 rsptr = SvPV_const(svrecode, rslen);
919 rs_charlen = sv_len_utf8(svrecode);
922 /* RS is 8 bit, scalar is utf8. */
923 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
937 if (memNE(s, rsptr, rslen))
939 SvIVX(retval) += rs_charlen;
942 s = SvPV_force_nolen(sv);
950 SvREFCNT_dec(svrecode);
952 Safefree(temp_buffer);
954 if (len && !SvPOK(sv))
955 s = SvPV_force_nomg(sv, len);
958 char * const send = s + len;
959 char * const start = s;
961 while (s > start && UTF8_IS_CONTINUATION(*s))
963 if (is_utf8_string((U8*)s, send - s)) {
964 sv_setpvn(retval, s, send - s);
966 SvCUR_set(sv, s - start);
972 sv_setpvs(retval, "");
976 sv_setpvn(retval, s, 1);
983 sv_setpvs(retval, "");
991 const bool chomping = PL_op->op_type == OP_SCHOMP;
995 do_chomp(TARG, TOPs, chomping);
1002 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
1003 const bool chomping = PL_op->op_type == OP_CHOMP;
1008 do_chomp(TARG, *++MARK, chomping);
1019 if (!PL_op->op_private) {
1028 SV_CHECK_THINKFIRST_COW_DROP(sv);
1030 switch (SvTYPE(sv)) {
1034 av_undef(MUTABLE_AV(sv));
1037 hv_undef(MUTABLE_HV(sv));
1040 if (cv_const_sv((const CV *)sv))
1041 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
1042 CvANON((const CV *)sv) ? "(anonymous)"
1043 : GvENAME(CvGV((const CV *)sv)));
1047 /* let user-undef'd sub keep its identity */
1048 GV* const gv = CvGV((const CV *)sv);
1049 cv_undef(MUTABLE_CV(sv));
1050 CvGV_set(MUTABLE_CV(sv), gv);
1055 SvSetMagicSV(sv, &PL_sv_undef);
1058 else if (isGV_with_GP(sv)) {
1062 /* undef *Pkg::meth_name ... */
1064 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1065 && HvENAME_get(stash);
1067 if((stash = GvHV((const GV *)sv))) {
1068 if(HvENAME_get(stash))
1069 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1073 gp_free(MUTABLE_GV(sv));
1075 GvGP_set(sv, gp_ref(gp));
1076 GvSV(sv) = newSV(0);
1077 GvLINE(sv) = CopLINE(PL_curcop);
1078 GvEGV(sv) = MUTABLE_GV(sv);
1082 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1084 /* undef *Foo::ISA */
1085 if( strEQ(GvNAME((const GV *)sv), "ISA")
1086 && (stash = GvSTASH((const GV *)sv))
1087 && (method_changed || HvENAME(stash)) )
1088 mro_isa_changed_in(stash);
1089 else if(method_changed)
1090 mro_method_changed_in(
1091 GvSTASH((const GV *)sv)
1098 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1113 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1114 Perl_croak_no_modify(aTHX);
1115 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1116 && SvIVX(TOPs) != IV_MIN)
1118 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1119 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1130 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1131 Perl_croak_no_modify(aTHX);
1133 TARG = sv_newmortal();
1134 sv_setsv(TARG, TOPs);
1135 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1136 && SvIVX(TOPs) != IV_MAX)
1138 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1139 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1144 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1154 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1155 Perl_croak_no_modify(aTHX);
1157 TARG = sv_newmortal();
1158 sv_setsv(TARG, TOPs);
1159 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1160 && SvIVX(TOPs) != IV_MIN)
1162 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1163 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1172 /* Ordinary operators. */
1176 dVAR; dSP; dATARGET; SV *svl, *svr;
1177 #ifdef PERL_PRESERVE_IVUV
1180 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1183 #ifdef PERL_PRESERVE_IVUV
1184 /* For integer to integer power, we do the calculation by hand wherever
1185 we're sure it is safe; otherwise we call pow() and try to convert to
1186 integer afterwards. */
1188 SvIV_please_nomg(svr);
1190 SvIV_please_nomg(svl);
1199 const IV iv = SvIVX(svr);
1203 goto float_it; /* Can't do negative powers this way. */
1207 baseuok = SvUOK(svl);
1209 baseuv = SvUVX(svl);
1211 const IV iv = SvIVX(svl);
1214 baseuok = TRUE; /* effectively it's a UV now */
1216 baseuv = -iv; /* abs, baseuok == false records sign */
1219 /* now we have integer ** positive integer. */
1222 /* foo & (foo - 1) is zero only for a power of 2. */
1223 if (!(baseuv & (baseuv - 1))) {
1224 /* We are raising power-of-2 to a positive integer.
1225 The logic here will work for any base (even non-integer
1226 bases) but it can be less accurate than
1227 pow (base,power) or exp (power * log (base)) when the
1228 intermediate values start to spill out of the mantissa.
1229 With powers of 2 we know this can't happen.
1230 And powers of 2 are the favourite thing for perl
1231 programmers to notice ** not doing what they mean. */
1233 NV base = baseuok ? baseuv : -(NV)baseuv;
1238 while (power >>= 1) {
1246 SvIV_please_nomg(svr);
1249 register unsigned int highbit = 8 * sizeof(UV);
1250 register unsigned int diff = 8 * sizeof(UV);
1251 while (diff >>= 1) {
1253 if (baseuv >> highbit) {
1257 /* we now have baseuv < 2 ** highbit */
1258 if (power * highbit <= 8 * sizeof(UV)) {
1259 /* result will definitely fit in UV, so use UV math
1260 on same algorithm as above */
1261 register UV result = 1;
1262 register UV base = baseuv;
1263 const bool odd_power = cBOOL(power & 1);
1267 while (power >>= 1) {
1274 if (baseuok || !odd_power)
1275 /* answer is positive */
1277 else if (result <= (UV)IV_MAX)
1278 /* answer negative, fits in IV */
1279 SETi( -(IV)result );
1280 else if (result == (UV)IV_MIN)
1281 /* 2's complement assumption: special case IV_MIN */
1284 /* answer negative, doesn't fit */
1285 SETn( -(NV)result );
1295 NV right = SvNV_nomg(svr);
1296 NV left = SvNV_nomg(svl);
1299 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1301 We are building perl with long double support and are on an AIX OS
1302 afflicted with a powl() function that wrongly returns NaNQ for any
1303 negative base. This was reported to IBM as PMR #23047-379 on
1304 03/06/2006. The problem exists in at least the following versions
1305 of AIX and the libm fileset, and no doubt others as well:
1307 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1308 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1309 AIX 5.2.0 bos.adt.libm 5.2.0.85
1311 So, until IBM fixes powl(), we provide the following workaround to
1312 handle the problem ourselves. Our logic is as follows: for
1313 negative bases (left), we use fmod(right, 2) to check if the
1314 exponent is an odd or even integer:
1316 - if odd, powl(left, right) == -powl(-left, right)
1317 - if even, powl(left, right) == powl(-left, right)
1319 If the exponent is not an integer, the result is rightly NaNQ, so
1320 we just return that (as NV_NAN).
1324 NV mod2 = Perl_fmod( right, 2.0 );
1325 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1326 SETn( -Perl_pow( -left, right) );
1327 } else if (mod2 == 0.0) { /* even integer */
1328 SETn( Perl_pow( -left, right) );
1329 } else { /* fractional power */
1333 SETn( Perl_pow( left, right) );
1336 SETn( Perl_pow( left, right) );
1337 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1339 #ifdef PERL_PRESERVE_IVUV
1341 SvIV_please_nomg(svr);
1349 dVAR; dSP; dATARGET; SV *svl, *svr;
1350 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1353 #ifdef PERL_PRESERVE_IVUV
1354 SvIV_please_nomg(svr);
1356 /* Unless the left argument is integer in range we are going to have to
1357 use NV maths. Hence only attempt to coerce the right argument if
1358 we know the left is integer. */
1359 /* Left operand is defined, so is it IV? */
1360 SvIV_please_nomg(svl);
1362 bool auvok = SvUOK(svl);
1363 bool buvok = SvUOK(svr);
1364 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1365 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1374 const IV aiv = SvIVX(svl);
1377 auvok = TRUE; /* effectively it's a UV now */
1379 alow = -aiv; /* abs, auvok == false records sign */
1385 const IV biv = SvIVX(svr);
1388 buvok = TRUE; /* effectively it's a UV now */
1390 blow = -biv; /* abs, buvok == false records sign */
1394 /* If this does sign extension on unsigned it's time for plan B */
1395 ahigh = alow >> (4 * sizeof (UV));
1397 bhigh = blow >> (4 * sizeof (UV));
1399 if (ahigh && bhigh) {
1401 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1402 which is overflow. Drop to NVs below. */
1403 } else if (!ahigh && !bhigh) {
1404 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1405 so the unsigned multiply cannot overflow. */
1406 const UV product = alow * blow;
1407 if (auvok == buvok) {
1408 /* -ve * -ve or +ve * +ve gives a +ve result. */
1412 } else if (product <= (UV)IV_MIN) {
1413 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1414 /* -ve result, which could overflow an IV */
1416 SETi( -(IV)product );
1418 } /* else drop to NVs below. */
1420 /* One operand is large, 1 small */
1423 /* swap the operands */
1425 bhigh = blow; /* bhigh now the temp var for the swap */
1429 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1430 multiplies can't overflow. shift can, add can, -ve can. */
1431 product_middle = ahigh * blow;
1432 if (!(product_middle & topmask)) {
1433 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1435 product_middle <<= (4 * sizeof (UV));
1436 product_low = alow * blow;
1438 /* as for pp_add, UV + something mustn't get smaller.
1439 IIRC ANSI mandates this wrapping *behaviour* for
1440 unsigned whatever the actual representation*/
1441 product_low += product_middle;
1442 if (product_low >= product_middle) {
1443 /* didn't overflow */
1444 if (auvok == buvok) {
1445 /* -ve * -ve or +ve * +ve gives a +ve result. */
1447 SETu( product_low );
1449 } else if (product_low <= (UV)IV_MIN) {
1450 /* 2s complement assumption again */
1451 /* -ve result, which could overflow an IV */
1453 SETi( -(IV)product_low );
1455 } /* else drop to NVs below. */
1457 } /* product_middle too large */
1458 } /* ahigh && bhigh */
1463 NV right = SvNV_nomg(svr);
1464 NV left = SvNV_nomg(svl);
1466 SETn( left * right );
1473 dVAR; dSP; dATARGET; SV *svl, *svr;
1474 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1477 /* Only try to do UV divide first
1478 if ((SLOPPYDIVIDE is true) or
1479 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1481 The assumption is that it is better to use floating point divide
1482 whenever possible, only doing integer divide first if we can't be sure.
1483 If NV_PRESERVES_UV is true then we know at compile time that no UV
1484 can be too large to preserve, so don't need to compile the code to
1485 test the size of UVs. */
1488 # define PERL_TRY_UV_DIVIDE
1489 /* ensure that 20./5. == 4. */
1491 # ifdef PERL_PRESERVE_IVUV
1492 # ifndef NV_PRESERVES_UV
1493 # define PERL_TRY_UV_DIVIDE
1498 #ifdef PERL_TRY_UV_DIVIDE
1499 SvIV_please_nomg(svr);
1501 SvIV_please_nomg(svl);
1503 bool left_non_neg = SvUOK(svl);
1504 bool right_non_neg = SvUOK(svr);
1508 if (right_non_neg) {
1512 const IV biv = SvIVX(svr);
1515 right_non_neg = TRUE; /* effectively it's a UV now */
1521 /* historically undef()/0 gives a "Use of uninitialized value"
1522 warning before dieing, hence this test goes here.
1523 If it were immediately before the second SvIV_please, then
1524 DIE() would be invoked before left was even inspected, so
1525 no inspection would give no warning. */
1527 DIE(aTHX_ "Illegal division by zero");
1533 const IV aiv = SvIVX(svl);
1536 left_non_neg = TRUE; /* effectively it's a UV now */
1545 /* For sloppy divide we always attempt integer division. */
1547 /* Otherwise we only attempt it if either or both operands
1548 would not be preserved by an NV. If both fit in NVs
1549 we fall through to the NV divide code below. However,
1550 as left >= right to ensure integer result here, we know that
1551 we can skip the test on the right operand - right big
1552 enough not to be preserved can't get here unless left is
1555 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1558 /* Integer division can't overflow, but it can be imprecise. */
1559 const UV result = left / right;
1560 if (result * right == left) {
1561 SP--; /* result is valid */
1562 if (left_non_neg == right_non_neg) {
1563 /* signs identical, result is positive. */
1567 /* 2s complement assumption */
1568 if (result <= (UV)IV_MIN)
1569 SETi( -(IV)result );
1571 /* It's exact but too negative for IV. */
1572 SETn( -(NV)result );
1575 } /* tried integer divide but it was not an integer result */
1576 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1577 } /* left wasn't SvIOK */
1578 } /* right wasn't SvIOK */
1579 #endif /* PERL_TRY_UV_DIVIDE */
1581 NV right = SvNV_nomg(svr);
1582 NV left = SvNV_nomg(svl);
1583 (void)POPs;(void)POPs;
1584 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1585 if (! Perl_isnan(right) && right == 0.0)
1589 DIE(aTHX_ "Illegal division by zero");
1590 PUSHn( left / right );
1597 dVAR; dSP; dATARGET;
1598 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1602 bool left_neg = FALSE;
1603 bool right_neg = FALSE;
1604 bool use_double = FALSE;
1605 bool dright_valid = FALSE;
1608 SV * const svr = TOPs;
1609 SV * const svl = TOPm1s;
1610 SvIV_please_nomg(svr);
1612 right_neg = !SvUOK(svr);
1616 const IV biv = SvIVX(svr);
1619 right_neg = FALSE; /* effectively it's a UV now */
1626 dright = SvNV_nomg(svr);
1627 right_neg = dright < 0;
1630 if (dright < UV_MAX_P1) {
1631 right = U_V(dright);
1632 dright_valid = TRUE; /* In case we need to use double below. */
1638 /* At this point use_double is only true if right is out of range for
1639 a UV. In range NV has been rounded down to nearest UV and
1640 use_double false. */
1641 SvIV_please_nomg(svl);
1642 if (!use_double && SvIOK(svl)) {
1644 left_neg = !SvUOK(svl);
1648 const IV aiv = SvIVX(svl);
1651 left_neg = FALSE; /* effectively it's a UV now */
1659 dleft = SvNV_nomg(svl);
1660 left_neg = dleft < 0;
1664 /* This should be exactly the 5.6 behaviour - if left and right are
1665 both in range for UV then use U_V() rather than floor. */
1667 if (dleft < UV_MAX_P1) {
1668 /* right was in range, so is dleft, so use UVs not double.
1672 /* left is out of range for UV, right was in range, so promote
1673 right (back) to double. */
1675 /* The +0.5 is used in 5.6 even though it is not strictly
1676 consistent with the implicit +0 floor in the U_V()
1677 inside the #if 1. */
1678 dleft = Perl_floor(dleft + 0.5);
1681 dright = Perl_floor(dright + 0.5);
1692 DIE(aTHX_ "Illegal modulus zero");
1694 dans = Perl_fmod(dleft, dright);
1695 if ((left_neg != right_neg) && dans)
1696 dans = dright - dans;
1699 sv_setnv(TARG, dans);
1705 DIE(aTHX_ "Illegal modulus zero");
1708 if ((left_neg != right_neg) && ans)
1711 /* XXX may warn: unary minus operator applied to unsigned type */
1712 /* could change -foo to be (~foo)+1 instead */
1713 if (ans <= ~((UV)IV_MAX)+1)
1714 sv_setiv(TARG, ~ans+1);
1716 sv_setnv(TARG, -(NV)ans);
1719 sv_setuv(TARG, ans);
1728 dVAR; dSP; dATARGET;
1732 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1733 /* TODO: think of some way of doing list-repeat overloading ??? */
1738 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1744 const UV uv = SvUV_nomg(sv);
1746 count = IV_MAX; /* The best we can do? */
1750 const IV iv = SvIV_nomg(sv);
1757 else if (SvNOKp(sv)) {
1758 const NV nv = SvNV_nomg(sv);
1765 count = SvIV_nomg(sv);
1767 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1769 static const char oom_list_extend[] = "Out of memory during list extend";
1770 const I32 items = SP - MARK;
1771 const I32 max = items * count;
1773 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1774 /* Did the max computation overflow? */
1775 if (items > 0 && max > 0 && (max < items || max < count))
1776 Perl_croak(aTHX_ oom_list_extend);
1781 /* This code was intended to fix 20010809.028:
1784 for (($x =~ /./g) x 2) {
1785 print chop; # "abcdabcd" expected as output.
1788 * but that change (#11635) broke this code:
1790 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1792 * I can't think of a better fix that doesn't introduce
1793 * an efficiency hit by copying the SVs. The stack isn't
1794 * refcounted, and mortalisation obviously doesn't
1795 * Do The Right Thing when the stack has more than
1796 * one pointer to the same mortal value.
1800 *SP = sv_2mortal(newSVsv(*SP));
1810 repeatcpy((char*)(MARK + items), (char*)MARK,
1811 items * sizeof(const SV *), count - 1);
1814 else if (count <= 0)
1817 else { /* Note: mark already snarfed by pp_list */
1818 SV * const tmpstr = POPs;
1821 static const char oom_string_extend[] =
1822 "Out of memory during string extend";
1825 sv_setsv_nomg(TARG, tmpstr);
1826 SvPV_force_nomg(TARG, len);
1827 isutf = DO_UTF8(TARG);
1832 const STRLEN max = (UV)count * len;
1833 if (len > MEM_SIZE_MAX / count)
1834 Perl_croak(aTHX_ oom_string_extend);
1835 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1836 SvGROW(TARG, max + 1);
1837 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1838 SvCUR_set(TARG, SvCUR(TARG) * count);
1840 *SvEND(TARG) = '\0';
1843 (void)SvPOK_only_UTF8(TARG);
1845 (void)SvPOK_only(TARG);
1847 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1848 /* The parser saw this as a list repeat, and there
1849 are probably several items on the stack. But we're
1850 in scalar context, and there's no pp_list to save us
1851 now. So drop the rest of the items -- robin@kitsite.com
1863 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1864 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1867 useleft = USE_LEFT(svl);
1868 #ifdef PERL_PRESERVE_IVUV
1869 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1870 "bad things" happen if you rely on signed integers wrapping. */
1871 SvIV_please_nomg(svr);
1873 /* Unless the left argument is integer in range we are going to have to
1874 use NV maths. Hence only attempt to coerce the right argument if
1875 we know the left is integer. */
1876 register UV auv = 0;
1882 a_valid = auvok = 1;
1883 /* left operand is undef, treat as zero. */
1885 /* Left operand is defined, so is it IV? */
1886 SvIV_please_nomg(svl);
1888 if ((auvok = SvUOK(svl)))
1891 register const IV aiv = SvIVX(svl);
1894 auvok = 1; /* Now acting as a sign flag. */
1895 } else { /* 2s complement assumption for IV_MIN */
1903 bool result_good = 0;
1906 bool buvok = SvUOK(svr);
1911 register const IV biv = SvIVX(svr);
1918 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1919 else "IV" now, independent of how it came in.
1920 if a, b represents positive, A, B negative, a maps to -A etc
1925 all UV maths. negate result if A negative.
1926 subtract if signs same, add if signs differ. */
1928 if (auvok ^ buvok) {
1937 /* Must get smaller */
1942 if (result <= buv) {
1943 /* result really should be -(auv-buv). as its negation
1944 of true value, need to swap our result flag */
1956 if (result <= (UV)IV_MIN)
1957 SETi( -(IV)result );
1959 /* result valid, but out of range for IV. */
1960 SETn( -(NV)result );
1964 } /* Overflow, drop through to NVs. */
1969 NV value = SvNV_nomg(svr);
1973 /* left operand is undef, treat as zero - value */
1977 SETn( SvNV_nomg(svl) - value );
1984 dVAR; dSP; dATARGET; SV *svl, *svr;
1985 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1989 const IV shift = SvIV_nomg(svr);
1990 if (PL_op->op_private & HINT_INTEGER) {
1991 const IV i = SvIV_nomg(svl);
1995 const UV u = SvUV_nomg(svl);
2004 dVAR; dSP; dATARGET; SV *svl, *svr;
2005 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2009 const IV shift = SvIV_nomg(svr);
2010 if (PL_op->op_private & HINT_INTEGER) {
2011 const IV i = SvIV_nomg(svl);
2015 const UV u = SvUV_nomg(svl);
2027 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2031 (SvIOK_notUV(left) && SvIOK_notUV(right))
2032 ? (SvIVX(left) < SvIVX(right))
2033 : (do_ncmp(left, right) == -1)
2043 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2047 (SvIOK_notUV(left) && SvIOK_notUV(right))
2048 ? (SvIVX(left) > SvIVX(right))
2049 : (do_ncmp(left, right) == 1)
2059 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2063 (SvIOK_notUV(left) && SvIOK_notUV(right))
2064 ? (SvIVX(left) <= SvIVX(right))
2065 : (do_ncmp(left, right) <= 0)
2075 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2079 (SvIOK_notUV(left) && SvIOK_notUV(right))
2080 ? (SvIVX(left) >= SvIVX(right))
2081 : ( (do_ncmp(left, right) & 2) == 0)
2091 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2095 (SvIOK_notUV(left) && SvIOK_notUV(right))
2096 ? (SvIVX(left) != SvIVX(right))
2097 : (do_ncmp(left, right) != 0)
2102 /* compare left and right SVs. Returns:
2106 * 2: left or right was a NaN
2109 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2113 PERL_ARGS_ASSERT_DO_NCMP;
2114 #ifdef PERL_PRESERVE_IVUV
2115 SvIV_please_nomg(right);
2116 /* Fortunately it seems NaN isn't IOK */
2118 SvIV_please_nomg(left);
2121 const IV leftiv = SvIVX(left);
2122 if (!SvUOK(right)) {
2123 /* ## IV <=> IV ## */
2124 const IV rightiv = SvIVX(right);
2125 return (leftiv > rightiv) - (leftiv < rightiv);
2127 /* ## IV <=> UV ## */
2129 /* As (b) is a UV, it's >=0, so it must be < */
2132 const UV rightuv = SvUVX(right);
2133 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2138 /* ## UV <=> UV ## */
2139 const UV leftuv = SvUVX(left);
2140 const UV rightuv = SvUVX(right);
2141 return (leftuv > rightuv) - (leftuv < rightuv);
2143 /* ## UV <=> IV ## */
2145 const IV rightiv = SvIVX(right);
2147 /* As (a) is a UV, it's >=0, so it cannot be < */
2150 const UV leftuv = SvUVX(left);
2151 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2159 NV const rnv = SvNV_nomg(right);
2160 NV const lnv = SvNV_nomg(left);
2162 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2163 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2166 return (lnv > rnv) - (lnv < rnv);
2185 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2188 value = do_ncmp(left, right);
2203 int amg_type = sle_amg;
2207 switch (PL_op->op_type) {
2226 tryAMAGICbin_MG(amg_type, AMGf_set);
2229 const int cmp = (IN_LOCALE_RUNTIME
2230 ? sv_cmp_locale_flags(left, right, 0)
2231 : sv_cmp_flags(left, right, 0));
2232 SETs(boolSV(cmp * multiplier < rhs));
2240 tryAMAGICbin_MG(seq_amg, AMGf_set);
2243 SETs(boolSV(sv_eq_flags(left, right, 0)));
2251 tryAMAGICbin_MG(sne_amg, AMGf_set);
2254 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2262 tryAMAGICbin_MG(scmp_amg, 0);
2265 const int cmp = (IN_LOCALE_RUNTIME
2266 ? sv_cmp_locale_flags(left, right, 0)
2267 : sv_cmp_flags(left, right, 0));
2275 dVAR; dSP; dATARGET;
2276 tryAMAGICbin_MG(band_amg, AMGf_assign);
2279 if (SvNIOKp(left) || SvNIOKp(right)) {
2280 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2281 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2282 if (PL_op->op_private & HINT_INTEGER) {
2283 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2287 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2290 if (left_ro_nonnum) SvNIOK_off(left);
2291 if (right_ro_nonnum) SvNIOK_off(right);
2294 do_vop(PL_op->op_type, TARG, left, right);
2303 dVAR; dSP; dATARGET;
2304 const int op_type = PL_op->op_type;
2306 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2309 if (SvNIOKp(left) || SvNIOKp(right)) {
2310 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2311 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2312 if (PL_op->op_private & HINT_INTEGER) {
2313 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2314 const IV r = SvIV_nomg(right);
2315 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2319 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2320 const UV r = SvUV_nomg(right);
2321 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2324 if (left_ro_nonnum) SvNIOK_off(left);
2325 if (right_ro_nonnum) SvNIOK_off(right);
2328 do_vop(op_type, TARG, left, right);
2338 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2340 SV * const sv = TOPs;
2341 const int flags = SvFLAGS(sv);
2343 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2347 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2348 /* It's publicly an integer, or privately an integer-not-float */
2351 if (SvIVX(sv) == IV_MIN) {
2352 /* 2s complement assumption. */
2353 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2356 else if (SvUVX(sv) <= IV_MAX) {
2361 else if (SvIVX(sv) != IV_MIN) {
2365 #ifdef PERL_PRESERVE_IVUV
2373 SETn(-SvNV_nomg(sv));
2374 else if (SvPOKp(sv)) {
2376 const char * const s = SvPV_nomg_const(sv, len);
2377 if (isIDFIRST(*s)) {
2378 sv_setpvs(TARG, "-");
2381 else if (*s == '+' || *s == '-') {
2382 sv_setsv_nomg(TARG, sv);
2383 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2385 else if (DO_UTF8(sv)) {
2386 SvIV_please_nomg(sv);
2388 goto oops_its_an_int;
2390 sv_setnv(TARG, -SvNV_nomg(sv));
2392 sv_setpvs(TARG, "-");
2397 SvIV_please_nomg(sv);
2399 goto oops_its_an_int;
2400 sv_setnv(TARG, -SvNV_nomg(sv));
2405 SETn(-SvNV_nomg(sv));
2413 tryAMAGICun_MG(not_amg, AMGf_set);
2414 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2421 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2425 if (PL_op->op_private & HINT_INTEGER) {
2426 const IV i = ~SvIV_nomg(sv);
2430 const UV u = ~SvUV_nomg(sv);
2439 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2440 sv_setsv_nomg(TARG, sv);
2441 tmps = (U8*)SvPV_force_nomg(TARG, len);
2444 /* Calculate exact length, let's not estimate. */
2449 U8 * const send = tmps + len;
2450 U8 * const origtmps = tmps;
2451 const UV utf8flags = UTF8_ALLOW_ANYUV;
2453 while (tmps < send) {
2454 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2456 targlen += UNISKIP(~c);
2462 /* Now rewind strings and write them. */
2469 Newx(result, targlen + 1, U8);
2471 while (tmps < send) {
2472 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2474 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2477 sv_usepvn_flags(TARG, (char*)result, targlen,
2478 SV_HAS_TRAILING_NUL);
2485 Newx(result, nchar + 1, U8);
2487 while (tmps < send) {
2488 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2493 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2501 register long *tmpl;
2502 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2505 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2510 for ( ; anum > 0; anum--, tmps++)
2518 /* integer versions of some of the above */
2522 dVAR; dSP; dATARGET;
2523 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2526 SETi( left * right );
2534 dVAR; dSP; dATARGET;
2535 tryAMAGICbin_MG(div_amg, AMGf_assign);
2538 IV value = SvIV_nomg(right);
2540 DIE(aTHX_ "Illegal division by zero");
2541 num = SvIV_nomg(left);
2543 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2547 value = num / value;
2553 #if defined(__GLIBC__) && IVSIZE == 8
2560 /* This is the vanilla old i_modulo. */
2561 dVAR; dSP; dATARGET;
2562 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2566 DIE(aTHX_ "Illegal modulus zero");
2567 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2571 SETi( left % right );
2576 #if defined(__GLIBC__) && IVSIZE == 8
2581 /* This is the i_modulo with the workaround for the _moddi3 bug
2582 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2583 * See below for pp_i_modulo. */
2584 dVAR; dSP; dATARGET;
2585 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2589 DIE(aTHX_ "Illegal modulus zero");
2590 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2594 SETi( left % PERL_ABS(right) );
2601 dVAR; dSP; dATARGET;
2602 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2606 DIE(aTHX_ "Illegal modulus zero");
2607 /* The assumption is to use hereafter the old vanilla version... */
2609 PL_ppaddr[OP_I_MODULO] =
2611 /* .. but if we have glibc, we might have a buggy _moddi3
2612 * (at least glicb 2.2.5 is known to have this bug), in other
2613 * words our integer modulus with negative quad as the second
2614 * argument might be broken. Test for this and re-patch the
2615 * opcode dispatch table if that is the case, remembering to
2616 * also apply the workaround so that this first round works
2617 * right, too. See [perl #9402] for more information. */
2621 /* Cannot do this check with inlined IV constants since
2622 * that seems to work correctly even with the buggy glibc. */
2624 /* Yikes, we have the bug.
2625 * Patch in the workaround version. */
2627 PL_ppaddr[OP_I_MODULO] =
2628 &Perl_pp_i_modulo_1;
2629 /* Make certain we work right this time, too. */
2630 right = PERL_ABS(right);
2633 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2637 SETi( left % right );
2645 dVAR; dSP; dATARGET;
2646 tryAMAGICbin_MG(add_amg, AMGf_assign);
2648 dPOPTOPiirl_ul_nomg;
2649 SETi( left + right );
2656 dVAR; dSP; dATARGET;
2657 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2659 dPOPTOPiirl_ul_nomg;
2660 SETi( left - right );
2668 tryAMAGICbin_MG(lt_amg, AMGf_set);
2671 SETs(boolSV(left < right));
2679 tryAMAGICbin_MG(gt_amg, AMGf_set);
2682 SETs(boolSV(left > right));
2690 tryAMAGICbin_MG(le_amg, AMGf_set);
2693 SETs(boolSV(left <= right));
2701 tryAMAGICbin_MG(ge_amg, AMGf_set);
2704 SETs(boolSV(left >= right));
2712 tryAMAGICbin_MG(eq_amg, AMGf_set);
2715 SETs(boolSV(left == right));
2723 tryAMAGICbin_MG(ne_amg, AMGf_set);
2726 SETs(boolSV(left != right));
2734 tryAMAGICbin_MG(ncmp_amg, 0);
2741 else if (left < right)
2753 tryAMAGICun_MG(neg_amg, 0);
2755 SV * const sv = TOPs;
2756 IV const i = SvIV_nomg(sv);
2762 /* High falutin' math. */
2767 tryAMAGICbin_MG(atan2_amg, 0);
2770 SETn(Perl_atan2(left, right));
2778 int amg_type = sin_amg;
2779 const char *neg_report = NULL;
2780 NV (*func)(NV) = Perl_sin;
2781 const int op_type = PL_op->op_type;
2798 amg_type = sqrt_amg;
2800 neg_report = "sqrt";
2805 tryAMAGICun_MG(amg_type, 0);
2807 SV * const arg = POPs;
2808 const NV value = SvNV_nomg(arg);
2810 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2811 SET_NUMERIC_STANDARD();
2812 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2815 XPUSHn(func(value));
2820 /* Support Configure command-line overrides for rand() functions.
2821 After 5.005, perhaps we should replace this by Configure support
2822 for drand48(), random(), or rand(). For 5.005, though, maintain
2823 compatibility by calling rand() but allow the user to override it.
2824 See INSTALL for details. --Andy Dougherty 15 July 1998
2826 /* Now it's after 5.005, and Configure supports drand48() and random(),
2827 in addition to rand(). So the overrides should not be needed any more.
2828 --Jarkko Hietaniemi 27 September 1998
2831 #ifndef HAS_DRAND48_PROTO
2832 extern double drand48 (void);
2845 if (!PL_srand_called) {
2846 (void)seedDrand01((Rand_seed_t)seed());
2847 PL_srand_called = TRUE;
2857 const UV anum = (MAXARG < 1) ? seed() : POPu;
2858 (void)seedDrand01((Rand_seed_t)anum);
2859 PL_srand_called = TRUE;
2863 /* Historically srand always returned true. We can avoid breaking
2865 sv_setpvs(TARG, "0 but true");
2874 tryAMAGICun_MG(int_amg, AMGf_numeric);
2876 SV * const sv = TOPs;
2877 const IV iv = SvIV_nomg(sv);
2878 /* XXX it's arguable that compiler casting to IV might be subtly
2879 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2880 else preferring IV has introduced a subtle behaviour change bug. OTOH
2881 relying on floating point to be accurate is a bug. */
2886 else if (SvIOK(sv)) {
2888 SETu(SvUV_nomg(sv));
2893 const NV value = SvNV_nomg(sv);
2895 if (value < (NV)UV_MAX + 0.5) {
2898 SETn(Perl_floor(value));
2902 if (value > (NV)IV_MIN - 0.5) {
2905 SETn(Perl_ceil(value));
2916 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2918 SV * const sv = TOPs;
2919 /* This will cache the NV value if string isn't actually integer */
2920 const IV iv = SvIV_nomg(sv);
2925 else if (SvIOK(sv)) {
2926 /* IVX is precise */
2928 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2936 /* 2s complement assumption. Also, not really needed as
2937 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2943 const NV value = SvNV_nomg(sv);
2957 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2961 SV* const sv = POPs;
2963 tmps = (SvPV_const(sv, len));
2965 /* If Unicode, try to downgrade
2966 * If not possible, croak. */
2967 SV* const tsv = sv_2mortal(newSVsv(sv));
2970 sv_utf8_downgrade(tsv, FALSE);
2971 tmps = SvPV_const(tsv, len);
2973 if (PL_op->op_type == OP_HEX)
2976 while (*tmps && len && isSPACE(*tmps))
2980 if (*tmps == 'x' || *tmps == 'X') {
2982 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2984 else if (*tmps == 'b' || *tmps == 'B')
2985 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2987 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2989 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3003 SV * const sv = TOPs;
3005 if (SvGAMAGIC(sv)) {
3006 /* For an overloaded or magic scalar, we can't know in advance if
3007 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3008 it likes to cache the length. Maybe that should be a documented
3013 = sv_2pv_flags(sv, &len,
3014 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3017 if (!SvPADTMP(TARG)) {
3018 sv_setsv(TARG, &PL_sv_undef);
3023 else if (DO_UTF8(sv)) {
3024 SETi(utf8_length((U8*)p, (U8*)p + len));
3028 } else if (SvOK(sv)) {
3029 /* Neither magic nor overloaded. */
3031 SETi(sv_len_utf8(sv));
3035 if (!SvPADTMP(TARG)) {
3036 sv_setsv_nomg(TARG, &PL_sv_undef);
3058 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3060 const IV arybase = CopARYBASE_get(PL_curcop);
3062 const char *repl = NULL;
3064 const int num_args = PL_op->op_private & 7;
3065 bool repl_need_utf8_upgrade = FALSE;
3066 bool repl_is_utf8 = FALSE;
3071 repl = SvPV_const(repl_sv, repl_len);
3072 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3075 len_iv = SvIV(len_sv);
3076 len_is_uv = SvIOK_UV(len_sv);
3079 pos1_iv = SvIV(pos_sv);
3080 pos1_is_uv = SvIOK_UV(pos_sv);
3086 sv_utf8_upgrade(sv);
3088 else if (DO_UTF8(sv))
3089 repl_need_utf8_upgrade = TRUE;
3091 tmps = SvPV_const(sv, curlen);
3093 utf8_curlen = sv_len_utf8(sv);
3094 if (utf8_curlen == curlen)
3097 curlen = utf8_curlen;
3102 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3103 UV pos1_uv = pos1_iv-arybase;
3104 /* Overflow can occur when $[ < 0 */
3105 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3110 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3111 goto bound_fail; /* $[=3; substr($_,2,...) */
3113 else { /* pos < $[ */
3114 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3119 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3124 if (pos1_is_uv || pos1_iv > 0) {
3125 if ((UV)pos1_iv > curlen)
3130 if (!len_is_uv && len_iv < 0) {
3131 pos2_iv = curlen + len_iv;
3133 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3136 } else { /* len_iv >= 0 */
3137 if (!pos1_is_uv && pos1_iv < 0) {
3138 pos2_iv = pos1_iv + len_iv;
3139 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3141 if ((UV)len_iv > curlen-(UV)pos1_iv)
3144 pos2_iv = pos1_iv+len_iv;
3154 if (!pos2_is_uv && pos2_iv < 0) {
3155 if (!pos1_is_uv && pos1_iv < 0)
3159 else if (!pos1_is_uv && pos1_iv < 0)
3162 if ((UV)pos2_iv < (UV)pos1_iv)
3164 if ((UV)pos2_iv > curlen)
3168 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3169 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3170 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3171 STRLEN byte_len = len;
3172 STRLEN byte_pos = utf8_curlen
3173 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3175 if (lvalue && !repl) {
3178 if (!SvGMAGICAL(sv)) {
3180 SvPV_force_nolen(sv);
3181 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3182 "Attempt to use reference as lvalue in substr");
3184 if (isGV_with_GP(sv))
3185 SvPV_force_nolen(sv);
3186 else if (SvOK(sv)) /* is it defined ? */
3187 (void)SvPOK_only_UTF8(sv);
3189 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3192 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3193 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3195 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3196 LvTARGOFF(ret) = pos;
3197 LvTARGLEN(ret) = len;
3200 PUSHs(ret); /* avoid SvSETMAGIC here */
3204 SvTAINTED_off(TARG); /* decontaminate */
3205 SvUTF8_off(TARG); /* decontaminate */
3208 sv_setpvn(TARG, tmps, byte_len);
3209 #ifdef USE_LOCALE_COLLATE
3210 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3216 SV* repl_sv_copy = NULL;
3218 if (repl_need_utf8_upgrade) {
3219 repl_sv_copy = newSVsv(repl_sv);
3220 sv_utf8_upgrade(repl_sv_copy);
3221 repl = SvPV_const(repl_sv_copy, repl_len);
3222 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3226 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3229 SvREFCNT_dec(repl_sv_copy);
3239 Perl_croak(aTHX_ "substr outside of string");
3240 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3247 register const IV size = POPi;
3248 register const IV offset = POPi;
3249 register SV * const src = POPs;
3250 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3253 if (lvalue) { /* it's an lvalue! */
3254 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3255 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3257 LvTARG(ret) = SvREFCNT_inc_simple(src);
3258 LvTARGOFF(ret) = offset;
3259 LvTARGLEN(ret) = size;
3263 SvTAINTED_off(TARG); /* decontaminate */
3267 sv_setuv(ret, do_vecget(src, offset, size));
3283 const char *little_p;
3284 const I32 arybase = CopARYBASE_get(PL_curcop);
3287 const bool is_index = PL_op->op_type == OP_INDEX;
3290 /* arybase is in characters, like offset, so combine prior to the
3291 UTF-8 to bytes calculation. */
3292 offset = POPi - arybase;
3296 big_p = SvPV_const(big, biglen);
3297 little_p = SvPV_const(little, llen);
3299 big_utf8 = DO_UTF8(big);
3300 little_utf8 = DO_UTF8(little);
3301 if (big_utf8 ^ little_utf8) {
3302 /* One needs to be upgraded. */
3303 if (little_utf8 && !PL_encoding) {
3304 /* Well, maybe instead we might be able to downgrade the small
3306 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3309 /* If the large string is ISO-8859-1, and it's not possible to
3310 convert the small string to ISO-8859-1, then there is no
3311 way that it could be found anywhere by index. */
3316 /* At this point, pv is a malloc()ed string. So donate it to temp
3317 to ensure it will get free()d */
3318 little = temp = newSV(0);
3319 sv_usepvn(temp, pv, llen);
3320 little_p = SvPVX(little);
3323 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3326 sv_recode_to_utf8(temp, PL_encoding);
3328 sv_utf8_upgrade(temp);
3333 big_p = SvPV_const(big, biglen);
3336 little_p = SvPV_const(little, llen);
3340 if (SvGAMAGIC(big)) {
3341 /* Life just becomes a lot easier if I use a temporary here.
3342 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3343 will trigger magic and overloading again, as will fbm_instr()
3345 big = newSVpvn_flags(big_p, biglen,
3346 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3349 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3350 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3351 warn on undef, and we've already triggered a warning with the
3352 SvPV_const some lines above. We can't remove that, as we need to
3353 call some SvPV to trigger overloading early and find out if the
3355 This is all getting to messy. The API isn't quite clean enough,
3356 because data access has side effects.
3358 little = newSVpvn_flags(little_p, llen,
3359 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3360 little_p = SvPVX(little);
3364 offset = is_index ? 0 : biglen;
3366 if (big_utf8 && offset > 0)
3367 sv_pos_u2b(big, &offset, 0);
3373 else if (offset > (I32)biglen)
3375 if (!(little_p = is_index
3376 ? fbm_instr((unsigned char*)big_p + offset,
3377 (unsigned char*)big_p + biglen, little, 0)
3378 : rninstr(big_p, big_p + offset,
3379 little_p, little_p + llen)))
3382 retval = little_p - big_p;
3383 if (retval > 0 && big_utf8)
3384 sv_pos_b2u(big, &retval);
3388 PUSHi(retval + arybase);
3394 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3395 SvTAINTED_off(TARG);
3396 do_sprintf(TARG, SP-MARK, MARK+1);
3397 TAINT_IF(SvTAINTED(TARG));
3409 const U8 *s = (U8*)SvPV_const(argsv, len);
3411 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3412 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3413 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3417 XPUSHu(DO_UTF8(argsv) ?
3418 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3430 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3432 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3434 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3436 (void) POPs; /* Ignore the argument value. */
3437 value = UNICODE_REPLACEMENT;
3443 SvUPGRADE(TARG,SVt_PV);
3445 if (value > 255 && !IN_BYTES) {
3446 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3447 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3448 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3450 (void)SvPOK_only(TARG);
3459 *tmps++ = (char)value;
3461 (void)SvPOK_only(TARG);
3463 if (PL_encoding && !IN_BYTES) {
3464 sv_recode_to_utf8(TARG, PL_encoding);
3466 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3467 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3471 *tmps++ = (char)value;
3487 const char *tmps = SvPV_const(left, len);
3489 if (DO_UTF8(left)) {
3490 /* If Unicode, try to downgrade.
3491 * If not possible, croak.
3492 * Yes, we made this up. */
3493 SV* const tsv = sv_2mortal(newSVsv(left));
3496 sv_utf8_downgrade(tsv, FALSE);
3497 tmps = SvPV_const(tsv, len);
3499 # ifdef USE_ITHREADS
3501 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3502 /* This should be threadsafe because in ithreads there is only
3503 * one thread per interpreter. If this would not be true,
3504 * we would need a mutex to protect this malloc. */
3505 PL_reentrant_buffer->_crypt_struct_buffer =
3506 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3507 #if defined(__GLIBC__) || defined(__EMX__)
3508 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3509 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3510 /* work around glibc-2.2.5 bug */
3511 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3515 # endif /* HAS_CRYPT_R */
3516 # endif /* USE_ITHREADS */
3518 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3520 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3526 "The crypt() function is unimplemented due to excessive paranoia.");
3530 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3531 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3533 /* Below are several macros that generate code */
3534 /* Generates code to store a unicode codepoint c that is known to occupy
3535 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3536 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3538 *(p) = UTF8_TWO_BYTE_HI(c); \
3539 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3542 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3543 * available byte after the two bytes */
3544 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3546 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3547 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3550 /* Generates code to store the upper case of latin1 character l which is known
3551 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3552 * are only two characters that fit this description, and this macro knows
3553 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3555 #define STORE_NON_LATIN1_UC(p, l) \
3557 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3558 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3559 } else { /* Must be the following letter */ \
3560 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3564 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3565 * after the character stored */
3566 #define CAT_NON_LATIN1_UC(p, l) \
3568 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3569 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3571 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3575 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3576 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3577 * and must require two bytes to store it. Advances p to point to the next
3578 * available position */
3579 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3581 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3582 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3583 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3584 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3585 } else {/* else is one of the other two special cases */ \
3586 CAT_NON_LATIN1_UC((p), (l)); \
3592 /* Actually is both lcfirst() and ucfirst(). Only the first character
3593 * changes. This means that possibly we can change in-place, ie., just
3594 * take the source and change that one character and store it back, but not
3595 * if read-only etc, or if the length changes */
3600 STRLEN slen; /* slen is the byte length of the whole SV. */
3603 bool inplace; /* ? Convert first char only, in-place */
3604 bool doing_utf8 = FALSE; /* ? using utf8 */
3605 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3606 const int op_type = PL_op->op_type;
3609 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3610 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3611 * stored as UTF-8 at s. */
3612 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3613 * lowercased) character stored in tmpbuf. May be either
3614 * UTF-8 or not, but in either case is the number of bytes */
3618 s = (const U8*)SvPV_nomg_const(source, slen);
3620 if (ckWARN(WARN_UNINITIALIZED))
3621 report_uninit(source);
3626 /* We may be able to get away with changing only the first character, in
3627 * place, but not if read-only, etc. Later we may discover more reasons to
3628 * not convert in-place. */
3629 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3631 /* First calculate what the changed first character should be. This affects
3632 * whether we can just swap it out, leaving the rest of the string unchanged,
3633 * or even if have to convert the dest to UTF-8 when the source isn't */
3635 if (! slen) { /* If empty */
3636 need = 1; /* still need a trailing NUL */
3638 else if (DO_UTF8(source)) { /* Is the source utf8? */
3641 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3642 * and doesn't allow for the user to specify their own. When code is added to
3643 * detect if there is a user-defined mapping in force here, and if so to use
3644 * that, then the code below can be compiled. The detection would be a good
3645 * thing anyway, as currently the user-defined mappings only work on utf8
3646 * strings, and thus depend on the chosen internal storage method, which is a
3648 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3649 if (UTF8_IS_INVARIANT(*s)) {
3651 /* An invariant source character is either ASCII or, in EBCDIC, an
3652 * ASCII equivalent or a caseless C1 control. In both these cases,
3653 * the lower and upper cases of any character are also invariants
3654 * (and title case is the same as upper case). So it is safe to
3655 * use the simple case change macros which avoid the overhead of
3656 * the general functions. Note that if perl were to be extended to
3657 * do locale handling in UTF-8 strings, this wouldn't be true in,
3658 * for example, Lithuanian or Turkic. */
3659 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3663 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3666 /* Similarly, if the source character isn't invariant but is in the
3667 * latin1 range (or EBCDIC equivalent thereof), we have the case
3668 * changes compiled into perl, and can avoid the overhead of the
3669 * general functions. In this range, the characters are stored as
3670 * two UTF-8 bytes, and it so happens that any changed-case version
3671 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3675 /* Convert the two source bytes to a single Unicode code point
3676 * value, change case and save for below */
3677 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3678 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3679 U8 lower = toLOWER_LATIN1(chr);
3680 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3682 else { /* ucfirst */
3683 U8 upper = toUPPER_LATIN1_MOD(chr);
3685 /* Most of the latin1 range characters are well-behaved. Their
3686 * title and upper cases are the same, and are also in the
3687 * latin1 range. The macro above returns their upper (hence
3688 * title) case, and all that need be done is to save the result
3689 * for below. However, several characters are problematic, and
3690 * have to be handled specially. The MOD in the macro name
3691 * above means that these tricky characters all get mapped to
3692 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3693 * This mapping saves some tests for the majority of the
3696 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3698 /* Not tricky. Just save it. */
3699 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3701 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3703 /* This one is tricky because it is two characters long,
3704 * though the UTF-8 is still two bytes, so the stored
3705 * length doesn't change */
3706 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3707 *(tmpbuf + 1) = 's';
3711 /* The other two have their title and upper cases the same,
3712 * but are tricky because the changed-case characters
3713 * aren't in the latin1 range. They, however, do fit into
3714 * two UTF-8 bytes */
3715 STORE_NON_LATIN1_UC(tmpbuf, chr);
3720 #endif /* end of dont want to break user-defined casing */
3722 /* Here, can't short-cut the general case */
3724 utf8_to_uvchr(s, &ulen);
3725 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3726 else toLOWER_utf8(s, tmpbuf, &tculen);
3728 /* we can't do in-place if the length changes. */
3729 if (ulen != tculen) inplace = FALSE;
3730 need = slen + 1 - ulen + tculen;
3731 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3735 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3736 * latin1 is treated as caseless. Note that a locale takes
3738 tculen = 1; /* Most characters will require one byte, but this will
3739 * need to be overridden for the tricky ones */
3742 if (op_type == OP_LCFIRST) {
3744 /* lower case the first letter: no trickiness for any character */
3745 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3746 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3749 else if (IN_LOCALE_RUNTIME) {
3750 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3751 * have upper and title case different
3754 else if (! IN_UNI_8_BIT) {
3755 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3756 * on EBCDIC machines whatever the
3757 * native function does */
3759 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3760 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3762 /* tmpbuf now has the correct title case for all latin1 characters
3763 * except for the several ones that have tricky handling. All
3764 * of these are mapped by the MOD to the letter below. */
3765 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3767 /* The length is going to change, with all three of these, so
3768 * can't replace just the first character */
3771 /* We use the original to distinguish between these tricky
3773 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3774 /* Two character title case 'Ss', but can remain non-UTF-8 */
3777 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3782 /* The other two tricky ones have their title case outside
3783 * latin1. It is the same as their upper case. */
3785 STORE_NON_LATIN1_UC(tmpbuf, *s);
3787 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3788 * and their upper cases is 2. */
3791 /* The entire result will have to be in UTF-8. Assume worst
3792 * case sizing in conversion. (all latin1 characters occupy
3793 * at most two bytes in utf8) */
3794 convert_source_to_utf8 = TRUE;
3795 need = slen * 2 + 1;
3797 } /* End of is one of the three special chars */
3798 } /* End of use Unicode (Latin1) semantics */
3799 } /* End of changing the case of the first character */
3801 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3802 * generate the result */
3805 /* We can convert in place. This means we change just the first
3806 * character without disturbing the rest; no need to grow */
3808 s = d = (U8*)SvPV_force_nomg(source, slen);
3814 /* Here, we can't convert in place; we earlier calculated how much
3815 * space we will need, so grow to accommodate that */
3816 SvUPGRADE(dest, SVt_PV);
3817 d = (U8*)SvGROW(dest, need);
3818 (void)SvPOK_only(dest);
3825 if (! convert_source_to_utf8) {
3827 /* Here both source and dest are in UTF-8, but have to create
3828 * the entire output. We initialize the result to be the
3829 * title/lower cased first character, and then append the rest
3831 sv_setpvn(dest, (char*)tmpbuf, tculen);
3833 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3837 const U8 *const send = s + slen;
3839 /* Here the dest needs to be in UTF-8, but the source isn't,
3840 * except we earlier UTF-8'd the first character of the source
3841 * into tmpbuf. First put that into dest, and then append the
3842 * rest of the source, converting it to UTF-8 as we go. */
3844 /* Assert tculen is 2 here because the only two characters that
3845 * get to this part of the code have 2-byte UTF-8 equivalents */
3847 *d++ = *(tmpbuf + 1);
3848 s++; /* We have just processed the 1st char */
3850 for (; s < send; s++) {
3851 d = uvchr_to_utf8(d, *s);
3854 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3858 else { /* in-place UTF-8. Just overwrite the first character */
3859 Copy(tmpbuf, d, tculen, U8);
3860 SvCUR_set(dest, need - 1);
3863 else { /* Neither source nor dest are in or need to be UTF-8 */
3865 if (IN_LOCALE_RUNTIME) {
3869 if (inplace) { /* in-place, only need to change the 1st char */
3872 else { /* Not in-place */
3874 /* Copy the case-changed character(s) from tmpbuf */
3875 Copy(tmpbuf, d, tculen, U8);
3876 d += tculen - 1; /* Code below expects d to point to final
3877 * character stored */
3880 else { /* empty source */
3881 /* See bug #39028: Don't taint if empty */
3885 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3886 * the destination to retain that flag */
3890 if (!inplace) { /* Finish the rest of the string, unchanged */
3891 /* This will copy the trailing NUL */
3892 Copy(s + 1, d + 1, slen, U8);
3893 SvCUR_set(dest, need - 1);
3896 if (dest != source && SvTAINTED(source))
3902 /* There's so much setup/teardown code common between uc and lc, I wonder if
3903 it would be worth merging the two, and just having a switch outside each
3904 of the three tight loops. There is less and less commonality though */
3918 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3919 && SvTEMP(source) && !DO_UTF8(source)
3920 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3922 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3923 * make the loop tight, so we overwrite the source with the dest before
3924 * looking at it, and we need to look at the original source
3925 * afterwards. There would also need to be code added to handle
3926 * switching to not in-place in midstream if we run into characters
3927 * that change the length.
3930 s = d = (U8*)SvPV_force_nomg(source, len);
3937 /* The old implementation would copy source into TARG at this point.
3938 This had the side effect that if source was undef, TARG was now
3939 an undefined SV with PADTMP set, and they don't warn inside
3940 sv_2pv_flags(). However, we're now getting the PV direct from
3941 source, which doesn't have PADTMP set, so it would warn. Hence the
3945 s = (const U8*)SvPV_nomg_const(source, len);
3947 if (ckWARN(WARN_UNINITIALIZED))
3948 report_uninit(source);
3954 SvUPGRADE(dest, SVt_PV);
3955 d = (U8*)SvGROW(dest, min);
3956 (void)SvPOK_only(dest);
3961 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3962 to check DO_UTF8 again here. */
3964 if (DO_UTF8(source)) {
3965 const U8 *const send = s + len;
3966 U8 tmpbuf[UTF8_MAXBYTES+1];
3968 /* All occurrences of these are to be moved to follow any other marks.
3969 * This is context-dependent. We may not be passed enough context to
3970 * move the iota subscript beyond all of them, but we do the best we can
3971 * with what we're given. The result is always better than if we
3972 * hadn't done this. And, the problem would only arise if we are
3973 * passed a character without all its combining marks, which would be
3974 * the caller's mistake. The information this is based on comes from a
3975 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3976 * itself) and so can't be checked properly to see if it ever gets
3977 * revised. But the likelihood of it changing is remote */
3978 bool in_iota_subscript = FALSE;
3981 if (in_iota_subscript && ! is_utf8_mark(s)) {
3982 /* A non-mark. Time to output the iota subscript */
3983 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3984 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3986 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3987 in_iota_subscript = FALSE;
3991 /* See comments at the first instance in this file of this ifdef */
3992 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3994 /* If the UTF-8 character is invariant, then it is in the range
3995 * known by the standard macro; result is only one byte long */
3996 if (UTF8_IS_INVARIANT(*s)) {
4000 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4002 /* Likewise, if it fits in a byte, its case change is in our
4004 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
4005 U8 upper = toUPPER_LATIN1_MOD(orig);
4006 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4014 /* Otherwise, need the general UTF-8 case. Get the changed
4015 * case value and copy it to the output buffer */
4017 const STRLEN u = UTF8SKIP(s);
4020 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4021 if (uv == GREEK_CAPITAL_LETTER_IOTA
4022 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4024 in_iota_subscript = TRUE;
4027 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4028 /* If the eventually required minimum size outgrows
4029 * the available space, we need to grow. */
4030 const UV o = d - (U8*)SvPVX_const(dest);
4032 /* If someone uppercases one million U+03B0s we
4033 * SvGROW() one million times. Or we could try
4034 * guessing how much to allocate without allocating too
4035 * much. Such is life. See corresponding comment in
4036 * lc code for another option */
4038 d = (U8*)SvPVX(dest) + o;
4040 Copy(tmpbuf, d, ulen, U8);
4046 if (in_iota_subscript) {
4047 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4051 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4053 else { /* Not UTF-8 */
4055 const U8 *const send = s + len;
4057 /* Use locale casing if in locale; regular style if not treating
4058 * latin1 as having case; otherwise the latin1 casing. Do the
4059 * whole thing in a tight loop, for speed, */
4060 if (IN_LOCALE_RUNTIME) {
4063 for (; s < send; d++, s++)
4064 *d = toUPPER_LC(*s);
4066 else if (! IN_UNI_8_BIT) {
4067 for (; s < send; d++, s++) {
4072 for (; s < send; d++, s++) {
4073 *d = toUPPER_LATIN1_MOD(*s);
4074 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4076 /* The mainstream case is the tight loop above. To avoid
4077 * extra tests in that, all three characters that require
4078 * special handling are mapped by the MOD to the one tested
4080 * Use the source to distinguish between the three cases */
4082 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4084 /* uc() of this requires 2 characters, but they are
4085 * ASCII. If not enough room, grow the string */
4086 if (SvLEN(dest) < ++min) {
4087 const UV o = d - (U8*)SvPVX_const(dest);
4089 d = (U8*)SvPVX(dest) + o;
4091 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4092 continue; /* Back to the tight loop; still in ASCII */
4095 /* The other two special handling characters have their
4096 * upper cases outside the latin1 range, hence need to be
4097 * in UTF-8, so the whole result needs to be in UTF-8. So,
4098 * here we are somewhere in the middle of processing a
4099 * non-UTF-8 string, and realize that we will have to convert
4100 * the whole thing to UTF-8. What to do? There are
4101 * several possibilities. The simplest to code is to
4102 * convert what we have so far, set a flag, and continue on
4103 * in the loop. The flag would be tested each time through
4104 * the loop, and if set, the next character would be
4105 * converted to UTF-8 and stored. But, I (khw) didn't want
4106 * to slow down the mainstream case at all for this fairly
4107 * rare case, so I didn't want to add a test that didn't
4108 * absolutely have to be there in the loop, besides the
4109 * possibility that it would get too complicated for
4110 * optimizers to deal with. Another possibility is to just
4111 * give up, convert the source to UTF-8, and restart the
4112 * function that way. Another possibility is to convert
4113 * both what has already been processed and what is yet to
4114 * come separately to UTF-8, then jump into the loop that
4115 * handles UTF-8. But the most efficient time-wise of the
4116 * ones I could think of is what follows, and turned out to
4117 * not require much extra code. */
4119 /* Convert what we have so far into UTF-8, telling the
4120 * function that we know it should be converted, and to
4121 * allow extra space for what we haven't processed yet.
4122 * Assume the worst case space requirements for converting
4123 * what we haven't processed so far: that it will require
4124 * two bytes for each remaining source character, plus the
4125 * NUL at the end. This may cause the string pointer to
4126 * move, so re-find it. */
4128 len = d - (U8*)SvPVX_const(dest);
4129 SvCUR_set(dest, len);
4130 len = sv_utf8_upgrade_flags_grow(dest,
4131 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4133 d = (U8*)SvPVX(dest) + len;
4135 /* And append the current character's upper case in UTF-8 */
4136 CAT_NON_LATIN1_UC(d, *s);
4138 /* Now process the remainder of the source, converting to
4139 * upper and UTF-8. If a resulting byte is invariant in
4140 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4141 * append it to the output. */
4144 for (; s < send; s++) {
4145 U8 upper = toUPPER_LATIN1_MOD(*s);
4146 if UTF8_IS_INVARIANT(upper) {
4150 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4154 /* Here have processed the whole source; no need to continue
4155 * with the outer loop. Each character has been converted
4156 * to upper case and converted to UTF-8 */
4159 } /* End of processing all latin1-style chars */
4160 } /* End of processing all chars */
4161 } /* End of source is not empty */
4163 if (source != dest) {
4164 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4165 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4167 } /* End of isn't utf8 */
4168 if (dest != source && SvTAINTED(source))
4187 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4188 && SvTEMP(source) && !DO_UTF8(source)) {
4190 /* We can convert in place, as lowercasing anything in the latin1 range
4191 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4193 s = d = (U8*)SvPV_force_nomg(source, len);
4200 /* The old implementation would copy source into TARG at this point.
4201 This had the side effect that if source was undef, TARG was now
4202 an undefined SV with PADTMP set, and they don't warn inside
4203 sv_2pv_flags(). However, we're now getting the PV direct from
4204 source, which doesn't have PADTMP set, so it would warn. Hence the
4208 s = (const U8*)SvPV_nomg_const(source, len);
4210 if (ckWARN(WARN_UNINITIALIZED))
4211 report_uninit(source);
4217 SvUPGRADE(dest, SVt_PV);
4218 d = (U8*)SvGROW(dest, min);
4219 (void)SvPOK_only(dest);
4224 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4225 to check DO_UTF8 again here. */
4227 if (DO_UTF8(source)) {
4228 const U8 *const send = s + len;
4229 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4232 /* See comments at the first instance in this file of this ifdef */
4233 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4234 if (UTF8_IS_INVARIANT(*s)) {
4236 /* Invariant characters use the standard mappings compiled in.
4241 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4243 /* As do the ones in the Latin1 range */
4244 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4245 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4250 /* Here, is utf8 not in Latin-1 range, have to go out and get
4251 * the mappings from the tables. */
4253 const STRLEN u = UTF8SKIP(s);
4256 #ifndef CONTEXT_DEPENDENT_CASING
4257 toLOWER_utf8(s, tmpbuf, &ulen);
4259 /* This is ifdefd out because it needs more work and thought. It isn't clear
4260 * that we should do it.
4261 * A minor objection is that this is based on a hard-coded rule from the
4262 * Unicode standard, and may change, but this is not very likely at all.
4263 * mktables should check and warn if it does.
4264 * More importantly, if the sigma occurs at the end of the string, we don't
4265 * have enough context to know whether it is part of a larger string or going
4266 * to be or not. It may be that we are passed a subset of the context, via
4267 * a \U...\E, for example, and we could conceivably know the larger context if
4268 * code were changed to pass that in. But, if the string passed in is an
4269 * intermediate result, and the user concatenates two strings together
4270 * after we have made a final sigma, that would be wrong. If the final sigma
4271 * occurs in the middle of the string we are working on, then we know that it
4272 * should be a final sigma, but otherwise we can't be sure. */
4274 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4276 /* If the lower case is a small sigma, it may be that we need
4277 * to change it to a final sigma. This happens at the end of
4278 * a word that contains more than just this character, and only
4279 * when we started with a capital sigma. */
4280 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4281 s > send - len && /* Makes sure not the first letter */
4282 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4285 /* We use the algorithm in:
4286 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4287 * is a CAPITAL SIGMA): If C is preceded by a sequence
4288 * consisting of a cased letter and a case-ignorable
4289 * sequence, and C is not followed by a sequence consisting
4290 * of a case ignorable sequence and then a cased letter,
4291 * then when lowercasing C, C becomes a final sigma */
4293 /* To determine if this is the end of a word, need to peek
4294 * ahead. Look at the next character */
4295 const U8 *peek = s + u;
4297 /* Skip any case ignorable characters */
4298 while (peek < send && is_utf8_case_ignorable(peek)) {
4299 peek += UTF8SKIP(peek);
4302 /* If we reached the end of the string without finding any
4303 * non-case ignorable characters, or if the next such one
4304 * is not-cased, then we have met the conditions for it
4305 * being a final sigma with regards to peek ahead, and so
4306 * must do peek behind for the remaining conditions. (We
4307 * know there is stuff behind to look at since we tested
4308 * above that this isn't the first letter) */
4309 if (peek >= send || ! is_utf8_cased(peek)) {
4310 peek = utf8_hop(s, -1);
4312 /* Here are at the beginning of the first character
4313 * before the original upper case sigma. Keep backing
4314 * up, skipping any case ignorable characters */
4315 while (is_utf8_case_ignorable(peek)) {
4316 peek = utf8_hop(peek, -1);
4319 /* Here peek points to the first byte of the closest
4320 * non-case-ignorable character before the capital
4321 * sigma. If it is cased, then by the Unicode
4322 * algorithm, we should use a small final sigma instead
4323 * of what we have */
4324 if (is_utf8_cased(peek)) {
4325 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4326 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4330 else { /* Not a context sensitive mapping */
4331 #endif /* End of commented out context sensitive */
4332 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4334 /* If the eventually required minimum size outgrows
4335 * the available space, we need to grow. */
4336 const UV o = d - (U8*)SvPVX_const(dest);
4338 /* If someone lowercases one million U+0130s we
4339 * SvGROW() one million times. Or we could try
4340 * guessing how much to allocate without allocating too
4341 * much. Such is life. Another option would be to
4342 * grow an extra byte or two more each time we need to
4343 * grow, which would cut down the million to 500K, with
4346 d = (U8*)SvPVX(dest) + o;
4348 #ifdef CONTEXT_DEPENDENT_CASING
4351 /* Copy the newly lowercased letter to the output buffer we're
4353 Copy(tmpbuf, d, ulen, U8);
4356 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4359 } /* End of looping through the source string */
4362 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4363 } else { /* Not utf8 */
4365 const U8 *const send = s + len;
4367 /* Use locale casing if in locale; regular style if not treating
4368 * latin1 as having case; otherwise the latin1 casing. Do the
4369 * whole thing in a tight loop, for speed, */
4370 if (IN_LOCALE_RUNTIME) {
4373 for (; s < send; d++, s++)
4374 *d = toLOWER_LC(*s);
4376 else if (! IN_UNI_8_BIT) {
4377 for (; s < send; d++, s++) {
4382 for (; s < send; d++, s++) {
4383 *d = toLOWER_LATIN1(*s);
4387 if (source != dest) {
4389 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4392 if (dest != source && SvTAINTED(source))
4401 SV * const sv = TOPs;
4403 register const char *s = SvPV_const(sv,len);
4405 SvUTF8_off(TARG); /* decontaminate */
4408 SvUPGRADE(TARG, SVt_PV);
4409 SvGROW(TARG, (len * 2) + 1);
4413 if (UTF8_IS_CONTINUED(*s)) {
4414 STRLEN ulen = UTF8SKIP(s);
4438 SvCUR_set(TARG, d - SvPVX_const(TARG));
4439 (void)SvPOK_only_UTF8(TARG);
4442 sv_setpvn(TARG, s, len);
4451 dVAR; dSP; dMARK; dORIGMARK;
4452 register AV *const av = MUTABLE_AV(POPs);
4453 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4455 if (SvTYPE(av) == SVt_PVAV) {
4456 const I32 arybase = CopARYBASE_get(PL_curcop);
4457 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4458 bool can_preserve = FALSE;
4464 can_preserve = SvCANEXISTDELETE(av);
4467 if (lval && localizing) {
4470 for (svp = MARK + 1; svp <= SP; svp++) {
4471 const I32 elem = SvIV(*svp);
4475 if (max > AvMAX(av))
4479 while (++MARK <= SP) {
4481 I32 elem = SvIV(*MARK);
4482 bool preeminent = TRUE;
4486 if (localizing && can_preserve) {
4487 /* If we can determine whether the element exist,
4488 * Try to preserve the existenceness of a tied array
4489 * element by using EXISTS and DELETE if possible.
4490 * Fallback to FETCH and STORE otherwise. */
4491 preeminent = av_exists(av, elem);
4494 svp = av_fetch(av, elem, lval);
4496 if (!svp || *svp == &PL_sv_undef)
4497 DIE(aTHX_ PL_no_aelem, elem);
4500 save_aelem(av, elem, svp);
4502 SAVEADELETE(av, elem);
4505 *MARK = svp ? *svp : &PL_sv_undef;
4508 if (GIMME != G_ARRAY) {
4510 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4516 /* Smart dereferencing for keys, values and each */
4528 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4533 "Type of argument to %s must be unblessed hashref or arrayref",
4534 PL_op_desc[PL_op->op_type] );
4537 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4539 "Can't modify %s in %s",
4540 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4543 /* Delegate to correct function for op type */
4545 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4546 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4549 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4557 AV *array = MUTABLE_AV(POPs);
4558 const I32 gimme = GIMME_V;
4559 IV *iterp = Perl_av_iter_p(aTHX_ array);
4560 const IV current = (*iterp)++;
4562 if (current > av_len(array)) {
4564 if (gimme == G_SCALAR)
4571 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4572 if (gimme == G_ARRAY) {
4573 SV **const element = av_fetch(array, current, 0);
4574 PUSHs(element ? *element : &PL_sv_undef);
4583 AV *array = MUTABLE_AV(POPs);
4584 const I32 gimme = GIMME_V;
4586 *Perl_av_iter_p(aTHX_ array) = 0;
4588 if (gimme == G_SCALAR) {
4590 PUSHi(av_len(array) + 1);
4592 else if (gimme == G_ARRAY) {
4593 IV n = Perl_av_len(aTHX_ array);
4594 IV i = CopARYBASE_get(PL_curcop);
4598 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
4600 for (; i <= n; i++) {
4605 for (i = 0; i <= n; i++) {
4606 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4607 PUSHs(elem ? *elem : &PL_sv_undef);
4614 /* Associative arrays. */
4620 HV * hash = MUTABLE_HV(POPs);
4622 const I32 gimme = GIMME_V;
4625 /* might clobber stack_sp */
4626 entry = hv_iternext(hash);
4631 SV* const sv = hv_iterkeysv(entry);
4632 PUSHs(sv); /* won't clobber stack_sp */
4633 if (gimme == G_ARRAY) {
4636 /* might clobber stack_sp */
4637 val = hv_iterval(hash, entry);
4642 else if (gimme == G_SCALAR)
4649 S_do_delete_local(pTHX)
4653 const I32 gimme = GIMME_V;
4657 if (PL_op->op_private & OPpSLICE) {
4659 SV * const osv = POPs;
4660 const bool tied = SvRMAGICAL(osv)
4661 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4662 const bool can_preserve = SvCANEXISTDELETE(osv)
4663 || mg_find((const SV *)osv, PERL_MAGIC_env);
4664 const U32 type = SvTYPE(osv);
4665 if (type == SVt_PVHV) { /* hash element */
4666 HV * const hv = MUTABLE_HV(osv);
4667 while (++MARK <= SP) {
4668 SV * const keysv = *MARK;
4670 bool preeminent = TRUE;
4672 preeminent = hv_exists_ent(hv, keysv, 0);
4674 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4681 sv = hv_delete_ent(hv, keysv, 0, 0);
4682 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4685 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4687 *MARK = sv_mortalcopy(sv);
4693 SAVEHDELETE(hv, keysv);
4694 *MARK = &PL_sv_undef;
4698 else if (type == SVt_PVAV) { /* array element */
4699 if (PL_op->op_flags & OPf_SPECIAL) {
4700 AV * const av = MUTABLE_AV(osv);
4701 while (++MARK <= SP) {
4702 I32 idx = SvIV(*MARK);
4704 bool preeminent = TRUE;
4706 preeminent = av_exists(av, idx);
4708 SV **svp = av_fetch(av, idx, 1);
4715 sv = av_delete(av, idx, 0);
4716 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4719 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4721 *MARK = sv_mortalcopy(sv);
4727 SAVEADELETE(av, idx);
4728 *MARK = &PL_sv_undef;
4734 DIE(aTHX_ "Not a HASH reference");
4735 if (gimme == G_VOID)
4737 else if (gimme == G_SCALAR) {
4742 *++MARK = &PL_sv_undef;
4747 SV * const keysv = POPs;
4748 SV * const osv = POPs;
4749 const bool tied = SvRMAGICAL(osv)
4750 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4751 const bool can_preserve = SvCANEXISTDELETE(osv)
4752 || mg_find((const SV *)osv, PERL_MAGIC_env);
4753 const U32 type = SvTYPE(osv);
4755 if (type == SVt_PVHV) {
4756 HV * const hv = MUTABLE_HV(osv);
4757 bool preeminent = TRUE;
4759 preeminent = hv_exists_ent(hv, keysv, 0);
4761 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4768 sv = hv_delete_ent(hv, keysv, 0, 0);
4769 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4772 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4774 SV *nsv = sv_mortalcopy(sv);
4780 SAVEHDELETE(hv, keysv);
4782 else if (type == SVt_PVAV) {
4783 if (PL_op->op_flags & OPf_SPECIAL) {
4784 AV * const av = MUTABLE_AV(osv);
4785 I32 idx = SvIV(keysv);
4786 bool preeminent = TRUE;
4788 preeminent = av_exists(av, idx);
4790 SV **svp = av_fetch(av, idx, 1);
4797 sv = av_delete(av, idx, 0);
4798 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4801 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4803 SV *nsv = sv_mortalcopy(sv);
4809 SAVEADELETE(av, idx);
4812 DIE(aTHX_ "panic: avhv_delete no longer supported");
4815 DIE(aTHX_ "Not a HASH reference");
4818 if (gimme != G_VOID)
4832 if (PL_op->op_private & OPpLVAL_INTRO)
4833 return do_delete_local();
4836 discard = (gimme == G_VOID) ? G_DISCARD : 0;
4838 if (PL_op->op_private & OPpSLICE) {
4840 HV * const hv = MUTABLE_HV(POPs);
4841 const U32 hvtype = SvTYPE(hv);
4842 if (hvtype == SVt_PVHV) { /* hash element */
4843 while (++MARK <= SP) {
4844 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
4845 *MARK = sv ? sv : &PL_sv_undef;
4848 else if (hvtype == SVt_PVAV) { /* array element */
4849 if (PL_op->op_flags & OPf_SPECIAL) {
4850 while (++MARK <= SP) {
4851 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
4852 *MARK = sv ? sv : &PL_sv_undef;
4857 DIE(aTHX_ "Not a HASH reference");
4860 else if (gimme == G_SCALAR) {
4865 *++MARK = &PL_sv_undef;
4871 HV * const hv = MUTABLE_HV(POPs);
4873 if (SvTYPE(hv) == SVt_PVHV)
4874 sv = hv_delete_ent(hv, keysv, discard, 0);
4875 else if (SvTYPE(hv) == SVt_PVAV) {
4876 if (PL_op->op_flags & OPf_SPECIAL)
4877 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
4879 DIE(aTHX_ "panic: avhv_delete no longer supported");
4882 DIE(aTHX_ "Not a HASH reference");
4898 if (PL_op->op_private & OPpEXISTS_SUB) {
4900 SV * const sv = POPs;
4901 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
4904 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4909 hv = MUTABLE_HV(POPs);
4910 if (SvTYPE(hv) == SVt_PVHV) {
4911 if (hv_exists_ent(hv, tmpsv, 0))
4914 else if (SvTYPE(hv) == SVt_PVAV) {
4915 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
4916 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
4921 DIE(aTHX_ "Not a HASH reference");
4928 dVAR; dSP; dMARK; dORIGMARK;
4929 register HV * const hv = MUTABLE_HV(POPs);
4930 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4931 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4932 bool can_preserve = FALSE;
4938 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4939 can_preserve = TRUE;
4942 while (++MARK <= SP) {
4943 SV * const keysv = *MARK;
4946 bool preeminent = TRUE;
4948 if (localizing && can_preserve) {
4949 /* If we can determine whether the element exist,
4950 * try to preserve the existenceness of a tied hash
4951 * element by using EXISTS and DELETE if possible.
4952 * Fallback to FETCH and STORE otherwise. */
4953 preeminent = hv_exists_ent(hv, keysv, 0);
4956 he = hv_fetch_ent(hv, keysv, lval, 0);
4957 svp = he ? &HeVAL(he) : NULL;
4960 if (!svp || *svp == &PL_sv_undef) {
4961 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
4964 if (HvNAME_get(hv) && isGV(*svp))
4965 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
4966 else if (preeminent)
4967 save_helem_flags(hv, keysv, svp,
4968 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4970 SAVEHDELETE(hv, keysv);
4973 *MARK = svp ? *svp : &PL_sv_undef;
4975 if (GIMME != G_ARRAY) {
4977 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4983 /* List operators. */
4988 if (GIMME != G_ARRAY) {
4990 *MARK = *SP; /* unwanted list, return last item */
4992 *MARK = &PL_sv_undef;
5002 SV ** const lastrelem = PL_stack_sp;
5003 SV ** const lastlelem = PL_stack_base + POPMARK;
5004 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
5005 register SV ** const firstrelem = lastlelem + 1;
5006 const I32 arybase = CopARYBASE_get(PL_curcop);
5007 I32 is_something_there = FALSE;
5009 register const I32 max = lastrelem - lastlelem;
5010 register SV **lelem;
5012 if (GIMME != G_ARRAY) {
5013 I32 ix = SvIV(*lastlelem);
5018 if (ix < 0 || ix >= max)
5019 *firstlelem = &PL_sv_undef;
5021 *firstlelem = firstrelem[ix];
5027 SP = firstlelem - 1;
5031 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
5032 I32 ix = SvIV(*lelem);
5037 if (ix < 0 || ix >= max)
5038 *lelem = &PL_sv_undef;
5040 is_something_there = TRUE;
5041 if (!(*lelem = firstrelem[ix]))
5042 *lelem = &PL_sv_undef;
5045 if (is_something_there)
5048 SP = firstlelem - 1;
5054 dVAR; dSP; dMARK; dORIGMARK;
5055 const I32 items = SP - MARK;
5056 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
5057 SP = ORIGMARK; /* av_make() might realloc stack_sp */
5058 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5059 ? newRV_noinc(av) : av);
5065 dVAR; dSP; dMARK; dORIGMARK;
5066 HV* const hv = newHV();
5069 SV * const key = *++MARK;
5070 SV * const val = newSV(0);
5072 sv_setsv(val, *++MARK);
5074 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5075 (void)hv_store_ent(hv,key,val,0);
5078 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5079 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
5084 S_deref_plain_array(pTHX_ AV *ary)
5086 if (SvTYPE(ary) == SVt_PVAV) return ary;
5087 SvGETMAGIC((SV *)ary);
5088 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5089 Perl_die(aTHX_ "Not an ARRAY reference");
5090 else if (SvOBJECT(SvRV(ary)))
5091 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5092 return (AV *)SvRV(ary);
5095 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5096 # define DEREF_PLAIN_ARRAY(ary) \
5099 SvTYPE(aRrRay) == SVt_PVAV \
5101 : S_deref_plain_array(aTHX_ aRrRay); \
5104 # define DEREF_PLAIN_ARRAY(ary) \
5106 PL_Sv = (SV *)(ary), \
5107 SvTYPE(PL_Sv) == SVt_PVAV \
5109 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
5115 dVAR; dSP; dMARK; dORIGMARK;
5116 int num_args = (SP - MARK);
5117 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5121 register I32 offset;
5122 register I32 length;
5126 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5129 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5130 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5137 offset = i = SvIV(*MARK);
5139 offset += AvFILLp(ary) + 1;
5141 offset -= CopARYBASE_get(PL_curcop);
5143 DIE(aTHX_ PL_no_aelem, i);
5145 length = SvIVx(*MARK++);
5147 length += AvFILLp(ary) - offset + 1;
5153 length = AvMAX(ary) + 1; /* close enough to infinity */
5157 length = AvMAX(ary) + 1;
5159 if (offset > AvFILLp(ary) + 1) {
5161 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5162 offset = AvFILLp(ary) + 1;
5164 after = AvFILLp(ary) + 1 - (offset + length);
5165 if (after < 0) { /* not that much array */
5166 length += after; /* offset+length now in array */
5172 /* At this point, MARK .. SP-1 is our new LIST */
5175 diff = newlen - length;
5176 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5179 /* make new elements SVs now: avoid problems if they're from the array */
5180 for (dst = MARK, i = newlen; i; i--) {
5181 SV * const h = *dst;
5182 *dst++ = newSVsv(h);
5185 if (diff < 0) { /* shrinking the area */
5186 SV **tmparyval = NULL;
5188 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5189 Copy(MARK, tmparyval, newlen, SV*);
5192 MARK = ORIGMARK + 1;
5193 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5194 MEXTEND(MARK, length);
5195 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5197 EXTEND_MORTAL(length);
5198 for (i = length, dst = MARK; i; i--) {
5199 sv_2mortal(*dst); /* free them eventually */
5206 *MARK = AvARRAY(ary)[offset+length-1];
5209 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5210 SvREFCNT_dec(*dst++); /* free them now */
5213 AvFILLp(ary) += diff;
5215 /* pull up or down? */
5217 if (offset < after) { /* easier to pull up */
5218 if (offset) { /* esp. if nothing to pull */
5219 src = &AvARRAY(ary)[offset-1];
5220 dst = src - diff; /* diff is negative */
5221 for (i = offset; i > 0; i--) /* can't trust Copy */
5225 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5229 if (after) { /* anything to pull down? */
5230 src = AvARRAY(ary) + offset + length;
5231 dst = src + diff; /* diff is negative */
5232 Move(src, dst, after, SV*);
5234 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5235 /* avoid later double free */
5239 dst[--i] = &PL_sv_undef;
5242 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5243 Safefree(tmparyval);
5246 else { /* no, expanding (or same) */
5247 SV** tmparyval = NULL;
5249 Newx(tmparyval, length, SV*); /* so remember deletion */
5250 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5253 if (diff > 0) { /* expanding */
5254 /* push up or down? */
5255 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5259 Move(src, dst, offset, SV*);
5261 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5263 AvFILLp(ary) += diff;
5266 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5267 av_extend(ary, AvFILLp(ary) + diff);
5268 AvFILLp(ary) += diff;
5271 dst = AvARRAY(ary) + AvFILLp(ary);
5273 for (i = after; i; i--) {
5281 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5284 MARK = ORIGMARK + 1;
5285 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5287 Copy(tmparyval, MARK, length, SV*);
5289 EXTEND_MORTAL(length);
5290 for (i = length, dst = MARK; i; i--) {
5291 sv_2mortal(*dst); /* free them eventually */
5298 else if (length--) {
5299 *MARK = tmparyval[length];
5302 while (length-- > 0)
5303 SvREFCNT_dec(tmparyval[length]);
5307 *MARK = &PL_sv_undef;
5308 Safefree(tmparyval);
5312 mg_set(MUTABLE_SV(ary));
5320 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5321 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5322 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5325 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5328 ENTER_with_name("call_PUSH");
5329 call_method("PUSH",G_SCALAR|G_DISCARD);
5330 LEAVE_with_name("call_PUSH");
5334 PL_delaymagic = DM_DELAY;
5335 for (++MARK; MARK <= SP; MARK++) {
5336 SV * const sv = newSV(0);
5338 sv_setsv(sv, *MARK);
5339 av_store(ary, AvFILLp(ary)+1, sv);
5341 if (PL_delaymagic & DM_ARRAY_ISA)
5342 mg_set(MUTABLE_SV(ary));
5347 if (OP_GIMME(PL_op, 0) != G_VOID) {
5348 PUSHi( AvFILL(ary) + 1 );
5357 AV * const av = PL_op->op_flags & OPf_SPECIAL
5358 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
5359 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5363 (void)sv_2mortal(sv);
5370 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
5371 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
5372 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5375 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5378 ENTER_with_name("call_UNSHIFT");
5379 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
5380 LEAVE_with_name("call_UNSHIFT");
5385 av_unshift(ary, SP - MARK);
5387 SV * const sv = newSVsv(*++MARK);
5388 (void)av_store(ary, i++, sv);
5392 if (OP_GIMME(PL_op, 0) != G_VOID) {
5393 PUSHi( AvFILL(ary) + 1 );
5402 if (GIMME == G_ARRAY) {
5403 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5407 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5408 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5409 av = MUTABLE_AV((*SP));
5410 /* In-place reversing only happens in void context for the array
5411 * assignment. We don't need to push anything on the stack. */
5414 if (SvMAGICAL(av)) {
5416 register SV *tmp = sv_newmortal();
5417 /* For SvCANEXISTDELETE */
5420 bool can_preserve = SvCANEXISTDELETE(av);
5422 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5423 register SV *begin, *end;
5426 if (!av_exists(av, i)) {
5427 if (av_exists(av, j)) {
5428 register SV *sv = av_delete(av, j, 0);
5429 begin = *av_fetch(av, i, TRUE);
5430 sv_setsv_mg(begin, sv);
5434 else if (!av_exists(av, j)) {
5435 register SV *sv = av_delete(av, i, 0);
5436 end = *av_fetch(av, j, TRUE);
5437 sv_setsv_mg(end, sv);
5442 begin = *av_fetch(av, i, TRUE);
5443 end = *av_fetch(av, j, TRUE);
5444 sv_setsv(tmp, begin);
5445 sv_setsv_mg(begin, end);
5446 sv_setsv_mg(end, tmp);
5450 SV **begin = AvARRAY(av);
5453 SV **end = begin + AvFILLp(av);
5455 while (begin < end) {
5456 register SV * const tmp = *begin;
5467 register SV * const tmp = *MARK;
5471 /* safe as long as stack cannot get extended in the above */
5477 register char *down;
5482 SvUTF8_off(TARG); /* decontaminate */
5484 do_join(TARG, &PL_sv_no, MARK, SP);
5486 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
5487 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5488 report_uninit(TARG);
5491 up = SvPV_force(TARG, len);
5493 if (DO_UTF8(TARG)) { /* first reverse each character */
5494 U8* s = (U8*)SvPVX(TARG);
5495 const U8* send = (U8*)(s + len);
5497 if (UTF8_IS_INVARIANT(*s)) {
5502 if (!utf8_to_uvchr(s, 0))
5506 down = (char*)(s - 1);
5507 /* reverse this character */
5511 *down-- = (char)tmp;
5517 down = SvPVX(TARG) + len - 1;
5521 *down-- = (char)tmp;
5523 (void)SvPOK_only_UTF8(TARG);
5535 register IV limit = POPi; /* note, negative is forever */
5536 SV * const sv = POPs;
5538 register const char *s = SvPV_const(sv, len);
5539 const bool do_utf8 = DO_UTF8(sv);
5540 const char *strend = s + len;
5542 register REGEXP *rx;
5544 register const char *m;
5546 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
5547 I32 maxiters = slen + 10;
5548 I32 trailing_empty = 0;
5550 const I32 origlimit = limit;
5553 const I32 gimme = GIMME_V;
5555 const I32 oldsave = PL_savestack_ix;
5556 U32 make_mortal = SVs_TEMP;
5561 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5566 DIE(aTHX_ "panic: pp_split");
5569 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
5570 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
5572 RX_MATCH_UTF8_set(rx, do_utf8);
5575 if (pm->op_pmreplrootu.op_pmtargetoff) {
5576 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
5579 if (pm->op_pmreplrootu.op_pmtargetgv) {
5580 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
5585 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5591 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
5593 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
5600 for (i = AvFILLp(ary); i >= 0; i--)
5601 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
5603 /* temporarily switch stacks */
5604 SAVESWITCHSTACK(PL_curstack, ary);
5608 base = SP - PL_stack_base;
5610 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
5612 while (*s == ' ' || is_utf8_space((U8*)s))
5615 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5616 while (isSPACE_LC(*s))
5624 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
5628 gimme_scalar = gimme == G_SCALAR && !ary;
5631 limit = maxiters + 2;
5632 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
5635 /* this one uses 'm' and is a negative test */
5637 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5638 const int t = UTF8SKIP(m);
5639 /* is_utf8_space returns FALSE for malform utf8 */
5646 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5647 while (m < strend && !isSPACE_LC(*m))
5650 while (m < strend && !isSPACE(*m))
5663 dstr = newSVpvn_flags(s, m-s,
5664 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5668 /* skip the whitespace found last */
5670 s = m + UTF8SKIP(m);
5674 /* this one uses 's' and is a positive test */
5676 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
5679 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
5680 while (s < strend && isSPACE_LC(*s))
5683 while (s < strend && isSPACE(*s))
5688 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
5690 for (m = s; m < strend && *m != '\n'; m++)
5703 dstr = newSVpvn_flags(s, m-s,
5704 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5710 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
5712 Pre-extend the stack, either the number of bytes or
5713 characters in the string or a limited amount, triggered by:
5715 my ($x, $y) = split //, $str;
5719 if (!gimme_scalar) {
5720 const U32 items = limit - 1;
5729 /* keep track of how many bytes we skip over */
5739 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
5752 dstr = newSVpvn(s, 1);
5768 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
5769 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5770 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5771 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5772 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
5773 SV * const csv = CALLREG_INTUIT_STRING(rx);
5775 len = RX_MINLENRET(rx);
5776 if (len == 1 && !RX_UTF8(rx) && !tail) {
5777 const char c = *SvPV_nolen_const(csv);
5779 for (m = s; m < strend && *m != c; m++)
5790 dstr = newSVpvn_flags(s, m-s,
5791 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5794 /* The rx->minlen is in characters but we want to step
5795 * s ahead by bytes. */
5797 s = (char*)utf8_hop((U8*)m, len);
5799 s = m + len; /* Fake \n at the end */
5803 while (s < strend && --limit &&
5804 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
5805 csv, multiline ? FBMrf_MULTILINE : 0)) )
5814 dstr = newSVpvn_flags(s, m-s,
5815 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5818 /* The rx->minlen is in characters but we want to step
5819 * s ahead by bytes. */
5821 s = (char*)utf8_hop((U8*)m, len);
5823 s = m + len; /* Fake \n at the end */
5828 maxiters += slen * RX_NPARENS(rx);
5829 while (s < strend && --limit)
5833 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
5834 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
5836 if (rex_return == 0)
5838 TAINT_IF(RX_MATCH_TAINTED(rx));
5839 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
5842 orig = RX_SUBBEG(rx);
5844 strend = s + (strend - m);
5846 m = RX_OFFS(rx)[0].start + orig;
5855 dstr = newSVpvn_flags(s, m-s,
5856 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5859 if (RX_NPARENS(rx)) {
5861 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5862 s = RX_OFFS(rx)[i].start + orig;
5863 m = RX_OFFS(rx)[i].end + orig;
5865 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5866 parens that didn't match -- they should be set to
5867 undef, not the empty string */
5875 if (m >= orig && s >= orig) {
5876 dstr = newSVpvn_flags(s, m-s,
5877 (do_utf8 ? SVf_UTF8 : 0)
5881 dstr = &PL_sv_undef; /* undef, not "" */
5887 s = RX_OFFS(rx)[0].end + orig;
5891 if (!gimme_scalar) {
5892 iters = (SP - PL_stack_base) - base;
5894 if (iters > maxiters)
5895 DIE(aTHX_ "Split loop");
5897 /* keep field after final delim? */
5898 if (s < strend || (iters && origlimit)) {
5899 if (!gimme_scalar) {
5900 const STRLEN l = strend - s;
5901 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5906 else if (!origlimit) {
5908 iters -= trailing_empty;
5910 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5911 if (TOPs && !make_mortal)
5913 *SP-- = &PL_sv_undef;
5920 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5924 if (SvSMAGICAL(ary)) {
5926 mg_set(MUTABLE_SV(ary));
5929 if (gimme == G_ARRAY) {
5931 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5938 ENTER_with_name("call_PUSH");
5939 call_method("PUSH",G_SCALAR|G_DISCARD);
5940 LEAVE_with_name("call_PUSH");
5942 if (gimme == G_ARRAY) {
5944 /* EXTEND should not be needed - we just popped them */
5946 for (i=0; i < iters; i++) {
5947 SV **svp = av_fetch(ary, i, FALSE);
5948 PUSHs((svp) ? *svp : &PL_sv_undef);
5955 if (gimme == G_ARRAY)
5967 SV *const sv = PAD_SVl(PL_op->op_targ);
5969 if (SvPADSTALE(sv)) {
5972 RETURNOP(cLOGOP->op_other);
5974 RETURNOP(cLOGOP->op_next);
5983 assert(SvTYPE(retsv) != SVt_PVCV);
5985 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
5986 retsv = refto(retsv);
5993 PP(unimplemented_op)
5996 const Optype op_type = PL_op->op_type;
5997 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5998 with out of range op numbers - it only "special" cases op_custom.
5999 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
6000 if we get here for a custom op then that means that the custom op didn't
6001 have an implementation. Given that OP_NAME() looks up the custom op
6002 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
6003 registers &PL_unimplemented_op as the address of their custom op.
6004 NULL doesn't generate a useful error message. "custom" does. */
6005 const char *const name = op_type >= OP_max
6006 ? "[out of range]" : PL_op_name[PL_op->op_type];
6007 if(OP_IS_SOCKET(op_type))
6008 DIE(aTHX_ PL_no_sock_func, name);
6009 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
6016 HV * const hv = (HV*)POPs;
6018 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
6020 if (SvRMAGICAL(hv)) {
6021 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
6023 XPUSHs(magic_scalarpack(hv, mg));
6028 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
6034 * c-indentation-style: bsd
6036 * indent-tabs-mode: t
6039 * ex: set ts=8 sts=4 sw=4 noet: