3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
75 if (GIMME == G_SCALAR)
76 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
81 if (gimme == G_ARRAY) {
82 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
84 if (SvMAGICAL(TARG)) {
86 for (i=0; i < (U32)maxarg; i++) {
87 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
88 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
92 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
96 else if (gimme == G_SCALAR) {
97 SV* const sv = sv_newmortal();
98 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
110 assert(SvTYPE(TARG) == SVt_PVHV);
112 if (PL_op->op_private & OPpLVAL_INTRO)
113 if (!(PL_op->op_private & OPpPAD_STATE))
114 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
115 if (PL_op->op_flags & OPf_REF)
118 if (GIMME == G_SCALAR)
119 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
123 if (gimme == G_ARRAY) {
126 else if (gimme == G_SCALAR) {
127 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
135 static const char S_no_symref_sv[] =
136 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
142 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
145 tryAMAGICunDEREF(to_gv);
148 if (SvTYPE(sv) == SVt_PVIO) {
149 GV * const gv = MUTABLE_GV(sv_newmortal());
150 gv_init(gv, 0, "", 0, 0);
151 GvIOp(gv) = MUTABLE_IO(sv);
152 SvREFCNT_inc_void_NN(sv);
155 else if (!isGV_with_GP(sv))
156 DIE(aTHX_ "Not a GLOB reference");
159 if (!isGV_with_GP(sv)) {
160 if (!SvOK(sv) && sv != &PL_sv_undef) {
161 /* If this is a 'my' scalar and flag is set then vivify
165 Perl_croak_no_modify(aTHX);
166 if (PL_op->op_private & OPpDEREF) {
168 if (cUNOP->op_targ) {
170 SV * const namesv = PAD_SV(cUNOP->op_targ);
171 const char * const name = SvPV(namesv, len);
172 gv = MUTABLE_GV(newSV(0));
173 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
176 const char * const name = CopSTASHPV(PL_curcop);
179 prepare_SV_for_RV(sv);
180 SvRV_set(sv, MUTABLE_SV(gv));
185 if (PL_op->op_flags & OPf_REF ||
186 PL_op->op_private & HINT_STRICT_REFS)
187 DIE(aTHX_ PL_no_usym, "a symbol");
188 if (ckWARN(WARN_UNINITIALIZED))
192 if ((PL_op->op_flags & OPf_SPECIAL) &&
193 !(PL_op->op_flags & OPf_MOD))
195 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
197 && (!is_gv_magical_sv(sv,0)
198 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
205 if (PL_op->op_private & HINT_STRICT_REFS)
206 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
207 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
208 == OPpDONT_INIT_GV) {
209 /* We are the target of a coderef assignment. Return
210 the scalar unchanged, and let pp_sasssign deal with
214 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
216 /* FAKE globs in the symbol table cause weird bugs (#77810) */
217 if (sv) SvFAKE_off(sv);
220 if (PL_op->op_private & OPpLVAL_INTRO)
221 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
222 if (sv && SvFAKE(sv)) {
223 SV *newsv = sv_newmortal();
224 sv_setsv_flags(newsv, sv, 0);
232 /* Helper function for pp_rv2sv and pp_rv2av */
234 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
235 const svtype type, SV ***spp)
240 PERL_ARGS_ASSERT_SOFTREF2XV;
242 if (PL_op->op_private & HINT_STRICT_REFS) {
244 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
246 Perl_die(aTHX_ PL_no_usym, what);
249 if (PL_op->op_flags & OPf_REF)
250 Perl_die(aTHX_ PL_no_usym, what);
251 if (ckWARN(WARN_UNINITIALIZED))
253 if (type != SVt_PV && GIMME_V == G_ARRAY) {
257 **spp = &PL_sv_undef;
260 if ((PL_op->op_flags & OPf_SPECIAL) &&
261 !(PL_op->op_flags & OPf_MOD))
263 gv = gv_fetchsv(sv, 0, type);
265 && (!is_gv_magical_sv(sv,0)
266 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
268 **spp = &PL_sv_undef;
273 gv = gv_fetchsv(sv, GV_ADD, type);
283 if (!(PL_op->op_private & OPpDEREFed))
286 tryAMAGICunDEREF(to_sv);
289 switch (SvTYPE(sv)) {
295 DIE(aTHX_ "Not a SCALAR reference");
302 if (!isGV_with_GP(gv)) {
303 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
309 if (PL_op->op_flags & OPf_MOD) {
310 if (PL_op->op_private & OPpLVAL_INTRO) {
311 if (cUNOP->op_first->op_type == OP_NULL)
312 sv = save_scalar(MUTABLE_GV(TOPs));
314 sv = save_scalar(gv);
316 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
318 else if (PL_op->op_private & OPpDEREF)
319 vivify_ref(sv, PL_op->op_private & OPpDEREF);
328 AV * const av = MUTABLE_AV(TOPs);
329 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
331 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
333 *sv = newSV_type(SVt_PVMG);
334 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
338 SETs(sv_2mortal(newSViv(
339 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
349 if (PL_op->op_flags & OPf_MOD || LVRET) {
350 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
351 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
353 LvTARG(ret) = SvREFCNT_inc_simple(sv);
354 PUSHs(ret); /* no SvSETMAGIC */
358 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
359 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
360 if (mg && mg->mg_len >= 0) {
365 PUSHi(i + CopARYBASE_get(PL_curcop));
378 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
380 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
383 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
384 /* (But not in defined().) */
386 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
389 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
390 if ((PL_op->op_private & OPpLVAL_INTRO)) {
391 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
394 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
397 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
401 cv = MUTABLE_CV(&PL_sv_undef);
402 SETs(MUTABLE_SV(cv));
412 SV *ret = &PL_sv_undef;
414 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
415 const char * s = SvPVX_const(TOPs);
416 if (strnEQ(s, "CORE::", 6)) {
417 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
418 if (code < 0) { /* Overridable. */
419 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
420 int i = 0, n = 0, seen_question = 0, defgv = 0;
422 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
424 if (code == -KEY_chop || code == -KEY_chomp
425 || code == -KEY_exec || code == -KEY_system)
427 if (code == -KEY_mkdir) {
428 ret = newSVpvs_flags("_;$", SVs_TEMP);
431 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
432 ret = newSVpvs_flags("+", SVs_TEMP);
435 if (code == -KEY_push || code == -KEY_unshift) {
436 ret = newSVpvs_flags("+@", SVs_TEMP);
439 if (code == -KEY_pop || code == -KEY_shift) {
440 ret = newSVpvs_flags(";+", SVs_TEMP);
443 if (code == -KEY_splice) {
444 ret = newSVpvs_flags("+;$$@", SVs_TEMP);
447 if (code == -KEY_tied || code == -KEY_untie) {
448 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
451 if (code == -KEY_tie) {
452 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
455 if (code == -KEY_readpipe) {
456 s = "CORE::backtick";
458 while (i < MAXO) { /* The slow way. */
459 if (strEQ(s + 6, PL_op_name[i])
460 || strEQ(s + 6, PL_op_desc[i]))
466 goto nonesuch; /* Should not happen... */
468 defgv = PL_opargs[i] & OA_DEFGV;
469 oa = PL_opargs[i] >> OASHIFT;
471 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
475 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
476 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
477 /* But globs are already references (kinda) */
478 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
482 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
485 if (defgv && str[n - 1] == '$')
488 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
490 else if (code) /* Non-Overridable */
492 else { /* None such */
494 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
498 cv = sv_2cv(TOPs, &stash, &gv, 0);
500 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
509 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
511 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
513 PUSHs(MUTABLE_SV(cv));
527 if (GIMME != G_ARRAY) {
531 *MARK = &PL_sv_undef;
532 *MARK = refto(*MARK);
536 EXTEND_MORTAL(SP - MARK);
538 *MARK = refto(*MARK);
543 S_refto(pTHX_ SV *sv)
548 PERL_ARGS_ASSERT_REFTO;
550 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
553 if (!(sv = LvTARG(sv)))
556 SvREFCNT_inc_void_NN(sv);
558 else if (SvTYPE(sv) == SVt_PVAV) {
559 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
560 av_reify(MUTABLE_AV(sv));
562 SvREFCNT_inc_void_NN(sv);
564 else if (SvPADTMP(sv) && !IS_PADGV(sv))
568 SvREFCNT_inc_void_NN(sv);
571 sv_upgrade(rv, SVt_IV);
581 SV * const sv = POPs;
586 if (!sv || !SvROK(sv))
589 pv = sv_reftype(SvRV(sv),TRUE);
590 PUSHp(pv, strlen(pv));
600 stash = CopSTASH(PL_curcop);
602 SV * const ssv = POPs;
606 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
607 Perl_croak(aTHX_ "Attempt to bless into a reference");
608 ptr = SvPV_const(ssv,len);
610 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
611 "Explicit blessing to '' (assuming package main)");
612 stash = gv_stashpvn(ptr, len, GV_ADD);
615 (void)sv_bless(TOPs, stash);
624 const char * const elem = SvPV_nolen_const(sv);
625 GV * const gv = MUTABLE_GV(POPs);
630 /* elem will always be NUL terminated. */
631 const char * const second_letter = elem + 1;
634 if (strEQ(second_letter, "RRAY"))
635 tmpRef = MUTABLE_SV(GvAV(gv));
638 if (strEQ(second_letter, "ODE"))
639 tmpRef = MUTABLE_SV(GvCVu(gv));
642 if (strEQ(second_letter, "ILEHANDLE")) {
643 /* finally deprecated in 5.8.0 */
644 deprecate("*glob{FILEHANDLE}");
645 tmpRef = MUTABLE_SV(GvIOp(gv));
648 if (strEQ(second_letter, "ORMAT"))
649 tmpRef = MUTABLE_SV(GvFORM(gv));
652 if (strEQ(second_letter, "LOB"))
653 tmpRef = MUTABLE_SV(gv);
656 if (strEQ(second_letter, "ASH"))
657 tmpRef = MUTABLE_SV(GvHV(gv));
660 if (*second_letter == 'O' && !elem[2])
661 tmpRef = MUTABLE_SV(GvIOp(gv));
664 if (strEQ(second_letter, "AME"))
665 sv = newSVhek(GvNAME_HEK(gv));
668 if (strEQ(second_letter, "ACKAGE")) {
669 const HV * const stash = GvSTASH(gv);
670 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
671 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
675 if (strEQ(second_letter, "CALAR"))
690 /* Pattern matching */
695 register unsigned char *s;
698 register I32 *sfirst;
702 if (sv == PL_lastscream) {
706 s = (unsigned char*)(SvPV(sv, len));
708 if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
709 /* No point in studying a zero length string, and not safe to study
710 anything that doesn't appear to be a simple scalar (and hence might
711 change between now and when the regexp engine runs without our set
712 magic ever running) such as a reference to an object with overloaded
718 SvSCREAM_off(PL_lastscream);
719 SvREFCNT_dec(PL_lastscream);
721 PL_lastscream = SvREFCNT_inc_simple(sv);
723 s = (unsigned char*)(SvPV(sv, len));
727 if (pos > PL_maxscream) {
728 if (PL_maxscream < 0) {
729 PL_maxscream = pos + 80;
730 Newx(PL_screamfirst, 256, I32);
731 Newx(PL_screamnext, PL_maxscream, I32);
734 PL_maxscream = pos + pos / 4;
735 Renew(PL_screamnext, PL_maxscream, I32);
739 sfirst = PL_screamfirst;
740 snext = PL_screamnext;
742 if (!sfirst || !snext)
743 DIE(aTHX_ "do_study: out of memory");
745 for (ch = 256; ch; --ch)
750 register const I32 ch = s[pos];
752 snext[pos] = sfirst[ch] - pos;
759 /* piggyback on m//g magic */
760 sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
769 if (PL_op->op_flags & OPf_STACKED)
771 else if (PL_op->op_private & OPpTARGET_MY)
777 TARG = sv_newmortal();
782 /* Lvalue operators. */
794 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
796 do_chop(TARG, *++MARK);
805 SETi(do_chomp(TOPs));
811 dVAR; dSP; dMARK; dTARGET;
812 register I32 count = 0;
815 count += do_chomp(POPs);
825 if (!PL_op->op_private) {
834 SV_CHECK_THINKFIRST_COW_DROP(sv);
836 switch (SvTYPE(sv)) {
840 av_undef(MUTABLE_AV(sv));
843 hv_undef(MUTABLE_HV(sv));
846 if (cv_const_sv((const CV *)sv))
847 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
848 CvANON((const CV *)sv) ? "(anonymous)"
849 : GvENAME(CvGV((const CV *)sv)));
853 /* let user-undef'd sub keep its identity */
854 GV* const gv = CvGV((const CV *)sv);
855 cv_undef(MUTABLE_CV(sv));
856 CvGV_set(MUTABLE_CV(sv), gv);
861 SvSetMagicSV(sv, &PL_sv_undef);
864 else if (isGV_with_GP(sv)) {
869 if((stash = GvHV((const GV *)sv)) && HvENAME_get(stash))
870 mro_isa_changed_in(stash);
871 /* undef *Pkg::meth_name ... */
872 else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
873 && HvENAME_get(stash))
874 mro_method_changed_in(stash);
876 gp_free(MUTABLE_GV(sv));
878 GvGP(sv) = gp_ref(gp);
880 GvLINE(sv) = CopLINE(PL_curcop);
881 GvEGV(sv) = MUTABLE_GV(sv);
887 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
902 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
903 Perl_croak_no_modify(aTHX);
904 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
905 && SvIVX(TOPs) != IV_MIN)
907 SvIV_set(TOPs, SvIVX(TOPs) - 1);
908 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
919 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
920 Perl_croak_no_modify(aTHX);
922 TARG = sv_newmortal();
923 sv_setsv(TARG, TOPs);
924 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
925 && SvIVX(TOPs) != IV_MAX)
927 SvIV_set(TOPs, SvIVX(TOPs) + 1);
928 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
933 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
943 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
944 Perl_croak_no_modify(aTHX);
946 TARG = sv_newmortal();
947 sv_setsv(TARG, TOPs);
948 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
949 && SvIVX(TOPs) != IV_MIN)
951 SvIV_set(TOPs, SvIVX(TOPs) - 1);
952 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
961 /* Ordinary operators. */
965 dVAR; dSP; dATARGET; SV *svl, *svr;
966 #ifdef PERL_PRESERVE_IVUV
969 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
972 #ifdef PERL_PRESERVE_IVUV
973 /* For integer to integer power, we do the calculation by hand wherever
974 we're sure it is safe; otherwise we call pow() and try to convert to
975 integer afterwards. */
977 SvIV_please_nomg(svr);
979 SvIV_please_nomg(svl);
988 const IV iv = SvIVX(svr);
992 goto float_it; /* Can't do negative powers this way. */
996 baseuok = SvUOK(svl);
1000 const IV iv = SvIVX(svl);
1003 baseuok = TRUE; /* effectively it's a UV now */
1005 baseuv = -iv; /* abs, baseuok == false records sign */
1008 /* now we have integer ** positive integer. */
1011 /* foo & (foo - 1) is zero only for a power of 2. */
1012 if (!(baseuv & (baseuv - 1))) {
1013 /* We are raising power-of-2 to a positive integer.
1014 The logic here will work for any base (even non-integer
1015 bases) but it can be less accurate than
1016 pow (base,power) or exp (power * log (base)) when the
1017 intermediate values start to spill out of the mantissa.
1018 With powers of 2 we know this can't happen.
1019 And powers of 2 are the favourite thing for perl
1020 programmers to notice ** not doing what they mean. */
1022 NV base = baseuok ? baseuv : -(NV)baseuv;
1027 while (power >>= 1) {
1035 SvIV_please_nomg(svr);
1038 register unsigned int highbit = 8 * sizeof(UV);
1039 register unsigned int diff = 8 * sizeof(UV);
1040 while (diff >>= 1) {
1042 if (baseuv >> highbit) {
1046 /* we now have baseuv < 2 ** highbit */
1047 if (power * highbit <= 8 * sizeof(UV)) {
1048 /* result will definitely fit in UV, so use UV math
1049 on same algorithm as above */
1050 register UV result = 1;
1051 register UV base = baseuv;
1052 const bool odd_power = cBOOL(power & 1);
1056 while (power >>= 1) {
1063 if (baseuok || !odd_power)
1064 /* answer is positive */
1066 else if (result <= (UV)IV_MAX)
1067 /* answer negative, fits in IV */
1068 SETi( -(IV)result );
1069 else if (result == (UV)IV_MIN)
1070 /* 2's complement assumption: special case IV_MIN */
1073 /* answer negative, doesn't fit */
1074 SETn( -(NV)result );
1084 NV right = SvNV_nomg(svr);
1085 NV left = SvNV_nomg(svl);
1088 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1090 We are building perl with long double support and are on an AIX OS
1091 afflicted with a powl() function that wrongly returns NaNQ for any
1092 negative base. This was reported to IBM as PMR #23047-379 on
1093 03/06/2006. The problem exists in at least the following versions
1094 of AIX and the libm fileset, and no doubt others as well:
1096 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1097 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1098 AIX 5.2.0 bos.adt.libm 5.2.0.85
1100 So, until IBM fixes powl(), we provide the following workaround to
1101 handle the problem ourselves. Our logic is as follows: for
1102 negative bases (left), we use fmod(right, 2) to check if the
1103 exponent is an odd or even integer:
1105 - if odd, powl(left, right) == -powl(-left, right)
1106 - if even, powl(left, right) == powl(-left, right)
1108 If the exponent is not an integer, the result is rightly NaNQ, so
1109 we just return that (as NV_NAN).
1113 NV mod2 = Perl_fmod( right, 2.0 );
1114 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1115 SETn( -Perl_pow( -left, right) );
1116 } else if (mod2 == 0.0) { /* even integer */
1117 SETn( Perl_pow( -left, right) );
1118 } else { /* fractional power */
1122 SETn( Perl_pow( left, right) );
1125 SETn( Perl_pow( left, right) );
1126 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1128 #ifdef PERL_PRESERVE_IVUV
1130 SvIV_please_nomg(svr);
1138 dVAR; dSP; dATARGET; SV *svl, *svr;
1139 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1142 #ifdef PERL_PRESERVE_IVUV
1143 SvIV_please_nomg(svr);
1145 /* Unless the left argument is integer in range we are going to have to
1146 use NV maths. Hence only attempt to coerce the right argument if
1147 we know the left is integer. */
1148 /* Left operand is defined, so is it IV? */
1149 SvIV_please_nomg(svl);
1151 bool auvok = SvUOK(svl);
1152 bool buvok = SvUOK(svr);
1153 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1154 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1163 const IV aiv = SvIVX(svl);
1166 auvok = TRUE; /* effectively it's a UV now */
1168 alow = -aiv; /* abs, auvok == false records sign */
1174 const IV biv = SvIVX(svr);
1177 buvok = TRUE; /* effectively it's a UV now */
1179 blow = -biv; /* abs, buvok == false records sign */
1183 /* If this does sign extension on unsigned it's time for plan B */
1184 ahigh = alow >> (4 * sizeof (UV));
1186 bhigh = blow >> (4 * sizeof (UV));
1188 if (ahigh && bhigh) {
1190 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1191 which is overflow. Drop to NVs below. */
1192 } else if (!ahigh && !bhigh) {
1193 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1194 so the unsigned multiply cannot overflow. */
1195 const UV product = alow * blow;
1196 if (auvok == buvok) {
1197 /* -ve * -ve or +ve * +ve gives a +ve result. */
1201 } else if (product <= (UV)IV_MIN) {
1202 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1203 /* -ve result, which could overflow an IV */
1205 SETi( -(IV)product );
1207 } /* else drop to NVs below. */
1209 /* One operand is large, 1 small */
1212 /* swap the operands */
1214 bhigh = blow; /* bhigh now the temp var for the swap */
1218 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1219 multiplies can't overflow. shift can, add can, -ve can. */
1220 product_middle = ahigh * blow;
1221 if (!(product_middle & topmask)) {
1222 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1224 product_middle <<= (4 * sizeof (UV));
1225 product_low = alow * blow;
1227 /* as for pp_add, UV + something mustn't get smaller.
1228 IIRC ANSI mandates this wrapping *behaviour* for
1229 unsigned whatever the actual representation*/
1230 product_low += product_middle;
1231 if (product_low >= product_middle) {
1232 /* didn't overflow */
1233 if (auvok == buvok) {
1234 /* -ve * -ve or +ve * +ve gives a +ve result. */
1236 SETu( product_low );
1238 } else if (product_low <= (UV)IV_MIN) {
1239 /* 2s complement assumption again */
1240 /* -ve result, which could overflow an IV */
1242 SETi( -(IV)product_low );
1244 } /* else drop to NVs below. */
1246 } /* product_middle too large */
1247 } /* ahigh && bhigh */
1252 NV right = SvNV_nomg(svr);
1253 NV left = SvNV_nomg(svl);
1255 SETn( left * right );
1262 dVAR; dSP; dATARGET; SV *svl, *svr;
1263 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1266 /* Only try to do UV divide first
1267 if ((SLOPPYDIVIDE is true) or
1268 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1270 The assumption is that it is better to use floating point divide
1271 whenever possible, only doing integer divide first if we can't be sure.
1272 If NV_PRESERVES_UV is true then we know at compile time that no UV
1273 can be too large to preserve, so don't need to compile the code to
1274 test the size of UVs. */
1277 # define PERL_TRY_UV_DIVIDE
1278 /* ensure that 20./5. == 4. */
1280 # ifdef PERL_PRESERVE_IVUV
1281 # ifndef NV_PRESERVES_UV
1282 # define PERL_TRY_UV_DIVIDE
1287 #ifdef PERL_TRY_UV_DIVIDE
1288 SvIV_please_nomg(svr);
1290 SvIV_please_nomg(svl);
1292 bool left_non_neg = SvUOK(svl);
1293 bool right_non_neg = SvUOK(svr);
1297 if (right_non_neg) {
1301 const IV biv = SvIVX(svr);
1304 right_non_neg = TRUE; /* effectively it's a UV now */
1310 /* historically undef()/0 gives a "Use of uninitialized value"
1311 warning before dieing, hence this test goes here.
1312 If it were immediately before the second SvIV_please, then
1313 DIE() would be invoked before left was even inspected, so
1314 no inpsection would give no warning. */
1316 DIE(aTHX_ "Illegal division by zero");
1322 const IV aiv = SvIVX(svl);
1325 left_non_neg = TRUE; /* effectively it's a UV now */
1334 /* For sloppy divide we always attempt integer division. */
1336 /* Otherwise we only attempt it if either or both operands
1337 would not be preserved by an NV. If both fit in NVs
1338 we fall through to the NV divide code below. However,
1339 as left >= right to ensure integer result here, we know that
1340 we can skip the test on the right operand - right big
1341 enough not to be preserved can't get here unless left is
1344 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1347 /* Integer division can't overflow, but it can be imprecise. */
1348 const UV result = left / right;
1349 if (result * right == left) {
1350 SP--; /* result is valid */
1351 if (left_non_neg == right_non_neg) {
1352 /* signs identical, result is positive. */
1356 /* 2s complement assumption */
1357 if (result <= (UV)IV_MIN)
1358 SETi( -(IV)result );
1360 /* It's exact but too negative for IV. */
1361 SETn( -(NV)result );
1364 } /* tried integer divide but it was not an integer result */
1365 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1366 } /* left wasn't SvIOK */
1367 } /* right wasn't SvIOK */
1368 #endif /* PERL_TRY_UV_DIVIDE */
1370 NV right = SvNV_nomg(svr);
1371 NV left = SvNV_nomg(svl);
1372 (void)POPs;(void)POPs;
1373 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1374 if (! Perl_isnan(right) && right == 0.0)
1378 DIE(aTHX_ "Illegal division by zero");
1379 PUSHn( left / right );
1386 dVAR; dSP; dATARGET;
1387 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1391 bool left_neg = FALSE;
1392 bool right_neg = FALSE;
1393 bool use_double = FALSE;
1394 bool dright_valid = FALSE;
1397 SV * const svr = TOPs;
1398 SV * const svl = TOPm1s;
1399 SvIV_please_nomg(svr);
1401 right_neg = !SvUOK(svr);
1405 const IV biv = SvIVX(svr);
1408 right_neg = FALSE; /* effectively it's a UV now */
1415 dright = SvNV_nomg(svr);
1416 right_neg = dright < 0;
1419 if (dright < UV_MAX_P1) {
1420 right = U_V(dright);
1421 dright_valid = TRUE; /* In case we need to use double below. */
1427 /* At this point use_double is only true if right is out of range for
1428 a UV. In range NV has been rounded down to nearest UV and
1429 use_double false. */
1430 SvIV_please_nomg(svl);
1431 if (!use_double && SvIOK(svl)) {
1433 left_neg = !SvUOK(svl);
1437 const IV aiv = SvIVX(svl);
1440 left_neg = FALSE; /* effectively it's a UV now */
1448 dleft = SvNV_nomg(svl);
1449 left_neg = dleft < 0;
1453 /* This should be exactly the 5.6 behaviour - if left and right are
1454 both in range for UV then use U_V() rather than floor. */
1456 if (dleft < UV_MAX_P1) {
1457 /* right was in range, so is dleft, so use UVs not double.
1461 /* left is out of range for UV, right was in range, so promote
1462 right (back) to double. */
1464 /* The +0.5 is used in 5.6 even though it is not strictly
1465 consistent with the implicit +0 floor in the U_V()
1466 inside the #if 1. */
1467 dleft = Perl_floor(dleft + 0.5);
1470 dright = Perl_floor(dright + 0.5);
1481 DIE(aTHX_ "Illegal modulus zero");
1483 dans = Perl_fmod(dleft, dright);
1484 if ((left_neg != right_neg) && dans)
1485 dans = dright - dans;
1488 sv_setnv(TARG, dans);
1494 DIE(aTHX_ "Illegal modulus zero");
1497 if ((left_neg != right_neg) && ans)
1500 /* XXX may warn: unary minus operator applied to unsigned type */
1501 /* could change -foo to be (~foo)+1 instead */
1502 if (ans <= ~((UV)IV_MAX)+1)
1503 sv_setiv(TARG, ~ans+1);
1505 sv_setnv(TARG, -(NV)ans);
1508 sv_setuv(TARG, ans);
1517 dVAR; dSP; dATARGET;
1521 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1522 /* TODO: think of some way of doing list-repeat overloading ??? */
1527 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1533 const UV uv = SvUV_nomg(sv);
1535 count = IV_MAX; /* The best we can do? */
1539 const IV iv = SvIV_nomg(sv);
1546 else if (SvNOKp(sv)) {
1547 const NV nv = SvNV_nomg(sv);
1554 count = SvIV_nomg(sv);
1556 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1558 static const char oom_list_extend[] = "Out of memory during list extend";
1559 const I32 items = SP - MARK;
1560 const I32 max = items * count;
1562 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1563 /* Did the max computation overflow? */
1564 if (items > 0 && max > 0 && (max < items || max < count))
1565 Perl_croak(aTHX_ oom_list_extend);
1570 /* This code was intended to fix 20010809.028:
1573 for (($x =~ /./g) x 2) {
1574 print chop; # "abcdabcd" expected as output.
1577 * but that change (#11635) broke this code:
1579 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1581 * I can't think of a better fix that doesn't introduce
1582 * an efficiency hit by copying the SVs. The stack isn't
1583 * refcounted, and mortalisation obviously doesn't
1584 * Do The Right Thing when the stack has more than
1585 * one pointer to the same mortal value.
1589 *SP = sv_2mortal(newSVsv(*SP));
1599 repeatcpy((char*)(MARK + items), (char*)MARK,
1600 items * sizeof(const SV *), count - 1);
1603 else if (count <= 0)
1606 else { /* Note: mark already snarfed by pp_list */
1607 SV * const tmpstr = POPs;
1610 static const char oom_string_extend[] =
1611 "Out of memory during string extend";
1614 sv_setsv_nomg(TARG, tmpstr);
1615 SvPV_force_nomg(TARG, len);
1616 isutf = DO_UTF8(TARG);
1621 const STRLEN max = (UV)count * len;
1622 if (len > MEM_SIZE_MAX / count)
1623 Perl_croak(aTHX_ oom_string_extend);
1624 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1625 SvGROW(TARG, max + 1);
1626 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1627 SvCUR_set(TARG, SvCUR(TARG) * count);
1629 *SvEND(TARG) = '\0';
1632 (void)SvPOK_only_UTF8(TARG);
1634 (void)SvPOK_only(TARG);
1636 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1637 /* The parser saw this as a list repeat, and there
1638 are probably several items on the stack. But we're
1639 in scalar context, and there's no pp_list to save us
1640 now. So drop the rest of the items -- robin@kitsite.com
1652 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1653 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1656 useleft = USE_LEFT(svl);
1657 #ifdef PERL_PRESERVE_IVUV
1658 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1659 "bad things" happen if you rely on signed integers wrapping. */
1660 SvIV_please_nomg(svr);
1662 /* Unless the left argument is integer in range we are going to have to
1663 use NV maths. Hence only attempt to coerce the right argument if
1664 we know the left is integer. */
1665 register UV auv = 0;
1671 a_valid = auvok = 1;
1672 /* left operand is undef, treat as zero. */
1674 /* Left operand is defined, so is it IV? */
1675 SvIV_please_nomg(svl);
1677 if ((auvok = SvUOK(svl)))
1680 register const IV aiv = SvIVX(svl);
1683 auvok = 1; /* Now acting as a sign flag. */
1684 } else { /* 2s complement assumption for IV_MIN */
1692 bool result_good = 0;
1695 bool buvok = SvUOK(svr);
1700 register const IV biv = SvIVX(svr);
1707 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1708 else "IV" now, independent of how it came in.
1709 if a, b represents positive, A, B negative, a maps to -A etc
1714 all UV maths. negate result if A negative.
1715 subtract if signs same, add if signs differ. */
1717 if (auvok ^ buvok) {
1726 /* Must get smaller */
1731 if (result <= buv) {
1732 /* result really should be -(auv-buv). as its negation
1733 of true value, need to swap our result flag */
1745 if (result <= (UV)IV_MIN)
1746 SETi( -(IV)result );
1748 /* result valid, but out of range for IV. */
1749 SETn( -(NV)result );
1753 } /* Overflow, drop through to NVs. */
1758 NV value = SvNV_nomg(svr);
1762 /* left operand is undef, treat as zero - value */
1766 SETn( SvNV_nomg(svl) - value );
1773 dVAR; dSP; dATARGET; SV *svl, *svr;
1774 tryAMAGICbin_MG(lshift_amg, AMGf_assign);
1778 const IV shift = SvIV_nomg(svr);
1779 if (PL_op->op_private & HINT_INTEGER) {
1780 const IV i = SvIV_nomg(svl);
1784 const UV u = SvUV_nomg(svl);
1793 dVAR; dSP; dATARGET; SV *svl, *svr;
1794 tryAMAGICbin_MG(rshift_amg, AMGf_assign);
1798 const IV shift = SvIV_nomg(svr);
1799 if (PL_op->op_private & HINT_INTEGER) {
1800 const IV i = SvIV_nomg(svl);
1804 const UV u = SvUV_nomg(svl);
1814 tryAMAGICbin_MG(lt_amg, AMGf_set);
1815 #ifdef PERL_PRESERVE_IVUV
1816 SvIV_please_nomg(TOPs);
1818 SvIV_please_nomg(TOPm1s);
1819 if (SvIOK(TOPm1s)) {
1820 bool auvok = SvUOK(TOPm1s);
1821 bool buvok = SvUOK(TOPs);
1823 if (!auvok && !buvok) { /* ## IV < IV ## */
1824 const IV aiv = SvIVX(TOPm1s);
1825 const IV biv = SvIVX(TOPs);
1828 SETs(boolSV(aiv < biv));
1831 if (auvok && buvok) { /* ## UV < UV ## */
1832 const UV auv = SvUVX(TOPm1s);
1833 const UV buv = SvUVX(TOPs);
1836 SETs(boolSV(auv < buv));
1839 if (auvok) { /* ## UV < IV ## */
1841 const IV biv = SvIVX(TOPs);
1844 /* As (a) is a UV, it's >=0, so it cannot be < */
1849 SETs(boolSV(auv < (UV)biv));
1852 { /* ## IV < UV ## */
1853 const IV aiv = SvIVX(TOPm1s);
1857 /* As (b) is a UV, it's >=0, so it must be < */
1864 SETs(boolSV((UV)aiv < buv));
1870 #ifndef NV_PRESERVES_UV
1871 #ifdef PERL_PRESERVE_IVUV
1874 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1876 SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s)));
1881 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1883 if (Perl_isnan(left) || Perl_isnan(right))
1885 SETs(boolSV(left < right));
1888 SETs(boolSV(SvNV_nomg(TOPs) < value));
1897 tryAMAGICbin_MG(gt_amg, AMGf_set);
1898 #ifdef PERL_PRESERVE_IVUV
1899 SvIV_please_nomg(TOPs);
1901 SvIV_please_nomg(TOPm1s);
1902 if (SvIOK(TOPm1s)) {
1903 bool auvok = SvUOK(TOPm1s);
1904 bool buvok = SvUOK(TOPs);
1906 if (!auvok && !buvok) { /* ## IV > IV ## */
1907 const IV aiv = SvIVX(TOPm1s);
1908 const IV biv = SvIVX(TOPs);
1911 SETs(boolSV(aiv > biv));
1914 if (auvok && buvok) { /* ## UV > UV ## */
1915 const UV auv = SvUVX(TOPm1s);
1916 const UV buv = SvUVX(TOPs);
1919 SETs(boolSV(auv > buv));
1922 if (auvok) { /* ## UV > IV ## */
1924 const IV biv = SvIVX(TOPs);
1928 /* As (a) is a UV, it's >=0, so it must be > */
1933 SETs(boolSV(auv > (UV)biv));
1936 { /* ## IV > UV ## */
1937 const IV aiv = SvIVX(TOPm1s);
1941 /* As (b) is a UV, it's >=0, so it cannot be > */
1948 SETs(boolSV((UV)aiv > buv));
1954 #ifndef NV_PRESERVES_UV
1955 #ifdef PERL_PRESERVE_IVUV
1958 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1960 SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s)));
1965 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1967 if (Perl_isnan(left) || Perl_isnan(right))
1969 SETs(boolSV(left > right));
1972 SETs(boolSV(SvNV_nomg(TOPs) > value));
1981 tryAMAGICbin_MG(le_amg, AMGf_set);
1982 #ifdef PERL_PRESERVE_IVUV
1983 SvIV_please_nomg(TOPs);
1985 SvIV_please_nomg(TOPm1s);
1986 if (SvIOK(TOPm1s)) {
1987 bool auvok = SvUOK(TOPm1s);
1988 bool buvok = SvUOK(TOPs);
1990 if (!auvok && !buvok) { /* ## IV <= IV ## */
1991 const IV aiv = SvIVX(TOPm1s);
1992 const IV biv = SvIVX(TOPs);
1995 SETs(boolSV(aiv <= biv));
1998 if (auvok && buvok) { /* ## UV <= UV ## */
1999 UV auv = SvUVX(TOPm1s);
2000 UV buv = SvUVX(TOPs);
2003 SETs(boolSV(auv <= buv));
2006 if (auvok) { /* ## UV <= IV ## */
2008 const IV biv = SvIVX(TOPs);
2012 /* As (a) is a UV, it's >=0, so a cannot be <= */
2017 SETs(boolSV(auv <= (UV)biv));
2020 { /* ## IV <= UV ## */
2021 const IV aiv = SvIVX(TOPm1s);
2025 /* As (b) is a UV, it's >=0, so a must be <= */
2032 SETs(boolSV((UV)aiv <= buv));
2038 #ifndef NV_PRESERVES_UV
2039 #ifdef PERL_PRESERVE_IVUV
2042 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2044 SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s)));
2049 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2051 if (Perl_isnan(left) || Perl_isnan(right))
2053 SETs(boolSV(left <= right));
2056 SETs(boolSV(SvNV_nomg(TOPs) <= value));
2065 tryAMAGICbin_MG(ge_amg,AMGf_set);
2066 #ifdef PERL_PRESERVE_IVUV
2067 SvIV_please_nomg(TOPs);
2069 SvIV_please_nomg(TOPm1s);
2070 if (SvIOK(TOPm1s)) {
2071 bool auvok = SvUOK(TOPm1s);
2072 bool buvok = SvUOK(TOPs);
2074 if (!auvok && !buvok) { /* ## IV >= IV ## */
2075 const IV aiv = SvIVX(TOPm1s);
2076 const IV biv = SvIVX(TOPs);
2079 SETs(boolSV(aiv >= biv));
2082 if (auvok && buvok) { /* ## UV >= UV ## */
2083 const UV auv = SvUVX(TOPm1s);
2084 const UV buv = SvUVX(TOPs);
2087 SETs(boolSV(auv >= buv));
2090 if (auvok) { /* ## UV >= IV ## */
2092 const IV biv = SvIVX(TOPs);
2096 /* As (a) is a UV, it's >=0, so it must be >= */
2101 SETs(boolSV(auv >= (UV)biv));
2104 { /* ## IV >= UV ## */
2105 const IV aiv = SvIVX(TOPm1s);
2109 /* As (b) is a UV, it's >=0, so a cannot be >= */
2116 SETs(boolSV((UV)aiv >= buv));
2122 #ifndef NV_PRESERVES_UV
2123 #ifdef PERL_PRESERVE_IVUV
2126 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2128 SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s)));
2133 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2135 if (Perl_isnan(left) || Perl_isnan(right))
2137 SETs(boolSV(left >= right));
2140 SETs(boolSV(SvNV_nomg(TOPs) >= value));
2149 tryAMAGICbin_MG(ne_amg,AMGf_set);
2150 #ifndef NV_PRESERVES_UV
2151 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2153 SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s)));
2157 #ifdef PERL_PRESERVE_IVUV
2158 SvIV_please_nomg(TOPs);
2160 SvIV_please_nomg(TOPm1s);
2161 if (SvIOK(TOPm1s)) {
2162 const bool auvok = SvUOK(TOPm1s);
2163 const bool buvok = SvUOK(TOPs);
2165 if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
2166 /* Casting IV to UV before comparison isn't going to matter
2167 on 2s complement. On 1s complement or sign&magnitude
2168 (if we have any of them) it could make negative zero
2169 differ from normal zero. As I understand it. (Need to
2170 check - is negative zero implementation defined behaviour
2172 const UV buv = SvUVX(POPs);
2173 const UV auv = SvUVX(TOPs);
2175 SETs(boolSV(auv != buv));
2178 { /* ## Mixed IV,UV ## */
2182 /* != is commutative so swap if needed (save code) */
2184 /* swap. top of stack (b) is the iv */
2188 /* As (a) is a UV, it's >0, so it cannot be == */
2197 /* As (b) is a UV, it's >0, so it cannot be == */
2201 uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
2203 SETs(boolSV((UV)iv != uv));
2210 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2212 if (Perl_isnan(left) || Perl_isnan(right))
2214 SETs(boolSV(left != right));
2217 SETs(boolSV(SvNV_nomg(TOPs) != value));
2226 tryAMAGICbin_MG(ncmp_amg, 0);
2227 #ifndef NV_PRESERVES_UV
2228 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2229 const UV right = PTR2UV(SvRV(POPs));
2230 const UV left = PTR2UV(SvRV(TOPs));
2231 SETi((left > right) - (left < right));
2235 #ifdef PERL_PRESERVE_IVUV
2236 /* Fortunately it seems NaN isn't IOK */
2237 SvIV_please_nomg(TOPs);
2239 SvIV_please_nomg(TOPm1s);
2240 if (SvIOK(TOPm1s)) {
2241 const bool leftuvok = SvUOK(TOPm1s);
2242 const bool rightuvok = SvUOK(TOPs);
2244 if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
2245 const IV leftiv = SvIVX(TOPm1s);
2246 const IV rightiv = SvIVX(TOPs);
2248 if (leftiv > rightiv)
2250 else if (leftiv < rightiv)
2254 } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
2255 const UV leftuv = SvUVX(TOPm1s);
2256 const UV rightuv = SvUVX(TOPs);
2258 if (leftuv > rightuv)
2260 else if (leftuv < rightuv)
2264 } else if (leftuvok) { /* ## UV <=> IV ## */
2265 const IV rightiv = SvIVX(TOPs);
2267 /* As (a) is a UV, it's >=0, so it cannot be < */
2270 const UV leftuv = SvUVX(TOPm1s);
2271 if (leftuv > (UV)rightiv) {
2273 } else if (leftuv < (UV)rightiv) {
2279 } else { /* ## IV <=> UV ## */
2280 const IV leftiv = SvIVX(TOPm1s);
2282 /* As (b) is a UV, it's >=0, so it must be < */
2285 const UV rightuv = SvUVX(TOPs);
2286 if ((UV)leftiv > rightuv) {
2288 } else if ((UV)leftiv < rightuv) {
2306 if (Perl_isnan(left) || Perl_isnan(right)) {
2310 value = (left > right) - (left < right);
2314 else if (left < right)
2316 else if (left > right)
2332 int amg_type = sle_amg;
2336 switch (PL_op->op_type) {
2355 tryAMAGICbin_MG(amg_type, AMGf_set);
2358 const int cmp = (IN_LOCALE_RUNTIME
2359 ? sv_cmp_locale_flags(left, right, 0)
2360 : sv_cmp_flags(left, right, 0));
2361 SETs(boolSV(cmp * multiplier < rhs));
2369 tryAMAGICbin_MG(seq_amg, AMGf_set);
2372 SETs(boolSV(sv_eq_flags(left, right, 0)));
2380 tryAMAGICbin_MG(sne_amg, AMGf_set);
2383 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2391 tryAMAGICbin_MG(scmp_amg, 0);
2394 const int cmp = (IN_LOCALE_RUNTIME
2395 ? sv_cmp_locale_flags(left, right, 0)
2396 : sv_cmp_flags(left, right, 0));
2404 dVAR; dSP; dATARGET;
2405 tryAMAGICbin_MG(band_amg, AMGf_assign);
2408 if (SvNIOKp(left) || SvNIOKp(right)) {
2409 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2410 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2411 if (PL_op->op_private & HINT_INTEGER) {
2412 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2416 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2419 if (left_ro_nonnum) SvNIOK_off(left);
2420 if (right_ro_nonnum) SvNIOK_off(right);
2423 do_vop(PL_op->op_type, TARG, left, right);
2432 dVAR; dSP; dATARGET;
2433 const int op_type = PL_op->op_type;
2435 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2438 if (SvNIOKp(left) || SvNIOKp(right)) {
2439 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2440 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2441 if (PL_op->op_private & HINT_INTEGER) {
2442 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2443 const IV r = SvIV_nomg(right);
2444 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2448 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2449 const UV r = SvUV_nomg(right);
2450 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2453 if (left_ro_nonnum) SvNIOK_off(left);
2454 if (right_ro_nonnum) SvNIOK_off(right);
2457 do_vop(op_type, TARG, left, right);
2467 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2469 SV * const sv = TOPs;
2470 const int flags = SvFLAGS(sv);
2472 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2476 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2477 /* It's publicly an integer, or privately an integer-not-float */
2480 if (SvIVX(sv) == IV_MIN) {
2481 /* 2s complement assumption. */
2482 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2485 else if (SvUVX(sv) <= IV_MAX) {
2490 else if (SvIVX(sv) != IV_MIN) {
2494 #ifdef PERL_PRESERVE_IVUV
2502 SETn(-SvNV_nomg(sv));
2503 else if (SvPOKp(sv)) {
2505 const char * const s = SvPV_nomg_const(sv, len);
2506 if (isIDFIRST(*s)) {
2507 sv_setpvs(TARG, "-");
2510 else if (*s == '+' || *s == '-') {
2511 sv_setsv_nomg(TARG, sv);
2512 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2514 else if (DO_UTF8(sv)) {
2515 SvIV_please_nomg(sv);
2517 goto oops_its_an_int;
2519 sv_setnv(TARG, -SvNV_nomg(sv));
2521 sv_setpvs(TARG, "-");
2526 SvIV_please_nomg(sv);
2528 goto oops_its_an_int;
2529 sv_setnv(TARG, -SvNV_nomg(sv));
2534 SETn(-SvNV_nomg(sv));
2542 tryAMAGICun_MG(not_amg, AMGf_set);
2543 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2550 tryAMAGICun_MG(compl_amg, 0);
2554 if (PL_op->op_private & HINT_INTEGER) {
2555 const IV i = ~SvIV_nomg(sv);
2559 const UV u = ~SvUV_nomg(sv);
2568 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2569 sv_setsv_nomg(TARG, sv);
2570 tmps = (U8*)SvPV_force_nomg(TARG, len);
2573 /* Calculate exact length, let's not estimate. */
2578 U8 * const send = tmps + len;
2579 U8 * const origtmps = tmps;
2580 const UV utf8flags = UTF8_ALLOW_ANYUV;
2582 while (tmps < send) {
2583 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2585 targlen += UNISKIP(~c);
2591 /* Now rewind strings and write them. */
2598 Newx(result, targlen + 1, U8);
2600 while (tmps < send) {
2601 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2603 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2606 sv_usepvn_flags(TARG, (char*)result, targlen,
2607 SV_HAS_TRAILING_NUL);
2614 Newx(result, nchar + 1, U8);
2616 while (tmps < send) {
2617 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2622 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2630 register long *tmpl;
2631 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2634 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2639 for ( ; anum > 0; anum--, tmps++)
2647 /* integer versions of some of the above */
2651 dVAR; dSP; dATARGET;
2652 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2655 SETi( left * right );
2663 dVAR; dSP; dATARGET;
2664 tryAMAGICbin_MG(div_amg, AMGf_assign);
2667 IV value = SvIV_nomg(right);
2669 DIE(aTHX_ "Illegal division by zero");
2670 num = SvIV_nomg(left);
2672 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2676 value = num / value;
2682 #if defined(__GLIBC__) && IVSIZE == 8
2689 /* This is the vanilla old i_modulo. */
2690 dVAR; dSP; dATARGET;
2691 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2695 DIE(aTHX_ "Illegal modulus zero");
2696 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2700 SETi( left % right );
2705 #if defined(__GLIBC__) && IVSIZE == 8
2710 /* This is the i_modulo with the workaround for the _moddi3 bug
2711 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2712 * See below for pp_i_modulo. */
2713 dVAR; dSP; dATARGET;
2714 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2718 DIE(aTHX_ "Illegal modulus zero");
2719 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2723 SETi( left % PERL_ABS(right) );
2730 dVAR; dSP; dATARGET;
2731 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2735 DIE(aTHX_ "Illegal modulus zero");
2736 /* The assumption is to use hereafter the old vanilla version... */
2738 PL_ppaddr[OP_I_MODULO] =
2740 /* .. but if we have glibc, we might have a buggy _moddi3
2741 * (at least glicb 2.2.5 is known to have this bug), in other
2742 * words our integer modulus with negative quad as the second
2743 * argument might be broken. Test for this and re-patch the
2744 * opcode dispatch table if that is the case, remembering to
2745 * also apply the workaround so that this first round works
2746 * right, too. See [perl #9402] for more information. */
2750 /* Cannot do this check with inlined IV constants since
2751 * that seems to work correctly even with the buggy glibc. */
2753 /* Yikes, we have the bug.
2754 * Patch in the workaround version. */
2756 PL_ppaddr[OP_I_MODULO] =
2757 &Perl_pp_i_modulo_1;
2758 /* Make certain we work right this time, too. */
2759 right = PERL_ABS(right);
2762 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2766 SETi( left % right );
2774 dVAR; dSP; dATARGET;
2775 tryAMAGICbin_MG(add_amg, AMGf_assign);
2777 dPOPTOPiirl_ul_nomg;
2778 SETi( left + right );
2785 dVAR; dSP; dATARGET;
2786 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2788 dPOPTOPiirl_ul_nomg;
2789 SETi( left - right );
2797 tryAMAGICbin_MG(lt_amg, AMGf_set);
2800 SETs(boolSV(left < right));
2808 tryAMAGICbin_MG(gt_amg, AMGf_set);
2811 SETs(boolSV(left > right));
2819 tryAMAGICbin_MG(le_amg, AMGf_set);
2822 SETs(boolSV(left <= right));
2830 tryAMAGICbin_MG(ge_amg, AMGf_set);
2833 SETs(boolSV(left >= right));
2841 tryAMAGICbin_MG(eq_amg, AMGf_set);
2844 SETs(boolSV(left == right));
2852 tryAMAGICbin_MG(ne_amg, AMGf_set);
2855 SETs(boolSV(left != right));
2863 tryAMAGICbin_MG(ncmp_amg, 0);
2870 else if (left < right)
2882 tryAMAGICun_MG(neg_amg, 0);
2884 SV * const sv = TOPs;
2885 IV const i = SvIV_nomg(sv);
2891 /* High falutin' math. */
2896 tryAMAGICbin_MG(atan2_amg, 0);
2899 SETn(Perl_atan2(left, right));
2907 int amg_type = sin_amg;
2908 const char *neg_report = NULL;
2909 NV (*func)(NV) = Perl_sin;
2910 const int op_type = PL_op->op_type;
2927 amg_type = sqrt_amg;
2929 neg_report = "sqrt";
2934 tryAMAGICun_MG(amg_type, 0);
2936 SV * const arg = POPs;
2937 const NV value = SvNV_nomg(arg);
2939 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2940 SET_NUMERIC_STANDARD();
2941 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2944 XPUSHn(func(value));
2949 /* Support Configure command-line overrides for rand() functions.
2950 After 5.005, perhaps we should replace this by Configure support
2951 for drand48(), random(), or rand(). For 5.005, though, maintain
2952 compatibility by calling rand() but allow the user to override it.
2953 See INSTALL for details. --Andy Dougherty 15 July 1998
2955 /* Now it's after 5.005, and Configure supports drand48() and random(),
2956 in addition to rand(). So the overrides should not be needed any more.
2957 --Jarkko Hietaniemi 27 September 1998
2960 #ifndef HAS_DRAND48_PROTO
2961 extern double drand48 (void);
2974 if (!PL_srand_called) {
2975 (void)seedDrand01((Rand_seed_t)seed());
2976 PL_srand_called = TRUE;
2986 const UV anum = (MAXARG < 1) ? seed() : POPu;
2987 (void)seedDrand01((Rand_seed_t)anum);
2988 PL_srand_called = TRUE;
2992 /* Historically srand always returned true. We can avoid breaking
2994 sv_setpvs(TARG, "0 but true");
3003 tryAMAGICun_MG(int_amg, AMGf_numeric);
3005 SV * const sv = TOPs;
3006 const IV iv = SvIV_nomg(sv);
3007 /* XXX it's arguable that compiler casting to IV might be subtly
3008 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3009 else preferring IV has introduced a subtle behaviour change bug. OTOH
3010 relying on floating point to be accurate is a bug. */
3015 else if (SvIOK(sv)) {
3017 SETu(SvUV_nomg(sv));
3022 const NV value = SvNV_nomg(sv);
3024 if (value < (NV)UV_MAX + 0.5) {
3027 SETn(Perl_floor(value));
3031 if (value > (NV)IV_MIN - 0.5) {
3034 SETn(Perl_ceil(value));
3045 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3047 SV * const sv = TOPs;
3048 /* This will cache the NV value if string isn't actually integer */
3049 const IV iv = SvIV_nomg(sv);
3054 else if (SvIOK(sv)) {
3055 /* IVX is precise */
3057 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3065 /* 2s complement assumption. Also, not really needed as
3066 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3072 const NV value = SvNV_nomg(sv);
3086 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3090 SV* const sv = POPs;
3092 tmps = (SvPV_const(sv, len));
3094 /* If Unicode, try to downgrade
3095 * If not possible, croak. */
3096 SV* const tsv = sv_2mortal(newSVsv(sv));
3099 sv_utf8_downgrade(tsv, FALSE);
3100 tmps = SvPV_const(tsv, len);
3102 if (PL_op->op_type == OP_HEX)
3105 while (*tmps && len && isSPACE(*tmps))
3109 if (*tmps == 'x' || *tmps == 'X') {
3111 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3113 else if (*tmps == 'b' || *tmps == 'B')
3114 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3116 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3118 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3132 SV * const sv = TOPs;
3134 if (SvGAMAGIC(sv)) {
3135 /* For an overloaded or magic scalar, we can't know in advance if
3136 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3137 it likes to cache the length. Maybe that should be a documented
3142 = sv_2pv_flags(sv, &len,
3143 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3146 sv_setsv(TARG, &PL_sv_undef);
3149 else if (DO_UTF8(sv)) {
3150 SETi(utf8_length((U8*)p, (U8*)p + len));
3154 } else if (SvOK(sv)) {
3155 /* Neither magic nor overloaded. */
3157 SETi(sv_len_utf8(sv));
3161 sv_setsv_nomg(TARG, &PL_sv_undef);
3181 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3183 const IV arybase = CopARYBASE_get(PL_curcop);
3185 const char *repl = NULL;
3187 const int num_args = PL_op->op_private & 7;
3188 bool repl_need_utf8_upgrade = FALSE;
3189 bool repl_is_utf8 = FALSE;
3194 repl = SvPV_const(repl_sv, repl_len);
3195 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3198 len_iv = SvIV(len_sv);
3199 len_is_uv = SvIOK_UV(len_sv);
3202 pos1_iv = SvIV(pos_sv);
3203 pos1_is_uv = SvIOK_UV(pos_sv);
3209 sv_utf8_upgrade(sv);
3211 else if (DO_UTF8(sv))
3212 repl_need_utf8_upgrade = TRUE;
3214 tmps = SvPV_const(sv, curlen);
3216 utf8_curlen = sv_len_utf8(sv);
3217 if (utf8_curlen == curlen)
3220 curlen = utf8_curlen;
3225 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3226 UV pos1_uv = pos1_iv-arybase;
3227 /* Overflow can occur when $[ < 0 */
3228 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3233 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3234 goto bound_fail; /* $[=3; substr($_,2,...) */
3236 else { /* pos < $[ */
3237 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3242 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3247 if (pos1_is_uv || pos1_iv > 0) {
3248 if ((UV)pos1_iv > curlen)
3253 if (!len_is_uv && len_iv < 0) {
3254 pos2_iv = curlen + len_iv;
3256 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3259 } else { /* len_iv >= 0 */
3260 if (!pos1_is_uv && pos1_iv < 0) {
3261 pos2_iv = pos1_iv + len_iv;
3262 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3264 if ((UV)len_iv > curlen-(UV)pos1_iv)
3267 pos2_iv = pos1_iv+len_iv;
3277 if (!pos2_is_uv && pos2_iv < 0) {
3278 if (!pos1_is_uv && pos1_iv < 0)
3282 else if (!pos1_is_uv && pos1_iv < 0)
3285 if ((UV)pos2_iv < (UV)pos1_iv)
3287 if ((UV)pos2_iv > curlen)
3291 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3292 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3293 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3294 STRLEN byte_len = len;
3295 STRLEN byte_pos = utf8_curlen
3296 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3298 if (lvalue && !repl) {
3301 if (!SvGMAGICAL(sv)) {
3303 SvPV_force_nolen(sv);
3304 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3305 "Attempt to use reference as lvalue in substr");
3307 if (isGV_with_GP(sv))
3308 SvPV_force_nolen(sv);
3309 else if (SvOK(sv)) /* is it defined ? */
3310 (void)SvPOK_only_UTF8(sv);
3312 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3315 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3316 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3318 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3319 LvTARGOFF(ret) = pos;
3320 LvTARGLEN(ret) = len;
3323 PUSHs(ret); /* avoid SvSETMAGIC here */
3327 SvTAINTED_off(TARG); /* decontaminate */
3328 SvUTF8_off(TARG); /* decontaminate */
3331 sv_setpvn(TARG, tmps, byte_len);
3332 #ifdef USE_LOCALE_COLLATE
3333 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3339 SV* repl_sv_copy = NULL;
3341 if (repl_need_utf8_upgrade) {
3342 repl_sv_copy = newSVsv(repl_sv);
3343 sv_utf8_upgrade(repl_sv_copy);
3344 repl = SvPV_const(repl_sv_copy, repl_len);
3345 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3349 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3352 SvREFCNT_dec(repl_sv_copy);
3356 PUSHs(TARG); /* avoid SvSETMAGIC here */
3361 Perl_croak(aTHX_ "substr outside of string");
3362 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3369 register const IV size = POPi;
3370 register const IV offset = POPi;
3371 register SV * const src = POPs;
3372 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3375 if (lvalue) { /* it's an lvalue! */
3376 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3377 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3379 LvTARG(ret) = SvREFCNT_inc_simple(src);
3380 LvTARGOFF(ret) = offset;
3381 LvTARGLEN(ret) = size;
3385 SvTAINTED_off(TARG); /* decontaminate */
3389 sv_setuv(ret, do_vecget(src, offset, size));
3405 const char *little_p;
3406 const I32 arybase = CopARYBASE_get(PL_curcop);
3409 const bool is_index = PL_op->op_type == OP_INDEX;
3412 /* arybase is in characters, like offset, so combine prior to the
3413 UTF-8 to bytes calculation. */
3414 offset = POPi - arybase;
3418 big_p = SvPV_const(big, biglen);
3419 little_p = SvPV_const(little, llen);
3421 big_utf8 = DO_UTF8(big);
3422 little_utf8 = DO_UTF8(little);
3423 if (big_utf8 ^ little_utf8) {
3424 /* One needs to be upgraded. */
3425 if (little_utf8 && !PL_encoding) {
3426 /* Well, maybe instead we might be able to downgrade the small
3428 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3431 /* If the large string is ISO-8859-1, and it's not possible to
3432 convert the small string to ISO-8859-1, then there is no
3433 way that it could be found anywhere by index. */
3438 /* At this point, pv is a malloc()ed string. So donate it to temp
3439 to ensure it will get free()d */
3440 little = temp = newSV(0);
3441 sv_usepvn(temp, pv, llen);
3442 little_p = SvPVX(little);
3445 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3448 sv_recode_to_utf8(temp, PL_encoding);
3450 sv_utf8_upgrade(temp);
3455 big_p = SvPV_const(big, biglen);
3458 little_p = SvPV_const(little, llen);
3462 if (SvGAMAGIC(big)) {
3463 /* Life just becomes a lot easier if I use a temporary here.
3464 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3465 will trigger magic and overloading again, as will fbm_instr()
3467 big = newSVpvn_flags(big_p, biglen,
3468 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3471 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3472 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3473 warn on undef, and we've already triggered a warning with the
3474 SvPV_const some lines above. We can't remove that, as we need to
3475 call some SvPV to trigger overloading early and find out if the
3477 This is all getting to messy. The API isn't quite clean enough,
3478 because data access has side effects.
3480 little = newSVpvn_flags(little_p, llen,
3481 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3482 little_p = SvPVX(little);
3486 offset = is_index ? 0 : biglen;
3488 if (big_utf8 && offset > 0)
3489 sv_pos_u2b(big, &offset, 0);
3495 else if (offset > (I32)biglen)
3497 if (!(little_p = is_index
3498 ? fbm_instr((unsigned char*)big_p + offset,
3499 (unsigned char*)big_p + biglen, little, 0)
3500 : rninstr(big_p, big_p + offset,
3501 little_p, little_p + llen)))
3504 retval = little_p - big_p;
3505 if (retval > 0 && big_utf8)
3506 sv_pos_b2u(big, &retval);
3510 PUSHi(retval + arybase);
3516 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3517 if (SvTAINTED(MARK[1]))
3518 TAINT_PROPER("sprintf");
3519 SvTAINTED_off(TARG);
3520 do_sprintf(TARG, SP-MARK, MARK+1);
3521 TAINT_IF(SvTAINTED(TARG));
3533 const U8 *s = (U8*)SvPV_const(argsv, len);
3535 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3536 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3537 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3541 XPUSHu(DO_UTF8(argsv) ?
3542 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3554 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3556 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3558 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3560 (void) POPs; /* Ignore the argument value. */
3561 value = UNICODE_REPLACEMENT;
3567 SvUPGRADE(TARG,SVt_PV);
3569 if (value > 255 && !IN_BYTES) {
3570 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3571 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3572 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3574 (void)SvPOK_only(TARG);
3583 *tmps++ = (char)value;
3585 (void)SvPOK_only(TARG);
3587 if (PL_encoding && !IN_BYTES) {
3588 sv_recode_to_utf8(TARG, PL_encoding);
3590 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3591 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3595 *tmps++ = (char)value;
3611 const char *tmps = SvPV_const(left, len);
3613 if (DO_UTF8(left)) {
3614 /* If Unicode, try to downgrade.
3615 * If not possible, croak.
3616 * Yes, we made this up. */
3617 SV* const tsv = sv_2mortal(newSVsv(left));
3620 sv_utf8_downgrade(tsv, FALSE);
3621 tmps = SvPV_const(tsv, len);
3623 # ifdef USE_ITHREADS
3625 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3626 /* This should be threadsafe because in ithreads there is only
3627 * one thread per interpreter. If this would not be true,
3628 * we would need a mutex to protect this malloc. */
3629 PL_reentrant_buffer->_crypt_struct_buffer =
3630 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3631 #if defined(__GLIBC__) || defined(__EMX__)
3632 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3633 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3634 /* work around glibc-2.2.5 bug */
3635 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3639 # endif /* HAS_CRYPT_R */
3640 # endif /* USE_ITHREADS */
3642 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3644 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3650 "The crypt() function is unimplemented due to excessive paranoia.");
3654 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3655 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3657 /* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max
3658 * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF.
3659 * See http://www.unicode.org/unicode/reports/tr16 */
3660 #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
3661 #define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
3663 /* Below are several macros that generate code */
3664 /* Generates code to store a unicode codepoint c that is known to occupy
3665 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3666 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3668 *(p) = UTF8_TWO_BYTE_HI(c); \
3669 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3672 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3673 * available byte after the two bytes */
3674 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3676 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3677 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3680 /* Generates code to store the upper case of latin1 character l which is known
3681 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3682 * are only two characters that fit this description, and this macro knows
3683 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3685 #define STORE_NON_LATIN1_UC(p, l) \
3687 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3688 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3689 } else { /* Must be the following letter */ \
3690 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3694 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3695 * after the character stored */
3696 #define CAT_NON_LATIN1_UC(p, l) \
3698 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3699 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3701 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3705 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3706 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3707 * and must require two bytes to store it. Advances p to point to the next
3708 * available position */
3709 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3711 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3712 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3713 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3714 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3715 } else {/* else is one of the other two special cases */ \
3716 CAT_NON_LATIN1_UC((p), (l)); \
3722 /* Actually is both lcfirst() and ucfirst(). Only the first character
3723 * changes. This means that possibly we can change in-place, ie., just
3724 * take the source and change that one character and store it back, but not
3725 * if read-only etc, or if the length changes */
3730 STRLEN slen; /* slen is the byte length of the whole SV. */
3733 bool inplace; /* ? Convert first char only, in-place */
3734 bool doing_utf8 = FALSE; /* ? using utf8 */
3735 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3736 const int op_type = PL_op->op_type;
3739 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3740 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3741 * stored as UTF-8 at s. */
3742 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3743 * lowercased) character stored in tmpbuf. May be either
3744 * UTF-8 or not, but in either case is the number of bytes */
3748 s = (const U8*)SvPV_nomg_const(source, slen);
3750 if (ckWARN(WARN_UNINITIALIZED))
3751 report_uninit(source);
3756 /* We may be able to get away with changing only the first character, in
3757 * place, but not if read-only, etc. Later we may discover more reasons to
3758 * not convert in-place. */
3759 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3761 /* First calculate what the changed first character should be. This affects
3762 * whether we can just swap it out, leaving the rest of the string unchanged,
3763 * or even if have to convert the dest to UTF-8 when the source isn't */
3765 if (! slen) { /* If empty */
3766 need = 1; /* still need a trailing NUL */
3768 else if (DO_UTF8(source)) { /* Is the source utf8? */
3771 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3772 * and doesn't allow for the user to specify their own. When code is added to
3773 * detect if there is a user-defined mapping in force here, and if so to use
3774 * that, then the code below can be compiled. The detection would be a good
3775 * thing anyway, as currently the user-defined mappings only work on utf8
3776 * strings, and thus depend on the chosen internal storage method, which is a
3778 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3779 if (UTF8_IS_INVARIANT(*s)) {
3781 /* An invariant source character is either ASCII or, in EBCDIC, an
3782 * ASCII equivalent or a caseless C1 control. In both these cases,
3783 * the lower and upper cases of any character are also invariants
3784 * (and title case is the same as upper case). So it is safe to
3785 * use the simple case change macros which avoid the overhead of
3786 * the general functions. Note that if perl were to be extended to
3787 * do locale handling in UTF-8 strings, this wouldn't be true in,
3788 * for example, Lithuanian or Turkic. */
3789 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3793 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3796 /* Similarly, if the source character isn't invariant but is in the
3797 * latin1 range (or EBCDIC equivalent thereof), we have the case
3798 * changes compiled into perl, and can avoid the overhead of the
3799 * general functions. In this range, the characters are stored as
3800 * two UTF-8 bytes, and it so happens that any changed-case version
3801 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3805 /* Convert the two source bytes to a single Unicode code point
3806 * value, change case and save for below */
3807 chr = UTF8_ACCUMULATE(*s, *(s+1));
3808 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3809 U8 lower = toLOWER_LATIN1(chr);
3810 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3812 else { /* ucfirst */
3813 U8 upper = toUPPER_LATIN1_MOD(chr);
3815 /* Most of the latin1 range characters are well-behaved. Their
3816 * title and upper cases are the same, and are also in the
3817 * latin1 range. The macro above returns their upper (hence
3818 * title) case, and all that need be done is to save the result
3819 * for below. However, several characters are problematic, and
3820 * have to be handled specially. The MOD in the macro name
3821 * above means that these tricky characters all get mapped to
3822 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3823 * This mapping saves some tests for the majority of the
3826 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3828 /* Not tricky. Just save it. */
3829 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3831 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3833 /* This one is tricky because it is two characters long,
3834 * though the UTF-8 is still two bytes, so the stored
3835 * length doesn't change */
3836 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3837 *(tmpbuf + 1) = 's';
3841 /* The other two have their title and upper cases the same,
3842 * but are tricky because the changed-case characters
3843 * aren't in the latin1 range. They, however, do fit into
3844 * two UTF-8 bytes */
3845 STORE_NON_LATIN1_UC(tmpbuf, chr);
3850 #endif /* end of dont want to break user-defined casing */
3852 /* Here, can't short-cut the general case */
3854 utf8_to_uvchr(s, &ulen);
3855 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3856 else toLOWER_utf8(s, tmpbuf, &tculen);
3858 /* we can't do in-place if the length changes. */
3859 if (ulen != tculen) inplace = FALSE;
3860 need = slen + 1 - ulen + tculen;
3861 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3865 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3866 * latin1 is treated as caseless. Note that a locale takes
3868 tculen = 1; /* Most characters will require one byte, but this will
3869 * need to be overridden for the tricky ones */
3872 if (op_type == OP_LCFIRST) {
3874 /* lower case the first letter: no trickiness for any character */
3875 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3876 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3879 else if (IN_LOCALE_RUNTIME) {
3880 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3881 * have upper and title case different
3884 else if (! IN_UNI_8_BIT) {
3885 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3886 * on EBCDIC machines whatever the
3887 * native function does */
3889 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3890 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3892 /* tmpbuf now has the correct title case for all latin1 characters
3893 * except for the several ones that have tricky handling. All
3894 * of these are mapped by the MOD to the letter below. */
3895 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3897 /* The length is going to change, with all three of these, so
3898 * can't replace just the first character */
3901 /* We use the original to distinguish between these tricky
3903 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3904 /* Two character title case 'Ss', but can remain non-UTF-8 */
3907 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3912 /* The other two tricky ones have their title case outside
3913 * latin1. It is the same as their upper case. */
3915 STORE_NON_LATIN1_UC(tmpbuf, *s);
3917 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3918 * and their upper cases is 2. */
3921 /* The entire result will have to be in UTF-8. Assume worst
3922 * case sizing in conversion. (all latin1 characters occupy
3923 * at most two bytes in utf8) */
3924 convert_source_to_utf8 = TRUE;
3925 need = slen * 2 + 1;
3927 } /* End of is one of the three special chars */
3928 } /* End of use Unicode (Latin1) semantics */
3929 } /* End of changing the case of the first character */
3931 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3932 * generate the result */
3935 /* We can convert in place. This means we change just the first
3936 * character without disturbing the rest; no need to grow */
3938 s = d = (U8*)SvPV_force_nomg(source, slen);
3944 /* Here, we can't convert in place; we earlier calculated how much
3945 * space we will need, so grow to accommodate that */
3946 SvUPGRADE(dest, SVt_PV);
3947 d = (U8*)SvGROW(dest, need);
3948 (void)SvPOK_only(dest);
3955 if (! convert_source_to_utf8) {
3957 /* Here both source and dest are in UTF-8, but have to create
3958 * the entire output. We initialize the result to be the
3959 * title/lower cased first character, and then append the rest
3961 sv_setpvn(dest, (char*)tmpbuf, tculen);
3963 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3967 const U8 *const send = s + slen;
3969 /* Here the dest needs to be in UTF-8, but the source isn't,
3970 * except we earlier UTF-8'd the first character of the source
3971 * into tmpbuf. First put that into dest, and then append the
3972 * rest of the source, converting it to UTF-8 as we go. */
3974 /* Assert tculen is 2 here because the only two characters that
3975 * get to this part of the code have 2-byte UTF-8 equivalents */
3977 *d++ = *(tmpbuf + 1);
3978 s++; /* We have just processed the 1st char */
3980 for (; s < send; s++) {
3981 d = uvchr_to_utf8(d, *s);
3984 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3988 else { /* in-place UTF-8. Just overwrite the first character */
3989 Copy(tmpbuf, d, tculen, U8);
3990 SvCUR_set(dest, need - 1);
3993 else { /* Neither source nor dest are in or need to be UTF-8 */
3995 if (IN_LOCALE_RUNTIME) {
3999 if (inplace) { /* in-place, only need to change the 1st char */
4002 else { /* Not in-place */
4004 /* Copy the case-changed character(s) from tmpbuf */
4005 Copy(tmpbuf, d, tculen, U8);
4006 d += tculen - 1; /* Code below expects d to point to final
4007 * character stored */
4010 else { /* empty source */
4011 /* See bug #39028: Don't taint if empty */
4015 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4016 * the destination to retain that flag */
4020 if (!inplace) { /* Finish the rest of the string, unchanged */
4021 /* This will copy the trailing NUL */
4022 Copy(s + 1, d + 1, slen, U8);
4023 SvCUR_set(dest, need - 1);
4030 /* There's so much setup/teardown code common between uc and lc, I wonder if
4031 it would be worth merging the two, and just having a switch outside each
4032 of the three tight loops. There is less and less commonality though */
4046 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4047 && SvTEMP(source) && !DO_UTF8(source)
4048 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
4050 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4051 * make the loop tight, so we overwrite the source with the dest before
4052 * looking at it, and we need to look at the original source
4053 * afterwards. There would also need to be code added to handle
4054 * switching to not in-place in midstream if we run into characters
4055 * that change the length.
4058 s = d = (U8*)SvPV_force_nomg(source, len);
4065 /* The old implementation would copy source into TARG at this point.
4066 This had the side effect that if source was undef, TARG was now
4067 an undefined SV with PADTMP set, and they don't warn inside
4068 sv_2pv_flags(). However, we're now getting the PV direct from
4069 source, which doesn't have PADTMP set, so it would warn. Hence the
4073 s = (const U8*)SvPV_nomg_const(source, len);
4075 if (ckWARN(WARN_UNINITIALIZED))
4076 report_uninit(source);
4082 SvUPGRADE(dest, SVt_PV);
4083 d = (U8*)SvGROW(dest, min);
4084 (void)SvPOK_only(dest);
4089 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4090 to check DO_UTF8 again here. */
4092 if (DO_UTF8(source)) {
4093 const U8 *const send = s + len;
4094 U8 tmpbuf[UTF8_MAXBYTES+1];
4096 /* All occurrences of these are to be moved to follow any other marks.
4097 * This is context-dependent. We may not be passed enough context to
4098 * move the iota subscript beyond all of them, but we do the best we can
4099 * with what we're given. The result is always better than if we
4100 * hadn't done this. And, the problem would only arise if we are
4101 * passed a character without all its combining marks, which would be
4102 * the caller's mistake. The information this is based on comes from a
4103 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4104 * itself) and so can't be checked properly to see if it ever gets
4105 * revised. But the likelihood of it changing is remote */
4106 bool in_iota_subscript = FALSE;
4109 if (in_iota_subscript && ! is_utf8_mark(s)) {
4110 /* A non-mark. Time to output the iota subscript */
4111 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4112 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4114 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4115 in_iota_subscript = FALSE;
4119 /* See comments at the first instance in this file of this ifdef */
4120 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4122 /* If the UTF-8 character is invariant, then it is in the range
4123 * known by the standard macro; result is only one byte long */
4124 if (UTF8_IS_INVARIANT(*s)) {
4128 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4130 /* Likewise, if it fits in a byte, its case change is in our
4132 U8 orig = UTF8_ACCUMULATE(*s, *(s+1));
4133 U8 upper = toUPPER_LATIN1_MOD(orig);
4134 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4142 /* Otherwise, need the general UTF-8 case. Get the changed
4143 * case value and copy it to the output buffer */
4145 const STRLEN u = UTF8SKIP(s);
4148 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4149 if (uv == GREEK_CAPITAL_LETTER_IOTA
4150 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4152 in_iota_subscript = TRUE;
4155 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4156 /* If the eventually required minimum size outgrows
4157 * the available space, we need to grow. */
4158 const UV o = d - (U8*)SvPVX_const(dest);
4160 /* If someone uppercases one million U+03B0s we
4161 * SvGROW() one million times. Or we could try
4162 * guessing how much to allocate without allocating too
4163 * much. Such is life. See corresponding comment in
4164 * lc code for another option */
4166 d = (U8*)SvPVX(dest) + o;
4168 Copy(tmpbuf, d, ulen, U8);
4174 if (in_iota_subscript) {
4175 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4179 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4181 else { /* Not UTF-8 */
4183 const U8 *const send = s + len;
4185 /* Use locale casing if in locale; regular style if not treating
4186 * latin1 as having case; otherwise the latin1 casing. Do the
4187 * whole thing in a tight loop, for speed, */
4188 if (IN_LOCALE_RUNTIME) {
4191 for (; s < send; d++, s++)
4192 *d = toUPPER_LC(*s);
4194 else if (! IN_UNI_8_BIT) {
4195 for (; s < send; d++, s++) {
4200 for (; s < send; d++, s++) {
4201 *d = toUPPER_LATIN1_MOD(*s);
4202 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4204 /* The mainstream case is the tight loop above. To avoid
4205 * extra tests in that, all three characters that require
4206 * special handling are mapped by the MOD to the one tested
4208 * Use the source to distinguish between the three cases */
4210 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4212 /* uc() of this requires 2 characters, but they are
4213 * ASCII. If not enough room, grow the string */
4214 if (SvLEN(dest) < ++min) {
4215 const UV o = d - (U8*)SvPVX_const(dest);
4217 d = (U8*)SvPVX(dest) + o;
4219 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4220 continue; /* Back to the tight loop; still in ASCII */
4223 /* The other two special handling characters have their
4224 * upper cases outside the latin1 range, hence need to be
4225 * in UTF-8, so the whole result needs to be in UTF-8. So,
4226 * here we are somewhere in the middle of processing a
4227 * non-UTF-8 string, and realize that we will have to convert
4228 * the whole thing to UTF-8. What to do? There are
4229 * several possibilities. The simplest to code is to
4230 * convert what we have so far, set a flag, and continue on
4231 * in the loop. The flag would be tested each time through
4232 * the loop, and if set, the next character would be
4233 * converted to UTF-8 and stored. But, I (khw) didn't want
4234 * to slow down the mainstream case at all for this fairly
4235 * rare case, so I didn't want to add a test that didn't
4236 * absolutely have to be there in the loop, besides the
4237 * possibility that it would get too complicated for
4238 * optimizers to deal with. Another possibility is to just
4239 * give up, convert the source to UTF-8, and restart the
4240 * function that way. Another possibility is to convert
4241 * both what has already been processed and what is yet to
4242 * come separately to UTF-8, then jump into the loop that
4243 * handles UTF-8. But the most efficient time-wise of the
4244 * ones I could think of is what follows, and turned out to
4245 * not require much extra code. */
4247 /* Convert what we have so far into UTF-8, telling the
4248 * function that we know it should be converted, and to
4249 * allow extra space for what we haven't processed yet.
4250 * Assume the worst case space requirements for converting
4251 * what we haven't processed so far: that it will require
4252 * two bytes for each remaining source character, plus the
4253 * NUL at the end. This may cause the string pointer to
4254 * move, so re-find it. */
4256 len = d - (U8*)SvPVX_const(dest);
4257 SvCUR_set(dest, len);
4258 len = sv_utf8_upgrade_flags_grow(dest,
4259 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4261 d = (U8*)SvPVX(dest) + len;
4263 /* And append the current character's upper case in UTF-8 */
4264 CAT_NON_LATIN1_UC(d, *s);
4266 /* Now process the remainder of the source, converting to
4267 * upper and UTF-8. If a resulting byte is invariant in
4268 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4269 * append it to the output. */
4272 for (; s < send; s++) {
4273 U8 upper = toUPPER_LATIN1_MOD(*s);
4274 if UTF8_IS_INVARIANT(upper) {
4278 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4282 /* Here have processed the whole source; no need to continue
4283 * with the outer loop. Each character has been converted
4284 * to upper case and converted to UTF-8 */
4287 } /* End of processing all latin1-style chars */
4288 } /* End of processing all chars */
4289 } /* End of source is not empty */
4291 if (source != dest) {
4292 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4293 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4295 } /* End of isn't utf8 */
4313 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4314 && SvTEMP(source) && !DO_UTF8(source)) {
4316 /* We can convert in place, as lowercasing anything in the latin1 range
4317 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4319 s = d = (U8*)SvPV_force_nomg(source, len);
4326 /* The old implementation would copy source into TARG at this point.
4327 This had the side effect that if source was undef, TARG was now
4328 an undefined SV with PADTMP set, and they don't warn inside
4329 sv_2pv_flags(). However, we're now getting the PV direct from
4330 source, which doesn't have PADTMP set, so it would warn. Hence the
4334 s = (const U8*)SvPV_nomg_const(source, len);
4336 if (ckWARN(WARN_UNINITIALIZED))
4337 report_uninit(source);
4343 SvUPGRADE(dest, SVt_PV);
4344 d = (U8*)SvGROW(dest, min);
4345 (void)SvPOK_only(dest);
4350 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4351 to check DO_UTF8 again here. */
4353 if (DO_UTF8(source)) {
4354 const U8 *const send = s + len;
4355 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4358 /* See comments at the first instance in this file of this ifdef */
4359 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4360 if (UTF8_IS_INVARIANT(*s)) {
4362 /* Invariant characters use the standard mappings compiled in.
4367 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4369 /* As do the ones in the Latin1 range */
4370 U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1)));
4371 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4376 /* Here, is utf8 not in Latin-1 range, have to go out and get
4377 * the mappings from the tables. */
4379 const STRLEN u = UTF8SKIP(s);
4382 #ifndef CONTEXT_DEPENDENT_CASING
4383 toLOWER_utf8(s, tmpbuf, &ulen);
4385 /* This is ifdefd out because it needs more work and thought. It isn't clear
4386 * that we should do it.
4387 * A minor objection is that this is based on a hard-coded rule from the
4388 * Unicode standard, and may change, but this is not very likely at all.
4389 * mktables should check and warn if it does.
4390 * More importantly, if the sigma occurs at the end of the string, we don't
4391 * have enough context to know whether it is part of a larger string or going
4392 * to be or not. It may be that we are passed a subset of the context, via
4393 * a \U...\E, for example, and we could conceivably know the larger context if
4394 * code were changed to pass that in. But, if the string passed in is an
4395 * intermediate result, and the user concatenates two strings together
4396 * after we have made a final sigma, that would be wrong. If the final sigma
4397 * occurs in the middle of the string we are working on, then we know that it
4398 * should be a final sigma, but otherwise we can't be sure. */
4400 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4402 /* If the lower case is a small sigma, it may be that we need
4403 * to change it to a final sigma. This happens at the end of
4404 * a word that contains more than just this character, and only
4405 * when we started with a capital sigma. */
4406 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4407 s > send - len && /* Makes sure not the first letter */
4408 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4411 /* We use the algorithm in:
4412 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4413 * is a CAPITAL SIGMA): If C is preceded by a sequence
4414 * consisting of a cased letter and a case-ignorable
4415 * sequence, and C is not followed by a sequence consisting
4416 * of a case ignorable sequence and then a cased letter,
4417 * then when lowercasing C, C becomes a final sigma */
4419 /* To determine if this is the end of a word, need to peek
4420 * ahead. Look at the next character */
4421 const U8 *peek = s + u;
4423 /* Skip any case ignorable characters */
4424 while (peek < send && is_utf8_case_ignorable(peek)) {
4425 peek += UTF8SKIP(peek);
4428 /* If we reached the end of the string without finding any
4429 * non-case ignorable characters, or if the next such one
4430 * is not-cased, then we have met the conditions for it
4431 * being a final sigma with regards to peek ahead, and so
4432 * must do peek behind for the remaining conditions. (We
4433 * know there is stuff behind to look at since we tested
4434 * above that this isn't the first letter) */
4435 if (peek >= send || ! is_utf8_cased(peek)) {
4436 peek = utf8_hop(s, -1);
4438 /* Here are at the beginning of the first character
4439 * before the original upper case sigma. Keep backing
4440 * up, skipping any case ignorable characters */
4441 while (is_utf8_case_ignorable(peek)) {
4442 peek = utf8_hop(peek, -1);
4445 /* Here peek points to the first byte of the closest
4446 * non-case-ignorable character before the capital
4447 * sigma. If it is cased, then by the Unicode
4448 * algorithm, we should use a small final sigma instead
4449 * of what we have */
4450 if (is_utf8_cased(peek)) {
4451 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4452 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4456 else { /* Not a context sensitive mapping */
4457 #endif /* End of commented out context sensitive */
4458 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4460 /* If the eventually required minimum size outgrows
4461 * the available space, we need to grow. */
4462 const UV o = d - (U8*)SvPVX_const(dest);
4464 /* If someone lowercases one million U+0130s we
4465 * SvGROW() one million times. Or we could try
4466 * guessing how much to allocate without allocating too
4467 * much. Such is life. Another option would be to
4468 * grow an extra byte or two more each time we need to
4469 * grow, which would cut down the million to 500K, with
4472 d = (U8*)SvPVX(dest) + o;
4474 #ifdef CONTEXT_DEPENDENT_CASING
4477 /* Copy the newly lowercased letter to the output buffer we're
4479 Copy(tmpbuf, d, ulen, U8);
4482 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4485 } /* End of looping through the source string */
4488 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4489 } else { /* Not utf8 */
4491 const U8 *const send = s + len;
4493 /* Use locale casing if in locale; regular style if not treating
4494 * latin1 as having case; otherwise the latin1 casing. Do the
4495 * whole thing in a tight loop, for speed, */
4496 if (IN_LOCALE_RUNTIME) {
4499 for (; s < send; d++, s++)
4500 *d = toLOWER_LC(*s);
4502 else if (! IN_UNI_8_BIT) {
4503 for (; s < send; d++, s++) {
4508 for (; s < send; d++, s++) {
4509 *d = toLOWER_LATIN1(*s);
4513 if (source != dest) {
4515 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4525 SV * const sv = TOPs;
4527 register const char *s = SvPV_const(sv,len);
4529 SvUTF8_off(TARG); /* decontaminate */
4532 SvUPGRADE(TARG, SVt_PV);
4533 SvGROW(TARG, (len * 2) + 1);
4537 if (UTF8_IS_CONTINUED(*s)) {
4538 STRLEN ulen = UTF8SKIP(s);