3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
75 if (GIMME == G_SCALAR)
76 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
81 if (gimme == G_ARRAY) {
82 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
84 if (SvMAGICAL(TARG)) {
86 for (i=0; i < (U32)maxarg; i++) {
87 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
88 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
92 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
96 else if (gimme == G_SCALAR) {
97 SV* const sv = sv_newmortal();
98 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
110 assert(SvTYPE(TARG) == SVt_PVHV);
112 if (PL_op->op_private & OPpLVAL_INTRO)
113 if (!(PL_op->op_private & OPpPAD_STATE))
114 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
115 if (PL_op->op_flags & OPf_REF)
118 if (GIMME == G_SCALAR)
119 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
123 if (gimme == G_ARRAY) {
126 else if (gimme == G_SCALAR) {
127 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
135 static const char S_no_symref_sv[] =
136 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
145 tryAMAGICunDEREF(to_gv);
148 if (SvTYPE(sv) == SVt_PVIO) {
149 GV * const gv = MUTABLE_GV(sv_newmortal());
150 gv_init(gv, 0, "", 0, 0);
151 GvIOp(gv) = MUTABLE_IO(sv);
152 SvREFCNT_inc_void_NN(sv);
155 else if (!isGV_with_GP(sv))
156 DIE(aTHX_ "Not a GLOB reference");
159 if (!isGV_with_GP(sv)) {
160 if (!SvOK(sv) && sv != &PL_sv_undef) {
161 /* If this is a 'my' scalar and flag is set then vivify
165 Perl_croak_no_modify(aTHX);
166 if (PL_op->op_private & OPpDEREF) {
168 if (cUNOP->op_targ) {
170 SV * const namesv = PAD_SV(cUNOP->op_targ);
171 const char * const name = SvPV(namesv, len);
172 gv = MUTABLE_GV(newSV(0));
173 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
176 const char * const name = CopSTASHPV(PL_curcop);
179 prepare_SV_for_RV(sv);
180 SvRV_set(sv, MUTABLE_SV(gv));
185 if (PL_op->op_flags & OPf_REF ||
186 PL_op->op_private & HINT_STRICT_REFS)
187 DIE(aTHX_ PL_no_usym, "a symbol");
188 if (ckWARN(WARN_UNINITIALIZED))
192 if ((PL_op->op_flags & OPf_SPECIAL) &&
193 !(PL_op->op_flags & OPf_MOD))
195 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
197 && (!is_gv_magical_sv(sv,0)
198 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
205 if (PL_op->op_private & HINT_STRICT_REFS)
206 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
207 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
208 == OPpDONT_INIT_GV) {
209 /* We are the target of a coderef assignment. Return
210 the scalar unchanged, and let pp_sasssign deal with
214 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
216 /* FAKE globs in the symbol table cause weird bugs (#77810) */
217 if (sv) SvFAKE_off(sv);
220 if (PL_op->op_private & OPpLVAL_INTRO)
221 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
222 if (sv && SvFAKE(sv)) {
223 SV *newsv = sv_newmortal();
224 sv_setsv_flags(newsv, sv, 0);
232 /* Helper function for pp_rv2sv and pp_rv2av */
234 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
235 const svtype type, SV ***spp)
240 PERL_ARGS_ASSERT_SOFTREF2XV;
242 if (PL_op->op_private & HINT_STRICT_REFS) {
244 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
246 Perl_die(aTHX_ PL_no_usym, what);
249 if (PL_op->op_flags & OPf_REF)
250 Perl_die(aTHX_ PL_no_usym, what);
251 if (ckWARN(WARN_UNINITIALIZED))
253 if (type != SVt_PV && GIMME_V == G_ARRAY) {
257 **spp = &PL_sv_undef;
260 if ((PL_op->op_flags & OPf_SPECIAL) &&
261 !(PL_op->op_flags & OPf_MOD))
263 gv = gv_fetchsv(sv, 0, type);
265 && (!is_gv_magical_sv(sv,0)
266 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
268 **spp = &PL_sv_undef;
273 gv = gv_fetchsv(sv, GV_ADD, type);
283 if (!(PL_op->op_private & OPpDEREFed))
286 tryAMAGICunDEREF(to_sv);
289 switch (SvTYPE(sv)) {
295 DIE(aTHX_ "Not a SCALAR reference");
302 if (!isGV_with_GP(gv)) {
303 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
309 if (PL_op->op_flags & OPf_MOD) {
310 if (PL_op->op_private & OPpLVAL_INTRO) {
311 if (cUNOP->op_first->op_type == OP_NULL)
312 sv = save_scalar(MUTABLE_GV(TOPs));
314 sv = save_scalar(gv);
316 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
318 else if (PL_op->op_private & OPpDEREF)
319 vivify_ref(sv, PL_op->op_private & OPpDEREF);
328 AV * const av = MUTABLE_AV(TOPs);
329 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
331 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
333 *sv = newSV_type(SVt_PVMG);
334 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
338 SETs(sv_2mortal(newSViv(
339 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
349 if (PL_op->op_flags & OPf_MOD || LVRET) {
350 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
351 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
353 LvTARG(ret) = SvREFCNT_inc_simple(sv);
354 PUSHs(ret); /* no SvSETMAGIC */
358 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
359 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
360 if (mg && mg->mg_len >= 0) {
365 PUSHi(i + CopARYBASE_get(PL_curcop));
378 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
380 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
383 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
384 /* (But not in defined().) */
386 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
389 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
390 if ((PL_op->op_private & OPpLVAL_INTRO)) {
391 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
394 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
397 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
401 cv = MUTABLE_CV(&PL_sv_undef);
402 SETs(MUTABLE_SV(cv));
412 SV *ret = &PL_sv_undef;
414 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
415 const char * s = SvPVX_const(TOPs);
416 if (strnEQ(s, "CORE::", 6)) {
417 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
418 if (code < 0) { /* Overridable. */
419 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
420 int i = 0, n = 0, seen_question = 0, defgv = 0;
422 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
424 if (code == -KEY_chop || code == -KEY_chomp
425 || code == -KEY_exec || code == -KEY_system)
427 if (code == -KEY_mkdir) {
428 ret = newSVpvs_flags("_;$", SVs_TEMP);
431 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
432 ret = newSVpvs_flags("\\[@%]", SVs_TEMP);
435 if (code == -KEY_tied || code == -KEY_untie) {
436 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
439 if (code == -KEY_tie) {
440 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
443 if (code == -KEY_readpipe) {
444 s = "CORE::backtick";
446 while (i < MAXO) { /* The slow way. */
447 if (strEQ(s + 6, PL_op_name[i])
448 || strEQ(s + 6, PL_op_desc[i]))
454 goto nonesuch; /* Should not happen... */
456 defgv = PL_opargs[i] & OA_DEFGV;
457 oa = PL_opargs[i] >> OASHIFT;
459 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
463 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
464 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
465 /* But globs are already references (kinda) */
466 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
470 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
473 if (defgv && str[n - 1] == '$')
476 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
478 else if (code) /* Non-Overridable */
480 else { /* None such */
482 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
486 cv = sv_2cv(TOPs, &stash, &gv, 0);
488 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
497 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
499 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
501 PUSHs(MUTABLE_SV(cv));
515 if (GIMME != G_ARRAY) {
519 *MARK = &PL_sv_undef;
520 *MARK = refto(*MARK);
524 EXTEND_MORTAL(SP - MARK);
526 *MARK = refto(*MARK);
531 S_refto(pTHX_ SV *sv)
536 PERL_ARGS_ASSERT_REFTO;
538 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
541 if (!(sv = LvTARG(sv)))
544 SvREFCNT_inc_void_NN(sv);
546 else if (SvTYPE(sv) == SVt_PVAV) {
547 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
548 av_reify(MUTABLE_AV(sv));
550 SvREFCNT_inc_void_NN(sv);
552 else if (SvPADTMP(sv) && !IS_PADGV(sv))
556 SvREFCNT_inc_void_NN(sv);
559 sv_upgrade(rv, SVt_IV);
569 SV * const sv = POPs;
574 if (!sv || !SvROK(sv))
577 pv = sv_reftype(SvRV(sv),TRUE);
578 PUSHp(pv, strlen(pv));
588 stash = CopSTASH(PL_curcop);
590 SV * const ssv = POPs;
594 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
595 Perl_croak(aTHX_ "Attempt to bless into a reference");
596 ptr = SvPV_const(ssv,len);
598 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
599 "Explicit blessing to '' (assuming package main)");
600 stash = gv_stashpvn(ptr, len, GV_ADD);
603 (void)sv_bless(TOPs, stash);
612 const char * const elem = SvPV_nolen_const(sv);
613 GV * const gv = MUTABLE_GV(POPs);
618 /* elem will always be NUL terminated. */
619 const char * const second_letter = elem + 1;
622 if (strEQ(second_letter, "RRAY"))
623 tmpRef = MUTABLE_SV(GvAV(gv));
626 if (strEQ(second_letter, "ODE"))
627 tmpRef = MUTABLE_SV(GvCVu(gv));
630 if (strEQ(second_letter, "ILEHANDLE")) {
631 /* finally deprecated in 5.8.0 */
632 deprecate("*glob{FILEHANDLE}");
633 tmpRef = MUTABLE_SV(GvIOp(gv));
636 if (strEQ(second_letter, "ORMAT"))
637 tmpRef = MUTABLE_SV(GvFORM(gv));
640 if (strEQ(second_letter, "LOB"))
641 tmpRef = MUTABLE_SV(gv);
644 if (strEQ(second_letter, "ASH"))
645 tmpRef = MUTABLE_SV(GvHV(gv));
648 if (*second_letter == 'O' && !elem[2])
649 tmpRef = MUTABLE_SV(GvIOp(gv));
652 if (strEQ(second_letter, "AME"))
653 sv = newSVhek(GvNAME_HEK(gv));
656 if (strEQ(second_letter, "ACKAGE")) {
657 const HV * const stash = GvSTASH(gv);
658 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
659 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
663 if (strEQ(second_letter, "CALAR"))
678 /* Pattern matching */
683 register unsigned char *s;
686 register I32 *sfirst;
690 if (sv == PL_lastscream) {
694 s = (unsigned char*)(SvPV(sv, len));
696 if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
697 /* No point in studying a zero length string, and not safe to study
698 anything that doesn't appear to be a simple scalar (and hence might
699 change between now and when the regexp engine runs without our set
700 magic ever running) such as a reference to an object with overloaded
706 SvSCREAM_off(PL_lastscream);
707 SvREFCNT_dec(PL_lastscream);
709 PL_lastscream = SvREFCNT_inc_simple(sv);
711 s = (unsigned char*)(SvPV(sv, len));
715 if (pos > PL_maxscream) {
716 if (PL_maxscream < 0) {
717 PL_maxscream = pos + 80;
718 Newx(PL_screamfirst, 256, I32);
719 Newx(PL_screamnext, PL_maxscream, I32);
722 PL_maxscream = pos + pos / 4;
723 Renew(PL_screamnext, PL_maxscream, I32);
727 sfirst = PL_screamfirst;
728 snext = PL_screamnext;
730 if (!sfirst || !snext)
731 DIE(aTHX_ "do_study: out of memory");
733 for (ch = 256; ch; --ch)
738 register const I32 ch = s[pos];
740 snext[pos] = sfirst[ch] - pos;
747 /* piggyback on m//g magic */
748 sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
757 if (PL_op->op_flags & OPf_STACKED)
759 else if (PL_op->op_private & OPpTARGET_MY)
765 TARG = sv_newmortal();
770 /* Lvalue operators. */
782 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
784 do_chop(TARG, *++MARK);
793 SETi(do_chomp(TOPs));
799 dVAR; dSP; dMARK; dTARGET;
800 register I32 count = 0;
803 count += do_chomp(POPs);
813 if (!PL_op->op_private) {
822 SV_CHECK_THINKFIRST_COW_DROP(sv);
824 switch (SvTYPE(sv)) {
828 av_undef(MUTABLE_AV(sv));
831 hv_undef(MUTABLE_HV(sv));
834 if (cv_const_sv((const CV *)sv))
835 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
836 CvANON((const CV *)sv) ? "(anonymous)"
837 : GvENAME(CvGV((const CV *)sv)));
841 /* let user-undef'd sub keep its identity */
842 GV* const gv = CvGV((const CV *)sv);
843 cv_undef(MUTABLE_CV(sv));
844 CvGV_set(MUTABLE_CV(sv), gv);
849 SvSetMagicSV(sv, &PL_sv_undef);
852 else if (isGV_with_GP(sv)) {
857 if((stash = GvHV((const GV *)sv)) && HvNAME_get(stash))
858 mro_isa_changed_in(stash);
859 /* undef *Pkg::meth_name ... */
860 else if(GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
861 && HvNAME_get(stash))
862 mro_method_changed_in(stash);
864 gp_free(MUTABLE_GV(sv));
866 GvGP(sv) = gp_ref(gp);
868 GvLINE(sv) = CopLINE(PL_curcop);
869 GvEGV(sv) = MUTABLE_GV(sv);
875 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
890 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
891 Perl_croak_no_modify(aTHX);
892 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
893 && SvIVX(TOPs) != IV_MIN)
895 SvIV_set(TOPs, SvIVX(TOPs) - 1);
896 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
907 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
908 Perl_croak_no_modify(aTHX);
910 TARG = sv_newmortal();
911 sv_setsv(TARG, TOPs);
912 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
913 && SvIVX(TOPs) != IV_MAX)
915 SvIV_set(TOPs, SvIVX(TOPs) + 1);
916 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
921 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
931 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
932 Perl_croak_no_modify(aTHX);
934 TARG = sv_newmortal();
935 sv_setsv(TARG, TOPs);
936 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
937 && SvIVX(TOPs) != IV_MIN)
939 SvIV_set(TOPs, SvIVX(TOPs) - 1);
940 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
949 /* Ordinary operators. */
953 dVAR; dSP; dATARGET; SV *svl, *svr;
954 #ifdef PERL_PRESERVE_IVUV
957 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
960 #ifdef PERL_PRESERVE_IVUV
961 /* For integer to integer power, we do the calculation by hand wherever
962 we're sure it is safe; otherwise we call pow() and try to convert to
963 integer afterwards. */
965 SvIV_please_nomg(svr);
967 SvIV_please_nomg(svl);
976 const IV iv = SvIVX(svr);
980 goto float_it; /* Can't do negative powers this way. */
984 baseuok = SvUOK(svl);
988 const IV iv = SvIVX(svl);
991 baseuok = TRUE; /* effectively it's a UV now */
993 baseuv = -iv; /* abs, baseuok == false records sign */
996 /* now we have integer ** positive integer. */
999 /* foo & (foo - 1) is zero only for a power of 2. */
1000 if (!(baseuv & (baseuv - 1))) {
1001 /* We are raising power-of-2 to a positive integer.
1002 The logic here will work for any base (even non-integer
1003 bases) but it can be less accurate than
1004 pow (base,power) or exp (power * log (base)) when the
1005 intermediate values start to spill out of the mantissa.
1006 With powers of 2 we know this can't happen.
1007 And powers of 2 are the favourite thing for perl
1008 programmers to notice ** not doing what they mean. */
1010 NV base = baseuok ? baseuv : -(NV)baseuv;
1015 while (power >>= 1) {
1023 SvIV_please_nomg(svr);
1026 register unsigned int highbit = 8 * sizeof(UV);
1027 register unsigned int diff = 8 * sizeof(UV);
1028 while (diff >>= 1) {
1030 if (baseuv >> highbit) {
1034 /* we now have baseuv < 2 ** highbit */
1035 if (power * highbit <= 8 * sizeof(UV)) {
1036 /* result will definitely fit in UV, so use UV math
1037 on same algorithm as above */
1038 register UV result = 1;
1039 register UV base = baseuv;
1040 const bool odd_power = cBOOL(power & 1);
1044 while (power >>= 1) {
1051 if (baseuok || !odd_power)
1052 /* answer is positive */
1054 else if (result <= (UV)IV_MAX)
1055 /* answer negative, fits in IV */
1056 SETi( -(IV)result );
1057 else if (result == (UV)IV_MIN)
1058 /* 2's complement assumption: special case IV_MIN */
1061 /* answer negative, doesn't fit */
1062 SETn( -(NV)result );
1072 NV right = SvNV_nomg(svr);
1073 NV left = SvNV_nomg(svl);
1076 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1078 We are building perl with long double support and are on an AIX OS
1079 afflicted with a powl() function that wrongly returns NaNQ for any
1080 negative base. This was reported to IBM as PMR #23047-379 on
1081 03/06/2006. The problem exists in at least the following versions
1082 of AIX and the libm fileset, and no doubt others as well:
1084 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1085 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1086 AIX 5.2.0 bos.adt.libm 5.2.0.85
1088 So, until IBM fixes powl(), we provide the following workaround to
1089 handle the problem ourselves. Our logic is as follows: for
1090 negative bases (left), we use fmod(right, 2) to check if the
1091 exponent is an odd or even integer:
1093 - if odd, powl(left, right) == -powl(-left, right)
1094 - if even, powl(left, right) == powl(-left, right)
1096 If the exponent is not an integer, the result is rightly NaNQ, so
1097 we just return that (as NV_NAN).
1101 NV mod2 = Perl_fmod( right, 2.0 );
1102 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1103 SETn( -Perl_pow( -left, right) );
1104 } else if (mod2 == 0.0) { /* even integer */
1105 SETn( Perl_pow( -left, right) );
1106 } else { /* fractional power */
1110 SETn( Perl_pow( left, right) );
1113 SETn( Perl_pow( left, right) );
1114 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1116 #ifdef PERL_PRESERVE_IVUV
1118 SvIV_please_nomg(svr);
1126 dVAR; dSP; dATARGET; SV *svl, *svr;
1127 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1130 #ifdef PERL_PRESERVE_IVUV
1131 SvIV_please_nomg(svr);
1133 /* Unless the left argument is integer in range we are going to have to
1134 use NV maths. Hence only attempt to coerce the right argument if
1135 we know the left is integer. */
1136 /* Left operand is defined, so is it IV? */
1137 SvIV_please_nomg(svl);
1139 bool auvok = SvUOK(svl);
1140 bool buvok = SvUOK(svr);
1141 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1142 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1151 const IV aiv = SvIVX(svl);
1154 auvok = TRUE; /* effectively it's a UV now */
1156 alow = -aiv; /* abs, auvok == false records sign */
1162 const IV biv = SvIVX(svr);
1165 buvok = TRUE; /* effectively it's a UV now */
1167 blow = -biv; /* abs, buvok == false records sign */
1171 /* If this does sign extension on unsigned it's time for plan B */
1172 ahigh = alow >> (4 * sizeof (UV));
1174 bhigh = blow >> (4 * sizeof (UV));
1176 if (ahigh && bhigh) {
1178 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1179 which is overflow. Drop to NVs below. */
1180 } else if (!ahigh && !bhigh) {
1181 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1182 so the unsigned multiply cannot overflow. */
1183 const UV product = alow * blow;
1184 if (auvok == buvok) {
1185 /* -ve * -ve or +ve * +ve gives a +ve result. */
1189 } else if (product <= (UV)IV_MIN) {
1190 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1191 /* -ve result, which could overflow an IV */
1193 SETi( -(IV)product );
1195 } /* else drop to NVs below. */
1197 /* One operand is large, 1 small */
1200 /* swap the operands */
1202 bhigh = blow; /* bhigh now the temp var for the swap */
1206 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1207 multiplies can't overflow. shift can, add can, -ve can. */
1208 product_middle = ahigh * blow;
1209 if (!(product_middle & topmask)) {
1210 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1212 product_middle <<= (4 * sizeof (UV));
1213 product_low = alow * blow;
1215 /* as for pp_add, UV + something mustn't get smaller.
1216 IIRC ANSI mandates this wrapping *behaviour* for
1217 unsigned whatever the actual representation*/
1218 product_low += product_middle;
1219 if (product_low >= product_middle) {
1220 /* didn't overflow */
1221 if (auvok == buvok) {
1222 /* -ve * -ve or +ve * +ve gives a +ve result. */
1224 SETu( product_low );
1226 } else if (product_low <= (UV)IV_MIN) {
1227 /* 2s complement assumption again */
1228 /* -ve result, which could overflow an IV */
1230 SETi( -(IV)product_low );
1232 } /* else drop to NVs below. */
1234 } /* product_middle too large */
1235 } /* ahigh && bhigh */
1240 NV right = SvNV_nomg(svr);
1241 NV left = SvNV_nomg(svl);
1243 SETn( left * right );
1250 dVAR; dSP; dATARGET; SV *svl, *svr;
1251 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1254 /* Only try to do UV divide first
1255 if ((SLOPPYDIVIDE is true) or
1256 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1258 The assumption is that it is better to use floating point divide
1259 whenever possible, only doing integer divide first if we can't be sure.
1260 If NV_PRESERVES_UV is true then we know at compile time that no UV
1261 can be too large to preserve, so don't need to compile the code to
1262 test the size of UVs. */
1265 # define PERL_TRY_UV_DIVIDE
1266 /* ensure that 20./5. == 4. */
1268 # ifdef PERL_PRESERVE_IVUV
1269 # ifndef NV_PRESERVES_UV
1270 # define PERL_TRY_UV_DIVIDE
1275 #ifdef PERL_TRY_UV_DIVIDE
1276 SvIV_please_nomg(svr);
1278 SvIV_please_nomg(svl);
1280 bool left_non_neg = SvUOK(svl);
1281 bool right_non_neg = SvUOK(svr);
1285 if (right_non_neg) {
1289 const IV biv = SvIVX(svr);
1292 right_non_neg = TRUE; /* effectively it's a UV now */
1298 /* historically undef()/0 gives a "Use of uninitialized value"
1299 warning before dieing, hence this test goes here.
1300 If it were immediately before the second SvIV_please, then
1301 DIE() would be invoked before left was even inspected, so
1302 no inpsection would give no warning. */
1304 DIE(aTHX_ "Illegal division by zero");
1310 const IV aiv = SvIVX(svl);
1313 left_non_neg = TRUE; /* effectively it's a UV now */
1322 /* For sloppy divide we always attempt integer division. */
1324 /* Otherwise we only attempt it if either or both operands
1325 would not be preserved by an NV. If both fit in NVs
1326 we fall through to the NV divide code below. However,
1327 as left >= right to ensure integer result here, we know that
1328 we can skip the test on the right operand - right big
1329 enough not to be preserved can't get here unless left is
1332 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1335 /* Integer division can't overflow, but it can be imprecise. */
1336 const UV result = left / right;
1337 if (result * right == left) {
1338 SP--; /* result is valid */
1339 if (left_non_neg == right_non_neg) {
1340 /* signs identical, result is positive. */
1344 /* 2s complement assumption */
1345 if (result <= (UV)IV_MIN)
1346 SETi( -(IV)result );
1348 /* It's exact but too negative for IV. */
1349 SETn( -(NV)result );
1352 } /* tried integer divide but it was not an integer result */
1353 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1354 } /* left wasn't SvIOK */
1355 } /* right wasn't SvIOK */
1356 #endif /* PERL_TRY_UV_DIVIDE */
1358 NV right = SvNV_nomg(svr);
1359 NV left = SvNV_nomg(svl);
1360 (void)POPs;(void)POPs;
1361 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1362 if (! Perl_isnan(right) && right == 0.0)
1366 DIE(aTHX_ "Illegal division by zero");
1367 PUSHn( left / right );
1374 dVAR; dSP; dATARGET;
1375 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1379 bool left_neg = FALSE;
1380 bool right_neg = FALSE;
1381 bool use_double = FALSE;
1382 bool dright_valid = FALSE;
1385 SV * const svr = TOPs;
1386 SV * const svl = TOPm1s;
1387 SvIV_please_nomg(svr);
1389 right_neg = !SvUOK(svr);
1393 const IV biv = SvIVX(svr);
1396 right_neg = FALSE; /* effectively it's a UV now */
1403 dright = SvNV_nomg(svr);
1404 right_neg = dright < 0;
1407 if (dright < UV_MAX_P1) {
1408 right = U_V(dright);
1409 dright_valid = TRUE; /* In case we need to use double below. */
1415 /* At this point use_double is only true if right is out of range for
1416 a UV. In range NV has been rounded down to nearest UV and
1417 use_double false. */
1418 SvIV_please_nomg(svl);
1419 if (!use_double && SvIOK(svl)) {
1421 left_neg = !SvUOK(svl);
1425 const IV aiv = SvIVX(svl);
1428 left_neg = FALSE; /* effectively it's a UV now */
1436 dleft = SvNV_nomg(svl);
1437 left_neg = dleft < 0;
1441 /* This should be exactly the 5.6 behaviour - if left and right are
1442 both in range for UV then use U_V() rather than floor. */
1444 if (dleft < UV_MAX_P1) {
1445 /* right was in range, so is dleft, so use UVs not double.
1449 /* left is out of range for UV, right was in range, so promote
1450 right (back) to double. */
1452 /* The +0.5 is used in 5.6 even though it is not strictly
1453 consistent with the implicit +0 floor in the U_V()
1454 inside the #if 1. */
1455 dleft = Perl_floor(dleft + 0.5);
1458 dright = Perl_floor(dright + 0.5);
1469 DIE(aTHX_ "Illegal modulus zero");
1471 dans = Perl_fmod(dleft, dright);
1472 if ((left_neg != right_neg) && dans)
1473 dans = dright - dans;
1476 sv_setnv(TARG, dans);
1482 DIE(aTHX_ "Illegal modulus zero");
1485 if ((left_neg != right_neg) && ans)
1488 /* XXX may warn: unary minus operator applied to unsigned type */
1489 /* could change -foo to be (~foo)+1 instead */
1490 if (ans <= ~((UV)IV_MAX)+1)
1491 sv_setiv(TARG, ~ans+1);
1493 sv_setnv(TARG, -(NV)ans);
1496 sv_setuv(TARG, ans);
1505 dVAR; dSP; dATARGET;
1509 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1510 /* TODO: think of some way of doing list-repeat overloading ??? */
1515 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1521 const UV uv = SvUV_nomg(sv);
1523 count = IV_MAX; /* The best we can do? */
1527 const IV iv = SvIV_nomg(sv);
1534 else if (SvNOKp(sv)) {
1535 const NV nv = SvNV_nomg(sv);
1542 count = SvIV_nomg(sv);
1544 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1546 static const char oom_list_extend[] = "Out of memory during list extend";
1547 const I32 items = SP - MARK;
1548 const I32 max = items * count;
1550 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1551 /* Did the max computation overflow? */
1552 if (items > 0 && max > 0 && (max < items || max < count))
1553 Perl_croak(aTHX_ oom_list_extend);
1558 /* This code was intended to fix 20010809.028:
1561 for (($x =~ /./g) x 2) {
1562 print chop; # "abcdabcd" expected as output.
1565 * but that change (#11635) broke this code:
1567 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1569 * I can't think of a better fix that doesn't introduce
1570 * an efficiency hit by copying the SVs. The stack isn't
1571 * refcounted, and mortalisation obviously doesn't
1572 * Do The Right Thing when the stack has more than
1573 * one pointer to the same mortal value.
1577 *SP = sv_2mortal(newSVsv(*SP));
1587 repeatcpy((char*)(MARK + items), (char*)MARK,
1588 items * sizeof(const SV *), count - 1);
1591 else if (count <= 0)
1594 else { /* Note: mark already snarfed by pp_list */
1595 SV * const tmpstr = POPs;
1598 static const char oom_string_extend[] =
1599 "Out of memory during string extend";
1602 sv_setsv_nomg(TARG, tmpstr);
1603 SvPV_force_nomg(TARG, len);
1604 isutf = DO_UTF8(TARG);
1609 const STRLEN max = (UV)count * len;
1610 if (len > MEM_SIZE_MAX / count)
1611 Perl_croak(aTHX_ oom_string_extend);
1612 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1613 SvGROW(TARG, max + 1);
1614 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1615 SvCUR_set(TARG, SvCUR(TARG) * count);
1617 *SvEND(TARG) = '\0';
1620 (void)SvPOK_only_UTF8(TARG);
1622 (void)SvPOK_only(TARG);
1624 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1625 /* The parser saw this as a list repeat, and there
1626 are probably several items on the stack. But we're
1627 in scalar context, and there's no pp_list to save us
1628 now. So drop the rest of the items -- robin@kitsite.com
1640 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1641 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1644 useleft = USE_LEFT(svl);
1645 #ifdef PERL_PRESERVE_IVUV
1646 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1647 "bad things" happen if you rely on signed integers wrapping. */
1648 SvIV_please_nomg(svr);
1650 /* Unless the left argument is integer in range we are going to have to
1651 use NV maths. Hence only attempt to coerce the right argument if
1652 we know the left is integer. */
1653 register UV auv = 0;
1659 a_valid = auvok = 1;
1660 /* left operand is undef, treat as zero. */
1662 /* Left operand is defined, so is it IV? */
1663 SvIV_please_nomg(svl);
1665 if ((auvok = SvUOK(svl)))
1668 register const IV aiv = SvIVX(svl);
1671 auvok = 1; /* Now acting as a sign flag. */
1672 } else { /* 2s complement assumption for IV_MIN */
1680 bool result_good = 0;
1683 bool buvok = SvUOK(svr);
1688 register const IV biv = SvIVX(svr);
1695 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1696 else "IV" now, independent of how it came in.
1697 if a, b represents positive, A, B negative, a maps to -A etc
1702 all UV maths. negate result if A negative.
1703 subtract if signs same, add if signs differ. */
1705 if (auvok ^ buvok) {
1714 /* Must get smaller */
1719 if (result <= buv) {
1720 /* result really should be -(auv-buv). as its negation
1721 of true value, need to swap our result flag */
1733 if (result <= (UV)IV_MIN)
1734 SETi( -(IV)result );
1736 /* result valid, but out of range for IV. */
1737 SETn( -(NV)result );
1741 } /* Overflow, drop through to NVs. */
1746 NV value = SvNV_nomg(svr);
1750 /* left operand is undef, treat as zero - value */
1754 SETn( SvNV_nomg(svl) - value );
1761 dVAR; dSP; dATARGET; SV *svl, *svr;
1762 tryAMAGICbin_MG(lshift_amg, AMGf_assign);
1766 const IV shift = SvIV_nomg(svr);
1767 if (PL_op->op_private & HINT_INTEGER) {
1768 const IV i = SvIV_nomg(svl);
1772 const UV u = SvUV_nomg(svl);
1781 dVAR; dSP; dATARGET; SV *svl, *svr;
1782 tryAMAGICbin_MG(rshift_amg, AMGf_assign);
1786 const IV shift = SvIV_nomg(svr);
1787 if (PL_op->op_private & HINT_INTEGER) {
1788 const IV i = SvIV_nomg(svl);
1792 const UV u = SvUV_nomg(svl);
1802 tryAMAGICbin_MG(lt_amg, AMGf_set);
1803 #ifdef PERL_PRESERVE_IVUV
1804 SvIV_please_nomg(TOPs);
1806 SvIV_please_nomg(TOPm1s);
1807 if (SvIOK(TOPm1s)) {
1808 bool auvok = SvUOK(TOPm1s);
1809 bool buvok = SvUOK(TOPs);
1811 if (!auvok && !buvok) { /* ## IV < IV ## */
1812 const IV aiv = SvIVX(TOPm1s);
1813 const IV biv = SvIVX(TOPs);
1816 SETs(boolSV(aiv < biv));
1819 if (auvok && buvok) { /* ## UV < UV ## */
1820 const UV auv = SvUVX(TOPm1s);
1821 const UV buv = SvUVX(TOPs);
1824 SETs(boolSV(auv < buv));
1827 if (auvok) { /* ## UV < IV ## */
1829 const IV biv = SvIVX(TOPs);
1832 /* As (a) is a UV, it's >=0, so it cannot be < */
1837 SETs(boolSV(auv < (UV)biv));
1840 { /* ## IV < UV ## */
1841 const IV aiv = SvIVX(TOPm1s);
1845 /* As (b) is a UV, it's >=0, so it must be < */
1852 SETs(boolSV((UV)aiv < buv));
1858 #ifndef NV_PRESERVES_UV
1859 #ifdef PERL_PRESERVE_IVUV
1862 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1864 SETs(boolSV(SvRV(TOPs) < SvRV(TOPp1s)));
1869 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1871 if (Perl_isnan(left) || Perl_isnan(right))
1873 SETs(boolSV(left < right));
1876 SETs(boolSV(SvNV_nomg(TOPs) < value));
1885 tryAMAGICbin_MG(gt_amg, AMGf_set);
1886 #ifdef PERL_PRESERVE_IVUV
1887 SvIV_please_nomg(TOPs);
1889 SvIV_please_nomg(TOPm1s);
1890 if (SvIOK(TOPm1s)) {
1891 bool auvok = SvUOK(TOPm1s);
1892 bool buvok = SvUOK(TOPs);
1894 if (!auvok && !buvok) { /* ## IV > IV ## */
1895 const IV aiv = SvIVX(TOPm1s);
1896 const IV biv = SvIVX(TOPs);
1899 SETs(boolSV(aiv > biv));
1902 if (auvok && buvok) { /* ## UV > UV ## */
1903 const UV auv = SvUVX(TOPm1s);
1904 const UV buv = SvUVX(TOPs);
1907 SETs(boolSV(auv > buv));
1910 if (auvok) { /* ## UV > IV ## */
1912 const IV biv = SvIVX(TOPs);
1916 /* As (a) is a UV, it's >=0, so it must be > */
1921 SETs(boolSV(auv > (UV)biv));
1924 { /* ## IV > UV ## */
1925 const IV aiv = SvIVX(TOPm1s);
1929 /* As (b) is a UV, it's >=0, so it cannot be > */
1936 SETs(boolSV((UV)aiv > buv));
1942 #ifndef NV_PRESERVES_UV
1943 #ifdef PERL_PRESERVE_IVUV
1946 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
1948 SETs(boolSV(SvRV(TOPs) > SvRV(TOPp1s)));
1953 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1955 if (Perl_isnan(left) || Perl_isnan(right))
1957 SETs(boolSV(left > right));
1960 SETs(boolSV(SvNV_nomg(TOPs) > value));
1969 tryAMAGICbin_MG(le_amg, AMGf_set);
1970 #ifdef PERL_PRESERVE_IVUV
1971 SvIV_please_nomg(TOPs);
1973 SvIV_please_nomg(TOPm1s);
1974 if (SvIOK(TOPm1s)) {
1975 bool auvok = SvUOK(TOPm1s);
1976 bool buvok = SvUOK(TOPs);
1978 if (!auvok && !buvok) { /* ## IV <= IV ## */
1979 const IV aiv = SvIVX(TOPm1s);
1980 const IV biv = SvIVX(TOPs);
1983 SETs(boolSV(aiv <= biv));
1986 if (auvok && buvok) { /* ## UV <= UV ## */
1987 UV auv = SvUVX(TOPm1s);
1988 UV buv = SvUVX(TOPs);
1991 SETs(boolSV(auv <= buv));
1994 if (auvok) { /* ## UV <= IV ## */
1996 const IV biv = SvIVX(TOPs);
2000 /* As (a) is a UV, it's >=0, so a cannot be <= */
2005 SETs(boolSV(auv <= (UV)biv));
2008 { /* ## IV <= UV ## */
2009 const IV aiv = SvIVX(TOPm1s);
2013 /* As (b) is a UV, it's >=0, so a must be <= */
2020 SETs(boolSV((UV)aiv <= buv));
2026 #ifndef NV_PRESERVES_UV
2027 #ifdef PERL_PRESERVE_IVUV
2030 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2032 SETs(boolSV(SvRV(TOPs) <= SvRV(TOPp1s)));
2037 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2039 if (Perl_isnan(left) || Perl_isnan(right))
2041 SETs(boolSV(left <= right));
2044 SETs(boolSV(SvNV_nomg(TOPs) <= value));
2053 tryAMAGICbin_MG(ge_amg,AMGf_set);
2054 #ifdef PERL_PRESERVE_IVUV
2055 SvIV_please_nomg(TOPs);
2057 SvIV_please_nomg(TOPm1s);
2058 if (SvIOK(TOPm1s)) {
2059 bool auvok = SvUOK(TOPm1s);
2060 bool buvok = SvUOK(TOPs);
2062 if (!auvok && !buvok) { /* ## IV >= IV ## */
2063 const IV aiv = SvIVX(TOPm1s);
2064 const IV biv = SvIVX(TOPs);
2067 SETs(boolSV(aiv >= biv));
2070 if (auvok && buvok) { /* ## UV >= UV ## */
2071 const UV auv = SvUVX(TOPm1s);
2072 const UV buv = SvUVX(TOPs);
2075 SETs(boolSV(auv >= buv));
2078 if (auvok) { /* ## UV >= IV ## */
2080 const IV biv = SvIVX(TOPs);
2084 /* As (a) is a UV, it's >=0, so it must be >= */
2089 SETs(boolSV(auv >= (UV)biv));
2092 { /* ## IV >= UV ## */
2093 const IV aiv = SvIVX(TOPm1s);
2097 /* As (b) is a UV, it's >=0, so a cannot be >= */
2104 SETs(boolSV((UV)aiv >= buv));
2110 #ifndef NV_PRESERVES_UV
2111 #ifdef PERL_PRESERVE_IVUV
2114 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2116 SETs(boolSV(SvRV(TOPs) >= SvRV(TOPp1s)));
2121 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2123 if (Perl_isnan(left) || Perl_isnan(right))
2125 SETs(boolSV(left >= right));
2128 SETs(boolSV(SvNV_nomg(TOPs) >= value));
2137 tryAMAGICbin_MG(ne_amg,AMGf_set);
2138 #ifndef NV_PRESERVES_UV
2139 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2141 SETs(boolSV(SvRV(TOPs) != SvRV(TOPp1s)));
2145 #ifdef PERL_PRESERVE_IVUV
2146 SvIV_please_nomg(TOPs);
2148 SvIV_please_nomg(TOPm1s);
2149 if (SvIOK(TOPm1s)) {
2150 const bool auvok = SvUOK(TOPm1s);
2151 const bool buvok = SvUOK(TOPs);
2153 if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
2154 /* Casting IV to UV before comparison isn't going to matter
2155 on 2s complement. On 1s complement or sign&magnitude
2156 (if we have any of them) it could make negative zero
2157 differ from normal zero. As I understand it. (Need to
2158 check - is negative zero implementation defined behaviour
2160 const UV buv = SvUVX(POPs);
2161 const UV auv = SvUVX(TOPs);
2163 SETs(boolSV(auv != buv));
2166 { /* ## Mixed IV,UV ## */
2170 /* != is commutative so swap if needed (save code) */
2172 /* swap. top of stack (b) is the iv */
2176 /* As (a) is a UV, it's >0, so it cannot be == */
2185 /* As (b) is a UV, it's >0, so it cannot be == */
2189 uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
2191 SETs(boolSV((UV)iv != uv));
2198 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2200 if (Perl_isnan(left) || Perl_isnan(right))
2202 SETs(boolSV(left != right));
2205 SETs(boolSV(SvNV_nomg(TOPs) != value));
2214 tryAMAGICbin_MG(ncmp_amg, 0);
2215 #ifndef NV_PRESERVES_UV
2216 if (SvROK(TOPs) && !SvAMAGIC(TOPs) && SvROK(TOPm1s) && !SvAMAGIC(TOPm1s)) {
2217 const UV right = PTR2UV(SvRV(POPs));
2218 const UV left = PTR2UV(SvRV(TOPs));
2219 SETi((left > right) - (left < right));
2223 #ifdef PERL_PRESERVE_IVUV
2224 /* Fortunately it seems NaN isn't IOK */
2225 SvIV_please_nomg(TOPs);
2227 SvIV_please_nomg(TOPm1s);
2228 if (SvIOK(TOPm1s)) {
2229 const bool leftuvok = SvUOK(TOPm1s);
2230 const bool rightuvok = SvUOK(TOPs);
2232 if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
2233 const IV leftiv = SvIVX(TOPm1s);
2234 const IV rightiv = SvIVX(TOPs);
2236 if (leftiv > rightiv)
2238 else if (leftiv < rightiv)
2242 } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
2243 const UV leftuv = SvUVX(TOPm1s);
2244 const UV rightuv = SvUVX(TOPs);
2246 if (leftuv > rightuv)
2248 else if (leftuv < rightuv)
2252 } else if (leftuvok) { /* ## UV <=> IV ## */
2253 const IV rightiv = SvIVX(TOPs);
2255 /* As (a) is a UV, it's >=0, so it cannot be < */
2258 const UV leftuv = SvUVX(TOPm1s);
2259 if (leftuv > (UV)rightiv) {
2261 } else if (leftuv < (UV)rightiv) {
2267 } else { /* ## IV <=> UV ## */
2268 const IV leftiv = SvIVX(TOPm1s);
2270 /* As (b) is a UV, it's >=0, so it must be < */
2273 const UV rightuv = SvUVX(TOPs);
2274 if ((UV)leftiv > rightuv) {
2276 } else if ((UV)leftiv < rightuv) {
2294 if (Perl_isnan(left) || Perl_isnan(right)) {
2298 value = (left > right) - (left < right);
2302 else if (left < right)
2304 else if (left > right)
2320 int amg_type = sle_amg;
2324 switch (PL_op->op_type) {
2343 tryAMAGICbin_MG(amg_type, AMGf_set);
2346 const int cmp = (IN_LOCALE_RUNTIME
2347 ? sv_cmp_locale_flags(left, right, 0)
2348 : sv_cmp_flags(left, right, 0));
2349 SETs(boolSV(cmp * multiplier < rhs));
2357 tryAMAGICbin_MG(seq_amg, AMGf_set);
2360 SETs(boolSV(sv_eq_flags(left, right, 0)));
2368 tryAMAGICbin_MG(sne_amg, AMGf_set);
2371 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2379 tryAMAGICbin_MG(scmp_amg, 0);
2382 const int cmp = (IN_LOCALE_RUNTIME
2383 ? sv_cmp_locale_flags(left, right, 0)
2384 : sv_cmp_flags(left, right, 0));
2392 dVAR; dSP; dATARGET;
2393 tryAMAGICbin_MG(band_amg, AMGf_assign);
2396 if (SvNIOKp(left) || SvNIOKp(right)) {
2397 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2398 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2399 if (PL_op->op_private & HINT_INTEGER) {
2400 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2404 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2407 if (left_ro_nonnum) SvNIOK_off(left);
2408 if (right_ro_nonnum) SvNIOK_off(right);
2411 do_vop(PL_op->op_type, TARG, left, right);
2420 dVAR; dSP; dATARGET;
2421 const int op_type = PL_op->op_type;
2423 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2426 if (SvNIOKp(left) || SvNIOKp(right)) {
2427 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2428 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2429 if (PL_op->op_private & HINT_INTEGER) {
2430 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2431 const IV r = SvIV_nomg(right);
2432 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2436 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2437 const UV r = SvUV_nomg(right);
2438 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2441 if (left_ro_nonnum) SvNIOK_off(left);
2442 if (right_ro_nonnum) SvNIOK_off(right);
2445 do_vop(op_type, TARG, left, right);
2455 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2457 SV * const sv = TOPs;
2458 const int flags = SvFLAGS(sv);
2460 if( looks_like_number( sv ) ){
2464 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2465 /* It's publicly an integer, or privately an integer-not-float */
2468 if (SvIVX(sv) == IV_MIN) {
2469 /* 2s complement assumption. */
2470 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2473 else if (SvUVX(sv) <= IV_MAX) {
2478 else if (SvIVX(sv) != IV_MIN) {
2482 #ifdef PERL_PRESERVE_IVUV
2490 SETn(-SvNV_nomg(sv));
2491 else if (SvPOKp(sv)) {
2493 const char * const s = SvPV_nomg_const(sv, len);
2494 if (isIDFIRST(*s)) {
2495 sv_setpvs(TARG, "-");
2498 else if (*s == '+' || *s == '-') {
2499 sv_setsv_nomg(TARG, sv);
2500 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2502 else if (DO_UTF8(sv)) {
2503 SvIV_please_nomg(sv);
2505 goto oops_its_an_int;
2507 sv_setnv(TARG, -SvNV_nomg(sv));
2509 sv_setpvs(TARG, "-");
2514 SvIV_please_nomg(sv);
2516 goto oops_its_an_int;
2517 sv_setnv(TARG, -SvNV_nomg(sv));
2522 SETn(-SvNV_nomg(sv));
2530 tryAMAGICun_MG(not_amg, AMGf_set);
2531 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2538 tryAMAGICun_MG(compl_amg, 0);
2542 if (PL_op->op_private & HINT_INTEGER) {
2543 const IV i = ~SvIV_nomg(sv);
2547 const UV u = ~SvUV_nomg(sv);
2556 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2557 sv_setsv_nomg(TARG, sv);
2558 tmps = (U8*)SvPV_force_nomg(TARG, len);
2561 /* Calculate exact length, let's not estimate. */
2566 U8 * const send = tmps + len;
2567 U8 * const origtmps = tmps;
2568 const UV utf8flags = UTF8_ALLOW_ANYUV;
2570 while (tmps < send) {
2571 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2573 targlen += UNISKIP(~c);
2579 /* Now rewind strings and write them. */
2586 Newx(result, targlen + 1, U8);
2588 while (tmps < send) {
2589 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2591 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2594 sv_usepvn_flags(TARG, (char*)result, targlen,
2595 SV_HAS_TRAILING_NUL);
2602 Newx(result, nchar + 1, U8);
2604 while (tmps < send) {
2605 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2610 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2618 register long *tmpl;
2619 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2622 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2627 for ( ; anum > 0; anum--, tmps++)
2635 /* integer versions of some of the above */
2639 dVAR; dSP; dATARGET;
2640 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2643 SETi( left * right );
2651 dVAR; dSP; dATARGET;
2652 tryAMAGICbin_MG(div_amg, AMGf_assign);
2655 IV value = SvIV_nomg(right);
2657 DIE(aTHX_ "Illegal division by zero");
2658 num = SvIV_nomg(left);
2660 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2664 value = num / value;
2670 #if defined(__GLIBC__) && IVSIZE == 8
2677 /* This is the vanilla old i_modulo. */
2678 dVAR; dSP; dATARGET;
2679 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2683 DIE(aTHX_ "Illegal modulus zero");
2684 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2688 SETi( left % right );
2693 #if defined(__GLIBC__) && IVSIZE == 8
2698 /* This is the i_modulo with the workaround for the _moddi3 bug
2699 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2700 * See below for pp_i_modulo. */
2701 dVAR; dSP; dATARGET;
2702 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2706 DIE(aTHX_ "Illegal modulus zero");
2707 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2711 SETi( left % PERL_ABS(right) );
2718 dVAR; dSP; dATARGET;
2719 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2723 DIE(aTHX_ "Illegal modulus zero");
2724 /* The assumption is to use hereafter the old vanilla version... */
2726 PL_ppaddr[OP_I_MODULO] =
2728 /* .. but if we have glibc, we might have a buggy _moddi3
2729 * (at least glicb 2.2.5 is known to have this bug), in other
2730 * words our integer modulus with negative quad as the second
2731 * argument might be broken. Test for this and re-patch the
2732 * opcode dispatch table if that is the case, remembering to
2733 * also apply the workaround so that this first round works
2734 * right, too. See [perl #9402] for more information. */
2738 /* Cannot do this check with inlined IV constants since
2739 * that seems to work correctly even with the buggy glibc. */
2741 /* Yikes, we have the bug.
2742 * Patch in the workaround version. */
2744 PL_ppaddr[OP_I_MODULO] =
2745 &Perl_pp_i_modulo_1;
2746 /* Make certain we work right this time, too. */
2747 right = PERL_ABS(right);
2750 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2754 SETi( left % right );
2762 dVAR; dSP; dATARGET;
2763 tryAMAGICbin_MG(add_amg, AMGf_assign);
2765 dPOPTOPiirl_ul_nomg;
2766 SETi( left + right );
2773 dVAR; dSP; dATARGET;
2774 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2776 dPOPTOPiirl_ul_nomg;
2777 SETi( left - right );
2785 tryAMAGICbin_MG(lt_amg, AMGf_set);
2788 SETs(boolSV(left < right));
2796 tryAMAGICbin_MG(gt_amg, AMGf_set);
2799 SETs(boolSV(left > right));
2807 tryAMAGICbin_MG(le_amg, AMGf_set);
2810 SETs(boolSV(left <= right));
2818 tryAMAGICbin_MG(ge_amg, AMGf_set);
2821 SETs(boolSV(left >= right));
2829 tryAMAGICbin_MG(eq_amg, AMGf_set);
2832 SETs(boolSV(left == right));
2840 tryAMAGICbin_MG(ne_amg, AMGf_set);
2843 SETs(boolSV(left != right));
2851 tryAMAGICbin_MG(ncmp_amg, 0);
2858 else if (left < right)
2870 tryAMAGICun_MG(neg_amg, 0);
2872 SV * const sv = TOPs;
2873 IV const i = SvIV_nomg(sv);
2879 /* High falutin' math. */
2884 tryAMAGICbin_MG(atan2_amg, 0);
2887 SETn(Perl_atan2(left, right));
2895 int amg_type = sin_amg;
2896 const char *neg_report = NULL;
2897 NV (*func)(NV) = Perl_sin;
2898 const int op_type = PL_op->op_type;
2915 amg_type = sqrt_amg;
2917 neg_report = "sqrt";
2922 tryAMAGICun_MG(amg_type, 0);
2924 SV * const arg = POPs;
2925 const NV value = SvNV_nomg(arg);
2927 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2928 SET_NUMERIC_STANDARD();
2929 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2932 XPUSHn(func(value));
2937 /* Support Configure command-line overrides for rand() functions.
2938 After 5.005, perhaps we should replace this by Configure support
2939 for drand48(), random(), or rand(). For 5.005, though, maintain
2940 compatibility by calling rand() but allow the user to override it.
2941 See INSTALL for details. --Andy Dougherty 15 July 1998
2943 /* Now it's after 5.005, and Configure supports drand48() and random(),
2944 in addition to rand(). So the overrides should not be needed any more.
2945 --Jarkko Hietaniemi 27 September 1998
2948 #ifndef HAS_DRAND48_PROTO
2949 extern double drand48 (void);
2962 if (!PL_srand_called) {
2963 (void)seedDrand01((Rand_seed_t)seed());
2964 PL_srand_called = TRUE;
2974 const UV anum = (MAXARG < 1) ? seed() : POPu;
2975 (void)seedDrand01((Rand_seed_t)anum);
2976 PL_srand_called = TRUE;
2980 /* Historically srand always returned true. We can avoid breaking
2982 sv_setpvs(TARG, "0 but true");
2991 tryAMAGICun_MG(int_amg, AMGf_numeric);
2993 SV * const sv = TOPs;
2994 const IV iv = SvIV_nomg(sv);
2995 /* XXX it's arguable that compiler casting to IV might be subtly
2996 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2997 else preferring IV has introduced a subtle behaviour change bug. OTOH
2998 relying on floating point to be accurate is a bug. */
3003 else if (SvIOK(sv)) {
3005 SETu(SvUV_nomg(sv));
3010 const NV value = SvNV_nomg(sv);
3012 if (value < (NV)UV_MAX + 0.5) {
3015 SETn(Perl_floor(value));
3019 if (value > (NV)IV_MIN - 0.5) {
3022 SETn(Perl_ceil(value));
3033 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3035 SV * const sv = TOPs;
3036 /* This will cache the NV value if string isn't actually integer */
3037 const IV iv = SvIV_nomg(sv);
3042 else if (SvIOK(sv)) {
3043 /* IVX is precise */
3045 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3053 /* 2s complement assumption. Also, not really needed as
3054 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3060 const NV value = SvNV_nomg(sv);
3074 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3078 SV* const sv = POPs;
3080 tmps = (SvPV_const(sv, len));
3082 /* If Unicode, try to downgrade
3083 * If not possible, croak. */
3084 SV* const tsv = sv_2mortal(newSVsv(sv));
3087 sv_utf8_downgrade(tsv, FALSE);
3088 tmps = SvPV_const(tsv, len);
3090 if (PL_op->op_type == OP_HEX)
3093 while (*tmps && len && isSPACE(*tmps))
3097 if (*tmps == 'x' || *tmps == 'X') {
3099 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3101 else if (*tmps == 'b' || *tmps == 'B')
3102 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3104 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3106 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3120 SV * const sv = TOPs;
3122 if (SvGAMAGIC(sv)) {
3123 /* For an overloaded or magic scalar, we can't know in advance if
3124 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3125 it likes to cache the length. Maybe that should be a documented
3130 = sv_2pv_flags(sv, &len,
3131 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3134 sv_setsv(TARG, &PL_sv_undef);
3137 else if (DO_UTF8(sv)) {
3138 SETi(utf8_length((U8*)p, (U8*)p + len));
3142 } else if (SvOK(sv)) {
3143 /* Neither magic nor overloaded. */
3145 SETi(sv_len_utf8(sv));
3149 sv_setsv_nomg(TARG, &PL_sv_undef);
3169 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3171 const IV arybase = CopARYBASE_get(PL_curcop);
3173 const char *repl = NULL;
3175 const int num_args = PL_op->op_private & 7;
3176 bool repl_need_utf8_upgrade = FALSE;
3177 bool repl_is_utf8 = FALSE;
3182 repl = SvPV_const(repl_sv, repl_len);
3183 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3186 len_iv = SvIV(len_sv);
3187 len_is_uv = SvIOK_UV(len_sv);
3190 pos1_iv = SvIV(pos_sv);
3191 pos1_is_uv = SvIOK_UV(pos_sv);
3197 sv_utf8_upgrade(sv);
3199 else if (DO_UTF8(sv))
3200 repl_need_utf8_upgrade = TRUE;
3202 tmps = SvPV_const(sv, curlen);
3204 utf8_curlen = sv_len_utf8(sv);
3205 if (utf8_curlen == curlen)
3208 curlen = utf8_curlen;
3213 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3214 UV pos1_uv = pos1_iv-arybase;
3215 /* Overflow can occur when $[ < 0 */
3216 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3221 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3222 goto bound_fail; /* $[=3; substr($_,2,...) */
3224 else { /* pos < $[ */
3225 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3230 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3235 if (pos1_is_uv || pos1_iv > 0) {
3236 if ((UV)pos1_iv > curlen)
3241 if (!len_is_uv && len_iv < 0) {
3242 pos2_iv = curlen + len_iv;
3244 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3247 } else { /* len_iv >= 0 */
3248 if (!pos1_is_uv && pos1_iv < 0) {
3249 pos2_iv = pos1_iv + len_iv;
3250 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3252 if ((UV)len_iv > curlen-(UV)pos1_iv)
3255 pos2_iv = pos1_iv+len_iv;
3265 if (!pos2_is_uv && pos2_iv < 0) {
3266 if (!pos1_is_uv && pos1_iv < 0)
3270 else if (!pos1_is_uv && pos1_iv < 0)
3273 if ((UV)pos2_iv < (UV)pos1_iv)
3275 if ((UV)pos2_iv > curlen)
3279 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3280 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3281 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3282 STRLEN byte_len = len;
3283 STRLEN byte_pos = utf8_curlen
3284 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3286 if (lvalue && !repl) {
3289 if (!SvGMAGICAL(sv)) {
3291 SvPV_force_nolen(sv);
3292 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3293 "Attempt to use reference as lvalue in substr");
3295 if (isGV_with_GP(sv))
3296 SvPV_force_nolen(sv);
3297 else if (SvOK(sv)) /* is it defined ? */
3298 (void)SvPOK_only_UTF8(sv);
3300 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3303 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3304 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3306 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3307 LvTARGOFF(ret) = pos;
3308 LvTARGLEN(ret) = len;
3311 PUSHs(ret); /* avoid SvSETMAGIC here */
3315 SvTAINTED_off(TARG); /* decontaminate */
3316 SvUTF8_off(TARG); /* decontaminate */
3319 sv_setpvn(TARG, tmps, byte_len);
3320 #ifdef USE_LOCALE_COLLATE
3321 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3327 SV* repl_sv_copy = NULL;
3329 if (repl_need_utf8_upgrade) {
3330 repl_sv_copy = newSVsv(repl_sv);
3331 sv_utf8_upgrade(repl_sv_copy);
3332 repl = SvPV_const(repl_sv_copy, repl_len);
3333 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3337 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3340 SvREFCNT_dec(repl_sv_copy);
3344 PUSHs(TARG); /* avoid SvSETMAGIC here */
3349 Perl_croak(aTHX_ "substr outside of string");
3350 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3357 register const IV size = POPi;
3358 register const IV offset = POPi;
3359 register SV * const src = POPs;
3360 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3363 if (lvalue) { /* it's an lvalue! */
3364 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3365 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3367 LvTARG(ret) = SvREFCNT_inc_simple(src);
3368 LvTARGOFF(ret) = offset;
3369 LvTARGLEN(ret) = size;
3373 SvTAINTED_off(TARG); /* decontaminate */
3377 sv_setuv(ret, do_vecget(src, offset, size));
3393 const char *little_p;
3394 const I32 arybase = CopARYBASE_get(PL_curcop);
3397 const bool is_index = PL_op->op_type == OP_INDEX;
3400 /* arybase is in characters, like offset, so combine prior to the
3401 UTF-8 to bytes calculation. */
3402 offset = POPi - arybase;
3406 big_p = SvPV_const(big, biglen);
3407 little_p = SvPV_const(little, llen);
3409 big_utf8 = DO_UTF8(big);
3410 little_utf8 = DO_UTF8(little);
3411 if (big_utf8 ^ little_utf8) {
3412 /* One needs to be upgraded. */
3413 if (little_utf8 && !PL_encoding) {
3414 /* Well, maybe instead we might be able to downgrade the small
3416 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3419 /* If the large string is ISO-8859-1, and it's not possible to
3420 convert the small string to ISO-8859-1, then there is no
3421 way that it could be found anywhere by index. */
3426 /* At this point, pv is a malloc()ed string. So donate it to temp
3427 to ensure it will get free()d */
3428 little = temp = newSV(0);
3429 sv_usepvn(temp, pv, llen);
3430 little_p = SvPVX(little);
3433 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3436 sv_recode_to_utf8(temp, PL_encoding);
3438 sv_utf8_upgrade(temp);
3443 big_p = SvPV_const(big, biglen);
3446 little_p = SvPV_const(little, llen);
3450 if (SvGAMAGIC(big)) {
3451 /* Life just becomes a lot easier if I use a temporary here.
3452 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3453 will trigger magic and overloading again, as will fbm_instr()
3455 big = newSVpvn_flags(big_p, biglen,
3456 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3459 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3460 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3461 warn on undef, and we've already triggered a warning with the
3462 SvPV_const some lines above. We can't remove that, as we need to
3463 call some SvPV to trigger overloading early and find out if the
3465 This is all getting to messy. The API isn't quite clean enough,
3466 because data access has side effects.
3468 little = newSVpvn_flags(little_p, llen,
3469 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3470 little_p = SvPVX(little);
3474 offset = is_index ? 0 : biglen;
3476 if (big_utf8 && offset > 0)
3477 sv_pos_u2b(big, &offset, 0);
3483 else if (offset > (I32)biglen)
3485 if (!(little_p = is_index
3486 ? fbm_instr((unsigned char*)big_p + offset,
3487 (unsigned char*)big_p + biglen, little, 0)
3488 : rninstr(big_p, big_p + offset,
3489 little_p, little_p + llen)))
3492 retval = little_p - big_p;
3493 if (retval > 0 && big_utf8)
3494 sv_pos_b2u(big, &retval);
3498 PUSHi(retval + arybase);
3504 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3505 if (SvTAINTED(MARK[1]))
3506 TAINT_PROPER("sprintf");
3507 SvTAINTED_off(TARG);
3508 do_sprintf(TARG, SP-MARK, MARK+1);
3509 TAINT_IF(SvTAINTED(TARG));
3521 const U8 *s = (U8*)SvPV_const(argsv, len);
3523 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3524 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3525 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3529 XPUSHu(DO_UTF8(argsv) ?
3530 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3542 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3544 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3546 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3548 (void) POPs; /* Ignore the argument value. */
3549 value = UNICODE_REPLACEMENT;
3555 SvUPGRADE(TARG,SVt_PV);
3557 if (value > 255 && !IN_BYTES) {
3558 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3559 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3560 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3562 (void)SvPOK_only(TARG);
3571 *tmps++ = (char)value;
3573 (void)SvPOK_only(TARG);
3575 if (PL_encoding && !IN_BYTES) {
3576 sv_recode_to_utf8(TARG, PL_encoding);
3578 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3579 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3583 *tmps++ = (char)value;
3599 const char *tmps = SvPV_const(left, len);
3601 if (DO_UTF8(left)) {
3602 /* If Unicode, try to downgrade.
3603 * If not possible, croak.
3604 * Yes, we made this up. */
3605 SV* const tsv = sv_2mortal(newSVsv(left));
3608 sv_utf8_downgrade(tsv, FALSE);
3609 tmps = SvPV_const(tsv, len);
3611 # ifdef USE_ITHREADS
3613 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3614 /* This should be threadsafe because in ithreads there is only
3615 * one thread per interpreter. If this would not be true,
3616 * we would need a mutex to protect this malloc. */
3617 PL_reentrant_buffer->_crypt_struct_buffer =
3618 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3619 #if defined(__GLIBC__) || defined(__EMX__)
3620 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3621 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3622 /* work around glibc-2.2.5 bug */
3623 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3627 # endif /* HAS_CRYPT_R */
3628 # endif /* USE_ITHREADS */
3630 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3632 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3638 "The crypt() function is unimplemented due to excessive paranoia.");
3642 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3643 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3645 /* Both the characters below can be stored in two UTF-8 bytes. In UTF-8 the max
3646 * character that 2 bytes can hold is U+07FF, and in UTF-EBCDIC it is U+03FF.
3647 * See http://www.unicode.org/unicode/reports/tr16 */
3648 #define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
3649 #define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
3651 /* Below are several macros that generate code */
3652 /* Generates code to store a unicode codepoint c that is known to occupy
3653 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3654 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3656 *(p) = UTF8_TWO_BYTE_HI(c); \
3657 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3660 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3661 * available byte after the two bytes */
3662 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3664 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3665 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3668 /* Generates code to store the upper case of latin1 character l which is known
3669 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3670 * are only two characters that fit this description, and this macro knows
3671 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3673 #define STORE_NON_LATIN1_UC(p, l) \
3675 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3676 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3677 } else { /* Must be the following letter */ \
3678 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3682 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3683 * after the character stored */
3684 #define CAT_NON_LATIN1_UC(p, l) \
3686 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3687 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3689 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3693 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3694 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3695 * and must require two bytes to store it. Advances p to point to the next
3696 * available position */
3697 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3699 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3700 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3701 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3702 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3703 } else {/* else is one of the other two special cases */ \
3704 CAT_NON_LATIN1_UC((p), (l)); \
3710 /* Actually is both lcfirst() and ucfirst(). Only the first character
3711 * changes. This means that possibly we can change in-place, ie., just
3712 * take the source and change that one character and store it back, but not
3713 * if read-only etc, or if the length changes */
3718 STRLEN slen; /* slen is the byte length of the whole SV. */
3721 bool inplace; /* ? Convert first char only, in-place */
3722 bool doing_utf8 = FALSE; /* ? using utf8 */
3723 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3724 const int op_type = PL_op->op_type;
3727 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3728 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3729 * stored as UTF-8 at s. */
3730 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3731 * lowercased) character stored in tmpbuf. May be either
3732 * UTF-8 or not, but in either case is the number of bytes */
3736 s = (const U8*)SvPV_nomg_const(source, slen);
3738 if (ckWARN(WARN_UNINITIALIZED))
3739 report_uninit(source);
3744 /* We may be able to get away with changing only the first character, in
3745 * place, but not if read-only, etc. Later we may discover more reasons to
3746 * not convert in-place. */
3747 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3749 /* First calculate what the changed first character should be. This affects
3750 * whether we can just swap it out, leaving the rest of the string unchanged,
3751 * or even if have to convert the dest to UTF-8 when the source isn't */
3753 if (! slen) { /* If empty */
3754 need = 1; /* still need a trailing NUL */
3756 else if (DO_UTF8(source)) { /* Is the source utf8? */
3759 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3760 * and doesn't allow for the user to specify their own. When code is added to
3761 * detect if there is a user-defined mapping in force here, and if so to use
3762 * that, then the code below can be compiled. The detection would be a good
3763 * thing anyway, as currently the user-defined mappings only work on utf8
3764 * strings, and thus depend on the chosen internal storage method, which is a
3766 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3767 if (UTF8_IS_INVARIANT(*s)) {
3769 /* An invariant source character is either ASCII or, in EBCDIC, an
3770 * ASCII equivalent or a caseless C1 control. In both these cases,
3771 * the lower and upper cases of any character are also invariants
3772 * (and title case is the same as upper case). So it is safe to
3773 * use the simple case change macros which avoid the overhead of
3774 * the general functions. Note that if perl were to be extended to
3775 * do locale handling in UTF-8 strings, this wouldn't be true in,
3776 * for example, Lithuanian or Turkic. */
3777 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3781 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3784 /* Similarly, if the source character isn't invariant but is in the
3785 * latin1 range (or EBCDIC equivalent thereof), we have the case
3786 * changes compiled into perl, and can avoid the overhead of the
3787 * general functions. In this range, the characters are stored as
3788 * two UTF-8 bytes, and it so happens that any changed-case version
3789 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3793 /* Convert the two source bytes to a single Unicode code point
3794 * value, change case and save for below */
3795 chr = UTF8_ACCUMULATE(*s, *(s+1));
3796 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3797 U8 lower = toLOWER_LATIN1(chr);
3798 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3800 else { /* ucfirst */
3801 U8 upper = toUPPER_LATIN1_MOD(chr);
3803 /* Most of the latin1 range characters are well-behaved. Their
3804 * title and upper cases are the same, and are also in the
3805 * latin1 range. The macro above returns their upper (hence
3806 * title) case, and all that need be done is to save the result
3807 * for below. However, several characters are problematic, and
3808 * have to be handled specially. The MOD in the macro name
3809 * above means that these tricky characters all get mapped to
3810 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3811 * This mapping saves some tests for the majority of the
3814 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3816 /* Not tricky. Just save it. */
3817 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3819 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3821 /* This one is tricky because it is two characters long,
3822 * though the UTF-8 is still two bytes, so the stored
3823 * length doesn't change */
3824 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3825 *(tmpbuf + 1) = 's';
3829 /* The other two have their title and upper cases the same,
3830 * but are tricky because the changed-case characters
3831 * aren't in the latin1 range. They, however, do fit into
3832 * two UTF-8 bytes */
3833 STORE_NON_LATIN1_UC(tmpbuf, chr);
3838 #endif /* end of dont want to break user-defined casing */
3840 /* Here, can't short-cut the general case */
3842 utf8_to_uvchr(s, &ulen);
3843 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3844 else toLOWER_utf8(s, tmpbuf, &tculen);
3846 /* we can't do in-place if the length changes. */
3847 if (ulen != tculen) inplace = FALSE;
3848 need = slen + 1 - ulen + tculen;
3849 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3853 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3854 * latin1 is treated as caseless. Note that a locale takes
3856 tculen = 1; /* Most characters will require one byte, but this will
3857 * need to be overridden for the tricky ones */
3860 if (op_type == OP_LCFIRST) {
3862 /* lower case the first letter: no trickiness for any character */
3863 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3864 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3867 else if (IN_LOCALE_RUNTIME) {
3868 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3869 * have upper and title case different
3872 else if (! IN_UNI_8_BIT) {
3873 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3874 * on EBCDIC machines whatever the
3875 * native function does */
3877 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3878 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3880 /* tmpbuf now has the correct title case for all latin1 characters
3881 * except for the several ones that have tricky handling. All
3882 * of these are mapped by the MOD to the letter below. */
3883 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3885 /* The length is going to change, with all three of these, so
3886 * can't replace just the first character */
3889 /* We use the original to distinguish between these tricky
3891 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3892 /* Two character title case 'Ss', but can remain non-UTF-8 */
3895 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3900 /* The other two tricky ones have their title case outside
3901 * latin1. It is the same as their upper case. */
3903 STORE_NON_LATIN1_UC(tmpbuf, *s);
3905 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3906 * and their upper cases is 2. */
3909 /* The entire result will have to be in UTF-8. Assume worst
3910 * case sizing in conversion. (all latin1 characters occupy
3911 * at most two bytes in utf8) */
3912 convert_source_to_utf8 = TRUE;
3913 need = slen * 2 + 1;
3915 } /* End of is one of the three special chars */
3916 } /* End of use Unicode (Latin1) semantics */
3917 } /* End of changing the case of the first character */
3919 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3920 * generate the result */
3923 /* We can convert in place. This means we change just the first
3924 * character without disturbing the rest; no need to grow */
3926 s = d = (U8*)SvPV_force_nomg(source, slen);
3932 /* Here, we can't convert in place; we earlier calculated how much
3933 * space we will need, so grow to accommodate that */
3934 SvUPGRADE(dest, SVt_PV);
3935 d = (U8*)SvGROW(dest, need);
3936 (void)SvPOK_only(dest);
3943 if (! convert_source_to_utf8) {
3945 /* Here both source and dest are in UTF-8, but have to create
3946 * the entire output. We initialize the result to be the
3947 * title/lower cased first character, and then append the rest
3949 sv_setpvn(dest, (char*)tmpbuf, tculen);
3951 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3955 const U8 *const send = s + slen;
3957 /* Here the dest needs to be in UTF-8, but the source isn't,
3958 * except we earlier UTF-8'd the first character of the source
3959 * into tmpbuf. First put that into dest, and then append the
3960 * rest of the source, converting it to UTF-8 as we go. */
3962 /* Assert tculen is 2 here because the only two characters that
3963 * get to this part of the code have 2-byte UTF-8 equivalents */
3965 *d++ = *(tmpbuf + 1);
3966 s++; /* We have just processed the 1st char */
3968 for (; s < send; s++) {
3969 d = uvchr_to_utf8(d, *s);
3972 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3976 else { /* in-place UTF-8. Just overwrite the first character */
3977 Copy(tmpbuf, d, tculen, U8);
3978 SvCUR_set(dest, need - 1);
3981 else { /* Neither source nor dest are in or need to be UTF-8 */
3983 if (IN_LOCALE_RUNTIME) {
3987 if (inplace) { /* in-place, only need to change the 1st char */
3990 else { /* Not in-place */
3992 /* Copy the case-changed character(s) from tmpbuf */
3993 Copy(tmpbuf, d, tculen, U8);
3994 d += tculen - 1; /* Code below expects d to point to final
3995 * character stored */
3998 else { /* empty source */
3999 /* See bug #39028: Don't taint if empty */
4003 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4004 * the destination to retain that flag */
4008 if (!inplace) { /* Finish the rest of the string, unchanged */
4009 /* This will copy the trailing NUL */
4010 Copy(s + 1, d + 1, slen, U8);
4011 SvCUR_set(dest, need - 1);
4018 /* There's so much setup/teardown code common between uc and lc, I wonder if
4019 it would be worth merging the two, and just having a switch outside each
4020 of the three tight loops. There is less and less commonality though */
4034 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4035 && SvTEMP(source) && !DO_UTF8(source)
4036 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
4038 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4039 * make the loop tight, so we overwrite the source with the dest before
4040 * looking at it, and we need to look at the original source
4041 * afterwards. There would also need to be code added to handle
4042 * switching to not in-place in midstream if we run into characters
4043 * that change the length.
4046 s = d = (U8*)SvPV_force_nomg(source, len);
4053 /* The old implementation would copy source into TARG at this point.
4054 This had the side effect that if source was undef, TARG was now
4055 an undefined SV with PADTMP set, and they don't warn inside
4056 sv_2pv_flags(). However, we're now getting the PV direct from
4057 source, which doesn't have PADTMP set, so it would warn. Hence the
4061 s = (const U8*)SvPV_nomg_const(source, len);
4063 if (ckWARN(WARN_UNINITIALIZED))
4064 report_uninit(source);
4070 SvUPGRADE(dest, SVt_PV);
4071 d = (U8*)SvGROW(dest, min);
4072 (void)SvPOK_only(dest);
4077 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4078 to check DO_UTF8 again here. */
4080 if (DO_UTF8(source)) {
4081 const U8 *const send = s + len;
4082 U8 tmpbuf[UTF8_MAXBYTES+1];
4084 /* All occurrences of these are to be moved to follow any other marks.
4085 * This is context-dependent. We may not be passed enough context to
4086 * move the iota subscript beyond all of them, but we do the best we can
4087 * with what we're given. The result is always better than if we
4088 * hadn't done this. And, the problem would only arise if we are
4089 * passed a character without all its combining marks, which would be
4090 * the caller's mistake. The information this is based on comes from a
4091 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4092 * itself) and so can't be checked properly to see if it ever gets
4093 * revised. But the likelihood of it changing is remote */
4094 bool in_iota_subscript = FALSE;
4097 if (in_iota_subscript && ! is_utf8_mark(s)) {
4098 /* A non-mark. Time to output the iota subscript */
4099 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4100 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4102 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4103 in_iota_subscript = FALSE;
4107 /* See comments at the first instance in this file of this ifdef */
4108 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4110 /* If the UTF-8 character is invariant, then it is in the range
4111 * known by the standard macro; result is only one byte long */
4112 if (UTF8_IS_INVARIANT(*s)) {
4116 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4118 /* Likewise, if it fits in a byte, its case change is in our
4120 U8 orig = UTF8_ACCUMULATE(*s, *(s+1));
4121 U8 upper = toUPPER_LATIN1_MOD(orig);
4122 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4130 /* Otherwise, need the general UTF-8 case. Get the changed
4131 * case value and copy it to the output buffer */
4133 const STRLEN u = UTF8SKIP(s);
4136 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4137 if (uv == GREEK_CAPITAL_LETTER_IOTA
4138 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4140 in_iota_subscript = TRUE;
4143 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4144 /* If the eventually required minimum size outgrows
4145 * the available space, we need to grow. */
4146 const UV o = d - (U8*)SvPVX_const(dest);
4148 /* If someone uppercases one million U+03B0s we
4149 * SvGROW() one million times. Or we could try
4150 * guessing how much to allocate without allocating too
4151 * much. Such is life. See corresponding comment in
4152 * lc code for another option */
4154 d = (U8*)SvPVX(dest) + o;
4156 Copy(tmpbuf, d, ulen, U8);
4162 if (in_iota_subscript) {
4163 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4167 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4169 else { /* Not UTF-8 */
4171 const U8 *const send = s + len;
4173 /* Use locale casing if in locale; regular style if not treating
4174 * latin1 as having case; otherwise the latin1 casing. Do the
4175 * whole thing in a tight loop, for speed, */
4176 if (IN_LOCALE_RUNTIME) {
4179 for (; s < send; d++, s++)
4180 *d = toUPPER_LC(*s);
4182 else if (! IN_UNI_8_BIT) {
4183 for (; s < send; d++, s++) {
4188 for (; s < send; d++, s++) {
4189 *d = toUPPER_LATIN1_MOD(*s);
4190 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4192 /* The mainstream case is the tight loop above. To avoid
4193 * extra tests in that, all three characters that require
4194 * special handling are mapped by the MOD to the one tested
4196 * Use the source to distinguish between the three cases */
4198 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4200 /* uc() of this requires 2 characters, but they are
4201 * ASCII. If not enough room, grow the string */
4202 if (SvLEN(dest) < ++min) {
4203 const UV o = d - (U8*)SvPVX_const(dest);
4205 d = (U8*)SvPVX(dest) + o;
4207 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4208 continue; /* Back to the tight loop; still in ASCII */
4211 /* The other two special handling characters have their
4212 * upper cases outside the latin1 range, hence need to be
4213 * in UTF-8, so the whole result needs to be in UTF-8. So,
4214 * here we are somewhere in the middle of processing a
4215 * non-UTF-8 string, and realize that we will have to convert
4216 * the whole thing to UTF-8. What to do? There are
4217 * several possibilities. The simplest to code is to
4218 * convert what we have so far, set a flag, and continue on
4219 * in the loop. The flag would be tested each time through
4220 * the loop, and if set, the next character would be
4221 * converted to UTF-8 and stored. But, I (khw) didn't want
4222 * to slow down the mainstream case at all for this fairly
4223 * rare case, so I didn't want to add a test that didn't
4224 * absolutely have to be there in the loop, besides the
4225 * possibility that it would get too complicated for
4226 * optimizers to deal with. Another possibility is to just
4227 * give up, convert the source to UTF-8, and restart the
4228 * function that way. Another possibility is to convert
4229 * both what has already been processed and what is yet to
4230 * come separately to UTF-8, then jump into the loop that
4231 * handles UTF-8. But the most efficient time-wise of the
4232 * ones I could think of is what follows, and turned out to
4233 * not require much extra code. */
4235 /* Convert what we have so far into UTF-8, telling the
4236 * function that we know it should be converted, and to
4237 * allow extra space for what we haven't processed yet.
4238 * Assume the worst case space requirements for converting
4239 * what we haven't processed so far: that it will require
4240 * two bytes for each remaining source character, plus the
4241 * NUL at the end. This may cause the string pointer to
4242 * move, so re-find it. */
4244 len = d - (U8*)SvPVX_const(dest);
4245 SvCUR_set(dest, len);
4246 len = sv_utf8_upgrade_flags_grow(dest,
4247 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4249 d = (U8*)SvPVX(dest) + len;
4251 /* And append the current character's upper case in UTF-8 */
4252 CAT_NON_LATIN1_UC(d, *s);
4254 /* Now process the remainder of the source, converting to
4255 * upper and UTF-8. If a resulting byte is invariant in
4256 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4257 * append it to the output. */
4260 for (; s < send; s++) {
4261 U8 upper = toUPPER_LATIN1_MOD(*s);
4262 if UTF8_IS_INVARIANT(upper) {
4266 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4270 /* Here have processed the whole source; no need to continue
4271 * with the outer loop. Each character has been converted
4272 * to upper case and converted to UTF-8 */
4275 } /* End of processing all latin1-style chars */
4276 } /* End of processing all chars */
4277 } /* End of source is not empty */
4279 if (source != dest) {
4280 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4281 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4283 } /* End of isn't utf8 */
4301 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4302 && SvTEMP(source) && !DO_UTF8(source)) {
4304 /* We can convert in place, as lowercasing anything in the latin1 range
4305 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4307 s = d = (U8*)SvPV_force_nomg(source, len);
4314 /* The old implementation would copy source into TARG at this point.
4315 This had the side effect that if source was undef, TARG was now
4316 an undefined SV with PADTMP set, and they don't warn inside
4317 sv_2pv_flags(). However, we're now getting the PV direct from
4318 source, which doesn't have PADTMP set, so it would warn. Hence the
4322 s = (const U8*)SvPV_nomg_const(source, len);
4324 if (ckWARN(WARN_UNINITIALIZED))
4325 report_uninit(source);
4331 SvUPGRADE(dest, SVt_PV);
4332 d = (U8*)SvGROW(dest, min);
4333 (void)SvPOK_only(dest);
4338 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4339 to check DO_UTF8 again here. */
4341 if (DO_UTF8(source)) {
4342 const U8 *const send = s + len;
4343 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4346 /* See comments at the first instance in this file of this ifdef */
4347 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4348 if (UTF8_IS_INVARIANT(*s)) {
4350 /* Invariant characters use the standard mappings compiled in.
4355 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4357 /* As do the ones in the Latin1 range */
4358 U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1)));
4359 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4364 /* Here, is utf8 not in Latin-1 range, have to go out and get
4365 * the mappings from the tables. */
4367 const STRLEN u = UTF8SKIP(s);
4370 #ifndef CONTEXT_DEPENDENT_CASING
4371 toLOWER_utf8(s, tmpbuf, &ulen);
4373 /* This is ifdefd out because it needs more work and thought. It isn't clear
4374 * that we should do it.
4375 * A minor objection is that this is based on a hard-coded rule from the
4376 * Unicode standard, and may change, but this is not very likely at all.
4377 * mktables should check and warn if it does.
4378 * More importantly, if the sigma occurs at the end of the string, we don't
4379 * have enough context to know whether it is part of a larger string or going
4380 * to be or not. It may be that we are passed a subset of the context, via
4381 * a \U...\E, for example, and we could conceivably know the larger context if
4382 * code were changed to pass that in. But, if the string passed in is an
4383 * intermediate result, and the user concatenates two strings together
4384 * after we have made a final sigma, that would be wrong. If the final sigma
4385 * occurs in the middle of the string we are working on, then we know that it
4386 * should be a final sigma, but otherwise we can't be sure. */
4388 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4390 /* If the lower case is a small sigma, it may be that we need
4391 * to change it to a final sigma. This happens at the end of
4392 * a word that contains more than just this character, and only
4393 * when we started with a capital sigma. */
4394 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4395 s > send - len && /* Makes sure not the first letter */
4396 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4399 /* We use the algorithm in:
4400 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4401 * is a CAPITAL SIGMA): If C is preceded by a sequence
4402 * consisting of a cased letter and a case-ignorable
4403 * sequence, and C is not followed by a sequence consisting
4404 * of a case ignorable sequence and then a cased letter,
4405 * then when lowercasing C, C becomes a final sigma */
4407 /* To determine if this is the end of a word, need to peek
4408 * ahead. Look at the next character */
4409 const U8 *peek = s + u;
4411 /* Skip any case ignorable characters */
4412 while (peek < send && is_utf8_case_ignorable(peek)) {
4413 peek += UTF8SKIP(peek);
4416 /* If we reached the end of the string without finding any
4417 * non-case ignorable characters, or if the next such one
4418 * is not-cased, then we have met the conditions for it
4419 * being a final sigma with regards to peek ahead, and so
4420 * must do peek behind for the remaining conditions. (We
4421 * know there is stuff behind to look at since we tested
4422 * above that this isn't the first letter) */
4423 if (peek >= send || ! is_utf8_cased(peek)) {
4424 peek = utf8_hop(s, -1);
4426 /* Here are at the beginning of the first character
4427 * before the original upper case sigma. Keep backing
4428 * up, skipping any case ignorable characters */
4429 while (is_utf8_case_ignorable(peek)) {
4430 peek = utf8_hop(peek, -1);
4433 /* Here peek points to the first byte of the closest
4434 * non-case-ignorable character before the capital
4435 * sigma. If it is cased, then by the Unicode
4436 * algorithm, we should use a small final sigma instead
4437 * of what we have */
4438 if (is_utf8_cased(peek)) {
4439 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4440 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4444 else { /* Not a context sensitive mapping */
4445 #endif /* End of commented out context sensitive */
4446 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4448 /* If the eventually required minimum size outgrows
4449 * the available space, we need to grow. */
4450 const UV o = d - (U8*)SvPVX_const(dest);
4452 /* If someone lowercases one million U+0130s we
4453 * SvGROW() one million times. Or we could try
4454 * guessing how much to allocate without allocating too
4455 * much. Such is life. Another option would be to
4456 * grow an extra byte or two more each time we need to
4457 * grow, which would cut down the million to 500K, with
4460 d = (U8*)SvPVX(dest) + o;
4462 #ifdef CONTEXT_DEPENDENT_CASING
4465 /* Copy the newly lowercased letter to the output buffer we're
4467 Copy(tmpbuf, d, ulen, U8);
4470 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4473 } /* End of looping through the source string */
4476 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4477 } else { /* Not utf8 */
4479 const U8 *const send = s + len;
4481 /* Use locale casing if in locale; regular style if not treating
4482 * latin1 as having case; otherwise the latin1 casing. Do the
4483 * whole thing in a tight loop, for speed, */
4484 if (IN_LOCALE_RUNTIME) {
4487 for (; s < send; d++, s++)
4488 *d = toLOWER_LC(*s);
4490 else if (! IN_UNI_8_BIT) {
4491 for (; s < send; d++, s++) {
4496 for (; s < send; d++, s++) {
4497 *d = toLOWER_LATIN1(*s);
4501 if (source != dest) {
4503 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4513 SV * const sv = TOPs;
4515 register const char *s = SvPV_const(sv,len);
4517 SvUTF8_off(TARG); /* decontaminate */
4520 SvUPGRADE(TARG, SVt_PV);
4521 SvGROW(TARG, (len * 2) + 1);
4525 if (UTF8_IS_CONTINUED(*s)) {
4526 STRLEN ulen = UTF8SKIP(s);