3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
32 #include "regcharclass.h"
34 /* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
38 #ifdef NEED_GETPID_PROTO
39 extern Pid_t getpid (void);
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
46 #if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
50 static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
51 static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
53 /* variations on pp_null */
58 if (GIMME_V == G_SCALAR)
65 /* This is also called directly by pp_lvavref. */
70 assert(SvTYPE(TARG) == SVt_PVAV);
71 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
72 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
73 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
76 if (PL_op->op_flags & OPf_REF) {
80 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
81 const I32 flags = is_lvalue_sub();
82 if (flags && !(flags & OPpENTERSUB_INARGS)) {
83 if (GIMME_V == G_SCALAR)
84 /* diag_listed_as: Can't return %s to lvalue scalar context */
85 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
92 if (gimme == G_ARRAY) {
93 /* XXX see also S_pushav in pp_hot.c */
94 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
96 if (SvMAGICAL(TARG)) {
98 for (i=0; i < maxarg; i++) {
99 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
100 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
105 for (i=0; i < maxarg; i++) {
106 SV * const sv = AvARRAY((const AV *)TARG)[i];
107 SP[i+1] = sv ? sv : &PL_sv_undef;
112 else if (gimme == G_SCALAR) {
113 SV* const sv = sv_newmortal();
114 const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
115 sv_setiv(sv, maxarg);
126 assert(SvTYPE(TARG) == SVt_PVHV);
128 if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
129 if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
130 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
132 if (PL_op->op_flags & OPf_REF)
134 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
135 const I32 flags = is_lvalue_sub();
136 if (flags && !(flags & OPpENTERSUB_INARGS)) {
137 if (GIMME_V == G_SCALAR)
138 /* diag_listed_as: Can't return %s to lvalue scalar context */
139 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
145 if (gimme == G_ARRAY) {
146 RETURNOP(Perl_do_kv(aTHX));
148 else if ((PL_op->op_private & OPpTRUEBOOL
149 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
150 && block_gimme() == G_VOID ))
151 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))
153 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0)));
154 else if (gimme == G_SCALAR) {
155 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
164 assert(SvTYPE(TARG) == SVt_PVCV);
172 SvPADSTALE_off(TARG);
179 CV * const protocv = PadnamePROTOCV(
180 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG]
182 assert(SvTYPE(TARG) == SVt_PVCV);
184 if (CvISXSUB(protocv)) { /* constant */
185 /* XXX Should we clone it here? */
186 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
187 to introcv and remove the SvPADSTALE_off. */
188 SAVEPADSVANDMORTALIZE(ARGTARG);
189 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv);
192 if (CvROOT(protocv)) {
193 assert(CvCLONE(protocv));
194 assert(!CvCLONED(protocv));
196 cv_clone_into(protocv,(CV *)TARG);
197 SAVECLEARSV(PAD_SVl(ARGTARG));
204 /* In some cases this function inspects PL_op. If this function is called
205 for new op types, more bool parameters may need to be added in place of
208 When noinit is true, the absence of a gv will cause a retval of undef.
209 This is unrelated to the cv-to-gv assignment case.
213 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
216 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
219 sv = amagic_deref_call(sv, to_gv_amg);
223 if (SvTYPE(sv) == SVt_PVIO) {
224 GV * const gv = MUTABLE_GV(sv_newmortal());
225 gv_init(gv, 0, "__ANONIO__", 10, 0);
226 GvIOp(gv) = MUTABLE_IO(sv);
227 SvREFCNT_inc_void_NN(sv);
230 else if (!isGV_with_GP(sv)) {
231 Perl_die(aTHX_ "Not a GLOB reference");
235 if (!isGV_with_GP(sv)) {
237 /* If this is a 'my' scalar and flag is set then vivify
240 if (vivify_sv && sv != &PL_sv_undef) {
243 Perl_croak_no_modify();
244 if (cUNOP->op_targ) {
245 SV * const namesv = PAD_SV(cUNOP->op_targ);
246 HV *stash = CopSTASH(PL_curcop);
247 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
248 gv = MUTABLE_GV(newSV(0));
249 gv_init_sv(gv, stash, namesv, 0);
252 const char * const name = CopSTASHPV(PL_curcop);
253 gv = newGVgen_flags(name,
254 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
255 SvREFCNT_inc_simple_void_NN(gv);
257 prepare_SV_for_RV(sv);
258 SvRV_set(sv, MUTABLE_SV(gv));
263 if (PL_op->op_flags & OPf_REF || strict) {
264 Perl_die(aTHX_ PL_no_usym, "a symbol");
266 if (ckWARN(WARN_UNINITIALIZED))
272 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
273 sv, GV_ADDMG, SVt_PVGV
282 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
286 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
287 == OPpDONT_INIT_GV) {
288 /* We are the target of a coderef assignment. Return
289 the scalar unchanged, and let pp_sasssign deal with
293 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
295 /* FAKE globs in the symbol table cause weird bugs (#77810) */
299 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
300 SV *newsv = sv_newmortal();
301 sv_setsv_flags(newsv, sv, 0);
313 sv, PL_op->op_private & OPpDEREF,
314 PL_op->op_private & HINT_STRICT_REFS,
315 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
316 || PL_op->op_type == OP_READLINE
318 if (PL_op->op_private & OPpLVAL_INTRO)
319 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
324 /* Helper function for pp_rv2sv and pp_rv2av */
326 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
327 const svtype type, SV ***spp)
331 PERL_ARGS_ASSERT_SOFTREF2XV;
333 if (PL_op->op_private & HINT_STRICT_REFS) {
335 Perl_die(aTHX_ PL_no_symref_sv, sv,
336 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
338 Perl_die(aTHX_ PL_no_usym, what);
342 PL_op->op_flags & OPf_REF
344 Perl_die(aTHX_ PL_no_usym, what);
345 if (ckWARN(WARN_UNINITIALIZED))
347 if (type != SVt_PV && GIMME_V == G_ARRAY) {
351 **spp = &PL_sv_undef;
354 if ((PL_op->op_flags & OPf_SPECIAL) &&
355 !(PL_op->op_flags & OPf_MOD))
357 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
359 **spp = &PL_sv_undef;
364 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
377 sv = amagic_deref_call(sv, to_sv_amg);
381 if (SvTYPE(sv) >= SVt_PVAV)
382 DIE(aTHX_ "Not a SCALAR reference");
387 if (!isGV_with_GP(gv)) {
388 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
394 if (PL_op->op_flags & OPf_MOD) {
395 if (PL_op->op_private & OPpLVAL_INTRO) {
396 if (cUNOP->op_first->op_type == OP_NULL)
397 sv = save_scalar(MUTABLE_GV(TOPs));
399 sv = save_scalar(gv);
401 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
403 else if (PL_op->op_private & OPpDEREF)
404 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
413 AV * const av = MUTABLE_AV(TOPs);
414 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
416 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
418 *svp = newSV_type(SVt_PVMG);
419 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
423 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
432 if (PL_op->op_flags & OPf_MOD || LVRET) {
433 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
434 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
436 LvTARG(ret) = SvREFCNT_inc_simple(sv);
437 SETs(ret); /* no SvSETMAGIC */
440 const MAGIC * const mg = mg_find_mglob(sv);
441 if (mg && mg->mg_len != -1) {
443 STRLEN i = mg->mg_len;
444 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
445 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
459 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
461 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
462 == OPpMAY_RETURN_CONSTANT)
465 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
466 /* (But not in defined().) */
468 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
470 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
471 cv = SvTYPE(SvRV(gv)) == SVt_PVCV
472 ? MUTABLE_CV(SvRV(gv))
476 cv = MUTABLE_CV(&PL_sv_undef);
477 SETs(MUTABLE_SV(cv));
487 SV *ret = &PL_sv_undef;
489 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
490 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
491 const char * s = SvPVX_const(TOPs);
492 if (strnEQ(s, "CORE::", 6)) {
493 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
495 DIE(aTHX_ "Can't find an opnumber for \"%"UTF8f"\"",
496 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
498 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
504 cv = sv_2cv(TOPs, &stash, &gv, 0);
506 ret = newSVpvn_flags(
507 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
517 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
519 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
521 PUSHs(MUTABLE_SV(cv));
535 if (GIMME_V != G_ARRAY) {
541 *MARK = &PL_sv_undef;
543 *MARK = refto(*MARK);
547 EXTEND_MORTAL(SP - MARK);
549 *MARK = refto(*MARK);
554 S_refto(pTHX_ SV *sv)
558 PERL_ARGS_ASSERT_REFTO;
560 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
563 if (!(sv = LvTARG(sv)))
566 SvREFCNT_inc_void_NN(sv);
568 else if (SvTYPE(sv) == SVt_PVAV) {
569 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
570 av_reify(MUTABLE_AV(sv));
572 SvREFCNT_inc_void_NN(sv);
574 else if (SvPADTMP(sv)) {
579 SvREFCNT_inc_void_NN(sv);
582 sv_upgrade(rv, SVt_IV);
591 SV * const sv = TOPs;
599 /* use the return value that is in a register, its the same as TARG */
600 TARG = sv_ref(TARG,SvRV(sv),TRUE);
615 stash = CopSTASH(PL_curcop);
616 if (SvTYPE(stash) != SVt_PVHV)
617 Perl_croak(aTHX_ "Attempt to bless into a freed package");
620 SV * const ssv = POPs;
624 if (!ssv) goto curstash;
627 if (!SvAMAGIC(ssv)) {
629 Perl_croak(aTHX_ "Attempt to bless into a reference");
631 /* SvAMAGIC is on here, but it only means potentially overloaded,
632 so after stringification: */
633 ptr = SvPV_nomg_const(ssv,len);
634 /* We need to check the flag again: */
635 if (!SvAMAGIC(ssv)) goto frog;
637 else ptr = SvPV_nomg_const(ssv,len);
639 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
640 "Explicit blessing to '' (assuming package main)");
641 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
644 (void)sv_bless(TOPs, stash);
654 const char * const elem = SvPV_const(sv, len);
655 GV * const gv = MUTABLE_GV(TOPs);
660 /* elem will always be NUL terminated. */
661 const char * const second_letter = elem + 1;
664 if (len == 5 && strEQ(second_letter, "RRAY"))
666 tmpRef = MUTABLE_SV(GvAV(gv));
667 if (tmpRef && !AvREAL((const AV *)tmpRef)
668 && AvREIFY((const AV *)tmpRef))
669 av_reify(MUTABLE_AV(tmpRef));
673 if (len == 4 && strEQ(second_letter, "ODE"))
674 tmpRef = MUTABLE_SV(GvCVu(gv));
677 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
678 /* finally deprecated in 5.8.0 */
679 deprecate("*glob{FILEHANDLE}");
680 tmpRef = MUTABLE_SV(GvIOp(gv));
683 if (len == 6 && strEQ(second_letter, "ORMAT"))
684 tmpRef = MUTABLE_SV(GvFORM(gv));
687 if (len == 4 && strEQ(second_letter, "LOB"))
688 tmpRef = MUTABLE_SV(gv);
691 if (len == 4 && strEQ(second_letter, "ASH"))
692 tmpRef = MUTABLE_SV(GvHV(gv));
695 if (*second_letter == 'O' && !elem[2] && len == 2)
696 tmpRef = MUTABLE_SV(GvIOp(gv));
699 if (len == 4 && strEQ(second_letter, "AME"))
700 sv = newSVhek(GvNAME_HEK(gv));
703 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
704 const HV * const stash = GvSTASH(gv);
705 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
706 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
710 if (len == 6 && strEQ(second_letter, "CALAR"))
725 /* Pattern matching */
733 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
734 /* Historically, study was skipped in these cases. */
739 /* Make study a no-op. It's no longer useful and its existence
740 complicates matters elsewhere. */
746 /* also used for: pp_transr() */
753 if (PL_op->op_flags & OPf_STACKED)
758 sv = PAD_SV(ARGTARG);
763 if(PL_op->op_type == OP_TRANSR) {
765 const char * const pv = SvPV(sv,len);
766 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
771 I32 i = do_trans(sv);
777 /* Lvalue operators. */
780 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
786 PERL_ARGS_ASSERT_DO_CHOMP;
788 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
790 if (SvTYPE(sv) == SVt_PVAV) {
792 AV *const av = MUTABLE_AV(sv);
793 const I32 max = AvFILL(av);
795 for (i = 0; i <= max; i++) {
796 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
797 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
798 count += do_chomp(retval, sv, chomping);
802 else if (SvTYPE(sv) == SVt_PVHV) {
803 HV* const hv = MUTABLE_HV(sv);
805 (void)hv_iterinit(hv);
806 while ((entry = hv_iternext(hv)))
807 count += do_chomp(retval, hv_iterval(hv,entry), chomping);
810 else if (SvREADONLY(sv)) {
811 Perl_croak_no_modify();
816 /* XXX, here sv is utf8-ized as a side-effect!
817 If encoding.pm is used properly, almost string-generating
818 operations, including literal strings, chr(), input data, etc.
819 should have been utf8-ized already, right?
821 sv_recode_to_utf8(sv, _get_encoding());
828 char *temp_buffer = NULL;
833 goto nope_free_nothing;
835 while (len && s[-1] == '\n') {
842 STRLEN rslen, rs_charlen;
843 const char *rsptr = SvPV_const(PL_rs, rslen);
845 rs_charlen = SvUTF8(PL_rs)
849 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
850 /* Assumption is that rs is shorter than the scalar. */
852 /* RS is utf8, scalar is 8 bit. */
854 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
857 /* Cannot downgrade, therefore cannot possibly match.
858 At this point, temp_buffer is not alloced, and
859 is the buffer inside PL_rs, so dont free it.
861 assert (temp_buffer == rsptr);
866 else if (IN_ENCODING) {
867 /* RS is 8 bit, encoding.pm is used.
868 * Do not recode PL_rs as a side-effect. */
869 svrecode = newSVpvn(rsptr, rslen);
870 sv_recode_to_utf8(svrecode, _get_encoding());
871 rsptr = SvPV_const(svrecode, rslen);
872 rs_charlen = sv_len_utf8(svrecode);
875 /* RS is 8 bit, scalar is utf8. */
876 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
890 if (memNE(s, rsptr, rslen))
895 SvPV_force_nomg_nolen(sv);
902 Safefree(temp_buffer);
904 SvREFCNT_dec(svrecode);
908 if (len && (!SvPOK(sv) || SvIsCOW(sv)))
909 s = SvPV_force_nomg(sv, len);
912 char * const send = s + len;
913 char * const start = s;
915 while (s > start && UTF8_IS_CONTINUATION(*s))
917 if (is_utf8_string((U8*)s, send - s)) {
918 sv_setpvn(retval, s, send - s);
920 SvCUR_set(sv, s - start);
926 sv_setpvs(retval, "");
930 sv_setpvn(retval, s, 1);
937 sv_setpvs(retval, "");
944 /* also used for: pp_schomp() */
949 const bool chomping = PL_op->op_type == OP_SCHOMP;
951 const size_t count = do_chomp(TARG, TOPs, chomping);
953 sv_setiv(TARG, count);
959 /* also used for: pp_chomp() */
963 dSP; dMARK; dTARGET; dORIGMARK;
964 const bool chomping = PL_op->op_type == OP_CHOMP;
968 count += do_chomp(TARG, *++MARK, chomping);
970 sv_setiv(TARG, count);
981 if (!PL_op->op_private) {
993 if (SvTHINKFIRST(sv))
994 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF);
996 switch (SvTYPE(sv)) {
1000 av_undef(MUTABLE_AV(sv));
1003 hv_undef(MUTABLE_HV(sv));
1006 if (cv_const_sv((const CV *)sv))
1007 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
1008 "Constant subroutine %"SVf" undefined",
1009 SVfARG(CvANON((const CV *)sv)
1010 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
1011 : sv_2mortal(newSVhek(
1013 ? CvNAME_HEK((CV *)sv)
1014 : GvENAME_HEK(CvGV((const CV *)sv))
1019 /* let user-undef'd sub keep its identity */
1020 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME);
1023 assert(isGV_with_GP(sv));
1024 assert(!SvFAKE(sv));
1029 /* undef *Pkg::meth_name ... */
1031 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1032 && HvENAME_get(stash);
1034 if((stash = GvHV((const GV *)sv))) {
1035 if(HvENAME_get(stash))
1036 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1040 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
1041 gp_free(MUTABLE_GV(sv));
1043 GvGP_set(sv, gp_ref(gp));
1044 #ifndef PERL_DONT_CREATE_GVSV
1045 GvSV(sv) = newSV(0);
1047 GvLINE(sv) = CopLINE(PL_curcop);
1048 GvEGV(sv) = MUTABLE_GV(sv);
1052 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1054 /* undef *Foo::ISA */
1055 if( strEQ(GvNAME((const GV *)sv), "ISA")
1056 && (stash = GvSTASH((const GV *)sv))
1057 && (method_changed || HvENAME(stash)) )
1058 mro_isa_changed_in(stash);
1059 else if(method_changed)
1060 mro_method_changed_in(
1061 GvSTASH((const GV *)sv)
1067 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1081 /* common "slow" code for pp_postinc and pp_postdec */
1084 S_postincdec_common(pTHX_ SV *sv, SV *targ)
1088 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
1091 TARG = sv_newmortal();
1098 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1099 if (inc && !SvOK(TARG))
1106 /* also used for: pp_i_postinc() */
1113 /* special-case sv being a simple integer */
1114 if (LIKELY(((sv->sv_flags &
1115 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1116 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1118 && SvIVX(sv) != IV_MAX)
1121 SvIV_set(sv, iv + 1);
1122 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1127 return S_postincdec_common(aTHX_ sv, TARG);
1131 /* also used for: pp_i_postdec() */
1138 /* special-case sv being a simple integer */
1139 if (LIKELY(((sv->sv_flags &
1140 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1141 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1143 && SvIVX(sv) != IV_MIN)
1146 SvIV_set(sv, iv - 1);
1147 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1152 return S_postincdec_common(aTHX_ sv, TARG);
1156 /* Ordinary operators. */
1160 dSP; dATARGET; SV *svl, *svr;
1161 #ifdef PERL_PRESERVE_IVUV
1164 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1167 #ifdef PERL_PRESERVE_IVUV
1168 /* For integer to integer power, we do the calculation by hand wherever
1169 we're sure it is safe; otherwise we call pow() and try to convert to
1170 integer afterwards. */
1171 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1179 const IV iv = SvIVX(svr);
1183 goto float_it; /* Can't do negative powers this way. */
1187 baseuok = SvUOK(svl);
1189 baseuv = SvUVX(svl);
1191 const IV iv = SvIVX(svl);
1194 baseuok = TRUE; /* effectively it's a UV now */
1196 baseuv = -iv; /* abs, baseuok == false records sign */
1199 /* now we have integer ** positive integer. */
1202 /* foo & (foo - 1) is zero only for a power of 2. */
1203 if (!(baseuv & (baseuv - 1))) {
1204 /* We are raising power-of-2 to a positive integer.
1205 The logic here will work for any base (even non-integer
1206 bases) but it can be less accurate than
1207 pow (base,power) or exp (power * log (base)) when the
1208 intermediate values start to spill out of the mantissa.
1209 With powers of 2 we know this can't happen.
1210 And powers of 2 are the favourite thing for perl
1211 programmers to notice ** not doing what they mean. */
1213 NV base = baseuok ? baseuv : -(NV)baseuv;
1218 while (power >>= 1) {
1226 SvIV_please_nomg(svr);
1229 unsigned int highbit = 8 * sizeof(UV);
1230 unsigned int diff = 8 * sizeof(UV);
1231 while (diff >>= 1) {
1233 if (baseuv >> highbit) {
1237 /* we now have baseuv < 2 ** highbit */
1238 if (power * highbit <= 8 * sizeof(UV)) {
1239 /* result will definitely fit in UV, so use UV math
1240 on same algorithm as above */
1243 const bool odd_power = cBOOL(power & 1);
1247 while (power >>= 1) {
1254 if (baseuok || !odd_power)
1255 /* answer is positive */
1257 else if (result <= (UV)IV_MAX)
1258 /* answer negative, fits in IV */
1259 SETi( -(IV)result );
1260 else if (result == (UV)IV_MIN)
1261 /* 2's complement assumption: special case IV_MIN */
1264 /* answer negative, doesn't fit */
1265 SETn( -(NV)result );
1273 NV right = SvNV_nomg(svr);
1274 NV left = SvNV_nomg(svl);
1277 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1279 We are building perl with long double support and are on an AIX OS
1280 afflicted with a powl() function that wrongly returns NaNQ for any
1281 negative base. This was reported to IBM as PMR #23047-379 on
1282 03/06/2006. The problem exists in at least the following versions
1283 of AIX and the libm fileset, and no doubt others as well:
1285 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1286 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1287 AIX 5.2.0 bos.adt.libm 5.2.0.85
1289 So, until IBM fixes powl(), we provide the following workaround to
1290 handle the problem ourselves. Our logic is as follows: for
1291 negative bases (left), we use fmod(right, 2) to check if the
1292 exponent is an odd or even integer:
1294 - if odd, powl(left, right) == -powl(-left, right)
1295 - if even, powl(left, right) == powl(-left, right)
1297 If the exponent is not an integer, the result is rightly NaNQ, so
1298 we just return that (as NV_NAN).
1302 NV mod2 = Perl_fmod( right, 2.0 );
1303 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1304 SETn( -Perl_pow( -left, right) );
1305 } else if (mod2 == 0.0) { /* even integer */
1306 SETn( Perl_pow( -left, right) );
1307 } else { /* fractional power */
1311 SETn( Perl_pow( left, right) );
1314 SETn( Perl_pow( left, right) );
1315 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1317 #ifdef PERL_PRESERVE_IVUV
1319 SvIV_please_nomg(svr);
1327 dSP; dATARGET; SV *svl, *svr;
1328 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1332 #ifdef PERL_PRESERVE_IVUV
1334 /* special-case some simple common cases */
1335 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1337 U32 flags = (svl->sv_flags & svr->sv_flags);
1338 if (flags & SVf_IOK) {
1339 /* both args are simple IVs */
1344 topl = ((UV)il) >> (UVSIZE * 4 - 1);
1345 topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1347 /* if both are in a range that can't under/overflow, do a
1348 * simple integer multiply: if the top halves(*) of both numbers
1349 * are 00...00 or 11...11, then it's safe.
1350 * (*) for 32-bits, the "top half" is the top 17 bits,
1351 * for 64-bits, its 33 bits */
1353 ((topl+1) | (topr+1))
1354 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1357 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1363 else if (flags & SVf_NOK) {
1364 /* both args are NVs */
1371 if (nl == (NV)il && nr == (NV)ir)
1372 /* nothing was lost by converting to IVs */
1376 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1377 if (Perl_isinf(result)) {
1378 Zero((U8*)&result + 8, 8, U8);
1381 TARGn(result, 0); /* args not GMG, so can't be tainted */
1389 if (SvIV_please_nomg(svr)) {
1390 /* Unless the left argument is integer in range we are going to have to
1391 use NV maths. Hence only attempt to coerce the right argument if
1392 we know the left is integer. */
1393 /* Left operand is defined, so is it IV? */
1394 if (SvIV_please_nomg(svl)) {
1395 bool auvok = SvUOK(svl);
1396 bool buvok = SvUOK(svr);
1397 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1398 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1407 const IV aiv = SvIVX(svl);
1410 auvok = TRUE; /* effectively it's a UV now */
1412 /* abs, auvok == false records sign */
1413 alow = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1419 const IV biv = SvIVX(svr);
1422 buvok = TRUE; /* effectively it's a UV now */
1424 /* abs, buvok == false records sign */
1425 blow = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1429 /* If this does sign extension on unsigned it's time for plan B */
1430 ahigh = alow >> (4 * sizeof (UV));
1432 bhigh = blow >> (4 * sizeof (UV));
1434 if (ahigh && bhigh) {
1436 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1437 which is overflow. Drop to NVs below. */
1438 } else if (!ahigh && !bhigh) {
1439 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1440 so the unsigned multiply cannot overflow. */
1441 const UV product = alow * blow;
1442 if (auvok == buvok) {
1443 /* -ve * -ve or +ve * +ve gives a +ve result. */
1447 } else if (product <= (UV)IV_MIN) {
1448 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1449 /* -ve result, which could overflow an IV */
1451 /* can't negate IV_MIN, but there are aren't two
1452 * integers such that !ahigh && !bhigh, where the
1453 * product equals 0x800....000 */
1454 assert(product != (UV)IV_MIN);
1455 SETi( -(IV)product );
1457 } /* else drop to NVs below. */
1459 /* One operand is large, 1 small */
1462 /* swap the operands */
1464 bhigh = blow; /* bhigh now the temp var for the swap */
1468 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1469 multiplies can't overflow. shift can, add can, -ve can. */
1470 product_middle = ahigh * blow;
1471 if (!(product_middle & topmask)) {
1472 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1474 product_middle <<= (4 * sizeof (UV));
1475 product_low = alow * blow;
1477 /* as for pp_add, UV + something mustn't get smaller.
1478 IIRC ANSI mandates this wrapping *behaviour* for
1479 unsigned whatever the actual representation*/
1480 product_low += product_middle;
1481 if (product_low >= product_middle) {
1482 /* didn't overflow */
1483 if (auvok == buvok) {
1484 /* -ve * -ve or +ve * +ve gives a +ve result. */
1486 SETu( product_low );
1488 } else if (product_low <= (UV)IV_MIN) {
1489 /* 2s complement assumption again */
1490 /* -ve result, which could overflow an IV */
1492 SETi(product_low == (UV)IV_MIN
1493 ? IV_MIN : -(IV)product_low);
1495 } /* else drop to NVs below. */
1497 } /* product_middle too large */
1498 } /* ahigh && bhigh */
1503 NV right = SvNV_nomg(svr);
1504 NV left = SvNV_nomg(svl);
1505 NV result = left * right;
1508 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1509 if (Perl_isinf(result)) {
1510 Zero((U8*)&result + 8, 8, U8);
1520 dSP; dATARGET; SV *svl, *svr;
1521 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1524 /* Only try to do UV divide first
1525 if ((SLOPPYDIVIDE is true) or
1526 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1528 The assumption is that it is better to use floating point divide
1529 whenever possible, only doing integer divide first if we can't be sure.
1530 If NV_PRESERVES_UV is true then we know at compile time that no UV
1531 can be too large to preserve, so don't need to compile the code to
1532 test the size of UVs. */
1535 # define PERL_TRY_UV_DIVIDE
1536 /* ensure that 20./5. == 4. */
1538 # ifdef PERL_PRESERVE_IVUV
1539 # ifndef NV_PRESERVES_UV
1540 # define PERL_TRY_UV_DIVIDE
1545 #ifdef PERL_TRY_UV_DIVIDE
1546 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1547 bool left_non_neg = SvUOK(svl);
1548 bool right_non_neg = SvUOK(svr);
1552 if (right_non_neg) {
1556 const IV biv = SvIVX(svr);
1559 right_non_neg = TRUE; /* effectively it's a UV now */
1562 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1565 /* historically undef()/0 gives a "Use of uninitialized value"
1566 warning before dieing, hence this test goes here.
1567 If it were immediately before the second SvIV_please, then
1568 DIE() would be invoked before left was even inspected, so
1569 no inspection would give no warning. */
1571 DIE(aTHX_ "Illegal division by zero");
1577 const IV aiv = SvIVX(svl);
1580 left_non_neg = TRUE; /* effectively it's a UV now */
1583 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1589 /* For sloppy divide we always attempt integer division. */
1591 /* Otherwise we only attempt it if either or both operands
1592 would not be preserved by an NV. If both fit in NVs
1593 we fall through to the NV divide code below. However,
1594 as left >= right to ensure integer result here, we know that
1595 we can skip the test on the right operand - right big
1596 enough not to be preserved can't get here unless left is
1599 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1602 /* Integer division can't overflow, but it can be imprecise. */
1603 const UV result = left / right;
1604 if (result * right == left) {
1605 SP--; /* result is valid */
1606 if (left_non_neg == right_non_neg) {
1607 /* signs identical, result is positive. */
1611 /* 2s complement assumption */
1612 if (result <= (UV)IV_MIN)
1613 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result);
1615 /* It's exact but too negative for IV. */
1616 SETn( -(NV)result );
1619 } /* tried integer divide but it was not an integer result */
1620 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1621 } /* one operand wasn't SvIOK */
1622 #endif /* PERL_TRY_UV_DIVIDE */
1624 NV right = SvNV_nomg(svr);
1625 NV left = SvNV_nomg(svl);
1626 (void)POPs;(void)POPs;
1627 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1628 if (! Perl_isnan(right) && right == 0.0)
1632 DIE(aTHX_ "Illegal division by zero");
1633 PUSHn( left / right );
1641 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1645 bool left_neg = FALSE;
1646 bool right_neg = FALSE;
1647 bool use_double = FALSE;
1648 bool dright_valid = FALSE;
1651 SV * const svr = TOPs;
1652 SV * const svl = TOPm1s;
1653 if (SvIV_please_nomg(svr)) {
1654 right_neg = !SvUOK(svr);
1658 const IV biv = SvIVX(svr);
1661 right_neg = FALSE; /* effectively it's a UV now */
1663 right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
1668 dright = SvNV_nomg(svr);
1669 right_neg = dright < 0;
1672 if (dright < UV_MAX_P1) {
1673 right = U_V(dright);
1674 dright_valid = TRUE; /* In case we need to use double below. */
1680 /* At this point use_double is only true if right is out of range for
1681 a UV. In range NV has been rounded down to nearest UV and
1682 use_double false. */
1683 if (!use_double && SvIV_please_nomg(svl)) {
1684 left_neg = !SvUOK(svl);
1688 const IV aiv = SvIVX(svl);
1691 left_neg = FALSE; /* effectively it's a UV now */
1693 left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
1698 dleft = SvNV_nomg(svl);
1699 left_neg = dleft < 0;
1703 /* This should be exactly the 5.6 behaviour - if left and right are
1704 both in range for UV then use U_V() rather than floor. */
1706 if (dleft < UV_MAX_P1) {
1707 /* right was in range, so is dleft, so use UVs not double.
1711 /* left is out of range for UV, right was in range, so promote
1712 right (back) to double. */
1714 /* The +0.5 is used in 5.6 even though it is not strictly
1715 consistent with the implicit +0 floor in the U_V()
1716 inside the #if 1. */
1717 dleft = Perl_floor(dleft + 0.5);
1720 dright = Perl_floor(dright + 0.5);
1731 DIE(aTHX_ "Illegal modulus zero");
1733 dans = Perl_fmod(dleft, dright);
1734 if ((left_neg != right_neg) && dans)
1735 dans = dright - dans;
1738 sv_setnv(TARG, dans);
1744 DIE(aTHX_ "Illegal modulus zero");
1747 if ((left_neg != right_neg) && ans)
1750 /* XXX may warn: unary minus operator applied to unsigned type */
1751 /* could change -foo to be (~foo)+1 instead */
1752 if (ans <= ~((UV)IV_MAX)+1)
1753 sv_setiv(TARG, ~ans+1);
1755 sv_setnv(TARG, -(NV)ans);
1758 sv_setuv(TARG, ans);
1770 bool infnan = FALSE;
1772 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1773 /* TODO: think of some way of doing list-repeat overloading ??? */
1778 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) {
1779 /* The parser saw this as a list repeat, and there
1780 are probably several items on the stack. But we're
1781 in scalar/void context, and there's no pp_list to save us
1782 now. So drop the rest of the items -- robin@kitsite.com
1785 if (MARK + 1 < SP) {
1791 ASSUME(MARK + 1 == SP);
1793 MARK[1] = &PL_sv_undef;
1797 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1803 const UV uv = SvUV_nomg(sv);
1805 count = IV_MAX; /* The best we can do? */
1809 count = SvIV_nomg(sv);
1812 else if (SvNOKp(sv)) {
1813 const NV nv = SvNV_nomg(sv);
1814 infnan = Perl_isinfnan(nv);
1815 if (UNLIKELY(infnan)) {
1819 count = -1; /* An arbitrary negative integer */
1825 count = SvIV_nomg(sv);
1828 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1829 "Non-finite repeat count does nothing");
1830 } else if (count < 0) {
1832 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1833 "Negative repeat count does nothing");
1836 if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1838 const SSize_t items = SP - MARK;
1839 const U8 mod = PL_op->op_flags & OPf_MOD;
1844 if ( items > SSize_t_MAX / count /* max would overflow */
1845 /* repeatcpy would overflow */
1846 || items > I32_MAX / (I32)sizeof(SV *)
1848 Perl_croak(aTHX_ "%s","Out of memory during list extend");
1849 max = items * count;
1854 if (mod && SvPADTMP(*SP)) {
1855 *SP = sv_mortalcopy(*SP);
1862 repeatcpy((char*)(MARK + items), (char*)MARK,
1863 items * sizeof(const SV *), count - 1);
1866 else if (count <= 0)
1869 else { /* Note: mark already snarfed by pp_list */
1870 SV * const tmpstr = POPs;
1875 sv_setsv_nomg(TARG, tmpstr);
1876 SvPV_force_nomg(TARG, len);
1877 isutf = DO_UTF8(TARG);
1884 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */
1885 || len > (U32)I32_MAX /* repeatcpy would overflow */
1887 Perl_croak(aTHX_ "%s",
1888 "Out of memory during string extend");
1889 max = (UV)count * len + 1;
1892 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1893 SvCUR_set(TARG, SvCUR(TARG) * count);
1895 *SvEND(TARG) = '\0';
1898 (void)SvPOK_only_UTF8(TARG);
1900 (void)SvPOK_only(TARG);
1909 dSP; dATARGET; bool useleft; SV *svl, *svr;
1910 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1914 #ifdef PERL_PRESERVE_IVUV
1916 /* special-case some simple common cases */
1917 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1919 U32 flags = (svl->sv_flags & svr->sv_flags);
1920 if (flags & SVf_IOK) {
1921 /* both args are simple IVs */
1926 topl = ((UV)il) >> (UVSIZE * 8 - 2);
1927 topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1929 /* if both are in a range that can't under/overflow, do a
1930 * simple integer subtract: if the top of both numbers
1931 * are 00 or 11, then it's safe */
1932 if (!( ((topl+1) | (topr+1)) & 2)) {
1934 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1940 else if (flags & SVf_NOK) {
1941 /* both args are NVs */
1947 if (nl == (NV)il && nr == (NV)ir)
1948 /* nothing was lost by converting to IVs */
1951 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
1959 useleft = USE_LEFT(svl);
1960 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1961 "bad things" happen if you rely on signed integers wrapping. */
1962 if (SvIV_please_nomg(svr)) {
1963 /* Unless the left argument is integer in range we are going to have to
1964 use NV maths. Hence only attempt to coerce the right argument if
1965 we know the left is integer. */
1972 a_valid = auvok = 1;
1973 /* left operand is undef, treat as zero. */
1975 /* Left operand is defined, so is it IV? */
1976 if (SvIV_please_nomg(svl)) {
1977 if ((auvok = SvUOK(svl)))
1980 const IV aiv = SvIVX(svl);
1983 auvok = 1; /* Now acting as a sign flag. */
1984 } else { /* 2s complement assumption for IV_MIN */
1985 auv = (aiv == IV_MIN) ? (UV)aiv : (UV)-aiv;
1992 bool result_good = 0;
1995 bool buvok = SvUOK(svr);
2000 const IV biv = SvIVX(svr);
2005 buv = (biv == IV_MIN) ? (UV)biv : (UV)-biv;
2007 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
2008 else "IV" now, independent of how it came in.
2009 if a, b represents positive, A, B negative, a maps to -A etc
2014 all UV maths. negate result if A negative.
2015 subtract if signs same, add if signs differ. */
2017 if (auvok ^ buvok) {
2026 /* Must get smaller */
2031 if (result <= buv) {
2032 /* result really should be -(auv-buv). as its negation
2033 of true value, need to swap our result flag */
2045 if (result <= (UV)IV_MIN)
2046 SETi(result == (UV)IV_MIN
2047 ? IV_MIN : -(IV)result);
2049 /* result valid, but out of range for IV. */
2050 SETn( -(NV)result );
2054 } /* Overflow, drop through to NVs. */
2058 useleft = USE_LEFT(svl);
2061 NV value = SvNV_nomg(svr);
2065 /* left operand is undef, treat as zero - value */
2069 SETn( SvNV_nomg(svl) - value );
2074 #define IV_BITS (IVSIZE * 8)
2076 static UV S_uv_shift(UV uv, int shift, bool left)
2082 if (shift >= IV_BITS) {
2085 return left ? uv << shift : uv >> shift;
2088 static IV S_iv_shift(IV iv, int shift, bool left)
2094 if (shift >= IV_BITS) {
2095 return iv < 0 && !left ? -1 : 0;
2097 return left ? iv << shift : iv >> shift;
2100 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
2101 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE)
2102 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE)
2103 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE)
2107 dSP; dATARGET; SV *svl, *svr;
2108 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
2112 const IV shift = SvIV_nomg(svr);
2113 if (PL_op->op_private & HINT_INTEGER) {
2114 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift));
2117 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift));
2125 dSP; dATARGET; SV *svl, *svr;
2126 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2130 const IV shift = SvIV_nomg(svr);
2131 if (PL_op->op_private & HINT_INTEGER) {
2132 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift));
2135 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift));
2146 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2150 (SvIOK_notUV(left) && SvIOK_notUV(right))
2151 ? (SvIVX(left) < SvIVX(right))
2152 : (do_ncmp(left, right) == -1)
2162 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2166 (SvIOK_notUV(left) && SvIOK_notUV(right))
2167 ? (SvIVX(left) > SvIVX(right))
2168 : (do_ncmp(left, right) == 1)
2178 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2182 (SvIOK_notUV(left) && SvIOK_notUV(right))
2183 ? (SvIVX(left) <= SvIVX(right))
2184 : (do_ncmp(left, right) <= 0)
2194 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2198 (SvIOK_notUV(left) && SvIOK_notUV(right))
2199 ? (SvIVX(left) >= SvIVX(right))
2200 : ( (do_ncmp(left, right) & 2) == 0)
2210 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2214 (SvIOK_notUV(left) && SvIOK_notUV(right))
2215 ? (SvIVX(left) != SvIVX(right))
2216 : (do_ncmp(left, right) != 0)
2221 /* compare left and right SVs. Returns:
2225 * 2: left or right was a NaN
2228 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2230 PERL_ARGS_ASSERT_DO_NCMP;
2231 #ifdef PERL_PRESERVE_IVUV
2232 /* Fortunately it seems NaN isn't IOK */
2233 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2235 const IV leftiv = SvIVX(left);
2236 if (!SvUOK(right)) {
2237 /* ## IV <=> IV ## */
2238 const IV rightiv = SvIVX(right);
2239 return (leftiv > rightiv) - (leftiv < rightiv);
2241 /* ## IV <=> UV ## */
2243 /* As (b) is a UV, it's >=0, so it must be < */
2246 const UV rightuv = SvUVX(right);
2247 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2252 /* ## UV <=> UV ## */
2253 const UV leftuv = SvUVX(left);
2254 const UV rightuv = SvUVX(right);
2255 return (leftuv > rightuv) - (leftuv < rightuv);
2257 /* ## UV <=> IV ## */
2259 const IV rightiv = SvIVX(right);
2261 /* As (a) is a UV, it's >=0, so it cannot be < */
2264 const UV leftuv = SvUVX(left);
2265 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2268 NOT_REACHED; /* NOTREACHED */
2272 NV const rnv = SvNV_nomg(right);
2273 NV const lnv = SvNV_nomg(left);
2275 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2276 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2279 return (lnv > rnv) - (lnv < rnv);
2298 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2301 value = do_ncmp(left, right);
2313 /* also used for: pp_sge() pp_sgt() pp_slt() */
2319 int amg_type = sle_amg;
2323 switch (PL_op->op_type) {
2342 tryAMAGICbin_MG(amg_type, AMGf_set);
2346 #ifdef USE_LOCALE_COLLATE
2347 (IN_LC_RUNTIME(LC_COLLATE))
2348 ? sv_cmp_locale_flags(left, right, 0)
2351 sv_cmp_flags(left, right, 0);
2352 SETs(boolSV(cmp * multiplier < rhs));
2360 tryAMAGICbin_MG(seq_amg, AMGf_set);
2363 SETs(boolSV(sv_eq_flags(left, right, 0)));
2371 tryAMAGICbin_MG(sne_amg, AMGf_set);
2374 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2382 tryAMAGICbin_MG(scmp_amg, 0);
2386 #ifdef USE_LOCALE_COLLATE
2387 (IN_LC_RUNTIME(LC_COLLATE))
2388 ? sv_cmp_locale_flags(left, right, 0)
2391 sv_cmp_flags(left, right, 0);
2400 tryAMAGICbin_MG(band_amg, AMGf_assign);
2403 if (SvNIOKp(left) || SvNIOKp(right)) {
2404 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2405 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2406 if (PL_op->op_private & HINT_INTEGER) {
2407 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2411 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2414 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2415 if (right_ro_nonnum) SvNIOK_off(right);
2418 do_vop(PL_op->op_type, TARG, left, right);
2428 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg);
2430 dATARGET; dPOPTOPssrl;
2431 if (PL_op->op_private & HINT_INTEGER) {
2432 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2436 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2446 tryAMAGICbin_MG(sband_amg, AMGf_assign);
2448 dATARGET; dPOPTOPssrl;
2449 do_vop(OP_BIT_AND, TARG, left, right);
2454 /* also used for: pp_bit_xor() */
2459 const int op_type = PL_op->op_type;
2461 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2464 if (SvNIOKp(left) || SvNIOKp(right)) {
2465 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2466 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2467 if (PL_op->op_private & HINT_INTEGER) {
2468 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2469 const IV r = SvIV_nomg(right);
2470 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2474 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2475 const UV r = SvUV_nomg(right);
2476 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2479 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2480 if (right_ro_nonnum) SvNIOK_off(right);
2483 do_vop(op_type, TARG, left, right);
2490 /* also used for: pp_nbit_xor() */
2495 const int op_type = PL_op->op_type;
2497 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg),
2498 AMGf_assign|AMGf_numarg);
2500 dATARGET; dPOPTOPssrl;
2501 if (PL_op->op_private & HINT_INTEGER) {
2502 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2503 const IV r = SvIV_nomg(right);
2504 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2508 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2509 const UV r = SvUV_nomg(right);
2510 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2517 /* also used for: pp_sbit_xor() */
2522 const int op_type = PL_op->op_type;
2524 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg),
2527 dATARGET; dPOPTOPssrl;
2528 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left,
2534 PERL_STATIC_INLINE bool
2535 S_negate_string(pTHX)
2540 SV * const sv = TOPs;
2541 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2543 s = SvPV_nomg_const(sv, len);
2544 if (isIDFIRST(*s)) {
2545 sv_setpvs(TARG, "-");
2548 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2549 sv_setsv_nomg(TARG, sv);
2550 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2560 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2561 if (S_negate_string(aTHX)) return NORMAL;
2563 SV * const sv = TOPs;
2566 /* It's publicly an integer */
2569 if (SvIVX(sv) == IV_MIN) {
2570 /* 2s complement assumption. */
2571 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2575 else if (SvUVX(sv) <= IV_MAX) {
2580 else if (SvIVX(sv) != IV_MIN) {
2584 #ifdef PERL_PRESERVE_IVUV
2591 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2592 SETn(-SvNV_nomg(sv));
2593 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2594 goto oops_its_an_int;
2596 SETn(-SvNV_nomg(sv));
2604 tryAMAGICun_MG(not_amg, AMGf_set);
2605 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2610 S_scomplement(pTHX_ SV *targ, SV *sv)
2616 sv_copypv_nomg(TARG, sv);
2617 tmps = (U8*)SvPV_nomg(TARG, len);
2620 /* Calculate exact length, let's not estimate. */
2625 U8 * const send = tmps + len;
2626 U8 * const origtmps = tmps;
2627 const UV utf8flags = UTF8_ALLOW_ANYUV;
2629 while (tmps < send) {
2630 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2632 targlen += UVCHR_SKIP(~c);
2638 /* Now rewind strings and write them. */
2645 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2646 deprecated_above_ff_msg, PL_op_desc[PL_op->op_type]);
2647 Newx(result, targlen + 1, U8);
2649 while (tmps < send) {
2650 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2652 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2655 sv_usepvn_flags(TARG, (char*)result, targlen,
2656 SV_HAS_TRAILING_NUL);
2663 Newx(result, nchar + 1, U8);
2665 while (tmps < send) {
2666 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2671 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2679 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2682 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2687 for ( ; anum > 0; anum--, tmps++)
2694 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2698 if (PL_op->op_private & HINT_INTEGER) {
2699 const IV i = ~SvIV_nomg(sv);
2703 const UV u = ~SvUV_nomg(sv);
2708 S_scomplement(aTHX_ TARG, sv);
2718 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg);
2721 if (PL_op->op_private & HINT_INTEGER) {
2722 const IV i = ~SvIV_nomg(sv);
2726 const UV u = ~SvUV_nomg(sv);
2736 tryAMAGICun_MG(scompl_amg, AMGf_numeric);
2739 S_scomplement(aTHX_ TARG, sv);
2745 /* integer versions of some of the above */
2750 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2753 SETi( left * right );
2762 tryAMAGICbin_MG(div_amg, AMGf_assign);
2765 IV value = SvIV_nomg(right);
2767 DIE(aTHX_ "Illegal division by zero");
2768 num = SvIV_nomg(left);
2770 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2774 value = num / value;
2780 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS) \
2781 && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
2788 /* This is the vanilla old i_modulo. */
2790 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2794 DIE(aTHX_ "Illegal modulus zero");
2795 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2799 SETi( left % right );
2804 #if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS) \
2805 && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
2810 /* This is the i_modulo with the workaround for the _moddi3 bug
2811 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2812 * See below for pp_i_modulo. */
2814 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2818 DIE(aTHX_ "Illegal modulus zero");
2819 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2823 SETi( left % PERL_ABS(right) );
2830 dVAR; dSP; dATARGET;
2831 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2835 DIE(aTHX_ "Illegal modulus zero");
2836 /* The assumption is to use hereafter the old vanilla version... */
2838 PL_ppaddr[OP_I_MODULO] =
2840 /* .. but if we have glibc, we might have a buggy _moddi3
2841 * (at least glibc 2.2.5 is known to have this bug), in other
2842 * words our integer modulus with negative quad as the second
2843 * argument might be broken. Test for this and re-patch the
2844 * opcode dispatch table if that is the case, remembering to
2845 * also apply the workaround so that this first round works
2846 * right, too. See [perl #9402] for more information. */
2850 /* Cannot do this check with inlined IV constants since
2851 * that seems to work correctly even with the buggy glibc. */
2853 /* Yikes, we have the bug.
2854 * Patch in the workaround version. */
2856 PL_ppaddr[OP_I_MODULO] =
2857 &Perl_pp_i_modulo_1;
2858 /* Make certain we work right this time, too. */
2859 right = PERL_ABS(right);
2862 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2866 SETi( left % right );
2875 tryAMAGICbin_MG(add_amg, AMGf_assign);
2877 dPOPTOPiirl_ul_nomg;
2878 SETi( left + right );
2886 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2888 dPOPTOPiirl_ul_nomg;
2889 SETi( left - right );
2897 tryAMAGICbin_MG(lt_amg, AMGf_set);
2900 SETs(boolSV(left < right));
2908 tryAMAGICbin_MG(gt_amg, AMGf_set);
2911 SETs(boolSV(left > right));
2919 tryAMAGICbin_MG(le_amg, AMGf_set);
2922 SETs(boolSV(left <= right));
2930 tryAMAGICbin_MG(ge_amg, AMGf_set);
2933 SETs(boolSV(left >= right));
2941 tryAMAGICbin_MG(eq_amg, AMGf_set);
2944 SETs(boolSV(left == right));
2952 tryAMAGICbin_MG(ne_amg, AMGf_set);
2955 SETs(boolSV(left != right));
2963 tryAMAGICbin_MG(ncmp_amg, 0);
2970 else if (left < right)
2982 tryAMAGICun_MG(neg_amg, 0);
2983 if (S_negate_string(aTHX)) return NORMAL;
2985 SV * const sv = TOPs;
2986 IV const i = SvIV_nomg(sv);
2992 /* High falutin' math. */
2997 tryAMAGICbin_MG(atan2_amg, 0);
3000 SETn(Perl_atan2(left, right));
3006 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */
3011 int amg_type = fallback_amg;
3012 const char *neg_report = NULL;
3013 const int op_type = PL_op->op_type;
3016 case OP_SIN: amg_type = sin_amg; break;
3017 case OP_COS: amg_type = cos_amg; break;
3018 case OP_EXP: amg_type = exp_amg; break;
3019 case OP_LOG: amg_type = log_amg; neg_report = "log"; break;
3020 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break;
3023 assert(amg_type != fallback_amg);
3025 tryAMAGICun_MG(amg_type, 0);
3027 SV * const arg = TOPs;
3028 const NV value = SvNV_nomg(arg);
3030 if (neg_report) { /* log or sqrt */
3032 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3033 ! Perl_isnan(value) &&
3035 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) {
3036 SET_NUMERIC_STANDARD();
3037 /* diag_listed_as: Can't take log of %g */
3038 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
3043 case OP_SIN: result = Perl_sin(value); break;
3044 case OP_COS: result = Perl_cos(value); break;
3045 case OP_EXP: result = Perl_exp(value); break;
3046 case OP_LOG: result = Perl_log(value); break;
3047 case OP_SQRT: result = Perl_sqrt(value); break;
3054 /* Support Configure command-line overrides for rand() functions.
3055 After 5.005, perhaps we should replace this by Configure support
3056 for drand48(), random(), or rand(). For 5.005, though, maintain
3057 compatibility by calling rand() but allow the user to override it.
3058 See INSTALL for details. --Andy Dougherty 15 July 1998
3060 /* Now it's after 5.005, and Configure supports drand48() and random(),
3061 in addition to rand(). So the overrides should not be needed any more.
3062 --Jarkko Hietaniemi 27 September 1998
3067 if (!PL_srand_called) {
3068 (void)seedDrand01((Rand_seed_t)seed());
3069 PL_srand_called = TRUE;
3081 SV * const sv = POPs;
3087 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
3088 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3089 if (! Perl_isnan(value) && value == 0.0)
3099 sv_setnv_mg(TARG, value);
3110 if (MAXARG >= 1 && (TOPs || POPs)) {
3117 pv = SvPV(top, len);
3118 flags = grok_number(pv, len, &anum);
3120 if (!(flags & IS_NUMBER_IN_UV)) {
3121 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
3122 "Integer overflow in srand");
3130 (void)seedDrand01((Rand_seed_t)anum);
3131 PL_srand_called = TRUE;
3135 /* Historically srand always returned true. We can avoid breaking
3137 sv_setpvs(TARG, "0 but true");
3146 tryAMAGICun_MG(int_amg, AMGf_numeric);
3148 SV * const sv = TOPs;
3149 const IV iv = SvIV_nomg(sv);
3150 /* XXX it's arguable that compiler casting to IV might be subtly
3151 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3152 else preferring IV has introduced a subtle behaviour change bug. OTOH
3153 relying on floating point to be accurate is a bug. */
3158 else if (SvIOK(sv)) {
3160 SETu(SvUV_nomg(sv));
3165 const NV value = SvNV_nomg(sv);
3166 if (UNLIKELY(Perl_isinfnan(value)))
3168 else if (value >= 0.0) {
3169 if (value < (NV)UV_MAX + 0.5) {
3172 SETn(Perl_floor(value));
3176 if (value > (NV)IV_MIN - 0.5) {
3179 SETn(Perl_ceil(value));
3190 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3192 SV * const sv = TOPs;
3193 /* This will cache the NV value if string isn't actually integer */
3194 const IV iv = SvIV_nomg(sv);
3199 else if (SvIOK(sv)) {
3200 /* IVX is precise */
3202 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3210 /* 2s complement assumption. Also, not really needed as
3211 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3217 const NV value = SvNV_nomg(sv);
3228 /* also used for: pp_hex() */
3234 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3238 SV* const sv = TOPs;
3240 tmps = (SvPV_const(sv, len));
3242 /* If Unicode, try to downgrade
3243 * If not possible, croak. */
3244 SV* const tsv = sv_2mortal(newSVsv(sv));
3247 sv_utf8_downgrade(tsv, FALSE);
3248 tmps = SvPV_const(tsv, len);
3250 if (PL_op->op_type == OP_HEX)
3253 while (*tmps && len && isSPACE(*tmps))
3257 if (isALPHA_FOLD_EQ(*tmps, 'x')) {
3259 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3261 else if (isALPHA_FOLD_EQ(*tmps, 'b'))
3262 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3264 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3266 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3280 SV * const sv = TOPs;
3282 U32 in_bytes = IN_BYTES;
3283 /* simplest case shortcut */
3284 /* turn off SVf_UTF8 in tmp flags if HINT_BYTES on*/
3285 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
3286 STATIC_ASSERT_STMT(HINT_BYTES == 0x00000008 && SVf_UTF8 == 0x20000000 && (SVf_UTF8 == HINT_BYTES << 26));
3289 if(LIKELY(svflags == SVf_POK))
3291 if(svflags & SVs_GMG)
3294 if (!IN_BYTES) /* reread to avoid using an C auto/register */
3295 sv_setiv(TARG, (IV)sv_len_utf8_nomg(sv));
3299 /* unrolled SvPV_nomg_const(sv,len) */
3304 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
3306 sv_setiv(TARG, (IV)(len));
3309 if (!SvPADTMP(TARG)) {
3310 sv_setsv_nomg(TARG, &PL_sv_undef);
3311 } else { /* TARG is on stack at this point and is overwriten by SETs.
3312 This branch is the odd one out, so put TARG by default on
3313 stack earlier to let local SP go out of liveness sooner */
3320 return NORMAL; /* no putback, SP didn't move in this opcode */
3323 /* Returns false if substring is completely outside original string.
3324 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
3325 always be true for an explicit 0.
3328 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv,
3329 bool pos1_is_uv, IV len_iv,
3330 bool len_is_uv, STRLEN *posp,
3336 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
3338 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3339 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3342 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3345 if (len_iv || len_is_uv) {
3346 if (!len_is_uv && len_iv < 0) {
3347 pos2_iv = curlen + len_iv;
3349 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3352 } else { /* len_iv >= 0 */
3353 if (!pos1_is_uv && pos1_iv < 0) {
3354 pos2_iv = pos1_iv + len_iv;
3355 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3357 if ((UV)len_iv > curlen-(UV)pos1_iv)
3360 pos2_iv = pos1_iv+len_iv;
3370 if (!pos2_is_uv && pos2_iv < 0) {
3371 if (!pos1_is_uv && pos1_iv < 0)
3375 else if (!pos1_is_uv && pos1_iv < 0)
3378 if ((UV)pos2_iv < (UV)pos1_iv)
3380 if ((UV)pos2_iv > curlen)
3383 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3384 *posp = (STRLEN)( (UV)pos1_iv );
3385 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3402 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3403 const bool rvalue = (GIMME_V != G_VOID);
3406 const char *repl = NULL;
3408 int num_args = PL_op->op_private & 7;
3409 bool repl_need_utf8_upgrade = FALSE;
3413 if(!(repl_sv = POPs)) num_args--;
3415 if ((len_sv = POPs)) {
3416 len_iv = SvIV(len_sv);
3417 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3422 pos1_iv = SvIV(pos_sv);
3423 pos1_is_uv = SvIOK_UV(pos_sv);
3425 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3429 if (lvalue && !repl_sv) {
3431 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3432 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3434 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3436 pos1_is_uv || pos1_iv >= 0
3437 ? (STRLEN)(UV)pos1_iv
3438 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3440 len_is_uv || len_iv > 0
3441 ? (STRLEN)(UV)len_iv
3442 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3444 PUSHs(ret); /* avoid SvSETMAGIC here */
3448 repl = SvPV_const(repl_sv, repl_len);
3451 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3452 "Attempt to use reference as lvalue in substr"
3454 tmps = SvPV_force_nomg(sv, curlen);
3455 if (DO_UTF8(repl_sv) && repl_len) {
3457 sv_utf8_upgrade_nomg(sv);
3461 else if (DO_UTF8(sv))
3462 repl_need_utf8_upgrade = TRUE;
3464 else tmps = SvPV_const(sv, curlen);
3466 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3467 if (utf8_curlen == curlen)
3470 curlen = utf8_curlen;
3476 STRLEN pos, len, byte_len, byte_pos;
3478 if (!translate_substr_offsets(
3479 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3483 byte_pos = utf8_curlen
3484 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3489 SvTAINTED_off(TARG); /* decontaminate */
3490 SvUTF8_off(TARG); /* decontaminate */
3491 sv_setpvn(TARG, tmps, byte_len);
3492 #ifdef USE_LOCALE_COLLATE
3493 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3500 SV* repl_sv_copy = NULL;
3502 if (repl_need_utf8_upgrade) {
3503 repl_sv_copy = newSVsv(repl_sv);
3504 sv_utf8_upgrade(repl_sv_copy);
3505 repl = SvPV_const(repl_sv_copy, repl_len);
3509 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3510 SvREFCNT_dec(repl_sv_copy);
3513 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST)
3523 Perl_croak(aTHX_ "substr outside of string");
3524 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3531 const IV size = POPi;
3532 const IV offset = POPi;
3533 SV * const src = POPs;
3534 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3537 if (lvalue) { /* it's an lvalue! */
3538 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3539 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3541 LvTARG(ret) = SvREFCNT_inc_simple(src);
3542 LvTARGOFF(ret) = offset;
3543 LvTARGLEN(ret) = size;
3547 SvTAINTED_off(TARG); /* decontaminate */
3551 sv_setuv(ret, do_vecget(src, offset, size));
3559 /* also used for: pp_rindex() */
3572 const char *little_p;
3575 const bool is_index = PL_op->op_type == OP_INDEX;
3576 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3582 big_p = SvPV_const(big, biglen);
3583 little_p = SvPV_const(little, llen);
3585 big_utf8 = DO_UTF8(big);
3586 little_utf8 = DO_UTF8(little);
3587 if (big_utf8 ^ little_utf8) {
3588 /* One needs to be upgraded. */
3589 if (little_utf8 && !IN_ENCODING) {
3590 /* Well, maybe instead we might be able to downgrade the small
3592 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3595 /* If the large string is ISO-8859-1, and it's not possible to
3596 convert the small string to ISO-8859-1, then there is no
3597 way that it could be found anywhere by index. */
3602 /* At this point, pv is a malloc()ed string. So donate it to temp
3603 to ensure it will get free()d */
3604 little = temp = newSV(0);
3605 sv_usepvn(temp, pv, llen);
3606 little_p = SvPVX(little);
3609 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3612 sv_recode_to_utf8(temp, _get_encoding());
3614 sv_utf8_upgrade(temp);
3619 big_p = SvPV_const(big, biglen);
3622 little_p = SvPV_const(little, llen);
3626 if (SvGAMAGIC(big)) {
3627 /* Life just becomes a lot easier if I use a temporary here.
3628 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3629 will trigger magic and overloading again, as will fbm_instr()
3631 big = newSVpvn_flags(big_p, biglen,
3632 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3635 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3636 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3637 warn on undef, and we've already triggered a warning with the
3638 SvPV_const some lines above. We can't remove that, as we need to
3639 call some SvPV to trigger overloading early and find out if the
3641 This is all getting too messy. The API isn't quite clean enough,
3642 because data access has side effects.
3644 little = newSVpvn_flags(little_p, llen,
3645 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3646 little_p = SvPVX(little);
3650 offset = is_index ? 0 : biglen;
3652 if (big_utf8 && offset > 0)
3653 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
3659 else if (offset > (SSize_t)biglen)
3661 if (!(little_p = is_index
3662 ? fbm_instr((unsigned char*)big_p + offset,
3663 (unsigned char*)big_p + biglen, little, 0)
3664 : rninstr(big_p, big_p + offset,
3665 little_p, little_p + llen)))
3668 retval = little_p - big_p;
3669 if (retval > 1 && big_utf8)
3670 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
3680 dSP; dMARK; dORIGMARK; dTARGET;
3681 SvTAINTED_off(TARG);
3682 do_sprintf(TARG, SP-MARK, MARK+1);
3683 TAINT_IF(SvTAINTED(TARG));
3695 const U8 *s = (U8*)SvPV_const(argsv, len);
3697 if (IN_ENCODING && SvPOK(argsv) && !DO_UTF8(argsv)) {
3698 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3699 s = (U8*)sv_recode_to_utf8(tmpsv, _get_encoding());
3700 len = UTF8SKIP(s); /* Should be well-formed; so this is its length */
3705 ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV)
3719 if (UNLIKELY(SvAMAGIC(top)))
3721 if (UNLIKELY(isinfnansv(top)))
3722 Perl_croak(aTHX_ "Cannot chr %"NVgf, SvNV(top));
3724 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3725 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3727 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3728 && SvNV_nomg(top) < 0.0)))
3730 if (ckWARN(WARN_UTF8)) {
3731 if (SvGMAGICAL(top)) {
3732 SV *top2 = sv_newmortal();
3733 sv_setsv_nomg(top2, top);
3736 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3737 "Invalid negative number (%"SVf") in chr", SVfARG(top));
3739 value = UNICODE_REPLACEMENT;
3741 value = SvUV_nomg(top);
3745 SvUPGRADE(TARG,SVt_PV);
3747 if (value > 255 && !IN_BYTES) {
3748 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
3749 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3750 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3752 (void)SvPOK_only(TARG);
3761 *tmps++ = (char)value;
3763 (void)SvPOK_only(TARG);
3765 if (IN_ENCODING && !IN_BYTES) {
3766 sv_recode_to_utf8(TARG, _get_encoding());
3768 if (SvCUR(TARG) == 0
3769 || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
3770 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
3775 *tmps++ = (char)value;
3791 const char *tmps = SvPV_const(left, len);
3793 if (DO_UTF8(left)) {
3794 /* If Unicode, try to downgrade.
3795 * If not possible, croak.
3796 * Yes, we made this up. */
3797 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP);
3799 sv_utf8_downgrade(tsv, FALSE);
3800 tmps = SvPV_const(tsv, len);
3802 # ifdef USE_ITHREADS
3804 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3805 /* This should be threadsafe because in ithreads there is only
3806 * one thread per interpreter. If this would not be true,
3807 * we would need a mutex to protect this malloc. */
3808 PL_reentrant_buffer->_crypt_struct_buffer =
3809 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3810 #if defined(__GLIBC__) || defined(__EMX__)
3811 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3812 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3813 /* work around glibc-2.2.5 bug */
3814 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3818 # endif /* HAS_CRYPT_R */
3819 # endif /* USE_ITHREADS */
3821 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3823 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3830 "The crypt() function is unimplemented due to excessive paranoia.");
3834 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3835 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3838 /* also used for: pp_lcfirst() */
3842 /* Actually is both lcfirst() and ucfirst(). Only the first character
3843 * changes. This means that possibly we can change in-place, ie., just
3844 * take the source and change that one character and store it back, but not
3845 * if read-only etc, or if the length changes */
3849 STRLEN slen; /* slen is the byte length of the whole SV. */
3852 bool inplace; /* ? Convert first char only, in-place */
3853 bool doing_utf8 = FALSE; /* ? using utf8 */
3854 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3855 const int op_type = PL_op->op_type;
3858 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3859 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3860 * stored as UTF-8 at s. */
3861 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3862 * lowercased) character stored in tmpbuf. May be either
3863 * UTF-8 or not, but in either case is the number of bytes */
3865 s = (const U8*)SvPV_const(source, slen);
3867 /* We may be able to get away with changing only the first character, in
3868 * place, but not if read-only, etc. Later we may discover more reasons to
3869 * not convert in-place. */
3870 inplace = !SvREADONLY(source)
3871 && ( SvPADTMP(source)
3872 || ( SvTEMP(source) && !SvSMAGICAL(source)
3873 && SvREFCNT(source) == 1));
3875 /* First calculate what the changed first character should be. This affects
3876 * whether we can just swap it out, leaving the rest of the string unchanged,
3877 * or even if have to convert the dest to UTF-8 when the source isn't */
3879 if (! slen) { /* If empty */
3880 need = 1; /* still need a trailing NUL */
3883 else if (DO_UTF8(source)) { /* Is the source utf8? */
3886 if (op_type == OP_UCFIRST) {
3887 #ifdef USE_LOCALE_CTYPE
3888 _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3890 _to_utf8_title_flags(s, tmpbuf, &tculen, 0);
3894 #ifdef USE_LOCALE_CTYPE
3895 _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3897 _to_utf8_lower_flags(s, tmpbuf, &tculen, 0);
3901 /* we can't do in-place if the length changes. */
3902 if (ulen != tculen) inplace = FALSE;
3903 need = slen + 1 - ulen + tculen;
3905 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3906 * latin1 is treated as caseless. Note that a locale takes
3908 ulen = 1; /* Original character is 1 byte */
3909 tculen = 1; /* Most characters will require one byte, but this will
3910 * need to be overridden for the tricky ones */
3913 if (op_type == OP_LCFIRST) {
3915 /* lower case the first letter: no trickiness for any character */
3916 #ifdef USE_LOCALE_CTYPE
3917 if (IN_LC_RUNTIME(LC_CTYPE)) {
3918 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3919 *tmpbuf = toLOWER_LC(*s);
3924 *tmpbuf = (IN_UNI_8_BIT)
3925 ? toLOWER_LATIN1(*s)
3929 #ifdef USE_LOCALE_CTYPE
3931 else if (IN_LC_RUNTIME(LC_CTYPE)) {
3932 if (IN_UTF8_CTYPE_LOCALE) {
3936 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3937 *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
3938 locales have upper and title case
3942 else if (! IN_UNI_8_BIT) {
3943 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3944 * on EBCDIC machines whatever the
3945 * native function does */
3948 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
3949 * UTF-8, which we treat as not in locale), and cased latin1 */
3951 #ifdef USE_LOCALE_CTYPE
3955 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3957 assert(tculen == 2);
3959 /* If the result is an upper Latin1-range character, it can
3960 * still be represented in one byte, which is its ordinal */
3961 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3962 *tmpbuf = (U8) title_ord;
3966 /* Otherwise it became more than one ASCII character (in
3967 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3968 * beyond Latin1, so the number of bytes changed, so can't
3969 * replace just the first character in place. */
3972 /* If the result won't fit in a byte, the entire result
3973 * will have to be in UTF-8. Assume worst case sizing in
3974 * conversion. (all latin1 characters occupy at most two
3976 if (title_ord > 255) {
3978 convert_source_to_utf8 = TRUE;
3979 need = slen * 2 + 1;
3981 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3982 * (both) characters whose title case is above 255 is
3986 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3987 need = slen + 1 + 1;
3991 } /* End of use Unicode (Latin1) semantics */
3992 } /* End of changing the case of the first character */
3994 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3995 * generate the result */
3998 /* We can convert in place. This means we change just the first
3999 * character without disturbing the rest; no need to grow */
4001 s = d = (U8*)SvPV_force_nomg(source, slen);
4007 /* Here, we can't convert in place; we earlier calculated how much
4008 * space we will need, so grow to accommodate that */
4009 SvUPGRADE(dest, SVt_PV);
4010 d = (U8*)SvGROW(dest, need);
4011 (void)SvPOK_only(dest);
4018 if (! convert_source_to_utf8) {
4020 /* Here both source and dest are in UTF-8, but have to create
4021 * the entire output. We initialize the result to be the
4022 * title/lower cased first character, and then append the rest
4024 sv_setpvn(dest, (char*)tmpbuf, tculen);
4026 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
4030 const U8 *const send = s + slen;
4032 /* Here the dest needs to be in UTF-8, but the source isn't,
4033 * except we earlier UTF-8'd the first character of the source
4034 * into tmpbuf. First put that into dest, and then append the
4035 * rest of the source, converting it to UTF-8 as we go. */
4037 /* Assert tculen is 2 here because the only two characters that
4038 * get to this part of the code have 2-byte UTF-8 equivalents */
4040 *d++ = *(tmpbuf + 1);
4041 s++; /* We have just processed the 1st char */
4043 for (; s < send; s++) {
4044 d = uvchr_to_utf8(d, *s);
4047 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4051 else { /* in-place UTF-8. Just overwrite the first character */
4052 Copy(tmpbuf, d, tculen, U8);
4053 SvCUR_set(dest, need - 1);
4057 else { /* Neither source nor dest are in or need to be UTF-8 */
4059 if (inplace) { /* in-place, only need to change the 1st char */
4062 else { /* Not in-place */
4064 /* Copy the case-changed character(s) from tmpbuf */
4065 Copy(tmpbuf, d, tculen, U8);
4066 d += tculen - 1; /* Code below expects d to point to final
4067 * character stored */
4070 else { /* empty source */
4071 /* See bug #39028: Don't taint if empty */
4075 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4076 * the destination to retain that flag */
4077 if (SvUTF8(source) && ! IN_BYTES)
4080 if (!inplace) { /* Finish the rest of the string, unchanged */
4081 /* This will copy the trailing NUL */
4082 Copy(s + 1, d + 1, slen, U8);
4083 SvCUR_set(dest, need - 1);
4086 #ifdef USE_LOCALE_CTYPE
4087 if (IN_LC_RUNTIME(LC_CTYPE)) {
4092 if (dest != source && SvTAINTED(source))
4098 /* There's so much setup/teardown code common between uc and lc, I wonder if
4099 it would be worth merging the two, and just having a switch outside each
4100 of the three tight loops. There is less and less commonality though */
4113 if ((SvPADTMP(source)
4115 (SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
4116 && !SvREADONLY(source) && SvPOK(source)
4119 #ifdef USE_LOCALE_CTYPE
4120 (IN_LC_RUNTIME(LC_CTYPE))
4121 ? ! IN_UTF8_CTYPE_LOCALE
4127 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4128 * make the loop tight, so we overwrite the source with the dest before
4129 * looking at it, and we need to look at the original source
4130 * afterwards. There would also need to be code added to handle
4131 * switching to not in-place in midstream if we run into characters
4132 * that change the length. Since being in locale overrides UNI_8_BIT,
4133 * that latter becomes irrelevant in the above test; instead for
4134 * locale, the size can't normally change, except if the locale is a
4137 s = d = (U8*)SvPV_force_nomg(source, len);
4144 s = (const U8*)SvPV_nomg_const(source, len);
4147 SvUPGRADE(dest, SVt_PV);
4148 d = (U8*)SvGROW(dest, min);
4149 (void)SvPOK_only(dest);
4154 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4155 to check DO_UTF8 again here. */
4157 if (DO_UTF8(source)) {
4158 const U8 *const send = s + len;
4159 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4161 /* All occurrences of these are to be moved to follow any other marks.
4162 * This is context-dependent. We may not be passed enough context to
4163 * move the iota subscript beyond all of them, but we do the best we can
4164 * with what we're given. The result is always better than if we
4165 * hadn't done this. And, the problem would only arise if we are
4166 * passed a character without all its combining marks, which would be
4167 * the caller's mistake. The information this is based on comes from a
4168 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4169 * itself) and so can't be checked properly to see if it ever gets
4170 * revised. But the likelihood of it changing is remote */
4171 bool in_iota_subscript = FALSE;
4177 if (in_iota_subscript && ! _is_utf8_mark(s)) {
4179 /* A non-mark. Time to output the iota subscript */
4180 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4181 d += capital_iota_len;
4182 in_iota_subscript = FALSE;
4185 /* Then handle the current character. Get the changed case value
4186 * and copy it to the output buffer */
4189 #ifdef USE_LOCALE_CTYPE
4190 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4192 uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, 0);
4194 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4195 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4196 if (uv == GREEK_CAPITAL_LETTER_IOTA
4197 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4199 in_iota_subscript = TRUE;
4202 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4203 /* If the eventually required minimum size outgrows the
4204 * available space, we need to grow. */
4205 const UV o = d - (U8*)SvPVX_const(dest);
4207 /* If someone uppercases one million U+03B0s we SvGROW()
4208 * one million times. Or we could try guessing how much to
4209 * allocate without allocating too much. Such is life.
4210 * See corresponding comment in lc code for another option
4213 d = (U8*)SvPVX(dest) + o;
4215 Copy(tmpbuf, d, ulen, U8);
4220 if (in_iota_subscript) {
4221 Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
4222 d += capital_iota_len;
4227 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4229 else { /* Not UTF-8 */
4231 const U8 *const send = s + len;
4233 /* Use locale casing if in locale; regular style if not treating
4234 * latin1 as having case; otherwise the latin1 casing. Do the
4235 * whole thing in a tight loop, for speed, */
4236 #ifdef USE_LOCALE_CTYPE
4237 if (IN_LC_RUNTIME(LC_CTYPE)) {
4238 if (IN_UTF8_CTYPE_LOCALE) {
4241 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4242 for (; s < send; d++, s++)
4243 *d = (U8) toUPPER_LC(*s);
4247 if (! IN_UNI_8_BIT) {
4248 for (; s < send; d++, s++) {
4253 #ifdef USE_LOCALE_CTYPE
4256 for (; s < send; d++, s++) {
4257 *d = toUPPER_LATIN1_MOD(*s);
4258 if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
4262 /* The mainstream case is the tight loop above. To avoid
4263 * extra tests in that, all three characters that require
4264 * special handling are mapped by the MOD to the one tested
4266 * Use the source to distinguish between the three cases */
4268 #if UNICODE_MAJOR_VERSION > 2 \
4269 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
4270 && UNICODE_DOT_DOT_VERSION >= 8)
4271 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4273 /* uc() of this requires 2 characters, but they are
4274 * ASCII. If not enough room, grow the string */
4275 if (SvLEN(dest) < ++min) {
4276 const UV o = d - (U8*)SvPVX_const(dest);
4278 d = (U8*)SvPVX(dest) + o;
4280 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4281 continue; /* Back to the tight loop; still in ASCII */
4285 /* The other two special handling characters have their
4286 * upper cases outside the latin1 range, hence need to be
4287 * in UTF-8, so the whole result needs to be in UTF-8. So,
4288 * here we are somewhere in the middle of processing a
4289 * non-UTF-8 string, and realize that we will have to convert
4290 * the whole thing to UTF-8. What to do? There are
4291 * several possibilities. The simplest to code is to
4292 * convert what we have so far, set a flag, and continue on
4293 * in the loop. The flag would be tested each time through
4294 * the loop, and if set, the next character would be
4295 * converted to UTF-8 and stored. But, I (khw) didn't want
4296 * to slow down the mainstream case at all for this fairly
4297 * rare case, so I didn't want to add a test that didn't
4298 * absolutely have to be there in the loop, besides the
4299 * possibility that it would get too complicated for
4300 * optimizers to deal with. Another possibility is to just
4301 * give up, convert the source to UTF-8, and restart the
4302 * function that way. Another possibility is to convert
4303 * both what has already been processed and what is yet to
4304 * come separately to UTF-8, then jump into the loop that
4305 * handles UTF-8. But the most efficient time-wise of the
4306 * ones I could think of is what follows, and turned out to
4307 * not require much extra code. */
4309 /* Convert what we have so far into UTF-8, telling the
4310 * function that we know it should be converted, and to
4311 * allow extra space for what we haven't processed yet.
4312 * Assume the worst case space requirements for converting
4313 * what we haven't processed so far: that it will require
4314 * two bytes for each remaining source character, plus the
4315 * NUL at the end. This may cause the string pointer to
4316 * move, so re-find it. */
4318 len = d - (U8*)SvPVX_const(dest);
4319 SvCUR_set(dest, len);
4320 len = sv_utf8_upgrade_flags_grow(dest,
4321 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4323 d = (U8*)SvPVX(dest) + len;
4325 /* Now process the remainder of the source, converting to
4326 * upper and UTF-8. If a resulting byte is invariant in
4327 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4328 * append it to the output. */
4329 for (; s < send; s++) {
4330 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4334 /* Here have processed the whole source; no need to continue
4335 * with the outer loop. Each character has been converted
4336 * to upper case and converted to UTF-8 */
4339 } /* End of processing all latin1-style chars */
4340 } /* End of processing all chars */
4341 } /* End of source is not empty */
4343 if (source != dest) {
4344 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4345 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4347 } /* End of isn't utf8 */
4348 #ifdef USE_LOCALE_CTYPE
4349 if (IN_LC_RUNTIME(LC_CTYPE)) {
4354 if (dest != source && SvTAINTED(source))
4372 if ( ( SvPADTMP(source)
4373 || ( SvTEMP(source) && !SvSMAGICAL(source)
4374 && SvREFCNT(source) == 1 )
4376 && !SvREADONLY(source) && SvPOK(source)
4377 && !DO_UTF8(source)) {
4379 /* We can convert in place, as lowercasing anything in the latin1 range
4380 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4382 s = d = (U8*)SvPV_force_nomg(source, len);
4389 s = (const U8*)SvPV_nomg_const(source, len);
4392 SvUPGRADE(dest, SVt_PV);
4393 d = (U8*)SvGROW(dest, min);
4394 (void)SvPOK_only(dest);
4399 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4400 to check DO_UTF8 again here. */
4402 if (DO_UTF8(source)) {
4403 const U8 *const send = s + len;
4404 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4407 const STRLEN u = UTF8SKIP(s);
4410 #ifdef USE_LOCALE_CTYPE
4411 _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4413 _to_utf8_lower_flags(s, tmpbuf, &ulen, 0);
4416 /* Here is where we would do context-sensitive actions. See the
4417 * commit message for 86510fb15 for why there isn't any */
4419 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4421 /* If the eventually required minimum size outgrows the
4422 * available space, we need to grow. */
4423 const UV o = d - (U8*)SvPVX_const(dest);
4425 /* If someone lowercases one million U+0130s we SvGROW() one
4426 * million times. Or we could try guessing how much to
4427 * allocate without allocating too much. Such is life.
4428 * Another option would be to grow an extra byte or two more
4429 * each time we need to grow, which would cut down the million
4430 * to 500K, with little waste */
4432 d = (U8*)SvPVX(dest) + o;
4435 /* Copy the newly lowercased letter to the output buffer we're
4437 Copy(tmpbuf, d, ulen, U8);
4440 } /* End of looping through the source string */
4443 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4444 } else { /* Not utf8 */
4446 const U8 *const send = s + len;
4448 /* Use locale casing if in locale; regular style if not treating
4449 * latin1 as having case; otherwise the latin1 casing. Do the
4450 * whole thing in a tight loop, for speed, */
4451 #ifdef USE_LOCALE_CTYPE
4452 if (IN_LC_RUNTIME(LC_CTYPE)) {
4453 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4454 for (; s < send; d++, s++)
4455 *d = toLOWER_LC(*s);
4459 if (! IN_UNI_8_BIT) {
4460 for (; s < send; d++, s++) {
4465 for (; s < send; d++, s++) {
4466 *d = toLOWER_LATIN1(*s);
4470 if (source != dest) {
4472 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4475 #ifdef USE_LOCALE_CTYPE
4476 if (IN_LC_RUNTIME(LC_CTYPE)) {
4481 if (dest != source && SvTAINTED(source))
4490 SV * const sv = TOPs;
4492 const char *s = SvPV_const(sv,len);
4494 SvUTF8_off(TARG); /* decontaminate */
4497 SvUPGRADE(TARG, SVt_PV);
4498 SvGROW(TARG, (len * 2) + 1);
4502 STRLEN ulen = UTF8SKIP(s);
4503 bool to_quote = FALSE;
4505 if (UTF8_IS_INVARIANT(*s)) {
4506 if (_isQUOTEMETA(*s)) {
4510 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4512 #ifdef USE_LOCALE_CTYPE
4513 /* In locale, we quote all non-ASCII Latin1 chars.
4514 * Otherwise use the quoting rules */
4516 IN_LC_RUNTIME(LC_CTYPE)
4519 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
4524 else if (is_QUOTEMETA_high(s)) {
4539 else if (IN_UNI_8_BIT) {
4541 if (_isQUOTEMETA(*s))
4547 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4548 * including everything above ASCII */
4550 if (!isWORDCHAR_A(*s))
4556 SvCUR_set(TARG, d - SvPVX_const(TARG));
4557 (void)SvPOK_only_UTF8(TARG);
4560 sv_setpvn(TARG, s, len);
4576 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4577 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
4578 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
4579 || UNICODE_DOT_DOT_VERSION > 0)
4580 const bool full_folding = TRUE; /* This variable is here so we can easily
4581 move to more generality later */
4583 const bool full_folding = FALSE;
4585 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4586 #ifdef USE_LOCALE_CTYPE
4587 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
4591 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4592 * You are welcome(?) -Hugmeir
4600 s = (const U8*)SvPV_nomg_const(source, len);
4602 if (ckWARN(WARN_UNINITIALIZED))
4603 report_uninit(source);
4610 SvUPGRADE(dest, SVt_PV);
4611 d = (U8*)SvGROW(dest, min);
4612 (void)SvPOK_only(dest);
4617 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4619 const STRLEN u = UTF8SKIP(s);
4622 _to_utf8_fold_flags(s, tmpbuf, &ulen, flags);
4624 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4625 const UV o = d - (U8*)SvPVX_const(dest);
4627 d = (U8*)SvPVX(dest) + o;
4630 Copy(tmpbuf, d, ulen, U8);
4635 } /* Unflagged string */
4637 #ifdef USE_LOCALE_CTYPE
4638 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4639 if (IN_UTF8_CTYPE_LOCALE) {
4640 goto do_uni_folding;
4642 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4643 for (; s < send; d++, s++)
4644 *d = (U8) toFOLD_LC(*s);
4648 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4649 for (; s < send; d++, s++)
4653 #ifdef USE_LOCALE_CTYPE
4656 /* For ASCII and the Latin-1 range, there's only two troublesome
4657 * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4658 * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
4659 * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
4660 * For the rest, the casefold is their lowercase. */
4661 for (; s < send; d++, s++) {
4662 if (*s == MICRO_SIGN) {
4663 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4664 * which is outside of the latin-1 range. There's a couple
4665 * of ways to deal with this -- khw discusses them in
4666 * pp_lc/uc, so go there :) What we do here is upgrade what
4667 * we had already casefolded, then enter an inner loop that
4668 * appends the rest of the characters as UTF-8. */
4669 len = d - (U8*)SvPVX_const(dest);
4670 SvCUR_set(dest, len);
4671 len = sv_utf8_upgrade_flags_grow(dest,
4672 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,