3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
152 sv = amagic_deref_call(sv, to_gv_amg);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
173 Perl_croak_no_modify(aTHX);
174 if (PL_op->op_private & OPpDEREF) {
176 if (cUNOP->op_targ) {
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
184 const char * const name = CopSTASHPV(PL_curcop);
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
205 && (!is_gv_magical_sv(sv,0)
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
213 if (PL_op->op_private & HINT_STRICT_REFS)
214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
222 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
224 /* FAKE globs in the symbol table cause weird bugs (#77810) */
225 if (sv) SvFAKE_off(sv);
228 if (sv && SvFAKE(sv)) {
229 SV *newsv = sv_newmortal();
230 sv_setsv_flags(newsv, sv, 0);
234 if (PL_op->op_private & OPpLVAL_INTRO)
235 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
240 /* Helper function for pp_rv2sv and pp_rv2av */
242 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
243 const svtype type, SV ***spp)
248 PERL_ARGS_ASSERT_SOFTREF2XV;
250 if (PL_op->op_private & HINT_STRICT_REFS) {
252 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
254 Perl_die(aTHX_ PL_no_usym, what);
258 PL_op->op_flags & OPf_REF &&
259 PL_op->op_next->op_type != OP_BOOLKEYS
261 Perl_die(aTHX_ PL_no_usym, what);
262 if (ckWARN(WARN_UNINITIALIZED))
264 if (type != SVt_PV && GIMME_V == G_ARRAY) {
268 **spp = &PL_sv_undef;
271 if ((PL_op->op_flags & OPf_SPECIAL) &&
272 !(PL_op->op_flags & OPf_MOD))
274 gv = gv_fetchsv(sv, 0, type);
276 && (!is_gv_magical_sv(sv,0)
277 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
279 **spp = &PL_sv_undef;
284 gv = gv_fetchsv(sv, GV_ADD, type);
294 if (!(PL_op->op_private & OPpDEREFed))
298 sv = amagic_deref_call(sv, to_sv_amg);
303 switch (SvTYPE(sv)) {
309 DIE(aTHX_ "Not a SCALAR reference");
316 if (!isGV_with_GP(gv)) {
317 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
323 if (PL_op->op_flags & OPf_MOD) {
324 if (PL_op->op_private & OPpLVAL_INTRO) {
325 if (cUNOP->op_first->op_type == OP_NULL)
326 sv = save_scalar(MUTABLE_GV(TOPs));
328 sv = save_scalar(gv);
330 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
332 else if (PL_op->op_private & OPpDEREF)
333 vivify_ref(sv, PL_op->op_private & OPpDEREF);
342 AV * const av = MUTABLE_AV(TOPs);
343 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
345 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
347 *sv = newSV_type(SVt_PVMG);
348 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
352 SETs(sv_2mortal(newSViv(
353 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
363 if (PL_op->op_flags & OPf_MOD || LVRET) {
364 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
365 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
367 LvTARG(ret) = SvREFCNT_inc_simple(sv);
368 PUSHs(ret); /* no SvSETMAGIC */
372 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
373 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
374 if (mg && mg->mg_len >= 0) {
379 PUSHi(i + CopARYBASE_get(PL_curcop));
392 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
394 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
397 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
398 /* (But not in defined().) */
400 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
403 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
404 if ((PL_op->op_private & OPpLVAL_INTRO)) {
405 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
408 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
411 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
415 cv = MUTABLE_CV(&PL_sv_undef);
416 SETs(MUTABLE_SV(cv));
426 SV *ret = &PL_sv_undef;
428 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
429 const char * s = SvPVX_const(TOPs);
430 if (strnEQ(s, "CORE::", 6)) {
431 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
432 if (code < 0) { /* Overridable. */
433 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
434 int i = 0, n = 0, seen_question = 0, defgv = 0;
436 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
438 if (code == -KEY_chop || code == -KEY_chomp
439 || code == -KEY_exec || code == -KEY_system)
441 if (code == -KEY_mkdir) {
442 ret = newSVpvs_flags("_;$", SVs_TEMP);
445 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
446 ret = newSVpvs_flags("+", SVs_TEMP);
449 if (code == -KEY_push || code == -KEY_unshift) {
450 ret = newSVpvs_flags("+@", SVs_TEMP);
453 if (code == -KEY_pop || code == -KEY_shift) {
454 ret = newSVpvs_flags(";+", SVs_TEMP);
457 if (code == -KEY_splice) {
458 ret = newSVpvs_flags("+;$$@", SVs_TEMP);
461 if (code == -KEY_tied || code == -KEY_untie) {
462 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
465 if (code == -KEY_tie) {
466 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
469 if (code == -KEY_readpipe) {
470 s = "CORE::backtick";
472 while (i < MAXO) { /* The slow way. */
473 if (strEQ(s + 6, PL_op_name[i])
474 || strEQ(s + 6, PL_op_desc[i]))
480 goto nonesuch; /* Should not happen... */
482 defgv = PL_opargs[i] & OA_DEFGV;
483 oa = PL_opargs[i] >> OASHIFT;
485 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
489 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
490 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
491 /* But globs are already references (kinda) */
492 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
496 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
499 if (defgv && str[n - 1] == '$')
502 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
504 else if (code) /* Non-Overridable */
506 else { /* None such */
508 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
512 cv = sv_2cv(TOPs, &stash, &gv, 0);
514 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
523 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
525 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
527 PUSHs(MUTABLE_SV(cv));
541 if (GIMME != G_ARRAY) {
545 *MARK = &PL_sv_undef;
546 *MARK = refto(*MARK);
550 EXTEND_MORTAL(SP - MARK);
552 *MARK = refto(*MARK);
557 S_refto(pTHX_ SV *sv)
562 PERL_ARGS_ASSERT_REFTO;
564 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
567 if (!(sv = LvTARG(sv)))
570 SvREFCNT_inc_void_NN(sv);
572 else if (SvTYPE(sv) == SVt_PVAV) {
573 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
574 av_reify(MUTABLE_AV(sv));
576 SvREFCNT_inc_void_NN(sv);
578 else if (SvPADTMP(sv) && !IS_PADGV(sv))
582 SvREFCNT_inc_void_NN(sv);
585 sv_upgrade(rv, SVt_IV);
595 SV * const sv = POPs;
600 if (!sv || !SvROK(sv))
603 pv = sv_reftype(SvRV(sv),TRUE);
604 PUSHp(pv, strlen(pv));
614 stash = CopSTASH(PL_curcop);
616 SV * const ssv = POPs;
620 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
621 Perl_croak(aTHX_ "Attempt to bless into a reference");
622 ptr = SvPV_const(ssv,len);
624 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
625 "Explicit blessing to '' (assuming package main)");
626 stash = gv_stashpvn(ptr, len, GV_ADD);
629 (void)sv_bless(TOPs, stash);
638 const char * const elem = SvPV_nolen_const(sv);
639 GV * const gv = MUTABLE_GV(POPs);
644 /* elem will always be NUL terminated. */
645 const char * const second_letter = elem + 1;
648 if (strEQ(second_letter, "RRAY"))
649 tmpRef = MUTABLE_SV(GvAV(gv));
652 if (strEQ(second_letter, "ODE"))
653 tmpRef = MUTABLE_SV(GvCVu(gv));
656 if (strEQ(second_letter, "ILEHANDLE")) {
657 /* finally deprecated in 5.8.0 */
658 deprecate("*glob{FILEHANDLE}");
659 tmpRef = MUTABLE_SV(GvIOp(gv));
662 if (strEQ(second_letter, "ORMAT"))
663 tmpRef = MUTABLE_SV(GvFORM(gv));
666 if (strEQ(second_letter, "LOB"))
667 tmpRef = MUTABLE_SV(gv);
670 if (strEQ(second_letter, "ASH"))
671 tmpRef = MUTABLE_SV(GvHV(gv));
674 if (*second_letter == 'O' && !elem[2])
675 tmpRef = MUTABLE_SV(GvIOp(gv));
678 if (strEQ(second_letter, "AME"))
679 sv = newSVhek(GvNAME_HEK(gv));
682 if (strEQ(second_letter, "ACKAGE")) {
683 const HV * const stash = GvSTASH(gv);
684 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
685 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
689 if (strEQ(second_letter, "CALAR"))
704 /* Pattern matching */
709 register unsigned char *s;
712 register I32 *sfirst;
716 if (sv == PL_lastscream) {
720 s = (unsigned char*)(SvPV(sv, len));
721 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
722 /* No point in studying a zero length string, and not safe to study
723 anything that doesn't appear to be a simple scalar (and hence might
724 change between now and when the regexp engine runs without our set
725 magic ever running) such as a reference to an object with overloaded
726 stringification. Also refuse to study an FBM scalar, as this gives
727 more flexibility in SV flag usage. No real-world code would ever
728 end up studying an FBM scalar, so this isn't a real pessimisation.
735 SvSCREAM_off(PL_lastscream);
736 SvREFCNT_dec(PL_lastscream);
738 PL_lastscream = SvREFCNT_inc_simple(sv);
740 if (pos > PL_maxscream) {
741 if (PL_maxscream < 0) {
742 PL_maxscream = pos + 80;
743 Newx(PL_screamfirst, 256, I32);
744 Newx(PL_screamnext, PL_maxscream, I32);
747 PL_maxscream = pos + pos / 4;
748 Renew(PL_screamnext, PL_maxscream, I32);
752 sfirst = PL_screamfirst;
753 snext = PL_screamnext;
755 if (!sfirst || !snext)
756 DIE(aTHX_ "do_study: out of memory");
758 for (ch = 256; ch; --ch)
763 register const I32 ch = s[pos];
765 snext[pos] = sfirst[ch] - pos;
772 /* piggyback on m//g magic */
773 sv_magic(sv, NULL, PERL_MAGIC_regex_global, NULL, 0);
782 if (PL_op->op_flags & OPf_STACKED)
784 else if (PL_op->op_private & OPpTARGET_MY)
790 TARG = sv_newmortal();
791 if(PL_op->op_type == OP_TRANSR) {
792 SV * const newsv = newSVsv(sv);
796 else PUSHi(do_trans(sv));
800 /* Lvalue operators. */
803 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
809 PERL_ARGS_ASSERT_DO_CHOMP;
811 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
813 if (SvTYPE(sv) == SVt_PVAV) {
815 AV *const av = MUTABLE_AV(sv);
816 const I32 max = AvFILL(av);
818 for (i = 0; i <= max; i++) {
819 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
820 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
821 do_chomp(retval, sv, chomping);
825 else if (SvTYPE(sv) == SVt_PVHV) {
826 HV* const hv = MUTABLE_HV(sv);
828 (void)hv_iterinit(hv);
829 while ((entry = hv_iternext(hv)))
830 do_chomp(retval, hv_iterval(hv,entry), chomping);
833 else if (SvREADONLY(sv)) {
835 /* SV is copy-on-write */
836 sv_force_normal_flags(sv, 0);
839 Perl_croak_no_modify(aTHX);
844 /* XXX, here sv is utf8-ized as a side-effect!
845 If encoding.pm is used properly, almost string-generating
846 operations, including literal strings, chr(), input data, etc.
847 should have been utf8-ized already, right?
849 sv_recode_to_utf8(sv, PL_encoding);
855 char *temp_buffer = NULL;
864 while (len && s[-1] == '\n') {
871 STRLEN rslen, rs_charlen;
872 const char *rsptr = SvPV_const(PL_rs, rslen);
874 rs_charlen = SvUTF8(PL_rs)
878 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
879 /* Assumption is that rs is shorter than the scalar. */
881 /* RS is utf8, scalar is 8 bit. */
883 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
886 /* Cannot downgrade, therefore cannot possibly match
888 assert (temp_buffer == rsptr);
894 else if (PL_encoding) {
895 /* RS is 8 bit, encoding.pm is used.
896 * Do not recode PL_rs as a side-effect. */
897 svrecode = newSVpvn(rsptr, rslen);
898 sv_recode_to_utf8(svrecode, PL_encoding);
899 rsptr = SvPV_const(svrecode, rslen);
900 rs_charlen = sv_len_utf8(svrecode);
903 /* RS is 8 bit, scalar is utf8. */
904 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
918 if (memNE(s, rsptr, rslen))
920 SvIVX(retval) += rs_charlen;
923 s = SvPV_force_nolen(sv);
931 SvREFCNT_dec(svrecode);
933 Safefree(temp_buffer);
935 if (len && !SvPOK(sv))
936 s = SvPV_force_nomg(sv, len);
939 char * const send = s + len;
940 char * const start = s;
942 while (s > start && UTF8_IS_CONTINUATION(*s))
944 if (is_utf8_string((U8*)s, send - s)) {
945 sv_setpvn(retval, s, send - s);
947 SvCUR_set(sv, s - start);
953 sv_setpvs(retval, "");
957 sv_setpvn(retval, s, 1);
964 sv_setpvs(retval, "");
972 const bool chomping = PL_op->op_type == OP_SCHOMP;
976 do_chomp(TARG, TOPs, chomping);
983 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
984 const bool chomping = PL_op->op_type == OP_CHOMP;
989 do_chomp(TARG, *++MARK, chomping);
1000 if (!PL_op->op_private) {
1009 SV_CHECK_THINKFIRST_COW_DROP(sv);
1011 switch (SvTYPE(sv)) {
1015 av_undef(MUTABLE_AV(sv));
1018 hv_undef(MUTABLE_HV(sv));
1021 if (cv_const_sv((const CV *)sv))
1022 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
1023 CvANON((const CV *)sv) ? "(anonymous)"
1024 : GvENAME(CvGV((const CV *)sv)));
1028 /* let user-undef'd sub keep its identity */
1029 GV* const gv = CvGV((const CV *)sv);
1030 cv_undef(MUTABLE_CV(sv));
1031 CvGV_set(MUTABLE_CV(sv), gv);
1036 SvSetMagicSV(sv, &PL_sv_undef);
1039 else if (isGV_with_GP(sv)) {
1043 /* undef *Pkg::meth_name ... */
1045 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1046 && HvENAME_get(stash);
1048 if((stash = GvHV((const GV *)sv))) {
1049 if(HvENAME_get(stash))
1050 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1054 gp_free(MUTABLE_GV(sv));
1056 GvGP_set(sv, gp_ref(gp));
1057 GvSV(sv) = newSV(0);
1058 GvLINE(sv) = CopLINE(PL_curcop);
1059 GvEGV(sv) = MUTABLE_GV(sv);
1063 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1065 /* undef *Foo::ISA */
1066 if( strEQ(GvNAME((const GV *)sv), "ISA")
1067 && (stash = GvSTASH((const GV *)sv))
1068 && (method_changed || HvENAME(stash)) )
1069 mro_isa_changed_in(stash);
1070 else if(method_changed)
1071 mro_method_changed_in(
1072 GvSTASH((const GV *)sv)
1079 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1094 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1095 Perl_croak_no_modify(aTHX);
1096 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1097 && SvIVX(TOPs) != IV_MIN)
1099 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1100 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1111 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1112 Perl_croak_no_modify(aTHX);
1114 TARG = sv_newmortal();
1115 sv_setsv(TARG, TOPs);
1116 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1117 && SvIVX(TOPs) != IV_MAX)
1119 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1120 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1125 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1135 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1136 Perl_croak_no_modify(aTHX);
1138 TARG = sv_newmortal();
1139 sv_setsv(TARG, TOPs);
1140 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1141 && SvIVX(TOPs) != IV_MIN)
1143 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1144 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1153 /* Ordinary operators. */
1157 dVAR; dSP; dATARGET; SV *svl, *svr;
1158 #ifdef PERL_PRESERVE_IVUV
1161 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1164 #ifdef PERL_PRESERVE_IVUV
1165 /* For integer to integer power, we do the calculation by hand wherever
1166 we're sure it is safe; otherwise we call pow() and try to convert to
1167 integer afterwards. */
1169 SvIV_please_nomg(svr);
1171 SvIV_please_nomg(svl);
1180 const IV iv = SvIVX(svr);
1184 goto float_it; /* Can't do negative powers this way. */
1188 baseuok = SvUOK(svl);
1190 baseuv = SvUVX(svl);
1192 const IV iv = SvIVX(svl);
1195 baseuok = TRUE; /* effectively it's a UV now */
1197 baseuv = -iv; /* abs, baseuok == false records sign */
1200 /* now we have integer ** positive integer. */
1203 /* foo & (foo - 1) is zero only for a power of 2. */
1204 if (!(baseuv & (baseuv - 1))) {
1205 /* We are raising power-of-2 to a positive integer.
1206 The logic here will work for any base (even non-integer
1207 bases) but it can be less accurate than
1208 pow (base,power) or exp (power * log (base)) when the
1209 intermediate values start to spill out of the mantissa.
1210 With powers of 2 we know this can't happen.
1211 And powers of 2 are the favourite thing for perl
1212 programmers to notice ** not doing what they mean. */
1214 NV base = baseuok ? baseuv : -(NV)baseuv;
1219 while (power >>= 1) {
1227 SvIV_please_nomg(svr);
1230 register unsigned int highbit = 8 * sizeof(UV);
1231 register unsigned int diff = 8 * sizeof(UV);
1232 while (diff >>= 1) {
1234 if (baseuv >> highbit) {
1238 /* we now have baseuv < 2 ** highbit */
1239 if (power * highbit <= 8 * sizeof(UV)) {
1240 /* result will definitely fit in UV, so use UV math
1241 on same algorithm as above */
1242 register UV result = 1;
1243 register UV base = baseuv;
1244 const bool odd_power = cBOOL(power & 1);
1248 while (power >>= 1) {
1255 if (baseuok || !odd_power)
1256 /* answer is positive */
1258 else if (result <= (UV)IV_MAX)
1259 /* answer negative, fits in IV */
1260 SETi( -(IV)result );
1261 else if (result == (UV)IV_MIN)
1262 /* 2's complement assumption: special case IV_MIN */
1265 /* answer negative, doesn't fit */
1266 SETn( -(NV)result );
1276 NV right = SvNV_nomg(svr);
1277 NV left = SvNV_nomg(svl);
1280 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1282 We are building perl with long double support and are on an AIX OS
1283 afflicted with a powl() function that wrongly returns NaNQ for any
1284 negative base. This was reported to IBM as PMR #23047-379 on
1285 03/06/2006. The problem exists in at least the following versions
1286 of AIX and the libm fileset, and no doubt others as well:
1288 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1289 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1290 AIX 5.2.0 bos.adt.libm 5.2.0.85
1292 So, until IBM fixes powl(), we provide the following workaround to
1293 handle the problem ourselves. Our logic is as follows: for
1294 negative bases (left), we use fmod(right, 2) to check if the
1295 exponent is an odd or even integer:
1297 - if odd, powl(left, right) == -powl(-left, right)
1298 - if even, powl(left, right) == powl(-left, right)
1300 If the exponent is not an integer, the result is rightly NaNQ, so
1301 we just return that (as NV_NAN).
1305 NV mod2 = Perl_fmod( right, 2.0 );
1306 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1307 SETn( -Perl_pow( -left, right) );
1308 } else if (mod2 == 0.0) { /* even integer */
1309 SETn( Perl_pow( -left, right) );
1310 } else { /* fractional power */
1314 SETn( Perl_pow( left, right) );
1317 SETn( Perl_pow( left, right) );
1318 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1320 #ifdef PERL_PRESERVE_IVUV
1322 SvIV_please_nomg(svr);
1330 dVAR; dSP; dATARGET; SV *svl, *svr;
1331 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1334 #ifdef PERL_PRESERVE_IVUV
1335 SvIV_please_nomg(svr);
1337 /* Unless the left argument is integer in range we are going to have to
1338 use NV maths. Hence only attempt to coerce the right argument if
1339 we know the left is integer. */
1340 /* Left operand is defined, so is it IV? */
1341 SvIV_please_nomg(svl);
1343 bool auvok = SvUOK(svl);
1344 bool buvok = SvUOK(svr);
1345 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1346 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1355 const IV aiv = SvIVX(svl);
1358 auvok = TRUE; /* effectively it's a UV now */
1360 alow = -aiv; /* abs, auvok == false records sign */
1366 const IV biv = SvIVX(svr);
1369 buvok = TRUE; /* effectively it's a UV now */
1371 blow = -biv; /* abs, buvok == false records sign */
1375 /* If this does sign extension on unsigned it's time for plan B */
1376 ahigh = alow >> (4 * sizeof (UV));
1378 bhigh = blow >> (4 * sizeof (UV));
1380 if (ahigh && bhigh) {
1382 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1383 which is overflow. Drop to NVs below. */
1384 } else if (!ahigh && !bhigh) {
1385 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1386 so the unsigned multiply cannot overflow. */
1387 const UV product = alow * blow;
1388 if (auvok == buvok) {
1389 /* -ve * -ve or +ve * +ve gives a +ve result. */
1393 } else if (product <= (UV)IV_MIN) {
1394 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1395 /* -ve result, which could overflow an IV */
1397 SETi( -(IV)product );
1399 } /* else drop to NVs below. */
1401 /* One operand is large, 1 small */
1404 /* swap the operands */
1406 bhigh = blow; /* bhigh now the temp var for the swap */
1410 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1411 multiplies can't overflow. shift can, add can, -ve can. */
1412 product_middle = ahigh * blow;
1413 if (!(product_middle & topmask)) {
1414 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1416 product_middle <<= (4 * sizeof (UV));
1417 product_low = alow * blow;
1419 /* as for pp_add, UV + something mustn't get smaller.
1420 IIRC ANSI mandates this wrapping *behaviour* for
1421 unsigned whatever the actual representation*/
1422 product_low += product_middle;
1423 if (product_low >= product_middle) {
1424 /* didn't overflow */
1425 if (auvok == buvok) {
1426 /* -ve * -ve or +ve * +ve gives a +ve result. */
1428 SETu( product_low );
1430 } else if (product_low <= (UV)IV_MIN) {
1431 /* 2s complement assumption again */
1432 /* -ve result, which could overflow an IV */
1434 SETi( -(IV)product_low );
1436 } /* else drop to NVs below. */
1438 } /* product_middle too large */
1439 } /* ahigh && bhigh */
1444 NV right = SvNV_nomg(svr);
1445 NV left = SvNV_nomg(svl);
1447 SETn( left * right );
1454 dVAR; dSP; dATARGET; SV *svl, *svr;
1455 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1458 /* Only try to do UV divide first
1459 if ((SLOPPYDIVIDE is true) or
1460 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1462 The assumption is that it is better to use floating point divide
1463 whenever possible, only doing integer divide first if we can't be sure.
1464 If NV_PRESERVES_UV is true then we know at compile time that no UV
1465 can be too large to preserve, so don't need to compile the code to
1466 test the size of UVs. */
1469 # define PERL_TRY_UV_DIVIDE
1470 /* ensure that 20./5. == 4. */
1472 # ifdef PERL_PRESERVE_IVUV
1473 # ifndef NV_PRESERVES_UV
1474 # define PERL_TRY_UV_DIVIDE
1479 #ifdef PERL_TRY_UV_DIVIDE
1480 SvIV_please_nomg(svr);
1482 SvIV_please_nomg(svl);
1484 bool left_non_neg = SvUOK(svl);
1485 bool right_non_neg = SvUOK(svr);
1489 if (right_non_neg) {
1493 const IV biv = SvIVX(svr);
1496 right_non_neg = TRUE; /* effectively it's a UV now */
1502 /* historically undef()/0 gives a "Use of uninitialized value"
1503 warning before dieing, hence this test goes here.
1504 If it were immediately before the second SvIV_please, then
1505 DIE() would be invoked before left was even inspected, so
1506 no inspection would give no warning. */
1508 DIE(aTHX_ "Illegal division by zero");
1514 const IV aiv = SvIVX(svl);
1517 left_non_neg = TRUE; /* effectively it's a UV now */
1526 /* For sloppy divide we always attempt integer division. */
1528 /* Otherwise we only attempt it if either or both operands
1529 would not be preserved by an NV. If both fit in NVs
1530 we fall through to the NV divide code below. However,
1531 as left >= right to ensure integer result here, we know that
1532 we can skip the test on the right operand - right big
1533 enough not to be preserved can't get here unless left is
1536 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1539 /* Integer division can't overflow, but it can be imprecise. */
1540 const UV result = left / right;
1541 if (result * right == left) {
1542 SP--; /* result is valid */
1543 if (left_non_neg == right_non_neg) {
1544 /* signs identical, result is positive. */
1548 /* 2s complement assumption */
1549 if (result <= (UV)IV_MIN)
1550 SETi( -(IV)result );
1552 /* It's exact but too negative for IV. */
1553 SETn( -(NV)result );
1556 } /* tried integer divide but it was not an integer result */
1557 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1558 } /* left wasn't SvIOK */
1559 } /* right wasn't SvIOK */
1560 #endif /* PERL_TRY_UV_DIVIDE */
1562 NV right = SvNV_nomg(svr);
1563 NV left = SvNV_nomg(svl);
1564 (void)POPs;(void)POPs;
1565 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1566 if (! Perl_isnan(right) && right == 0.0)
1570 DIE(aTHX_ "Illegal division by zero");
1571 PUSHn( left / right );
1578 dVAR; dSP; dATARGET;
1579 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1583 bool left_neg = FALSE;
1584 bool right_neg = FALSE;
1585 bool use_double = FALSE;
1586 bool dright_valid = FALSE;
1589 SV * const svr = TOPs;
1590 SV * const svl = TOPm1s;
1591 SvIV_please_nomg(svr);
1593 right_neg = !SvUOK(svr);
1597 const IV biv = SvIVX(svr);
1600 right_neg = FALSE; /* effectively it's a UV now */
1607 dright = SvNV_nomg(svr);
1608 right_neg = dright < 0;
1611 if (dright < UV_MAX_P1) {
1612 right = U_V(dright);
1613 dright_valid = TRUE; /* In case we need to use double below. */
1619 /* At this point use_double is only true if right is out of range for
1620 a UV. In range NV has been rounded down to nearest UV and
1621 use_double false. */
1622 SvIV_please_nomg(svl);
1623 if (!use_double && SvIOK(svl)) {
1625 left_neg = !SvUOK(svl);
1629 const IV aiv = SvIVX(svl);
1632 left_neg = FALSE; /* effectively it's a UV now */
1640 dleft = SvNV_nomg(svl);
1641 left_neg = dleft < 0;
1645 /* This should be exactly the 5.6 behaviour - if left and right are
1646 both in range for UV then use U_V() rather than floor. */
1648 if (dleft < UV_MAX_P1) {
1649 /* right was in range, so is dleft, so use UVs not double.
1653 /* left is out of range for UV, right was in range, so promote
1654 right (back) to double. */
1656 /* The +0.5 is used in 5.6 even though it is not strictly
1657 consistent with the implicit +0 floor in the U_V()
1658 inside the #if 1. */
1659 dleft = Perl_floor(dleft + 0.5);
1662 dright = Perl_floor(dright + 0.5);
1673 DIE(aTHX_ "Illegal modulus zero");
1675 dans = Perl_fmod(dleft, dright);
1676 if ((left_neg != right_neg) && dans)
1677 dans = dright - dans;
1680 sv_setnv(TARG, dans);
1686 DIE(aTHX_ "Illegal modulus zero");
1689 if ((left_neg != right_neg) && ans)
1692 /* XXX may warn: unary minus operator applied to unsigned type */
1693 /* could change -foo to be (~foo)+1 instead */
1694 if (ans <= ~((UV)IV_MAX)+1)
1695 sv_setiv(TARG, ~ans+1);
1697 sv_setnv(TARG, -(NV)ans);
1700 sv_setuv(TARG, ans);
1709 dVAR; dSP; dATARGET;
1713 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1714 /* TODO: think of some way of doing list-repeat overloading ??? */
1719 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1725 const UV uv = SvUV_nomg(sv);
1727 count = IV_MAX; /* The best we can do? */
1731 const IV iv = SvIV_nomg(sv);
1738 else if (SvNOKp(sv)) {
1739 const NV nv = SvNV_nomg(sv);
1746 count = SvIV_nomg(sv);
1748 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1750 static const char oom_list_extend[] = "Out of memory during list extend";
1751 const I32 items = SP - MARK;
1752 const I32 max = items * count;
1754 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1755 /* Did the max computation overflow? */
1756 if (items > 0 && max > 0 && (max < items || max < count))
1757 Perl_croak(aTHX_ oom_list_extend);
1762 /* This code was intended to fix 20010809.028:
1765 for (($x =~ /./g) x 2) {
1766 print chop; # "abcdabcd" expected as output.
1769 * but that change (#11635) broke this code:
1771 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1773 * I can't think of a better fix that doesn't introduce
1774 * an efficiency hit by copying the SVs. The stack isn't
1775 * refcounted, and mortalisation obviously doesn't
1776 * Do The Right Thing when the stack has more than
1777 * one pointer to the same mortal value.
1781 *SP = sv_2mortal(newSVsv(*SP));
1791 repeatcpy((char*)(MARK + items), (char*)MARK,
1792 items * sizeof(const SV *), count - 1);
1795 else if (count <= 0)
1798 else { /* Note: mark already snarfed by pp_list */
1799 SV * const tmpstr = POPs;
1802 static const char oom_string_extend[] =
1803 "Out of memory during string extend";
1806 sv_setsv_nomg(TARG, tmpstr);
1807 SvPV_force_nomg(TARG, len);
1808 isutf = DO_UTF8(TARG);
1813 const STRLEN max = (UV)count * len;
1814 if (len > MEM_SIZE_MAX / count)
1815 Perl_croak(aTHX_ oom_string_extend);
1816 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1817 SvGROW(TARG, max + 1);
1818 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1819 SvCUR_set(TARG, SvCUR(TARG) * count);
1821 *SvEND(TARG) = '\0';
1824 (void)SvPOK_only_UTF8(TARG);
1826 (void)SvPOK_only(TARG);
1828 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1829 /* The parser saw this as a list repeat, and there
1830 are probably several items on the stack. But we're
1831 in scalar context, and there's no pp_list to save us
1832 now. So drop the rest of the items -- robin@kitsite.com
1844 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1845 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1848 useleft = USE_LEFT(svl);
1849 #ifdef PERL_PRESERVE_IVUV
1850 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1851 "bad things" happen if you rely on signed integers wrapping. */
1852 SvIV_please_nomg(svr);
1854 /* Unless the left argument is integer in range we are going to have to
1855 use NV maths. Hence only attempt to coerce the right argument if
1856 we know the left is integer. */
1857 register UV auv = 0;
1863 a_valid = auvok = 1;
1864 /* left operand is undef, treat as zero. */
1866 /* Left operand is defined, so is it IV? */
1867 SvIV_please_nomg(svl);
1869 if ((auvok = SvUOK(svl)))
1872 register const IV aiv = SvIVX(svl);
1875 auvok = 1; /* Now acting as a sign flag. */
1876 } else { /* 2s complement assumption for IV_MIN */
1884 bool result_good = 0;
1887 bool buvok = SvUOK(svr);
1892 register const IV biv = SvIVX(svr);
1899 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1900 else "IV" now, independent of how it came in.
1901 if a, b represents positive, A, B negative, a maps to -A etc
1906 all UV maths. negate result if A negative.
1907 subtract if signs same, add if signs differ. */
1909 if (auvok ^ buvok) {
1918 /* Must get smaller */
1923 if (result <= buv) {
1924 /* result really should be -(auv-buv). as its negation
1925 of true value, need to swap our result flag */
1937 if (result <= (UV)IV_MIN)
1938 SETi( -(IV)result );
1940 /* result valid, but out of range for IV. */
1941 SETn( -(NV)result );
1945 } /* Overflow, drop through to NVs. */
1950 NV value = SvNV_nomg(svr);
1954 /* left operand is undef, treat as zero - value */
1958 SETn( SvNV_nomg(svl) - value );
1965 dVAR; dSP; dATARGET; SV *svl, *svr;
1966 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1970 const IV shift = SvIV_nomg(svr);
1971 if (PL_op->op_private & HINT_INTEGER) {
1972 const IV i = SvIV_nomg(svl);
1976 const UV u = SvUV_nomg(svl);
1985 dVAR; dSP; dATARGET; SV *svl, *svr;
1986 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1990 const IV shift = SvIV_nomg(svr);
1991 if (PL_op->op_private & HINT_INTEGER) {
1992 const IV i = SvIV_nomg(svl);
1996 const UV u = SvUV_nomg(svl);
2006 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2007 #ifdef PERL_PRESERVE_IVUV
2008 SvIV_please_nomg(TOPs);
2010 SvIV_please_nomg(TOPm1s);
2011 if (SvIOK(TOPm1s)) {
2012 bool auvok = SvUOK(TOPm1s);
2013 bool buvok = SvUOK(TOPs);
2015 if (!auvok && !buvok) { /* ## IV < IV ## */
2016 const IV aiv = SvIVX(TOPm1s);
2017 const IV biv = SvIVX(TOPs);
2020 SETs(boolSV(aiv < biv));
2023 if (auvok && buvok) { /* ## UV < UV ## */
2024 const UV auv = SvUVX(TOPm1s);
2025 const UV buv = SvUVX(TOPs);
2028 SETs(boolSV(auv < buv));
2031 if (auvok) { /* ## UV < IV ## */
2033 const IV biv = SvIVX(TOPs);
2036 /* As (a) is a UV, it's >=0, so it cannot be < */
2041 SETs(boolSV(auv < (UV)biv));
2044 { /* ## IV < UV ## */
2045 const IV aiv = SvIVX(TOPm1s);
2049 /* As (b) is a UV, it's >=0, so it must be < */
2056 SETs(boolSV((UV)aiv < buv));
2063 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2065 if (Perl_isnan(left) || Perl_isnan(right))
2067 SETs(boolSV(left < right));
2070 SETs(boolSV(SvNV_nomg(TOPs) < value));
2079 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2080 #ifdef PERL_PRESERVE_IVUV
2081 SvIV_please_nomg(TOPs);
2083 SvIV_please_nomg(TOPm1s);
2084 if (SvIOK(TOPm1s)) {
2085 bool auvok = SvUOK(TOPm1s);
2086 bool buvok = SvUOK(TOPs);
2088 if (!auvok && !buvok) { /* ## IV > IV ## */
2089 const IV aiv = SvIVX(TOPm1s);
2090 const IV biv = SvIVX(TOPs);
2093 SETs(boolSV(aiv > biv));
2096 if (auvok && buvok) { /* ## UV > UV ## */
2097 const UV auv = SvUVX(TOPm1s);
2098 const UV buv = SvUVX(TOPs);
2101 SETs(boolSV(auv > buv));
2104 if (auvok) { /* ## UV > IV ## */
2106 const IV biv = SvIVX(TOPs);
2110 /* As (a) is a UV, it's >=0, so it must be > */
2115 SETs(boolSV(auv > (UV)biv));
2118 { /* ## IV > UV ## */
2119 const IV aiv = SvIVX(TOPm1s);
2123 /* As (b) is a UV, it's >=0, so it cannot be > */
2130 SETs(boolSV((UV)aiv > buv));
2137 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2139 if (Perl_isnan(left) || Perl_isnan(right))
2141 SETs(boolSV(left > right));
2144 SETs(boolSV(SvNV_nomg(TOPs) > value));
2153 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2154 #ifdef PERL_PRESERVE_IVUV
2155 SvIV_please_nomg(TOPs);
2157 SvIV_please_nomg(TOPm1s);
2158 if (SvIOK(TOPm1s)) {
2159 bool auvok = SvUOK(TOPm1s);
2160 bool buvok = SvUOK(TOPs);
2162 if (!auvok && !buvok) { /* ## IV <= IV ## */
2163 const IV aiv = SvIVX(TOPm1s);
2164 const IV biv = SvIVX(TOPs);
2167 SETs(boolSV(aiv <= biv));
2170 if (auvok && buvok) { /* ## UV <= UV ## */
2171 UV auv = SvUVX(TOPm1s);
2172 UV buv = SvUVX(TOPs);
2175 SETs(boolSV(auv <= buv));
2178 if (auvok) { /* ## UV <= IV ## */
2180 const IV biv = SvIVX(TOPs);
2184 /* As (a) is a UV, it's >=0, so a cannot be <= */
2189 SETs(boolSV(auv <= (UV)biv));
2192 { /* ## IV <= UV ## */
2193 const IV aiv = SvIVX(TOPm1s);
2197 /* As (b) is a UV, it's >=0, so a must be <= */
2204 SETs(boolSV((UV)aiv <= buv));
2211 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2213 if (Perl_isnan(left) || Perl_isnan(right))
2215 SETs(boolSV(left <= right));
2218 SETs(boolSV(SvNV_nomg(TOPs) <= value));
2227 tryAMAGICbin_MG(ge_amg,AMGf_set|AMGf_numeric);
2228 #ifdef PERL_PRESERVE_IVUV
2229 SvIV_please_nomg(TOPs);
2231 SvIV_please_nomg(TOPm1s);
2232 if (SvIOK(TOPm1s)) {
2233 bool auvok = SvUOK(TOPm1s);
2234 bool buvok = SvUOK(TOPs);
2236 if (!auvok && !buvok) { /* ## IV >= IV ## */
2237 const IV aiv = SvIVX(TOPm1s);
2238 const IV biv = SvIVX(TOPs);
2241 SETs(boolSV(aiv >= biv));
2244 if (auvok && buvok) { /* ## UV >= UV ## */
2245 const UV auv = SvUVX(TOPm1s);
2246 const UV buv = SvUVX(TOPs);
2249 SETs(boolSV(auv >= buv));
2252 if (auvok) { /* ## UV >= IV ## */
2254 const IV biv = SvIVX(TOPs);
2258 /* As (a) is a UV, it's >=0, so it must be >= */
2263 SETs(boolSV(auv >= (UV)biv));
2266 { /* ## IV >= UV ## */
2267 const IV aiv = SvIVX(TOPm1s);
2271 /* As (b) is a UV, it's >=0, so a cannot be >= */
2278 SETs(boolSV((UV)aiv >= buv));
2285 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2287 if (Perl_isnan(left) || Perl_isnan(right))
2289 SETs(boolSV(left >= right));
2292 SETs(boolSV(SvNV_nomg(TOPs) >= value));
2301 tryAMAGICbin_MG(ne_amg,AMGf_set|AMGf_numeric);
2302 #ifdef PERL_PRESERVE_IVUV
2303 SvIV_please_nomg(TOPs);
2305 SvIV_please_nomg(TOPm1s);
2306 if (SvIOK(TOPm1s)) {
2307 const bool auvok = SvUOK(TOPm1s);
2308 const bool buvok = SvUOK(TOPs);
2310 if (auvok == buvok) { /* ## IV == IV or UV == UV ## */
2311 /* Casting IV to UV before comparison isn't going to matter
2312 on 2s complement. On 1s complement or sign&magnitude
2313 (if we have any of them) it could make negative zero
2314 differ from normal zero. As I understand it. (Need to
2315 check - is negative zero implementation defined behaviour
2317 const UV buv = SvUVX(POPs);
2318 const UV auv = SvUVX(TOPs);
2320 SETs(boolSV(auv != buv));
2323 { /* ## Mixed IV,UV ## */
2327 /* != is commutative so swap if needed (save code) */
2329 /* swap. top of stack (b) is the iv */
2333 /* As (a) is a UV, it's >0, so it cannot be == */
2342 /* As (b) is a UV, it's >0, so it cannot be == */
2346 uv = SvUVX(*(SP+1)); /* Do I want TOPp1s() ? */
2348 SETs(boolSV((UV)iv != uv));
2355 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2357 if (Perl_isnan(left) || Perl_isnan(right))
2359 SETs(boolSV(left != right));
2362 SETs(boolSV(SvNV_nomg(TOPs) != value));
2371 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2372 #ifdef PERL_PRESERVE_IVUV
2373 /* Fortunately it seems NaN isn't IOK */
2374 SvIV_please_nomg(TOPs);
2376 SvIV_please_nomg(TOPm1s);
2377 if (SvIOK(TOPm1s)) {
2378 const bool leftuvok = SvUOK(TOPm1s);
2379 const bool rightuvok = SvUOK(TOPs);
2381 if (!leftuvok && !rightuvok) { /* ## IV <=> IV ## */
2382 const IV leftiv = SvIVX(TOPm1s);
2383 const IV rightiv = SvIVX(TOPs);
2385 if (leftiv > rightiv)
2387 else if (leftiv < rightiv)
2391 } else if (leftuvok && rightuvok) { /* ## UV <=> UV ## */
2392 const UV leftuv = SvUVX(TOPm1s);
2393 const UV rightuv = SvUVX(TOPs);
2395 if (leftuv > rightuv)
2397 else if (leftuv < rightuv)
2401 } else if (leftuvok) { /* ## UV <=> IV ## */
2402 const IV rightiv = SvIVX(TOPs);
2404 /* As (a) is a UV, it's >=0, so it cannot be < */
2407 const UV leftuv = SvUVX(TOPm1s);
2408 if (leftuv > (UV)rightiv) {
2410 } else if (leftuv < (UV)rightiv) {
2416 } else { /* ## IV <=> UV ## */
2417 const IV leftiv = SvIVX(TOPm1s);
2419 /* As (b) is a UV, it's >=0, so it must be < */
2422 const UV rightuv = SvUVX(TOPs);
2423 if ((UV)leftiv > rightuv) {
2425 } else if ((UV)leftiv < rightuv) {
2442 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2443 if (Perl_isnan(left) || Perl_isnan(right)) {
2447 value = (left > right) - (left < right);
2451 else if (left < right)
2453 else if (left > right)
2469 int amg_type = sle_amg;
2473 switch (PL_op->op_type) {
2492 tryAMAGICbin_MG(amg_type, AMGf_set);
2495 const int cmp = (IN_LOCALE_RUNTIME
2496 ? sv_cmp_locale_flags(left, right, 0)
2497 : sv_cmp_flags(left, right, 0));
2498 SETs(boolSV(cmp * multiplier < rhs));
2506 tryAMAGICbin_MG(seq_amg, AMGf_set);
2509 SETs(boolSV(sv_eq_flags(left, right, 0)));
2517 tryAMAGICbin_MG(sne_amg, AMGf_set);
2520 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2528 tryAMAGICbin_MG(scmp_amg, 0);
2531 const int cmp = (IN_LOCALE_RUNTIME
2532 ? sv_cmp_locale_flags(left, right, 0)
2533 : sv_cmp_flags(left, right, 0));
2541 dVAR; dSP; dATARGET;
2542 tryAMAGICbin_MG(band_amg, AMGf_assign);
2545 if (SvNIOKp(left) || SvNIOKp(right)) {
2546 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2547 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2548 if (PL_op->op_private & HINT_INTEGER) {
2549 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2553 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2556 if (left_ro_nonnum) SvNIOK_off(left);
2557 if (right_ro_nonnum) SvNIOK_off(right);
2560 do_vop(PL_op->op_type, TARG, left, right);
2569 dVAR; dSP; dATARGET;
2570 const int op_type = PL_op->op_type;
2572 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2575 if (SvNIOKp(left) || SvNIOKp(right)) {
2576 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2577 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2578 if (PL_op->op_private & HINT_INTEGER) {
2579 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2580 const IV r = SvIV_nomg(right);
2581 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2585 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2586 const UV r = SvUV_nomg(right);
2587 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2590 if (left_ro_nonnum) SvNIOK_off(left);
2591 if (right_ro_nonnum) SvNIOK_off(right);
2594 do_vop(op_type, TARG, left, right);
2604 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2606 SV * const sv = TOPs;
2607 const int flags = SvFLAGS(sv);
2609 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2613 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2614 /* It's publicly an integer, or privately an integer-not-float */
2617 if (SvIVX(sv) == IV_MIN) {
2618 /* 2s complement assumption. */
2619 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2622 else if (SvUVX(sv) <= IV_MAX) {
2627 else if (SvIVX(sv) != IV_MIN) {
2631 #ifdef PERL_PRESERVE_IVUV
2639 SETn(-SvNV_nomg(sv));
2640 else if (SvPOKp(sv)) {
2642 const char * const s = SvPV_nomg_const(sv, len);
2643 if (isIDFIRST(*s)) {
2644 sv_setpvs(TARG, "-");
2647 else if (*s == '+' || *s == '-') {
2648 sv_setsv_nomg(TARG, sv);
2649 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2651 else if (DO_UTF8(sv)) {
2652 SvIV_please_nomg(sv);
2654 goto oops_its_an_int;
2656 sv_setnv(TARG, -SvNV_nomg(sv));
2658 sv_setpvs(TARG, "-");
2663 SvIV_please_nomg(sv);
2665 goto oops_its_an_int;
2666 sv_setnv(TARG, -SvNV_nomg(sv));
2671 SETn(-SvNV_nomg(sv));
2679 tryAMAGICun_MG(not_amg, AMGf_set);
2680 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2687 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2691 if (PL_op->op_private & HINT_INTEGER) {
2692 const IV i = ~SvIV_nomg(sv);
2696 const UV u = ~SvUV_nomg(sv);
2705 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2706 sv_setsv_nomg(TARG, sv);
2707 tmps = (U8*)SvPV_force_nomg(TARG, len);
2710 /* Calculate exact length, let's not estimate. */
2715 U8 * const send = tmps + len;
2716 U8 * const origtmps = tmps;
2717 const UV utf8flags = UTF8_ALLOW_ANYUV;
2719 while (tmps < send) {
2720 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2722 targlen += UNISKIP(~c);
2728 /* Now rewind strings and write them. */
2735 Newx(result, targlen + 1, U8);
2737 while (tmps < send) {
2738 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2740 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2743 sv_usepvn_flags(TARG, (char*)result, targlen,
2744 SV_HAS_TRAILING_NUL);
2751 Newx(result, nchar + 1, U8);
2753 while (tmps < send) {
2754 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2759 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2767 register long *tmpl;
2768 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2771 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2776 for ( ; anum > 0; anum--, tmps++)
2784 /* integer versions of some of the above */
2788 dVAR; dSP; dATARGET;
2789 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2792 SETi( left * right );
2800 dVAR; dSP; dATARGET;
2801 tryAMAGICbin_MG(div_amg, AMGf_assign);
2804 IV value = SvIV_nomg(right);
2806 DIE(aTHX_ "Illegal division by zero");
2807 num = SvIV_nomg(left);
2809 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2813 value = num / value;
2819 #if defined(__GLIBC__) && IVSIZE == 8
2826 /* This is the vanilla old i_modulo. */
2827 dVAR; dSP; dATARGET;
2828 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2832 DIE(aTHX_ "Illegal modulus zero");
2833 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2837 SETi( left % right );
2842 #if defined(__GLIBC__) && IVSIZE == 8
2847 /* This is the i_modulo with the workaround for the _moddi3 bug
2848 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2849 * See below for pp_i_modulo. */
2850 dVAR; dSP; dATARGET;
2851 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2855 DIE(aTHX_ "Illegal modulus zero");
2856 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2860 SETi( left % PERL_ABS(right) );
2867 dVAR; dSP; dATARGET;
2868 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2872 DIE(aTHX_ "Illegal modulus zero");
2873 /* The assumption is to use hereafter the old vanilla version... */
2875 PL_ppaddr[OP_I_MODULO] =
2877 /* .. but if we have glibc, we might have a buggy _moddi3
2878 * (at least glicb 2.2.5 is known to have this bug), in other
2879 * words our integer modulus with negative quad as the second
2880 * argument might be broken. Test for this and re-patch the
2881 * opcode dispatch table if that is the case, remembering to
2882 * also apply the workaround so that this first round works
2883 * right, too. See [perl #9402] for more information. */
2887 /* Cannot do this check with inlined IV constants since
2888 * that seems to work correctly even with the buggy glibc. */
2890 /* Yikes, we have the bug.
2891 * Patch in the workaround version. */
2893 PL_ppaddr[OP_I_MODULO] =
2894 &Perl_pp_i_modulo_1;
2895 /* Make certain we work right this time, too. */
2896 right = PERL_ABS(right);
2899 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2903 SETi( left % right );
2911 dVAR; dSP; dATARGET;
2912 tryAMAGICbin_MG(add_amg, AMGf_assign);
2914 dPOPTOPiirl_ul_nomg;
2915 SETi( left + right );
2922 dVAR; dSP; dATARGET;
2923 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2925 dPOPTOPiirl_ul_nomg;
2926 SETi( left - right );
2934 tryAMAGICbin_MG(lt_amg, AMGf_set);
2937 SETs(boolSV(left < right));
2945 tryAMAGICbin_MG(gt_amg, AMGf_set);
2948 SETs(boolSV(left > right));
2956 tryAMAGICbin_MG(le_amg, AMGf_set);
2959 SETs(boolSV(left <= right));
2967 tryAMAGICbin_MG(ge_amg, AMGf_set);
2970 SETs(boolSV(left >= right));
2978 tryAMAGICbin_MG(eq_amg, AMGf_set);
2981 SETs(boolSV(left == right));
2989 tryAMAGICbin_MG(ne_amg, AMGf_set);
2992 SETs(boolSV(left != right));
3000 tryAMAGICbin_MG(ncmp_amg, 0);
3007 else if (left < right)
3019 tryAMAGICun_MG(neg_amg, 0);
3021 SV * const sv = TOPs;
3022 IV const i = SvIV_nomg(sv);
3028 /* High falutin' math. */
3033 tryAMAGICbin_MG(atan2_amg, 0);
3036 SETn(Perl_atan2(left, right));
3044 int amg_type = sin_amg;
3045 const char *neg_report = NULL;
3046 NV (*func)(NV) = Perl_sin;
3047 const int op_type = PL_op->op_type;
3064 amg_type = sqrt_amg;
3066 neg_report = "sqrt";
3071 tryAMAGICun_MG(amg_type, 0);
3073 SV * const arg = POPs;
3074 const NV value = SvNV_nomg(arg);
3076 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
3077 SET_NUMERIC_STANDARD();
3078 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
3081 XPUSHn(func(value));
3086 /* Support Configure command-line overrides for rand() functions.
3087 After 5.005, perhaps we should replace this by Configure support
3088 for drand48(), random(), or rand(). For 5.005, though, maintain
3089 compatibility by calling rand() but allow the user to override it.
3090 See INSTALL for details. --Andy Dougherty 15 July 1998
3092 /* Now it's after 5.005, and Configure supports drand48() and random(),
3093 in addition to rand(). So the overrides should not be needed any more.
3094 --Jarkko Hietaniemi 27 September 1998
3097 #ifndef HAS_DRAND48_PROTO
3098 extern double drand48 (void);
3111 if (!PL_srand_called) {
3112 (void)seedDrand01((Rand_seed_t)seed());
3113 PL_srand_called = TRUE;
3123 const UV anum = (MAXARG < 1) ? seed() : POPu;
3124 (void)seedDrand01((Rand_seed_t)anum);
3125 PL_srand_called = TRUE;
3129 /* Historically srand always returned true. We can avoid breaking
3131 sv_setpvs(TARG, "0 but true");
3140 tryAMAGICun_MG(int_amg, AMGf_numeric);
3142 SV * const sv = TOPs;
3143 const IV iv = SvIV_nomg(sv);
3144 /* XXX it's arguable that compiler casting to IV might be subtly
3145 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
3146 else preferring IV has introduced a subtle behaviour change bug. OTOH
3147 relying on floating point to be accurate is a bug. */
3152 else if (SvIOK(sv)) {
3154 SETu(SvUV_nomg(sv));
3159 const NV value = SvNV_nomg(sv);
3161 if (value < (NV)UV_MAX + 0.5) {
3164 SETn(Perl_floor(value));
3168 if (value > (NV)IV_MIN - 0.5) {
3171 SETn(Perl_ceil(value));
3182 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3184 SV * const sv = TOPs;
3185 /* This will cache the NV value if string isn't actually integer */
3186 const IV iv = SvIV_nomg(sv);
3191 else if (SvIOK(sv)) {
3192 /* IVX is precise */
3194 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3202 /* 2s complement assumption. Also, not really needed as
3203 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3209 const NV value = SvNV_nomg(sv);
3223 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3227 SV* const sv = POPs;
3229 tmps = (SvPV_const(sv, len));
3231 /* If Unicode, try to downgrade
3232 * If not possible, croak. */
3233 SV* const tsv = sv_2mortal(newSVsv(sv));
3236 sv_utf8_downgrade(tsv, FALSE);
3237 tmps = SvPV_const(tsv, len);
3239 if (PL_op->op_type == OP_HEX)
3242 while (*tmps && len && isSPACE(*tmps))
3246 if (*tmps == 'x' || *tmps == 'X') {
3248 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3250 else if (*tmps == 'b' || *tmps == 'B')
3251 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3253 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3255 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3269 SV * const sv = TOPs;
3271 if (SvGAMAGIC(sv)) {
3272 /* For an overloaded or magic scalar, we can't know in advance if
3273 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
3274 it likes to cache the length. Maybe that should be a documented
3279 = sv_2pv_flags(sv, &len,
3280 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
3283 if (!SvPADTMP(TARG)) {
3284 sv_setsv(TARG, &PL_sv_undef);
3289 else if (DO_UTF8(sv)) {
3290 SETi(utf8_length((U8*)p, (U8*)p + len));
3294 } else if (SvOK(sv)) {
3295 /* Neither magic nor overloaded. */
3297 SETi(sv_len_utf8(sv));
3301 if (!SvPADTMP(TARG)) {
3302 sv_setsv_nomg(TARG, &PL_sv_undef);
3324 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3326 const IV arybase = CopARYBASE_get(PL_curcop);
3328 const char *repl = NULL;
3330 const int num_args = PL_op->op_private & 7;
3331 bool repl_need_utf8_upgrade = FALSE;
3332 bool repl_is_utf8 = FALSE;
3337 repl = SvPV_const(repl_sv, repl_len);
3338 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3341 len_iv = SvIV(len_sv);
3342 len_is_uv = SvIOK_UV(len_sv);
3345 pos1_iv = SvIV(pos_sv);
3346 pos1_is_uv = SvIOK_UV(pos_sv);
3352 sv_utf8_upgrade(sv);
3354 else if (DO_UTF8(sv))
3355 repl_need_utf8_upgrade = TRUE;
3357 tmps = SvPV_const(sv, curlen);
3359 utf8_curlen = sv_len_utf8(sv);
3360 if (utf8_curlen == curlen)
3363 curlen = utf8_curlen;
3368 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3369 UV pos1_uv = pos1_iv-arybase;
3370 /* Overflow can occur when $[ < 0 */
3371 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3376 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3377 goto bound_fail; /* $[=3; substr($_,2,...) */
3379 else { /* pos < $[ */
3380 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3385 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3390 if (pos1_is_uv || pos1_iv > 0) {
3391 if ((UV)pos1_iv > curlen)
3396 if (!len_is_uv && len_iv < 0) {
3397 pos2_iv = curlen + len_iv;
3399 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3402 } else { /* len_iv >= 0 */
3403 if (!pos1_is_uv && pos1_iv < 0) {
3404 pos2_iv = pos1_iv + len_iv;
3405 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3407 if ((UV)len_iv > curlen-(UV)pos1_iv)
3410 pos2_iv = pos1_iv+len_iv;
3420 if (!pos2_is_uv && pos2_iv < 0) {
3421 if (!pos1_is_uv && pos1_iv < 0)
3425 else if (!pos1_is_uv && pos1_iv < 0)
3428 if ((UV)pos2_iv < (UV)pos1_iv)
3430 if ((UV)pos2_iv > curlen)
3434 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3435 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3436 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3437 STRLEN byte_len = len;
3438 STRLEN byte_pos = utf8_curlen
3439 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3441 if (lvalue && !repl) {
3444 if (!SvGMAGICAL(sv)) {
3446 SvPV_force_nolen(sv);
3447 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3448 "Attempt to use reference as lvalue in substr");
3450 if (isGV_with_GP(sv))
3451 SvPV_force_nolen(sv);
3452 else if (SvOK(sv)) /* is it defined ? */
3453 (void)SvPOK_only_UTF8(sv);
3455 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3458 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3459 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3461 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3462 LvTARGOFF(ret) = pos;
3463 LvTARGLEN(ret) = len;
3466 PUSHs(ret); /* avoid SvSETMAGIC here */
3470 SvTAINTED_off(TARG); /* decontaminate */
3471 SvUTF8_off(TARG); /* decontaminate */
3474 sv_setpvn(TARG, tmps, byte_len);
3475 #ifdef USE_LOCALE_COLLATE
3476 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3482 SV* repl_sv_copy = NULL;
3484 if (repl_need_utf8_upgrade) {
3485 repl_sv_copy = newSVsv(repl_sv);
3486 sv_utf8_upgrade(repl_sv_copy);
3487 repl = SvPV_const(repl_sv_copy, repl_len);
3488 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3492 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3495 SvREFCNT_dec(repl_sv_copy);
3505 Perl_croak(aTHX_ "substr outside of string");
3506 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3513 register const IV size = POPi;
3514 register const IV offset = POPi;
3515 register SV * const src = POPs;
3516 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3519 if (lvalue) { /* it's an lvalue! */
3520 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3521 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3523 LvTARG(ret) = SvREFCNT_inc_simple(src);
3524 LvTARGOFF(ret) = offset;
3525 LvTARGLEN(ret) = size;
3529 SvTAINTED_off(TARG); /* decontaminate */
3533 sv_setuv(ret, do_vecget(src, offset, size));
3549 const char *little_p;
3550 const I32 arybase = CopARYBASE_get(PL_curcop);
3553 const bool is_index = PL_op->op_type == OP_INDEX;
3556 /* arybase is in characters, like offset, so combine prior to the
3557 UTF-8 to bytes calculation. */
3558 offset = POPi - arybase;
3562 big_p = SvPV_const(big, biglen);
3563 little_p = SvPV_const(little, llen);
3565 big_utf8 = DO_UTF8(big);
3566 little_utf8 = DO_UTF8(little);
3567 if (big_utf8 ^ little_utf8) {
3568 /* One needs to be upgraded. */
3569 if (little_utf8 && !PL_encoding) {
3570 /* Well, maybe instead we might be able to downgrade the small
3572 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3575 /* If the large string is ISO-8859-1, and it's not possible to
3576 convert the small string to ISO-8859-1, then there is no
3577 way that it could be found anywhere by index. */
3582 /* At this point, pv is a malloc()ed string. So donate it to temp
3583 to ensure it will get free()d */
3584 little = temp = newSV(0);
3585 sv_usepvn(temp, pv, llen);
3586 little_p = SvPVX(little);
3589 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3592 sv_recode_to_utf8(temp, PL_encoding);
3594 sv_utf8_upgrade(temp);
3599 big_p = SvPV_const(big, biglen);
3602 little_p = SvPV_const(little, llen);
3606 if (SvGAMAGIC(big)) {
3607 /* Life just becomes a lot easier if I use a temporary here.
3608 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3609 will trigger magic and overloading again, as will fbm_instr()
3611 big = newSVpvn_flags(big_p, biglen,
3612 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3615 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3616 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3617 warn on undef, and we've already triggered a warning with the
3618 SvPV_const some lines above. We can't remove that, as we need to
3619 call some SvPV to trigger overloading early and find out if the
3621 This is all getting to messy. The API isn't quite clean enough,
3622 because data access has side effects.
3624 little = newSVpvn_flags(little_p, llen,
3625 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3626 little_p = SvPVX(little);
3630 offset = is_index ? 0 : biglen;
3632 if (big_utf8 && offset > 0)
3633 sv_pos_u2b(big, &offset, 0);
3639 else if (offset > (I32)biglen)
3641 if (!(little_p = is_index
3642 ? fbm_instr((unsigned char*)big_p + offset,
3643 (unsigned char*)big_p + biglen, little, 0)
3644 : rninstr(big_p, big_p + offset,
3645 little_p, little_p + llen)))
3648 retval = little_p - big_p;
3649 if (retval > 0 && big_utf8)
3650 sv_pos_b2u(big, &retval);
3654 PUSHi(retval + arybase);
3660 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3661 SvTAINTED_off(TARG);
3662 do_sprintf(TARG, SP-MARK, MARK+1);
3663 TAINT_IF(SvTAINTED(TARG));
3675 const U8 *s = (U8*)SvPV_const(argsv, len);
3677 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3678 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3679 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3683 XPUSHu(DO_UTF8(argsv) ?
3684 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3696 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3698 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3700 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3702 (void) POPs; /* Ignore the argument value. */
3703 value = UNICODE_REPLACEMENT;
3709 SvUPGRADE(TARG,SVt_PV);
3711 if (value > 255 && !IN_BYTES) {
3712 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3713 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3714 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3716 (void)SvPOK_only(TARG);
3725 *tmps++ = (char)value;
3727 (void)SvPOK_only(TARG);
3729 if (PL_encoding && !IN_BYTES) {
3730 sv_recode_to_utf8(TARG, PL_encoding);
3732 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3733 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3737 *tmps++ = (char)value;
3753 const char *tmps = SvPV_const(left, len);
3755 if (DO_UTF8(left)) {
3756 /* If Unicode, try to downgrade.
3757 * If not possible, croak.
3758 * Yes, we made this up. */
3759 SV* const tsv = sv_2mortal(newSVsv(left));
3762 sv_utf8_downgrade(tsv, FALSE);
3763 tmps = SvPV_const(tsv, len);
3765 # ifdef USE_ITHREADS
3767 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3768 /* This should be threadsafe because in ithreads there is only
3769 * one thread per interpreter. If this would not be true,
3770 * we would need a mutex to protect this malloc. */
3771 PL_reentrant_buffer->_crypt_struct_buffer =
3772 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3773 #if defined(__GLIBC__) || defined(__EMX__)
3774 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3775 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3776 /* work around glibc-2.2.5 bug */
3777 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3781 # endif /* HAS_CRYPT_R */
3782 # endif /* USE_ITHREADS */
3784 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3786 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3792 "The crypt() function is unimplemented due to excessive paranoia.");
3796 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3797 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3799 /* Below are several macros that generate code */
3800 /* Generates code to store a unicode codepoint c that is known to occupy
3801 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3802 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3804 *(p) = UTF8_TWO_BYTE_HI(c); \
3805 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3808 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3809 * available byte after the two bytes */
3810 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3812 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3813 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3816 /* Generates code to store the upper case of latin1 character l which is known
3817 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3818 * are only two characters that fit this description, and this macro knows
3819 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3821 #define STORE_NON_LATIN1_UC(p, l) \
3823 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3824 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3825 } else { /* Must be the following letter */ \
3826 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3830 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3831 * after the character stored */
3832 #define CAT_NON_LATIN1_UC(p, l) \
3834 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3835 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3837 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3841 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3842 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3843 * and must require two bytes to store it. Advances p to point to the next
3844 * available position */
3845 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3847 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3848 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3849 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3850 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3851 } else {/* else is one of the other two special cases */ \
3852 CAT_NON_LATIN1_UC((p), (l)); \
3858 /* Actually is both lcfirst() and ucfirst(). Only the first character
3859 * changes. This means that possibly we can change in-place, ie., just
3860 * take the source and change that one character and store it back, but not
3861 * if read-only etc, or if the length changes */
3866 STRLEN slen; /* slen is the byte length of the whole SV. */
3869 bool inplace; /* ? Convert first char only, in-place */
3870 bool doing_utf8 = FALSE; /* ? using utf8 */
3871 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3872 const int op_type = PL_op->op_type;
3875 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3876 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3877 * stored as UTF-8 at s. */
3878 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3879 * lowercased) character stored in tmpbuf. May be either
3880 * UTF-8 or not, but in either case is the number of bytes */
3884 s = (const U8*)SvPV_nomg_const(source, slen);
3886 if (ckWARN(WARN_UNINITIALIZED))
3887 report_uninit(source);
3892 /* We may be able to get away with changing only the first character, in
3893 * place, but not if read-only, etc. Later we may discover more reasons to
3894 * not convert in-place. */
3895 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3897 /* First calculate what the changed first character should be. This affects
3898 * whether we can just swap it out, leaving the rest of the string unchanged,
3899 * or even if have to convert the dest to UTF-8 when the source isn't */
3901 if (! slen) { /* If empty */
3902 need = 1; /* still need a trailing NUL */
3904 else if (DO_UTF8(source)) { /* Is the source utf8? */
3907 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3908 * and doesn't allow for the user to specify their own. When code is added to
3909 * detect if there is a user-defined mapping in force here, and if so to use
3910 * that, then the code below can be compiled. The detection would be a good
3911 * thing anyway, as currently the user-defined mappings only work on utf8
3912 * strings, and thus depend on the chosen internal storage method, which is a
3914 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3915 if (UTF8_IS_INVARIANT(*s)) {
3917 /* An invariant source character is either ASCII or, in EBCDIC, an
3918 * ASCII equivalent or a caseless C1 control. In both these cases,
3919 * the lower and upper cases of any character are also invariants
3920 * (and title case is the same as upper case). So it is safe to
3921 * use the simple case change macros which avoid the overhead of
3922 * the general functions. Note that if perl were to be extended to
3923 * do locale handling in UTF-8 strings, this wouldn't be true in,
3924 * for example, Lithuanian or Turkic. */
3925 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3929 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3932 /* Similarly, if the source character isn't invariant but is in the
3933 * latin1 range (or EBCDIC equivalent thereof), we have the case
3934 * changes compiled into perl, and can avoid the overhead of the
3935 * general functions. In this range, the characters are stored as
3936 * two UTF-8 bytes, and it so happens that any changed-case version
3937 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3941 /* Convert the two source bytes to a single Unicode code point
3942 * value, change case and save for below */
3943 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3944 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3945 U8 lower = toLOWER_LATIN1(chr);
3946 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3948 else { /* ucfirst */
3949 U8 upper = toUPPER_LATIN1_MOD(chr);
3951 /* Most of the latin1 range characters are well-behaved. Their
3952 * title and upper cases are the same, and are also in the
3953 * latin1 range. The macro above returns their upper (hence
3954 * title) case, and all that need be done is to save the result
3955 * for below. However, several characters are problematic, and
3956 * have to be handled specially. The MOD in the macro name
3957 * above means that these tricky characters all get mapped to
3958 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3959 * This mapping saves some tests for the majority of the
3962 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3964 /* Not tricky. Just save it. */
3965 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3967 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3969 /* This one is tricky because it is two characters long,
3970 * though the UTF-8 is still two bytes, so the stored
3971 * length doesn't change */
3972 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3973 *(tmpbuf + 1) = 's';
3977 /* The other two have their title and upper cases the same,
3978 * but are tricky because the changed-case characters
3979 * aren't in the latin1 range. They, however, do fit into
3980 * two UTF-8 bytes */
3981 STORE_NON_LATIN1_UC(tmpbuf, chr);
3986 #endif /* end of dont want to break user-defined casing */
3988 /* Here, can't short-cut the general case */
3990 utf8_to_uvchr(s, &ulen);
3991 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3992 else toLOWER_utf8(s, tmpbuf, &tculen);
3994 /* we can't do in-place if the length changes. */
3995 if (ulen != tculen) inplace = FALSE;
3996 need = slen + 1 - ulen + tculen;
3997 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4001 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
4002 * latin1 is treated as caseless. Note that a locale takes
4004 tculen = 1; /* Most characters will require one byte, but this will
4005 * need to be overridden for the tricky ones */
4008 if (op_type == OP_LCFIRST) {
4010 /* lower case the first letter: no trickiness for any character */
4011 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
4012 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
4015 else if (IN_LOCALE_RUNTIME) {
4016 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
4017 * have upper and title case different
4020 else if (! IN_UNI_8_BIT) {
4021 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
4022 * on EBCDIC machines whatever the
4023 * native function does */
4025 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
4026 *tmpbuf = toUPPER_LATIN1_MOD(*s);
4028 /* tmpbuf now has the correct title case for all latin1 characters
4029 * except for the several ones that have tricky handling. All
4030 * of these are mapped by the MOD to the letter below. */
4031 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
4033 /* The length is going to change, with all three of these, so
4034 * can't replace just the first character */
4037 /* We use the original to distinguish between these tricky
4039 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4040 /* Two character title case 'Ss', but can remain non-UTF-8 */
4043 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
4048 /* The other two tricky ones have their title case outside
4049 * latin1. It is the same as their upper case. */
4051 STORE_NON_LATIN1_UC(tmpbuf, *s);
4053 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
4054 * and their upper cases is 2. */
4057 /* The entire result will have to be in UTF-8. Assume worst
4058 * case sizing in conversion. (all latin1 characters occupy
4059 * at most two bytes in utf8) */
4060 convert_source_to_utf8 = TRUE;
4061 need = slen * 2 + 1;
4063 } /* End of is one of the three special chars */
4064 } /* End of use Unicode (Latin1) semantics */
4065 } /* End of changing the case of the first character */
4067 /* Here, have the first character's changed case stored in tmpbuf. Ready to
4068 * generate the result */
4071 /* We can convert in place. This means we change just the first
4072 * character without disturbing the rest; no need to grow */
4074 s = d = (U8*)SvPV_force_nomg(source, slen);
4080 /* Here, we can't convert in place; we earlier calculated how much
4081 * space we will need, so grow to accommodate that */
4082 SvUPGRADE(dest, SVt_PV);
4083 d = (U8*)SvGROW(dest, need);
4084 (void)SvPOK_only(dest);
4091 if (! convert_source_to_utf8) {
4093 /* Here both source and dest are in UTF-8, but have to create
4094 * the entire output. We initialize the result to be the
4095 * title/lower cased first character, and then append the rest
4097 sv_setpvn(dest, (char*)tmpbuf, tculen);
4099 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
4103 const U8 *const send = s + slen;
4105 /* Here the dest needs to be in UTF-8, but the source isn't,
4106 * except we earlier UTF-8'd the first character of the source
4107 * into tmpbuf. First put that into dest, and then append the
4108 * rest of the source, converting it to UTF-8 as we go. */
4110 /* Assert tculen is 2 here because the only two characters that
4111 * get to this part of the code have 2-byte UTF-8 equivalents */
4113 *d++ = *(tmpbuf + 1);
4114 s++; /* We have just processed the 1st char */
4116 for (; s < send; s++) {
4117 d = uvchr_to_utf8(d, *s);
4120 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4124 else { /* in-place UTF-8. Just overwrite the first character */
4125 Copy(tmpbuf, d, tculen, U8);
4126 SvCUR_set(dest, need - 1);
4129 else { /* Neither source nor dest are in or need to be UTF-8 */
4131 if (IN_LOCALE_RUNTIME) {
4135 if (inplace) { /* in-place, only need to change the 1st char */
4138 else { /* Not in-place */
4140 /* Copy the case-changed character(s) from tmpbuf */
4141 Copy(tmpbuf, d, tculen, U8);
4142 d += tculen - 1; /* Code below expects d to point to final
4143 * character stored */
4146 else { /* empty source */
4147 /* See bug #39028: Don't taint if empty */
4151 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4152 * the destination to retain that flag */
4156 if (!inplace) { /* Finish the rest of the string, unchanged */
4157 /* This will copy the trailing NUL */
4158 Copy(s + 1, d + 1, slen, U8);
4159 SvCUR_set(dest, need - 1);
4162 if (dest != source && SvTAINTED(source))
4168 /* There's so much setup/teardown code common between uc and lc, I wonder if
4169 it would be worth merging the two, and just having a switch outside each
4170 of the three tight loops. There is less and less commonality though */
4184 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4185 && SvTEMP(source) && !DO_UTF8(source)
4186 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
4188 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4189 * make the loop tight, so we overwrite the source with the dest before
4190 * looking at it, and we need to look at the original source
4191 * afterwards. There would also need to be code added to handle
4192 * switching to not in-place in midstream if we run into characters
4193 * that change the length.
4196 s = d = (U8*)SvPV_force_nomg(source, len);
4203 /* The old implementation would copy source into TARG at this point.
4204 This had the side effect that if source was undef, TARG was now
4205 an undefined SV with PADTMP set, and they don't warn inside
4206 sv_2pv_flags(). However, we're now getting the PV direct from
4207 source, which doesn't have PADTMP set, so it would warn. Hence the
4211 s = (const U8*)SvPV_nomg_const(source, len);
4213 if (ckWARN(WARN_UNINITIALIZED))
4214 report_uninit(source);
4220 SvUPGRADE(dest, SVt_PV);
4221 d = (U8*)SvGROW(dest, min);
4222 (void)SvPOK_only(dest);
4227 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4228 to check DO_UTF8 again here. */
4230 if (DO_UTF8(source)) {
4231 const U8 *const send = s + len;
4232 U8 tmpbuf[UTF8_MAXBYTES+1];
4234 /* All occurrences of these are to be moved to follow any other marks.
4235 * This is context-dependent. We may not be passed enough context to
4236 * move the iota subscript beyond all of them, but we do the best we can
4237 * with what we're given. The result is always better than if we
4238 * hadn't done this. And, the problem would only arise if we are
4239 * passed a character without all its combining marks, which would be
4240 * the caller's mistake. The information this is based on comes from a
4241 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4242 * itself) and so can't be checked properly to see if it ever gets
4243 * revised. But the likelihood of it changing is remote */
4244 bool in_iota_subscript = FALSE;
4247 if (in_iota_subscript && ! is_utf8_mark(s)) {
4248 /* A non-mark. Time to output the iota subscript */
4249 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4250 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4252 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4253 in_iota_subscript = FALSE;
4257 /* See comments at the first instance in this file of this ifdef */
4258 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4260 /* If the UTF-8 character is invariant, then it is in the range
4261 * known by the standard macro; result is only one byte long */
4262 if (UTF8_IS_INVARIANT(*s)) {
4266 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4268 /* Likewise, if it fits in a byte, its case change is in our
4270 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
4271 U8 upper = toUPPER_LATIN1_MOD(orig);
4272 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
4280 /* Otherwise, need the general UTF-8 case. Get the changed
4281 * case value and copy it to the output buffer */
4283 const STRLEN u = UTF8SKIP(s);
4286 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4287 if (uv == GREEK_CAPITAL_LETTER_IOTA
4288 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4290 in_iota_subscript = TRUE;
4293 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4294 /* If the eventually required minimum size outgrows
4295 * the available space, we need to grow. */
4296 const UV o = d - (U8*)SvPVX_const(dest);
4298 /* If someone uppercases one million U+03B0s we
4299 * SvGROW() one million times. Or we could try
4300 * guessing how much to allocate without allocating too
4301 * much. Such is life. See corresponding comment in
4302 * lc code for another option */
4304 d = (U8*)SvPVX(dest) + o;
4306 Copy(tmpbuf, d, ulen, U8);
4312 if (in_iota_subscript) {
4313 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4317 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4319 else { /* Not UTF-8 */
4321 const U8 *const send = s + len;
4323 /* Use locale casing if in locale; regular style if not treating
4324 * latin1 as having case; otherwise the latin1 casing. Do the
4325 * whole thing in a tight loop, for speed, */
4326 if (IN_LOCALE_RUNTIME) {
4329 for (; s < send; d++, s++)
4330 *d = toUPPER_LC(*s);
4332 else if (! IN_UNI_8_BIT) {
4333 for (; s < send; d++, s++) {
4338 for (; s < send; d++, s++) {
4339 *d = toUPPER_LATIN1_MOD(*s);
4340 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4342 /* The mainstream case is the tight loop above. To avoid
4343 * extra tests in that, all three characters that require
4344 * special handling are mapped by the MOD to the one tested
4346 * Use the source to distinguish between the three cases */
4348 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4350 /* uc() of this requires 2 characters, but they are
4351 * ASCII. If not enough room, grow the string */
4352 if (SvLEN(dest) < ++min) {
4353 const UV o = d - (U8*)SvPVX_const(dest);
4355 d = (U8*)SvPVX(dest) + o;
4357 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4358 continue; /* Back to the tight loop; still in ASCII */
4361 /* The other two special handling characters have their
4362 * upper cases outside the latin1 range, hence need to be
4363 * in UTF-8, so the whole result needs to be in UTF-8. So,
4364 * here we are somewhere in the middle of processing a
4365 * non-UTF-8 string, and realize that we will have to convert
4366 * the whole thing to UTF-8. What to do? There are
4367 * several possibilities. The simplest to code is to
4368 * convert what we have so far, set a flag, and continue on
4369 * in the loop. The flag would be tested each time through
4370 * the loop, and if set, the next character would be
4371 * converted to UTF-8 and stored. But, I (khw) didn't want
4372 * to slow down the mainstream case at all for this fairly
4373 * rare case, so I didn't want to add a test that didn't
4374 * absolutely have to be there in the loop, besides the
4375 * possibility that it would get too complicated for
4376 * optimizers to deal with. Another possibility is to just
4377 * give up, convert the source to UTF-8, and restart the
4378 * function that way. Another possibility is to convert
4379 * both what has already been processed and what is yet to
4380 * come separately to UTF-8, then jump into the loop that
4381 * handles UTF-8. But the most efficient time-wise of the
4382 * ones I could think of is what follows, and turned out to
4383 * not require much extra code. */
4385 /* Convert what we have so far into UTF-8, telling the
4386 * function that we know it should be converted, and to
4387 * allow extra space for what we haven't processed yet.
4388 * Assume the worst case space requirements for converting
4389 * what we haven't processed so far: that it will require
4390 * two bytes for each remaining source character, plus the
4391 * NUL at the end. This may cause the string pointer to
4392 * move, so re-find it. */
4394 len = d - (U8*)SvPVX_const(dest);
4395 SvCUR_set(dest, len);
4396 len = sv_utf8_upgrade_flags_grow(dest,
4397 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4399 d = (U8*)SvPVX(dest) + len;
4401 /* And append the current character's upper case in UTF-8 */
4402 CAT_NON_LATIN1_UC(d, *s);
4404 /* Now process the remainder of the source, converting to
4405 * upper and UTF-8. If a resulting byte is invariant in
4406 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4407 * append it to the output. */
4410 for (; s < send; s++) {
4411 U8 upper = toUPPER_LATIN1_MOD(*s);
4412 if UTF8_IS_INVARIANT(upper) {
4416 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4420 /* Here have processed the whole source; no need to continue
4421 * with the outer loop. Each character has been converted
4422 * to upper case and converted to UTF-8 */
4425 } /* End of processing all latin1-style chars */
4426 } /* End of processing all chars */
4427 } /* End of source is not empty */
4429 if (source != dest) {
4430 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4431 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4433 } /* End of isn't utf8 */
4434 if (dest != source && SvTAINTED(source))
4453 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4454 && SvTEMP(source) && !DO_UTF8(source)) {
4456 /* We can convert in place, as lowercasing anything in the latin1 range
4457 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4459 s = d = (U8*)SvPV_force_nomg(source, len);
4466 /* The old implementation would copy source into TARG at this point.
4467 This had the side effect that if source was undef, TARG was now
4468 an undefined SV with PADTMP set, and they don't warn inside
4469 sv_2pv_flags(). However, we're now getting the PV direct from
4470 source, which doesn't have PADTMP set, so it would warn. Hence the
4474 s = (const U8*)SvPV_nomg_const(source, len);
4476 if (ckWARN(WARN_UNINITIALIZED))
4477 report_uninit(source);
4483 SvUPGRADE(dest, SVt_PV);
4484 d = (U8*)SvGROW(dest, min);
4485 (void)SvPOK_only(dest);
4490 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4491 to check DO_UTF8 again here. */
4493 if (DO_UTF8(source)) {
4494 const U8 *const send = s + len;
4495 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4498 /* See comments at the first instance in this file of this ifdef */
4499 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4500 if (UTF8_IS_INVARIANT(*s)) {
4502 /* Invariant characters use the standard mappings compiled in.
4507 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4509 /* As do the ones in the Latin1 range */
4510 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4511 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4516 /* Here, is utf8 not in Latin-1 range, have to go out and get
4517 * the mappings from the tables. */
4519 const STRLEN u = UTF8SKIP(s);
4522 #ifndef CONTEXT_DEPENDENT_CASING
4523 toLOWER_utf8(s, tmpbuf, &ulen);
4525 /* This is ifdefd out because it needs more work and thought. It isn't clear
4526 * that we should do it.
4527 * A minor objection is that this is based on a hard-coded rule from the
4528 * Unicode standard, and may change, but this is not very likely at all.
4529 * mktables should check and warn if it does.
4530 * More importantly, if the sigma occurs at the end of the string, we don't
4531 * have enough context to know whether it is part of a larger string or going
4532 * to be or not. It may be that we are passed a subset of the context, via
4533 * a \U...\E, for example, and we could conceivably know the larger context if
4534 * code were changed to pass that in. But, if the string passed in is an
4535 * intermediate result, and the user concatenates two strings together
4536 * after we have made a final sigma, that would be wrong. If the final sigma
4537 * occurs in the middle of the string we are working on, then we know that it
4538 * should be a final sigma, but otherwise we can't be sure. */
4540 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4542 /* If the lower case is a small