3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
33 /* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
37 #ifdef NEED_GETPID_PROTO
38 extern Pid_t getpid (void);
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
45 #if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
49 /* variations on pp_null */
55 if (GIMME_V == G_SCALAR)
66 assert(SvTYPE(TARG) == SVt_PVAV);
67 if (PL_op->op_private & OPpLVAL_INTRO)
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
71 if (PL_op->op_flags & OPf_REF) {
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
84 if (gimme == G_ARRAY) {
85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
87 if (SvMAGICAL(TARG)) {
89 for (i=0; i < (U32)maxarg; i++) {
90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
99 else if (gimme == G_SCALAR) {
100 SV* const sv = sv_newmortal();
101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
102 sv_setiv(sv, maxarg);
113 assert(SvTYPE(TARG) == SVt_PVHV);
115 if (PL_op->op_private & OPpLVAL_INTRO)
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
118 if (PL_op->op_flags & OPf_REF)
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
129 if (gimme == G_ARRAY) {
130 RETURNOP(Perl_do_kv(aTHX));
132 else if (gimme == G_SCALAR) {
133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
141 static const char S_no_symref_sv[] =
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
152 sv = amagic_deref_call(sv, to_gv_amg);
156 if (SvTYPE(sv) == SVt_PVIO) {
157 GV * const gv = MUTABLE_GV(sv_newmortal());
158 gv_init(gv, 0, "", 0, 0);
159 GvIOp(gv) = MUTABLE_IO(sv);
160 SvREFCNT_inc_void_NN(sv);
163 else if (!isGV_with_GP(sv))
164 DIE(aTHX_ "Not a GLOB reference");
167 if (!isGV_with_GP(sv)) {
168 if (!SvOK(sv) && sv != &PL_sv_undef) {
169 /* If this is a 'my' scalar and flag is set then vivify
173 Perl_croak_no_modify(aTHX);
174 if (PL_op->op_private & OPpDEREF) {
176 if (cUNOP->op_targ) {
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
180 gv = MUTABLE_GV(newSV(0));
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
184 const char * const name = CopSTASHPV(PL_curcop);
187 prepare_SV_for_RV(sv);
188 SvRV_set(sv, MUTABLE_SV(gv));
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
195 DIE(aTHX_ PL_no_usym, "a symbol");
196 if (ckWARN(WARN_UNINITIALIZED))
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
205 && (!is_gv_magical_sv(sv,0)
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
213 if (PL_op->op_private & HINT_STRICT_REFS)
214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
222 sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD, SVt_PVGV));
224 /* FAKE globs in the symbol table cause weird bugs (#77810) */
225 if (sv) SvFAKE_off(sv);
228 if (sv && SvFAKE(sv)) {
229 SV *newsv = sv_newmortal();
230 sv_setsv_flags(newsv, sv, 0);
234 if (PL_op->op_private & OPpLVAL_INTRO)
235 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
240 /* Helper function for pp_rv2sv and pp_rv2av */
242 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
243 const svtype type, SV ***spp)
248 PERL_ARGS_ASSERT_SOFTREF2XV;
250 if (PL_op->op_private & HINT_STRICT_REFS) {
252 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
254 Perl_die(aTHX_ PL_no_usym, what);
258 PL_op->op_flags & OPf_REF &&
259 PL_op->op_next->op_type != OP_BOOLKEYS
261 Perl_die(aTHX_ PL_no_usym, what);
262 if (ckWARN(WARN_UNINITIALIZED))
264 if (type != SVt_PV && GIMME_V == G_ARRAY) {
268 **spp = &PL_sv_undef;
271 if ((PL_op->op_flags & OPf_SPECIAL) &&
272 !(PL_op->op_flags & OPf_MOD))
274 gv = gv_fetchsv(sv, 0, type);
276 && (!is_gv_magical_sv(sv,0)
277 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
279 **spp = &PL_sv_undef;
284 gv = gv_fetchsv(sv, GV_ADD, type);
294 if (!(PL_op->op_private & OPpDEREFed))
298 sv = amagic_deref_call(sv, to_sv_amg);
303 switch (SvTYPE(sv)) {
309 DIE(aTHX_ "Not a SCALAR reference");
316 if (!isGV_with_GP(gv)) {
317 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
323 if (PL_op->op_flags & OPf_MOD) {
324 if (PL_op->op_private & OPpLVAL_INTRO) {
325 if (cUNOP->op_first->op_type == OP_NULL)
326 sv = save_scalar(MUTABLE_GV(TOPs));
328 sv = save_scalar(gv);
330 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
332 else if (PL_op->op_private & OPpDEREF)
333 vivify_ref(sv, PL_op->op_private & OPpDEREF);
342 AV * const av = MUTABLE_AV(TOPs);
343 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
345 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
347 *sv = newSV_type(SVt_PVMG);
348 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
352 SETs(sv_2mortal(newSViv(
353 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
363 if (PL_op->op_flags & OPf_MOD || LVRET) {
364 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
365 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
367 LvTARG(ret) = SvREFCNT_inc_simple(sv);
368 PUSHs(ret); /* no SvSETMAGIC */
372 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
373 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
374 if (mg && mg->mg_len >= 0) {
379 PUSHi(i + CopARYBASE_get(PL_curcop));
392 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
394 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
397 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
398 /* (But not in defined().) */
400 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
403 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
404 if ((PL_op->op_private & OPpLVAL_INTRO)) {
405 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
408 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
411 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
415 cv = MUTABLE_CV(&PL_sv_undef);
416 SETs(MUTABLE_SV(cv));
426 SV *ret = &PL_sv_undef;
428 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
429 const char * s = SvPVX_const(TOPs);
430 if (strnEQ(s, "CORE::", 6)) {
431 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
432 if (code < 0) { /* Overridable. */
433 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
434 int i = 0, n = 0, seen_question = 0, defgv = 0;
436 char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
438 if (code == -KEY_chop || code == -KEY_chomp
439 || code == -KEY_exec || code == -KEY_system)
441 if (code == -KEY_mkdir) {
442 ret = newSVpvs_flags("_;$", SVs_TEMP);
445 if (code == -KEY_keys || code == -KEY_values || code == -KEY_each) {
446 ret = newSVpvs_flags("+", SVs_TEMP);
449 if (code == -KEY_push || code == -KEY_unshift) {
450 ret = newSVpvs_flags("+@", SVs_TEMP);
453 if (code == -KEY_pop || code == -KEY_shift) {
454 ret = newSVpvs_flags(";+", SVs_TEMP);
457 if (code == -KEY_splice) {
458 ret = newSVpvs_flags("+;$$@", SVs_TEMP);
461 if (code == -KEY_tied || code == -KEY_untie) {
462 ret = newSVpvs_flags("\\[$@%*]", SVs_TEMP);
465 if (code == -KEY_tie) {
466 ret = newSVpvs_flags("\\[$@%*]$@", SVs_TEMP);
469 if (code == -KEY_readpipe) {
470 s = "CORE::backtick";
472 while (i < MAXO) { /* The slow way. */
473 if (strEQ(s + 6, PL_op_name[i])
474 || strEQ(s + 6, PL_op_desc[i]))
480 goto nonesuch; /* Should not happen... */
482 defgv = PL_opargs[i] & OA_DEFGV;
483 oa = PL_opargs[i] >> OASHIFT;
485 if (oa & OA_OPTIONAL && !seen_question && !defgv) {
489 if ((oa & (OA_OPTIONAL - 1)) >= OA_AVREF
490 && (oa & (OA_OPTIONAL - 1)) <= OA_SCALARREF
491 /* But globs are already references (kinda) */
492 && (oa & (OA_OPTIONAL - 1)) != OA_FILEREF
496 str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
499 if (defgv && str[n - 1] == '$')
502 ret = newSVpvn_flags(str, n - 1, SVs_TEMP);
504 else if (code) /* Non-Overridable */
506 else { /* None such */
508 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
512 cv = sv_2cv(TOPs, &stash, &gv, 0);
514 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
523 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
525 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
527 PUSHs(MUTABLE_SV(cv));
541 if (GIMME != G_ARRAY) {
545 *MARK = &PL_sv_undef;
546 *MARK = refto(*MARK);
550 EXTEND_MORTAL(SP - MARK);
552 *MARK = refto(*MARK);
557 S_refto(pTHX_ SV *sv)
562 PERL_ARGS_ASSERT_REFTO;
564 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
567 if (!(sv = LvTARG(sv)))
570 SvREFCNT_inc_void_NN(sv);
572 else if (SvTYPE(sv) == SVt_PVAV) {
573 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
574 av_reify(MUTABLE_AV(sv));
576 SvREFCNT_inc_void_NN(sv);
578 else if (SvPADTMP(sv) && !IS_PADGV(sv))
582 SvREFCNT_inc_void_NN(sv);
585 sv_upgrade(rv, SVt_IV);
595 SV * const sv = POPs;
600 if (!sv || !SvROK(sv))
603 pv = sv_reftype(SvRV(sv),TRUE);
604 PUSHp(pv, strlen(pv));
614 stash = CopSTASH(PL_curcop);
616 SV * const ssv = POPs;
620 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
621 Perl_croak(aTHX_ "Attempt to bless into a reference");
622 ptr = SvPV_const(ssv,len);
624 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
625 "Explicit blessing to '' (assuming package main)");
626 stash = gv_stashpvn(ptr, len, GV_ADD);
629 (void)sv_bless(TOPs, stash);
638 const char * const elem = SvPV_nolen_const(sv);
639 GV * const gv = MUTABLE_GV(POPs);
644 /* elem will always be NUL terminated. */
645 const char * const second_letter = elem + 1;
648 if (strEQ(second_letter, "RRAY"))
649 tmpRef = MUTABLE_SV(GvAV(gv));
652 if (strEQ(second_letter, "ODE"))
653 tmpRef = MUTABLE_SV(GvCVu(gv));
656 if (strEQ(second_letter, "ILEHANDLE")) {
657 /* finally deprecated in 5.8.0 */
658 deprecate("*glob{FILEHANDLE}");
659 tmpRef = MUTABLE_SV(GvIOp(gv));
662 if (strEQ(second_letter, "ORMAT"))
663 tmpRef = MUTABLE_SV(GvFORM(gv));
666 if (strEQ(second_letter, "LOB"))
667 tmpRef = MUTABLE_SV(gv);
670 if (strEQ(second_letter, "ASH"))
671 tmpRef = MUTABLE_SV(GvHV(gv));
674 if (*second_letter == 'O' && !elem[2])
675 tmpRef = MUTABLE_SV(GvIOp(gv));
678 if (strEQ(second_letter, "AME"))
679 sv = newSVhek(GvNAME_HEK(gv));
682 if (strEQ(second_letter, "ACKAGE")) {
683 const HV * const stash = GvSTASH(gv);
684 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
685 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
689 if (strEQ(second_letter, "CALAR"))
704 /* Pattern matching */
709 register unsigned char *s;
712 register I32 *sfirst;
716 if (sv == PL_lastscream) {
720 s = (unsigned char*)(SvPV(sv, len));
721 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
722 /* No point in studying a zero length string, and not safe to study
723 anything that doesn't appear to be a simple scalar (and hence might
724 change between now and when the regexp engine runs without our set
725 magic ever running) such as a reference to an object with overloaded
726 stringification. Also refuse to study an FBM scalar, as this gives
727 more flexibility in SV flag usage. No real-world code would ever
728 end up studying an FBM scalar, so this isn't a real pessimisation.
735 SvSCREAM_off(PL_lastscream);
736 SvREFCNT_dec(PL_lastscream);
738 PL_lastscream = SvREFCNT_inc_simple(sv);
740 if (pos > PL_maxscream) {
741 if (PL_maxscream < 0) {
742 PL_maxscream = pos + 80;
743 Newx(PL_screamfirst, 256, I32);
744 Newx(PL_screamnext, PL_maxscream, I32);
747 PL_maxscream = pos + pos / 4;
748 Renew(PL_screamnext, PL_maxscream, I32);
752 sfirst = PL_screamfirst;
753 snext = PL_screamnext;
755 if (!sfirst || !snext)
756 DIE(aTHX_ "do_study: out of memory");
758 for (ch = 256; ch; --ch)
763 register const I32 ch = s[pos];
765 snext[pos] = sfirst[ch] - pos;
772 sv_magic(sv, NULL, PERL_MAGIC_study, NULL, 0);
781 if (PL_op->op_flags & OPf_STACKED)
783 else if (PL_op->op_private & OPpTARGET_MY)
789 TARG = sv_newmortal();
790 if(PL_op->op_type == OP_TRANSR) {
791 SV * const newsv = newSVsv(sv);
795 else PUSHi(do_trans(sv));
799 /* Lvalue operators. */
802 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
808 PERL_ARGS_ASSERT_DO_CHOMP;
810 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
812 if (SvTYPE(sv) == SVt_PVAV) {
814 AV *const av = MUTABLE_AV(sv);
815 const I32 max = AvFILL(av);
817 for (i = 0; i <= max; i++) {
818 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
819 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
820 do_chomp(retval, sv, chomping);
824 else if (SvTYPE(sv) == SVt_PVHV) {
825 HV* const hv = MUTABLE_HV(sv);
827 (void)hv_iterinit(hv);
828 while ((entry = hv_iternext(hv)))
829 do_chomp(retval, hv_iterval(hv,entry), chomping);
832 else if (SvREADONLY(sv)) {
834 /* SV is copy-on-write */
835 sv_force_normal_flags(sv, 0);
838 Perl_croak_no_modify(aTHX);
843 /* XXX, here sv is utf8-ized as a side-effect!
844 If encoding.pm is used properly, almost string-generating
845 operations, including literal strings, chr(), input data, etc.
846 should have been utf8-ized already, right?
848 sv_recode_to_utf8(sv, PL_encoding);
854 char *temp_buffer = NULL;
863 while (len && s[-1] == '\n') {
870 STRLEN rslen, rs_charlen;
871 const char *rsptr = SvPV_const(PL_rs, rslen);
873 rs_charlen = SvUTF8(PL_rs)
877 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
878 /* Assumption is that rs is shorter than the scalar. */
880 /* RS is utf8, scalar is 8 bit. */
882 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
885 /* Cannot downgrade, therefore cannot possibly match
887 assert (temp_buffer == rsptr);
893 else if (PL_encoding) {
894 /* RS is 8 bit, encoding.pm is used.
895 * Do not recode PL_rs as a side-effect. */
896 svrecode = newSVpvn(rsptr, rslen);
897 sv_recode_to_utf8(svrecode, PL_encoding);
898 rsptr = SvPV_const(svrecode, rslen);
899 rs_charlen = sv_len_utf8(svrecode);
902 /* RS is 8 bit, scalar is utf8. */
903 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
917 if (memNE(s, rsptr, rslen))
919 SvIVX(retval) += rs_charlen;
922 s = SvPV_force_nolen(sv);
930 SvREFCNT_dec(svrecode);
932 Safefree(temp_buffer);
934 if (len && !SvPOK(sv))
935 s = SvPV_force_nomg(sv, len);
938 char * const send = s + len;
939 char * const start = s;
941 while (s > start && UTF8_IS_CONTINUATION(*s))
943 if (is_utf8_string((U8*)s, send - s)) {
944 sv_setpvn(retval, s, send - s);
946 SvCUR_set(sv, s - start);
952 sv_setpvs(retval, "");
956 sv_setpvn(retval, s, 1);
963 sv_setpvs(retval, "");
971 const bool chomping = PL_op->op_type == OP_SCHOMP;
975 do_chomp(TARG, TOPs, chomping);
982 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
983 const bool chomping = PL_op->op_type == OP_CHOMP;
988 do_chomp(TARG, *++MARK, chomping);
999 if (!PL_op->op_private) {
1008 SV_CHECK_THINKFIRST_COW_DROP(sv);
1010 switch (SvTYPE(sv)) {
1014 av_undef(MUTABLE_AV(sv));
1017 hv_undef(MUTABLE_HV(sv));
1020 if (cv_const_sv((const CV *)sv))
1021 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
1022 CvANON((const CV *)sv) ? "(anonymous)"
1023 : GvENAME(CvGV((const CV *)sv)));
1027 /* let user-undef'd sub keep its identity */
1028 GV* const gv = CvGV((const CV *)sv);
1029 cv_undef(MUTABLE_CV(sv));
1030 CvGV_set(MUTABLE_CV(sv), gv);
1035 SvSetMagicSV(sv, &PL_sv_undef);
1038 else if (isGV_with_GP(sv)) {
1042 /* undef *Pkg::meth_name ... */
1044 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1045 && HvENAME_get(stash);
1047 if((stash = GvHV((const GV *)sv))) {
1048 if(HvENAME_get(stash))
1049 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1053 gp_free(MUTABLE_GV(sv));
1055 GvGP_set(sv, gp_ref(gp));
1056 GvSV(sv) = newSV(0);
1057 GvLINE(sv) = CopLINE(PL_curcop);
1058 GvEGV(sv) = MUTABLE_GV(sv);
1062 mro_package_moved(NULL, stash, (const GV *)sv, 0);
1064 /* undef *Foo::ISA */
1065 if( strEQ(GvNAME((const GV *)sv), "ISA")
1066 && (stash = GvSTASH((const GV *)sv))
1067 && (method_changed || HvENAME(stash)) )
1068 mro_isa_changed_in(stash);
1069 else if(method_changed)
1070 mro_method_changed_in(
1071 GvSTASH((const GV *)sv)
1078 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
1093 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1094 Perl_croak_no_modify(aTHX);
1095 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1096 && SvIVX(TOPs) != IV_MIN)
1098 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1099 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1110 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1111 Perl_croak_no_modify(aTHX);
1113 TARG = sv_newmortal();
1114 sv_setsv(TARG, TOPs);
1115 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1116 && SvIVX(TOPs) != IV_MAX)
1118 SvIV_set(TOPs, SvIVX(TOPs) + 1);
1119 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1124 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
1134 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
1135 Perl_croak_no_modify(aTHX);
1137 TARG = sv_newmortal();
1138 sv_setsv(TARG, TOPs);
1139 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1140 && SvIVX(TOPs) != IV_MIN)
1142 SvIV_set(TOPs, SvIVX(TOPs) - 1);
1143 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
1152 /* Ordinary operators. */
1156 dVAR; dSP; dATARGET; SV *svl, *svr;
1157 #ifdef PERL_PRESERVE_IVUV
1160 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1163 #ifdef PERL_PRESERVE_IVUV
1164 /* For integer to integer power, we do the calculation by hand wherever
1165 we're sure it is safe; otherwise we call pow() and try to convert to
1166 integer afterwards. */
1168 SvIV_please_nomg(svr);
1170 SvIV_please_nomg(svl);
1179 const IV iv = SvIVX(svr);
1183 goto float_it; /* Can't do negative powers this way. */
1187 baseuok = SvUOK(svl);
1189 baseuv = SvUVX(svl);
1191 const IV iv = SvIVX(svl);
1194 baseuok = TRUE; /* effectively it's a UV now */
1196 baseuv = -iv; /* abs, baseuok == false records sign */
1199 /* now we have integer ** positive integer. */
1202 /* foo & (foo - 1) is zero only for a power of 2. */
1203 if (!(baseuv & (baseuv - 1))) {
1204 /* We are raising power-of-2 to a positive integer.
1205 The logic here will work for any base (even non-integer
1206 bases) but it can be less accurate than
1207 pow (base,power) or exp (power * log (base)) when the
1208 intermediate values start to spill out of the mantissa.
1209 With powers of 2 we know this can't happen.
1210 And powers of 2 are the favourite thing for perl
1211 programmers to notice ** not doing what they mean. */
1213 NV base = baseuok ? baseuv : -(NV)baseuv;
1218 while (power >>= 1) {
1226 SvIV_please_nomg(svr);
1229 register unsigned int highbit = 8 * sizeof(UV);
1230 register unsigned int diff = 8 * sizeof(UV);
1231 while (diff >>= 1) {
1233 if (baseuv >> highbit) {
1237 /* we now have baseuv < 2 ** highbit */
1238 if (power * highbit <= 8 * sizeof(UV)) {
1239 /* result will definitely fit in UV, so use UV math
1240 on same algorithm as above */
1241 register UV result = 1;
1242 register UV base = baseuv;
1243 const bool odd_power = cBOOL(power & 1);
1247 while (power >>= 1) {
1254 if (baseuok || !odd_power)
1255 /* answer is positive */
1257 else if (result <= (UV)IV_MAX)
1258 /* answer negative, fits in IV */
1259 SETi( -(IV)result );
1260 else if (result == (UV)IV_MIN)
1261 /* 2's complement assumption: special case IV_MIN */
1264 /* answer negative, doesn't fit */
1265 SETn( -(NV)result );
1275 NV right = SvNV_nomg(svr);
1276 NV left = SvNV_nomg(svl);
1279 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1281 We are building perl with long double support and are on an AIX OS
1282 afflicted with a powl() function that wrongly returns NaNQ for any
1283 negative base. This was reported to IBM as PMR #23047-379 on
1284 03/06/2006. The problem exists in at least the following versions
1285 of AIX and the libm fileset, and no doubt others as well:
1287 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1288 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1289 AIX 5.2.0 bos.adt.libm 5.2.0.85
1291 So, until IBM fixes powl(), we provide the following workaround to
1292 handle the problem ourselves. Our logic is as follows: for
1293 negative bases (left), we use fmod(right, 2) to check if the
1294 exponent is an odd or even integer:
1296 - if odd, powl(left, right) == -powl(-left, right)
1297 - if even, powl(left, right) == powl(-left, right)
1299 If the exponent is not an integer, the result is rightly NaNQ, so
1300 we just return that (as NV_NAN).
1304 NV mod2 = Perl_fmod( right, 2.0 );
1305 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1306 SETn( -Perl_pow( -left, right) );
1307 } else if (mod2 == 0.0) { /* even integer */
1308 SETn( Perl_pow( -left, right) );
1309 } else { /* fractional power */
1313 SETn( Perl_pow( left, right) );
1316 SETn( Perl_pow( left, right) );
1317 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1319 #ifdef PERL_PRESERVE_IVUV
1321 SvIV_please_nomg(svr);
1329 dVAR; dSP; dATARGET; SV *svl, *svr;
1330 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1333 #ifdef PERL_PRESERVE_IVUV
1334 SvIV_please_nomg(svr);
1336 /* Unless the left argument is integer in range we are going to have to
1337 use NV maths. Hence only attempt to coerce the right argument if
1338 we know the left is integer. */
1339 /* Left operand is defined, so is it IV? */
1340 SvIV_please_nomg(svl);
1342 bool auvok = SvUOK(svl);
1343 bool buvok = SvUOK(svr);
1344 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1345 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1354 const IV aiv = SvIVX(svl);
1357 auvok = TRUE; /* effectively it's a UV now */
1359 alow = -aiv; /* abs, auvok == false records sign */
1365 const IV biv = SvIVX(svr);
1368 buvok = TRUE; /* effectively it's a UV now */
1370 blow = -biv; /* abs, buvok == false records sign */
1374 /* If this does sign extension on unsigned it's time for plan B */
1375 ahigh = alow >> (4 * sizeof (UV));
1377 bhigh = blow >> (4 * sizeof (UV));
1379 if (ahigh && bhigh) {
1381 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1382 which is overflow. Drop to NVs below. */
1383 } else if (!ahigh && !bhigh) {
1384 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1385 so the unsigned multiply cannot overflow. */
1386 const UV product = alow * blow;
1387 if (auvok == buvok) {
1388 /* -ve * -ve or +ve * +ve gives a +ve result. */
1392 } else if (product <= (UV)IV_MIN) {
1393 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1394 /* -ve result, which could overflow an IV */
1396 SETi( -(IV)product );
1398 } /* else drop to NVs below. */
1400 /* One operand is large, 1 small */
1403 /* swap the operands */
1405 bhigh = blow; /* bhigh now the temp var for the swap */
1409 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1410 multiplies can't overflow. shift can, add can, -ve can. */
1411 product_middle = ahigh * blow;
1412 if (!(product_middle & topmask)) {
1413 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1415 product_middle <<= (4 * sizeof (UV));
1416 product_low = alow * blow;
1418 /* as for pp_add, UV + something mustn't get smaller.
1419 IIRC ANSI mandates this wrapping *behaviour* for
1420 unsigned whatever the actual representation*/
1421 product_low += product_middle;
1422 if (product_low >= product_middle) {
1423 /* didn't overflow */
1424 if (auvok == buvok) {
1425 /* -ve * -ve or +ve * +ve gives a +ve result. */
1427 SETu( product_low );
1429 } else if (product_low <= (UV)IV_MIN) {
1430 /* 2s complement assumption again */
1431 /* -ve result, which could overflow an IV */
1433 SETi( -(IV)product_low );
1435 } /* else drop to NVs below. */
1437 } /* product_middle too large */
1438 } /* ahigh && bhigh */
1443 NV right = SvNV_nomg(svr);
1444 NV left = SvNV_nomg(svl);
1446 SETn( left * right );
1453 dVAR; dSP; dATARGET; SV *svl, *svr;
1454 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1457 /* Only try to do UV divide first
1458 if ((SLOPPYDIVIDE is true) or
1459 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1461 The assumption is that it is better to use floating point divide
1462 whenever possible, only doing integer divide first if we can't be sure.
1463 If NV_PRESERVES_UV is true then we know at compile time that no UV
1464 can be too large to preserve, so don't need to compile the code to
1465 test the size of UVs. */
1468 # define PERL_TRY_UV_DIVIDE
1469 /* ensure that 20./5. == 4. */
1471 # ifdef PERL_PRESERVE_IVUV
1472 # ifndef NV_PRESERVES_UV
1473 # define PERL_TRY_UV_DIVIDE
1478 #ifdef PERL_TRY_UV_DIVIDE
1479 SvIV_please_nomg(svr);
1481 SvIV_please_nomg(svl);
1483 bool left_non_neg = SvUOK(svl);
1484 bool right_non_neg = SvUOK(svr);
1488 if (right_non_neg) {
1492 const IV biv = SvIVX(svr);
1495 right_non_neg = TRUE; /* effectively it's a UV now */
1501 /* historically undef()/0 gives a "Use of uninitialized value"
1502 warning before dieing, hence this test goes here.
1503 If it were immediately before the second SvIV_please, then
1504 DIE() would be invoked before left was even inspected, so
1505 no inspection would give no warning. */
1507 DIE(aTHX_ "Illegal division by zero");
1513 const IV aiv = SvIVX(svl);
1516 left_non_neg = TRUE; /* effectively it's a UV now */
1525 /* For sloppy divide we always attempt integer division. */
1527 /* Otherwise we only attempt it if either or both operands
1528 would not be preserved by an NV. If both fit in NVs
1529 we fall through to the NV divide code below. However,
1530 as left >= right to ensure integer result here, we know that
1531 we can skip the test on the right operand - right big
1532 enough not to be preserved can't get here unless left is
1535 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1538 /* Integer division can't overflow, but it can be imprecise. */
1539 const UV result = left / right;
1540 if (result * right == left) {
1541 SP--; /* result is valid */
1542 if (left_non_neg == right_non_neg) {
1543 /* signs identical, result is positive. */
1547 /* 2s complement assumption */
1548 if (result <= (UV)IV_MIN)
1549 SETi( -(IV)result );
1551 /* It's exact but too negative for IV. */
1552 SETn( -(NV)result );
1555 } /* tried integer divide but it was not an integer result */
1556 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1557 } /* left wasn't SvIOK */
1558 } /* right wasn't SvIOK */
1559 #endif /* PERL_TRY_UV_DIVIDE */
1561 NV right = SvNV_nomg(svr);
1562 NV left = SvNV_nomg(svl);
1563 (void)POPs;(void)POPs;
1564 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1565 if (! Perl_isnan(right) && right == 0.0)
1569 DIE(aTHX_ "Illegal division by zero");
1570 PUSHn( left / right );
1577 dVAR; dSP; dATARGET;
1578 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1582 bool left_neg = FALSE;
1583 bool right_neg = FALSE;
1584 bool use_double = FALSE;
1585 bool dright_valid = FALSE;
1588 SV * const svr = TOPs;
1589 SV * const svl = TOPm1s;
1590 SvIV_please_nomg(svr);
1592 right_neg = !SvUOK(svr);
1596 const IV biv = SvIVX(svr);
1599 right_neg = FALSE; /* effectively it's a UV now */
1606 dright = SvNV_nomg(svr);
1607 right_neg = dright < 0;
1610 if (dright < UV_MAX_P1) {
1611 right = U_V(dright);
1612 dright_valid = TRUE; /* In case we need to use double below. */
1618 /* At this point use_double is only true if right is out of range for
1619 a UV. In range NV has been rounded down to nearest UV and
1620 use_double false. */
1621 SvIV_please_nomg(svl);
1622 if (!use_double && SvIOK(svl)) {
1624 left_neg = !SvUOK(svl);
1628 const IV aiv = SvIVX(svl);
1631 left_neg = FALSE; /* effectively it's a UV now */
1639 dleft = SvNV_nomg(svl);
1640 left_neg = dleft < 0;
1644 /* This should be exactly the 5.6 behaviour - if left and right are
1645 both in range for UV then use U_V() rather than floor. */
1647 if (dleft < UV_MAX_P1) {
1648 /* right was in range, so is dleft, so use UVs not double.
1652 /* left is out of range for UV, right was in range, so promote
1653 right (back) to double. */
1655 /* The +0.5 is used in 5.6 even though it is not strictly
1656 consistent with the implicit +0 floor in the U_V()
1657 inside the #if 1. */
1658 dleft = Perl_floor(dleft + 0.5);
1661 dright = Perl_floor(dright + 0.5);
1672 DIE(aTHX_ "Illegal modulus zero");
1674 dans = Perl_fmod(dleft, dright);
1675 if ((left_neg != right_neg) && dans)
1676 dans = dright - dans;
1679 sv_setnv(TARG, dans);
1685 DIE(aTHX_ "Illegal modulus zero");
1688 if ((left_neg != right_neg) && ans)
1691 /* XXX may warn: unary minus operator applied to unsigned type */
1692 /* could change -foo to be (~foo)+1 instead */
1693 if (ans <= ~((UV)IV_MAX)+1)
1694 sv_setiv(TARG, ~ans+1);
1696 sv_setnv(TARG, -(NV)ans);
1699 sv_setuv(TARG, ans);
1708 dVAR; dSP; dATARGET;
1712 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1713 /* TODO: think of some way of doing list-repeat overloading ??? */
1718 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1724 const UV uv = SvUV_nomg(sv);
1726 count = IV_MAX; /* The best we can do? */
1730 const IV iv = SvIV_nomg(sv);
1737 else if (SvNOKp(sv)) {
1738 const NV nv = SvNV_nomg(sv);
1745 count = SvIV_nomg(sv);
1747 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1749 static const char oom_list_extend[] = "Out of memory during list extend";
1750 const I32 items = SP - MARK;
1751 const I32 max = items * count;
1753 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1754 /* Did the max computation overflow? */
1755 if (items > 0 && max > 0 && (max < items || max < count))
1756 Perl_croak(aTHX_ oom_list_extend);
1761 /* This code was intended to fix 20010809.028:
1764 for (($x =~ /./g) x 2) {
1765 print chop; # "abcdabcd" expected as output.
1768 * but that change (#11635) broke this code:
1770 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1772 * I can't think of a better fix that doesn't introduce
1773 * an efficiency hit by copying the SVs. The stack isn't
1774 * refcounted, and mortalisation obviously doesn't
1775 * Do The Right Thing when the stack has more than
1776 * one pointer to the same mortal value.
1780 *SP = sv_2mortal(newSVsv(*SP));
1790 repeatcpy((char*)(MARK + items), (char*)MARK,
1791 items * sizeof(const SV *), count - 1);
1794 else if (count <= 0)
1797 else { /* Note: mark already snarfed by pp_list */
1798 SV * const tmpstr = POPs;
1801 static const char oom_string_extend[] =
1802 "Out of memory during string extend";
1805 sv_setsv_nomg(TARG, tmpstr);
1806 SvPV_force_nomg(TARG, len);
1807 isutf = DO_UTF8(TARG);
1812 const STRLEN max = (UV)count * len;
1813 if (len > MEM_SIZE_MAX / count)
1814 Perl_croak(aTHX_ oom_string_extend);
1815 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
1816 SvGROW(TARG, max + 1);
1817 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1818 SvCUR_set(TARG, SvCUR(TARG) * count);
1820 *SvEND(TARG) = '\0';
1823 (void)SvPOK_only_UTF8(TARG);
1825 (void)SvPOK_only(TARG);
1827 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1828 /* The parser saw this as a list repeat, and there
1829 are probably several items on the stack. But we're
1830 in scalar context, and there's no pp_list to save us
1831 now. So drop the rest of the items -- robin@kitsite.com
1843 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
1844 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1847 useleft = USE_LEFT(svl);
1848 #ifdef PERL_PRESERVE_IVUV
1849 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1850 "bad things" happen if you rely on signed integers wrapping. */
1851 SvIV_please_nomg(svr);
1853 /* Unless the left argument is integer in range we are going to have to
1854 use NV maths. Hence only attempt to coerce the right argument if
1855 we know the left is integer. */
1856 register UV auv = 0;
1862 a_valid = auvok = 1;
1863 /* left operand is undef, treat as zero. */
1865 /* Left operand is defined, so is it IV? */
1866 SvIV_please_nomg(svl);
1868 if ((auvok = SvUOK(svl)))
1871 register const IV aiv = SvIVX(svl);
1874 auvok = 1; /* Now acting as a sign flag. */
1875 } else { /* 2s complement assumption for IV_MIN */
1883 bool result_good = 0;
1886 bool buvok = SvUOK(svr);
1891 register const IV biv = SvIVX(svr);
1898 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1899 else "IV" now, independent of how it came in.
1900 if a, b represents positive, A, B negative, a maps to -A etc
1905 all UV maths. negate result if A negative.
1906 subtract if signs same, add if signs differ. */
1908 if (auvok ^ buvok) {
1917 /* Must get smaller */
1922 if (result <= buv) {
1923 /* result really should be -(auv-buv). as its negation
1924 of true value, need to swap our result flag */
1936 if (result <= (UV)IV_MIN)
1937 SETi( -(IV)result );
1939 /* result valid, but out of range for IV. */
1940 SETn( -(NV)result );
1944 } /* Overflow, drop through to NVs. */
1949 NV value = SvNV_nomg(svr);
1953 /* left operand is undef, treat as zero - value */
1957 SETn( SvNV_nomg(svl) - value );
1964 dVAR; dSP; dATARGET; SV *svl, *svr;
1965 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
1969 const IV shift = SvIV_nomg(svr);
1970 if (PL_op->op_private & HINT_INTEGER) {
1971 const IV i = SvIV_nomg(svl);
1975 const UV u = SvUV_nomg(svl);
1984 dVAR; dSP; dATARGET; SV *svl, *svr;
1985 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
1989 const IV shift = SvIV_nomg(svr);
1990 if (PL_op->op_private & HINT_INTEGER) {
1991 const IV i = SvIV_nomg(svl);
1995 const UV u = SvUV_nomg(svl);
2007 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
2011 (SvIOK_notUV(left) && SvIOK_notUV(right))
2012 ? (SvIVX(left) < SvIVX(right))
2013 : (do_ncmp(left, right) == -1)
2023 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
2027 (SvIOK_notUV(left) && SvIOK_notUV(right))
2028 ? (SvIVX(left) > SvIVX(right))
2029 : (do_ncmp(left, right) == 1)
2039 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
2043 (SvIOK_notUV(left) && SvIOK_notUV(right))
2044 ? (SvIVX(left) <= SvIVX(right))
2045 : (do_ncmp(left, right) <= 0)
2055 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2059 (SvIOK_notUV(left) && SvIOK_notUV(right))
2060 ? (SvIVX(left) >= SvIVX(right))
2061 : ( (do_ncmp(left, right) & 2) == 0)
2071 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2075 (SvIOK_notUV(left) && SvIOK_notUV(right))
2076 ? (SvIVX(left) != SvIVX(right))
2077 : (do_ncmp(left, right) != 0)
2082 /* compare left and right SVs. Returns:
2086 * 2: left or right was a NaN
2089 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2093 PERL_ARGS_ASSERT_DO_NCMP;
2094 #ifdef PERL_PRESERVE_IVUV
2095 SvIV_please_nomg(right);
2096 /* Fortunately it seems NaN isn't IOK */
2098 SvIV_please_nomg(left);
2101 const IV leftiv = SvIVX(left);
2102 if (!SvUOK(right)) {
2103 /* ## IV <=> IV ## */
2104 const IV rightiv = SvIVX(right);
2105 return (leftiv > rightiv) - (leftiv < rightiv);
2107 /* ## IV <=> UV ## */
2109 /* As (b) is a UV, it's >=0, so it must be < */
2112 const UV rightuv = SvUVX(right);
2113 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2118 /* ## UV <=> UV ## */
2119 const UV leftuv = SvUVX(left);
2120 const UV rightuv = SvUVX(right);
2121 return (leftuv > rightuv) - (leftuv < rightuv);
2123 /* ## UV <=> IV ## */
2125 const IV rightiv = SvIVX(right);
2127 /* As (a) is a UV, it's >=0, so it cannot be < */
2130 const UV leftuv = SvUVX(left);
2131 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2139 NV const rnv = SvNV_nomg(right);
2140 NV const lnv = SvNV_nomg(left);
2142 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2143 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2146 return (lnv > rnv) - (lnv < rnv);
2165 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2168 value = do_ncmp(left, right);
2183 int amg_type = sle_amg;
2187 switch (PL_op->op_type) {
2206 tryAMAGICbin_MG(amg_type, AMGf_set);
2209 const int cmp = (IN_LOCALE_RUNTIME
2210 ? sv_cmp_locale_flags(left, right, 0)
2211 : sv_cmp_flags(left, right, 0));
2212 SETs(boolSV(cmp * multiplier < rhs));
2220 tryAMAGICbin_MG(seq_amg, AMGf_set);
2223 SETs(boolSV(sv_eq_flags(left, right, 0)));
2231 tryAMAGICbin_MG(sne_amg, AMGf_set);
2234 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2242 tryAMAGICbin_MG(scmp_amg, 0);
2245 const int cmp = (IN_LOCALE_RUNTIME
2246 ? sv_cmp_locale_flags(left, right, 0)
2247 : sv_cmp_flags(left, right, 0));
2255 dVAR; dSP; dATARGET;
2256 tryAMAGICbin_MG(band_amg, AMGf_assign);
2259 if (SvNIOKp(left) || SvNIOKp(right)) {
2260 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2261 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2262 if (PL_op->op_private & HINT_INTEGER) {
2263 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2267 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2270 if (left_ro_nonnum) SvNIOK_off(left);
2271 if (right_ro_nonnum) SvNIOK_off(right);
2274 do_vop(PL_op->op_type, TARG, left, right);
2283 dVAR; dSP; dATARGET;
2284 const int op_type = PL_op->op_type;
2286 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2289 if (SvNIOKp(left) || SvNIOKp(right)) {
2290 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2291 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2292 if (PL_op->op_private & HINT_INTEGER) {
2293 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2294 const IV r = SvIV_nomg(right);
2295 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2299 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2300 const UV r = SvUV_nomg(right);
2301 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2304 if (left_ro_nonnum) SvNIOK_off(left);
2305 if (right_ro_nonnum) SvNIOK_off(right);
2308 do_vop(op_type, TARG, left, right);
2318 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2320 SV * const sv = TOPs;
2321 const int flags = SvFLAGS(sv);
2323 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
2327 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2328 /* It's publicly an integer, or privately an integer-not-float */
2331 if (SvIVX(sv) == IV_MIN) {
2332 /* 2s complement assumption. */
2333 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2336 else if (SvUVX(sv) <= IV_MAX) {
2341 else if (SvIVX(sv) != IV_MIN) {
2345 #ifdef PERL_PRESERVE_IVUV
2353 SETn(-SvNV_nomg(sv));
2354 else if (SvPOKp(sv)) {
2356 const char * const s = SvPV_nomg_const(sv, len);
2357 if (isIDFIRST(*s)) {
2358 sv_setpvs(TARG, "-");
2361 else if (*s == '+' || *s == '-') {
2362 sv_setsv_nomg(TARG, sv);
2363 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2365 else if (DO_UTF8(sv)) {
2366 SvIV_please_nomg(sv);
2368 goto oops_its_an_int;
2370 sv_setnv(TARG, -SvNV_nomg(sv));
2372 sv_setpvs(TARG, "-");
2377 SvIV_please_nomg(sv);
2379 goto oops_its_an_int;
2380 sv_setnv(TARG, -SvNV_nomg(sv));
2385 SETn(-SvNV_nomg(sv));
2393 tryAMAGICun_MG(not_amg, AMGf_set);
2394 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
2401 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2405 if (PL_op->op_private & HINT_INTEGER) {
2406 const IV i = ~SvIV_nomg(sv);
2410 const UV u = ~SvUV_nomg(sv);
2419 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
2420 sv_setsv_nomg(TARG, sv);
2421 tmps = (U8*)SvPV_force_nomg(TARG, len);
2424 /* Calculate exact length, let's not estimate. */
2429 U8 * const send = tmps + len;
2430 U8 * const origtmps = tmps;
2431 const UV utf8flags = UTF8_ALLOW_ANYUV;
2433 while (tmps < send) {
2434 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2436 targlen += UNISKIP(~c);
2442 /* Now rewind strings and write them. */
2449 Newx(result, targlen + 1, U8);
2451 while (tmps < send) {
2452 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2454 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
2457 sv_usepvn_flags(TARG, (char*)result, targlen,
2458 SV_HAS_TRAILING_NUL);
2465 Newx(result, nchar + 1, U8);
2467 while (tmps < send) {
2468 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2473 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
2481 register long *tmpl;
2482 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2485 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2490 for ( ; anum > 0; anum--, tmps++)
2498 /* integer versions of some of the above */
2502 dVAR; dSP; dATARGET;
2503 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2506 SETi( left * right );
2514 dVAR; dSP; dATARGET;
2515 tryAMAGICbin_MG(div_amg, AMGf_assign);
2518 IV value = SvIV_nomg(right);
2520 DIE(aTHX_ "Illegal division by zero");
2521 num = SvIV_nomg(left);
2523 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2527 value = num / value;
2533 #if defined(__GLIBC__) && IVSIZE == 8
2540 /* This is the vanilla old i_modulo. */
2541 dVAR; dSP; dATARGET;
2542 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2546 DIE(aTHX_ "Illegal modulus zero");
2547 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2551 SETi( left % right );
2556 #if defined(__GLIBC__) && IVSIZE == 8
2561 /* This is the i_modulo with the workaround for the _moddi3 bug
2562 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
2563 * See below for pp_i_modulo. */
2564 dVAR; dSP; dATARGET;
2565 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2569 DIE(aTHX_ "Illegal modulus zero");
2570 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2574 SETi( left % PERL_ABS(right) );
2581 dVAR; dSP; dATARGET;
2582 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2586 DIE(aTHX_ "Illegal modulus zero");
2587 /* The assumption is to use hereafter the old vanilla version... */
2589 PL_ppaddr[OP_I_MODULO] =
2591 /* .. but if we have glibc, we might have a buggy _moddi3
2592 * (at least glicb 2.2.5 is known to have this bug), in other
2593 * words our integer modulus with negative quad as the second
2594 * argument might be broken. Test for this and re-patch the
2595 * opcode dispatch table if that is the case, remembering to
2596 * also apply the workaround so that this first round works
2597 * right, too. See [perl #9402] for more information. */
2601 /* Cannot do this check with inlined IV constants since
2602 * that seems to work correctly even with the buggy glibc. */
2604 /* Yikes, we have the bug.
2605 * Patch in the workaround version. */
2607 PL_ppaddr[OP_I_MODULO] =
2608 &Perl_pp_i_modulo_1;
2609 /* Make certain we work right this time, too. */
2610 right = PERL_ABS(right);
2613 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2617 SETi( left % right );
2625 dVAR; dSP; dATARGET;
2626 tryAMAGICbin_MG(add_amg, AMGf_assign);
2628 dPOPTOPiirl_ul_nomg;
2629 SETi( left + right );
2636 dVAR; dSP; dATARGET;
2637 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2639 dPOPTOPiirl_ul_nomg;
2640 SETi( left - right );
2648 tryAMAGICbin_MG(lt_amg, AMGf_set);
2651 SETs(boolSV(left < right));
2659 tryAMAGICbin_MG(gt_amg, AMGf_set);
2662 SETs(boolSV(left > right));
2670 tryAMAGICbin_MG(le_amg, AMGf_set);
2673 SETs(boolSV(left <= right));
2681 tryAMAGICbin_MG(ge_amg, AMGf_set);
2684 SETs(boolSV(left >= right));
2692 tryAMAGICbin_MG(eq_amg, AMGf_set);
2695 SETs(boolSV(left == right));
2703 tryAMAGICbin_MG(ne_amg, AMGf_set);
2706 SETs(boolSV(left != right));
2714 tryAMAGICbin_MG(ncmp_amg, 0);
2721 else if (left < right)
2733 tryAMAGICun_MG(neg_amg, 0);
2735 SV * const sv = TOPs;
2736 IV const i = SvIV_nomg(sv);
2742 /* High falutin' math. */
2747 tryAMAGICbin_MG(atan2_amg, 0);
2750 SETn(Perl_atan2(left, right));
2758 int amg_type = sin_amg;
2759 const char *neg_report = NULL;
2760 NV (*func)(NV) = Perl_sin;
2761 const int op_type = PL_op->op_type;
2778 amg_type = sqrt_amg;
2780 neg_report = "sqrt";
2785 tryAMAGICun_MG(amg_type, 0);
2787 SV * const arg = POPs;
2788 const NV value = SvNV_nomg(arg);
2790 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2791 SET_NUMERIC_STANDARD();
2792 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2795 XPUSHn(func(value));
2800 /* Support Configure command-line overrides for rand() functions.
2801 After 5.005, perhaps we should replace this by Configure support
2802 for drand48(), random(), or rand(). For 5.005, though, maintain
2803 compatibility by calling rand() but allow the user to override it.
2804 See INSTALL for details. --Andy Dougherty 15 July 1998
2806 /* Now it's after 5.005, and Configure supports drand48() and random(),
2807 in addition to rand(). So the overrides should not be needed any more.
2808 --Jarkko Hietaniemi 27 September 1998
2811 #ifndef HAS_DRAND48_PROTO
2812 extern double drand48 (void);
2825 if (!PL_srand_called) {
2826 (void)seedDrand01((Rand_seed_t)seed());
2827 PL_srand_called = TRUE;
2837 const UV anum = (MAXARG < 1) ? seed() : POPu;
2838 (void)seedDrand01((Rand_seed_t)anum);
2839 PL_srand_called = TRUE;
2843 /* Historically srand always returned true. We can avoid breaking
2845 sv_setpvs(TARG, "0 but true");
2854 tryAMAGICun_MG(int_amg, AMGf_numeric);
2856 SV * const sv = TOPs;
2857 const IV iv = SvIV_nomg(sv);
2858 /* XXX it's arguable that compiler casting to IV might be subtly
2859 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2860 else preferring IV has introduced a subtle behaviour change bug. OTOH
2861 relying on floating point to be accurate is a bug. */
2866 else if (SvIOK(sv)) {
2868 SETu(SvUV_nomg(sv));
2873 const NV value = SvNV_nomg(sv);
2875 if (value < (NV)UV_MAX + 0.5) {
2878 SETn(Perl_floor(value));
2882 if (value > (NV)IV_MIN - 0.5) {
2885 SETn(Perl_ceil(value));
2896 tryAMAGICun_MG(abs_amg, AMGf_numeric);
2898 SV * const sv = TOPs;
2899 /* This will cache the NV value if string isn't actually integer */
2900 const IV iv = SvIV_nomg(sv);
2905 else if (SvIOK(sv)) {
2906 /* IVX is precise */
2908 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
2916 /* 2s complement assumption. Also, not really needed as
2917 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2923 const NV value = SvNV_nomg(sv);
2937 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
2941 SV* const sv = POPs;
2943 tmps = (SvPV_const(sv, len));
2945 /* If Unicode, try to downgrade
2946 * If not possible, croak. */
2947 SV* const tsv = sv_2mortal(newSVsv(sv));
2950 sv_utf8_downgrade(tsv, FALSE);
2951 tmps = SvPV_const(tsv, len);
2953 if (PL_op->op_type == OP_HEX)
2956 while (*tmps && len && isSPACE(*tmps))
2960 if (*tmps == 'x' || *tmps == 'X') {
2962 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
2964 else if (*tmps == 'b' || *tmps == 'B')
2965 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
2967 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2969 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2983 SV * const sv = TOPs;
2985 if (SvGAMAGIC(sv)) {
2986 /* For an overloaded or magic scalar, we can't know in advance if
2987 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2988 it likes to cache the length. Maybe that should be a documented
2993 = sv_2pv_flags(sv, &len,
2994 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
2997 if (!SvPADTMP(TARG)) {
2998 sv_setsv(TARG, &PL_sv_undef);
3003 else if (DO_UTF8(sv)) {
3004 SETi(utf8_length((U8*)p, (U8*)p + len));
3008 } else if (SvOK(sv)) {
3009 /* Neither magic nor overloaded. */
3011 SETi(sv_len_utf8(sv));
3015 if (!SvPADTMP(TARG)) {
3016 sv_setsv_nomg(TARG, &PL_sv_undef);
3038 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3040 const IV arybase = CopARYBASE_get(PL_curcop);
3042 const char *repl = NULL;
3044 const int num_args = PL_op->op_private & 7;
3045 bool repl_need_utf8_upgrade = FALSE;
3046 bool repl_is_utf8 = FALSE;
3051 repl = SvPV_const(repl_sv, repl_len);
3052 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
3055 len_iv = SvIV(len_sv);
3056 len_is_uv = SvIOK_UV(len_sv);
3059 pos1_iv = SvIV(pos_sv);
3060 pos1_is_uv = SvIOK_UV(pos_sv);
3066 sv_utf8_upgrade(sv);
3068 else if (DO_UTF8(sv))
3069 repl_need_utf8_upgrade = TRUE;
3071 tmps = SvPV_const(sv, curlen);
3073 utf8_curlen = sv_len_utf8(sv);
3074 if (utf8_curlen == curlen)
3077 curlen = utf8_curlen;
3082 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3083 UV pos1_uv = pos1_iv-arybase;
3084 /* Overflow can occur when $[ < 0 */
3085 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
3090 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
3091 goto bound_fail; /* $[=3; substr($_,2,...) */
3093 else { /* pos < $[ */
3094 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3099 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3104 if (pos1_is_uv || pos1_iv > 0) {
3105 if ((UV)pos1_iv > curlen)
3110 if (!len_is_uv && len_iv < 0) {
3111 pos2_iv = curlen + len_iv;
3113 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3116 } else { /* len_iv >= 0 */
3117 if (!pos1_is_uv && pos1_iv < 0) {
3118 pos2_iv = pos1_iv + len_iv;
3119 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3121 if ((UV)len_iv > curlen-(UV)pos1_iv)
3124 pos2_iv = pos1_iv+len_iv;
3134 if (!pos2_is_uv && pos2_iv < 0) {
3135 if (!pos1_is_uv && pos1_iv < 0)
3139 else if (!pos1_is_uv && pos1_iv < 0)
3142 if ((UV)pos2_iv < (UV)pos1_iv)
3144 if ((UV)pos2_iv > curlen)
3148 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3149 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3150 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3151 STRLEN byte_len = len;
3152 STRLEN byte_pos = utf8_curlen
3153 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3155 if (lvalue && !repl) {
3158 if (!SvGMAGICAL(sv)) {
3160 SvPV_force_nolen(sv);
3161 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3162 "Attempt to use reference as lvalue in substr");
3164 if (isGV_with_GP(sv))
3165 SvPV_force_nolen(sv);
3166 else if (SvOK(sv)) /* is it defined ? */
3167 (void)SvPOK_only_UTF8(sv);
3169 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
3172 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3173 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3175 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3176 LvTARGOFF(ret) = pos;
3177 LvTARGLEN(ret) = len;
3180 PUSHs(ret); /* avoid SvSETMAGIC here */
3184 SvTAINTED_off(TARG); /* decontaminate */
3185 SvUTF8_off(TARG); /* decontaminate */
3188 sv_setpvn(TARG, tmps, byte_len);
3189 #ifdef USE_LOCALE_COLLATE
3190 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3196 SV* repl_sv_copy = NULL;
3198 if (repl_need_utf8_upgrade) {
3199 repl_sv_copy = newSVsv(repl_sv);
3200 sv_utf8_upgrade(repl_sv_copy);
3201 repl = SvPV_const(repl_sv_copy, repl_len);
3202 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3206 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3209 SvREFCNT_dec(repl_sv_copy);
3219 Perl_croak(aTHX_ "substr outside of string");
3220 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3227 register const IV size = POPi;
3228 register const IV offset = POPi;
3229 register SV * const src = POPs;
3230 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3233 if (lvalue) { /* it's an lvalue! */
3234 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3235 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3237 LvTARG(ret) = SvREFCNT_inc_simple(src);
3238 LvTARGOFF(ret) = offset;
3239 LvTARGLEN(ret) = size;
3243 SvTAINTED_off(TARG); /* decontaminate */
3247 sv_setuv(ret, do_vecget(src, offset, size));
3263 const char *little_p;
3264 const I32 arybase = CopARYBASE_get(PL_curcop);
3267 const bool is_index = PL_op->op_type == OP_INDEX;
3270 /* arybase is in characters, like offset, so combine prior to the
3271 UTF-8 to bytes calculation. */
3272 offset = POPi - arybase;
3276 big_p = SvPV_const(big, biglen);
3277 little_p = SvPV_const(little, llen);
3279 big_utf8 = DO_UTF8(big);
3280 little_utf8 = DO_UTF8(little);
3281 if (big_utf8 ^ little_utf8) {
3282 /* One needs to be upgraded. */
3283 if (little_utf8 && !PL_encoding) {
3284 /* Well, maybe instead we might be able to downgrade the small
3286 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3289 /* If the large string is ISO-8859-1, and it's not possible to
3290 convert the small string to ISO-8859-1, then there is no
3291 way that it could be found anywhere by index. */
3296 /* At this point, pv is a malloc()ed string. So donate it to temp
3297 to ensure it will get free()d */
3298 little = temp = newSV(0);
3299 sv_usepvn(temp, pv, llen);
3300 little_p = SvPVX(little);
3303 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
3306 sv_recode_to_utf8(temp, PL_encoding);
3308 sv_utf8_upgrade(temp);
3313 big_p = SvPV_const(big, biglen);
3316 little_p = SvPV_const(little, llen);
3320 if (SvGAMAGIC(big)) {
3321 /* Life just becomes a lot easier if I use a temporary here.
3322 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3323 will trigger magic and overloading again, as will fbm_instr()
3325 big = newSVpvn_flags(big_p, biglen,
3326 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3329 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3330 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3331 warn on undef, and we've already triggered a warning with the
3332 SvPV_const some lines above. We can't remove that, as we need to
3333 call some SvPV to trigger overloading early and find out if the
3335 This is all getting to messy. The API isn't quite clean enough,
3336 because data access has side effects.
3338 little = newSVpvn_flags(little_p, llen,
3339 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3340 little_p = SvPVX(little);
3344 offset = is_index ? 0 : biglen;
3346 if (big_utf8 && offset > 0)
3347 sv_pos_u2b(big, &offset, 0);
3353 else if (offset > (I32)biglen)
3355 if (!(little_p = is_index
3356 ? fbm_instr((unsigned char*)big_p + offset,
3357 (unsigned char*)big_p + biglen, little, 0)
3358 : rninstr(big_p, big_p + offset,
3359 little_p, little_p + llen)))
3362 retval = little_p - big_p;
3363 if (retval > 0 && big_utf8)
3364 sv_pos_b2u(big, &retval);
3368 PUSHi(retval + arybase);
3374 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3375 SvTAINTED_off(TARG);
3376 do_sprintf(TARG, SP-MARK, MARK+1);
3377 TAINT_IF(SvTAINTED(TARG));
3389 const U8 *s = (U8*)SvPV_const(argsv, len);
3391 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
3392 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
3393 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
3397 XPUSHu(DO_UTF8(argsv) ?
3398 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
3410 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3412 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3414 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3416 (void) POPs; /* Ignore the argument value. */
3417 value = UNICODE_REPLACEMENT;
3423 SvUPGRADE(TARG,SVt_PV);
3425 if (value > 255 && !IN_BYTES) {
3426 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
3427 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3428 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3430 (void)SvPOK_only(TARG);
3439 *tmps++ = (char)value;
3441 (void)SvPOK_only(TARG);
3443 if (PL_encoding && !IN_BYTES) {
3444 sv_recode_to_utf8(TARG, PL_encoding);
3446 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
3447 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3451 *tmps++ = (char)value;
3467 const char *tmps = SvPV_const(left, len);
3469 if (DO_UTF8(left)) {
3470 /* If Unicode, try to downgrade.
3471 * If not possible, croak.
3472 * Yes, we made this up. */
3473 SV* const tsv = sv_2mortal(newSVsv(left));
3476 sv_utf8_downgrade(tsv, FALSE);
3477 tmps = SvPV_const(tsv, len);
3479 # ifdef USE_ITHREADS
3481 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3482 /* This should be threadsafe because in ithreads there is only
3483 * one thread per interpreter. If this would not be true,
3484 * we would need a mutex to protect this malloc. */
3485 PL_reentrant_buffer->_crypt_struct_buffer =
3486 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3487 #if defined(__GLIBC__) || defined(__EMX__)
3488 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3489 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3490 /* work around glibc-2.2.5 bug */
3491 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3495 # endif /* HAS_CRYPT_R */
3496 # endif /* USE_ITHREADS */
3498 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
3500 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3506 "The crypt() function is unimplemented due to excessive paranoia.");
3510 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3511 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3513 /* Below are several macros that generate code */
3514 /* Generates code to store a unicode codepoint c that is known to occupy
3515 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3516 #define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3518 *(p) = UTF8_TWO_BYTE_HI(c); \
3519 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3522 /* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3523 * available byte after the two bytes */
3524 #define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3526 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3527 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3530 /* Generates code to store the upper case of latin1 character l which is known
3531 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3532 * are only two characters that fit this description, and this macro knows
3533 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3535 #define STORE_NON_LATIN1_UC(p, l) \
3537 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3538 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3539 } else { /* Must be the following letter */ \
3540 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3544 /* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3545 * after the character stored */
3546 #define CAT_NON_LATIN1_UC(p, l) \
3548 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3549 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3551 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3555 /* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3556 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3557 * and must require two bytes to store it. Advances p to point to the next
3558 * available position */
3559 #define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3561 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3562 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3563 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3564 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3565 } else {/* else is one of the other two special cases */ \
3566 CAT_NON_LATIN1_UC((p), (l)); \
3572 /* Actually is both lcfirst() and ucfirst(). Only the first character
3573 * changes. This means that possibly we can change in-place, ie., just
3574 * take the source and change that one character and store it back, but not
3575 * if read-only etc, or if the length changes */
3580 STRLEN slen; /* slen is the byte length of the whole SV. */
3583 bool inplace; /* ? Convert first char only, in-place */
3584 bool doing_utf8 = FALSE; /* ? using utf8 */
3585 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3586 const int op_type = PL_op->op_type;
3589 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3590 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3591 * stored as UTF-8 at s. */
3592 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3593 * lowercased) character stored in tmpbuf. May be either
3594 * UTF-8 or not, but in either case is the number of bytes */
3598 s = (const U8*)SvPV_nomg_const(source, slen);
3600 if (ckWARN(WARN_UNINITIALIZED))
3601 report_uninit(source);
3606 /* We may be able to get away with changing only the first character, in
3607 * place, but not if read-only, etc. Later we may discover more reasons to
3608 * not convert in-place. */
3609 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3611 /* First calculate what the changed first character should be. This affects
3612 * whether we can just swap it out, leaving the rest of the string unchanged,
3613 * or even if have to convert the dest to UTF-8 when the source isn't */
3615 if (! slen) { /* If empty */
3616 need = 1; /* still need a trailing NUL */
3618 else if (DO_UTF8(source)) { /* Is the source utf8? */
3621 /* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3622 * and doesn't allow for the user to specify their own. When code is added to
3623 * detect if there is a user-defined mapping in force here, and if so to use
3624 * that, then the code below can be compiled. The detection would be a good
3625 * thing anyway, as currently the user-defined mappings only work on utf8
3626 * strings, and thus depend on the chosen internal storage method, which is a
3628 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3629 if (UTF8_IS_INVARIANT(*s)) {
3631 /* An invariant source character is either ASCII or, in EBCDIC, an
3632 * ASCII equivalent or a caseless C1 control. In both these cases,
3633 * the lower and upper cases of any character are also invariants
3634 * (and title case is the same as upper case). So it is safe to
3635 * use the simple case change macros which avoid the overhead of
3636 * the general functions. Note that if perl were to be extended to
3637 * do locale handling in UTF-8 strings, this wouldn't be true in,
3638 * for example, Lithuanian or Turkic. */
3639 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3643 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3646 /* Similarly, if the source character isn't invariant but is in the
3647 * latin1 range (or EBCDIC equivalent thereof), we have the case
3648 * changes compiled into perl, and can avoid the overhead of the
3649 * general functions. In this range, the characters are stored as
3650 * two UTF-8 bytes, and it so happens that any changed-case version
3651 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3655 /* Convert the two source bytes to a single Unicode code point
3656 * value, change case and save for below */
3657 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
3658 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3659 U8 lower = toLOWER_LATIN1(chr);
3660 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3662 else { /* ucfirst */
3663 U8 upper = toUPPER_LATIN1_MOD(chr);
3665 /* Most of the latin1 range characters are well-behaved. Their
3666 * title and upper cases are the same, and are also in the
3667 * latin1 range. The macro above returns their upper (hence
3668 * title) case, and all that need be done is to save the result
3669 * for below. However, several characters are problematic, and
3670 * have to be handled specially. The MOD in the macro name
3671 * above means that these tricky characters all get mapped to
3672 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3673 * This mapping saves some tests for the majority of the
3676 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3678 /* Not tricky. Just save it. */
3679 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3681 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3683 /* This one is tricky because it is two characters long,
3684 * though the UTF-8 is still two bytes, so the stored
3685 * length doesn't change */
3686 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3687 *(tmpbuf + 1) = 's';
3691 /* The other two have their title and upper cases the same,
3692 * but are tricky because the changed-case characters
3693 * aren't in the latin1 range. They, however, do fit into
3694 * two UTF-8 bytes */
3695 STORE_NON_LATIN1_UC(tmpbuf, chr);
3700 #endif /* end of dont want to break user-defined casing */
3702 /* Here, can't short-cut the general case */
3704 utf8_to_uvchr(s, &ulen);
3705 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3706 else toLOWER_utf8(s, tmpbuf, &tculen);
3708 /* we can't do in-place if the length changes. */
3709 if (ulen != tculen) inplace = FALSE;
3710 need = slen + 1 - ulen + tculen;
3711 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3715 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3716 * latin1 is treated as caseless. Note that a locale takes
3718 tculen = 1; /* Most characters will require one byte, but this will
3719 * need to be overridden for the tricky ones */
3722 if (op_type == OP_LCFIRST) {
3724 /* lower case the first letter: no trickiness for any character */
3725 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3726 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3729 else if (IN_LOCALE_RUNTIME) {
3730 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3731 * have upper and title case different
3734 else if (! IN_UNI_8_BIT) {
3735 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3736 * on EBCDIC machines whatever the
3737 * native function does */
3739 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3740 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3742 /* tmpbuf now has the correct title case for all latin1 characters
3743 * except for the several ones that have tricky handling. All
3744 * of these are mapped by the MOD to the letter below. */
3745 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3747 /* The length is going to change, with all three of these, so
3748 * can't replace just the first character */
3751 /* We use the original to distinguish between these tricky
3753 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3754 /* Two character title case 'Ss', but can remain non-UTF-8 */
3757 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3762 /* The other two tricky ones have their title case outside
3763 * latin1. It is the same as their upper case. */
3765 STORE_NON_LATIN1_UC(tmpbuf, *s);
3767 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3768 * and their upper cases is 2. */
3771 /* The entire result will have to be in UTF-8. Assume worst
3772 * case sizing in conversion. (all latin1 characters occupy
3773 * at most two bytes in utf8) */
3774 convert_source_to_utf8 = TRUE;
3775 need = slen * 2 + 1;
3777 } /* End of is one of the three special chars */
3778 } /* End of use Unicode (Latin1) semantics */
3779 } /* End of changing the case of the first character */
3781 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3782 * generate the result */
3785 /* We can convert in place. This means we change just the first
3786 * character without disturbing the rest; no need to grow */
3788 s = d = (U8*)SvPV_force_nomg(source, slen);
3794 /* Here, we can't convert in place; we earlier calculated how much
3795 * space we will need, so grow to accommodate that */
3796 SvUPGRADE(dest, SVt_PV);
3797 d = (U8*)SvGROW(dest, need);
3798 (void)SvPOK_only(dest);
3805 if (! convert_source_to_utf8) {
3807 /* Here both source and dest are in UTF-8, but have to create
3808 * the entire output. We initialize the result to be the
3809 * title/lower cased first character, and then append the rest
3811 sv_setpvn(dest, (char*)tmpbuf, tculen);
3813 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3817 const U8 *const send = s + slen;
3819 /* Here the dest needs to be in UTF-8, but the source isn't,
3820 * except we earlier UTF-8'd the first character of the source
3821 * into tmpbuf. First put that into dest, and then append the
3822 * rest of the source, converting it to UTF-8 as we go. */
3824 /* Assert tculen is 2 here because the only two characters that
3825 * get to this part of the code have 2-byte UTF-8 equivalents */
3827 *d++ = *(tmpbuf + 1);
3828 s++; /* We have just processed the 1st char */
3830 for (; s < send; s++) {
3831 d = uvchr_to_utf8(d, *s);
3834 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3838 else { /* in-place UTF-8. Just overwrite the first character */
3839 Copy(tmpbuf, d, tculen, U8);
3840 SvCUR_set(dest, need - 1);
3843 else { /* Neither source nor dest are in or need to be UTF-8 */
3845 if (IN_LOCALE_RUNTIME) {
3849 if (inplace) { /* in-place, only need to change the 1st char */
3852 else { /* Not in-place */
3854 /* Copy the case-changed character(s) from tmpbuf */
3855 Copy(tmpbuf, d, tculen, U8);
3856 d += tculen - 1; /* Code below expects d to point to final
3857 * character stored */
3860 else { /* empty source */
3861 /* See bug #39028: Don't taint if empty */
3865 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3866 * the destination to retain that flag */
3870 if (!inplace) { /* Finish the rest of the string, unchanged */
3871 /* This will copy the trailing NUL */
3872 Copy(s + 1, d + 1, slen, U8);
3873 SvCUR_set(dest, need - 1);
3876 if (dest != source && SvTAINTED(source))
3882 /* There's so much setup/teardown code common between uc and lc, I wonder if
3883 it would be worth merging the two, and just having a switch outside each
3884 of the three tight loops. There is less and less commonality though */
3898 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
3899 && SvTEMP(source) && !DO_UTF8(source)
3900 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3902 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3903 * make the loop tight, so we overwrite the source with the dest before
3904 * looking at it, and we need to look at the original source
3905 * afterwards. There would also need to be code added to handle
3906 * switching to not in-place in midstream if we run into characters
3907 * that change the length.
3910 s = d = (U8*)SvPV_force_nomg(source, len);
3917 /* The old implementation would copy source into TARG at this point.
3918 This had the side effect that if source was undef, TARG was now
3919 an undefined SV with PADTMP set, and they don't warn inside
3920 sv_2pv_flags(). However, we're now getting the PV direct from
3921 source, which doesn't have PADTMP set, so it would warn. Hence the
3925 s = (const U8*)SvPV_nomg_const(source, len);
3927 if (ckWARN(WARN_UNINITIALIZED))
3928 report_uninit(source);
3934 SvUPGRADE(dest, SVt_PV);
3935 d = (U8*)SvGROW(dest, min);
3936 (void)SvPOK_only(dest);
3941 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3942 to check DO_UTF8 again here. */
3944 if (DO_UTF8(source)) {
3945 const U8 *const send = s + len;
3946 U8 tmpbuf[UTF8_MAXBYTES+1];
3948 /* All occurrences of these are to be moved to follow any other marks.
3949 * This is context-dependent. We may not be passed enough context to
3950 * move the iota subscript beyond all of them, but we do the best we can
3951 * with what we're given. The result is always better than if we
3952 * hadn't done this. And, the problem would only arise if we are
3953 * passed a character without all its combining marks, which would be
3954 * the caller's mistake. The information this is based on comes from a
3955 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3956 * itself) and so can't be checked properly to see if it ever gets
3957 * revised. But the likelihood of it changing is remote */
3958 bool in_iota_subscript = FALSE;
3961 if (in_iota_subscript && ! is_utf8_mark(s)) {
3962 /* A non-mark. Time to output the iota subscript */
3963 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
3964 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3966 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3967 in_iota_subscript = FALSE;
3971 /* See comments at the first instance in this file of this ifdef */
3972 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3974 /* If the UTF-8 character is invariant, then it is in the range
3975 * known by the standard macro; result is only one byte long */
3976 if (UTF8_IS_INVARIANT(*s)) {
3980 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3982 /* Likewise, if it fits in a byte, its case change is in our
3984 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
3985 U8 upper = toUPPER_LATIN1_MOD(orig);
3986 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
3994 /* Otherwise, need the general UTF-8 case. Get the changed
3995 * case value and copy it to the output buffer */
3997 const STRLEN u = UTF8SKIP(s);
4000 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4001 if (uv == GREEK_CAPITAL_LETTER_IOTA
4002 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4004 in_iota_subscript = TRUE;
4007 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4008 /* If the eventually required minimum size outgrows
4009 * the available space, we need to grow. */
4010 const UV o = d - (U8*)SvPVX_const(dest);
4012 /* If someone uppercases one million U+03B0s we
4013 * SvGROW() one million times. Or we could try
4014 * guessing how much to allocate without allocating too
4015 * much. Such is life. See corresponding comment in
4016 * lc code for another option */
4018 d = (U8*)SvPVX(dest) + o;
4020 Copy(tmpbuf, d, ulen, U8);
4026 if (in_iota_subscript) {
4027 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
4031 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4033 else { /* Not UTF-8 */
4035 const U8 *const send = s + len;
4037 /* Use locale casing if in locale; regular style if not treating
4038 * latin1 as having case; otherwise the latin1 casing. Do the
4039 * whole thing in a tight loop, for speed, */
4040 if (IN_LOCALE_RUNTIME) {
4043 for (; s < send; d++, s++)
4044 *d = toUPPER_LC(*s);
4046 else if (! IN_UNI_8_BIT) {
4047 for (; s < send; d++, s++) {
4052 for (; s < send; d++, s++) {
4053 *d = toUPPER_LATIN1_MOD(*s);
4054 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4056 /* The mainstream case is the tight loop above. To avoid
4057 * extra tests in that, all three characters that require
4058 * special handling are mapped by the MOD to the one tested
4060 * Use the source to distinguish between the three cases */
4062 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4064 /* uc() of this requires 2 characters, but they are
4065 * ASCII. If not enough room, grow the string */
4066 if (SvLEN(dest) < ++min) {
4067 const UV o = d - (U8*)SvPVX_const(dest);
4069 d = (U8*)SvPVX(dest) + o;
4071 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4072 continue; /* Back to the tight loop; still in ASCII */
4075 /* The other two special handling characters have their
4076 * upper cases outside the latin1 range, hence need to be
4077 * in UTF-8, so the whole result needs to be in UTF-8. So,
4078 * here we are somewhere in the middle of processing a
4079 * non-UTF-8 string, and realize that we will have to convert
4080 * the whole thing to UTF-8. What to do? There are
4081 * several possibilities. The simplest to code is to
4082 * convert what we have so far, set a flag, and continue on
4083 * in the loop. The flag would be tested each time through
4084 * the loop, and if set, the next character would be
4085 * converted to UTF-8 and stored. But, I (khw) didn't want
4086 * to slow down the mainstream case at all for this fairly
4087 * rare case, so I didn't want to add a test that didn't
4088 * absolutely have to be there in the loop, besides the
4089 * possibility that it would get too complicated for
4090 * optimizers to deal with. Another possibility is to just
4091 * give up, convert the source to UTF-8, and restart the
4092 * function that way. Another possibility is to convert
4093 * both what has already been processed and what is yet to
4094 * come separately to UTF-8, then jump into the loop that
4095 * handles UTF-8. But the most efficient time-wise of the
4096 * ones I could think of is what follows, and turned out to
4097 * not require much extra code. */
4099 /* Convert what we have so far into UTF-8, telling the
4100 * function that we know it should be converted, and to
4101 * allow extra space for what we haven't processed yet.
4102 * Assume the worst case space requirements for converting
4103 * what we haven't processed so far: that it will require
4104 * two bytes for each remaining source character, plus the
4105 * NUL at the end. This may cause the string pointer to
4106 * move, so re-find it. */
4108 len = d - (U8*)SvPVX_const(dest);
4109 SvCUR_set(dest, len);
4110 len = sv_utf8_upgrade_flags_grow(dest,
4111 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4113 d = (U8*)SvPVX(dest) + len;
4115 /* And append the current character's upper case in UTF-8 */
4116 CAT_NON_LATIN1_UC(d, *s);
4118 /* Now process the remainder of the source, converting to
4119 * upper and UTF-8. If a resulting byte is invariant in
4120 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4121 * append it to the output. */
4124 for (; s < send; s++) {
4125 U8 upper = toUPPER_LATIN1_MOD(*s);
4126 if UTF8_IS_INVARIANT(upper) {
4130 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4134 /* Here have processed the whole source; no need to continue
4135 * with the outer loop. Each character has been converted
4136 * to upper case and converted to UTF-8 */
4139 } /* End of processing all latin1-style chars */
4140 } /* End of processing all chars */
4141 } /* End of source is not empty */
4143 if (source != dest) {
4144 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4145 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4147 } /* End of isn't utf8 */
4148 if (dest != source && SvTAINTED(source))
4167 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
4168 && SvTEMP(source) && !DO_UTF8(source)) {
4170 /* We can convert in place, as lowercasing anything in the latin1 range
4171 * (or else DO_UTF8 would have been on) doesn't lengthen it */
4173 s = d = (U8*)SvPV_force_nomg(source, len);
4180 /* The old implementation would copy source into TARG at this point.
4181 This had the side effect that if source was undef, TARG was now
4182 an undefined SV with PADTMP set, and they don't warn inside
4183 sv_2pv_flags(). However, we're now getting the PV direct from
4184 source, which doesn't have PADTMP set, so it would warn. Hence the
4188 s = (const U8*)SvPV_nomg_const(source, len);
4190 if (ckWARN(WARN_UNINITIALIZED))
4191 report_uninit(source);
4197 SvUPGRADE(dest, SVt_PV);
4198 d = (U8*)SvGROW(dest, min);
4199 (void)SvPOK_only(dest);
4204 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4205 to check DO_UTF8 again here. */
4207 if (DO_UTF8(source)) {
4208 const U8 *const send = s + len;
4209 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4212 /* See comments at the first instance in this file of this ifdef */
4213 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4214 if (UTF8_IS_INVARIANT(*s)) {
4216 /* Invariant characters use the standard mappings compiled in.
4221 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
4223 /* As do the ones in the Latin1 range */
4224 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
4225 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
4230 /* Here, is utf8 not in Latin-1 range, have to go out and get
4231 * the mappings from the tables. */
4233 const STRLEN u = UTF8SKIP(s);
4236 #ifndef CONTEXT_DEPENDENT_CASING
4237 toLOWER_utf8(s, tmpbuf, &ulen);
4239 /* This is ifdefd out because it needs more work and thought. It isn't clear
4240 * that we should do it.
4241 * A minor objection is that this is based on a hard-coded rule from the
4242 * Unicode standard, and may change, but this is not very likely at all.
4243 * mktables should check and warn if it does.
4244 * More importantly, if the sigma occurs at the end of the string, we don't
4245 * have enough context to know whether it is part of a larger string or going
4246 * to be or not. It may be that we are passed a subset of the context, via
4247 * a \U...\E, for example, and we could conceivably know the larger context if
4248 * code were changed to pass that in. But, if the string passed in is an
4249 * intermediate result, and the user concatenates two strings together
4250 * after we have made a final sigma, that would be wrong. If the final sigma
4251 * occurs in the middle of the string we are working on, then we know that it
4252 * should be a final sigma, but otherwise we can't be sure. */
4254 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4256 /* If the lower case is a small sigma, it may be that we need
4257 * to change it to a final sigma. This happens at the end of
4258 * a word that contains more than just this character, and only
4259 * when we started with a capital sigma. */
4260 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4261 s > send - len && /* Makes sure not the first letter */
4262 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4265 /* We use the algorithm in:
4266 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4267 * is a CAPITAL SIGMA): If C is preceded by a sequence
4268 * consisting of a cased letter and a case-ignorable
4269 * sequence, and C is not followed by a sequence consisting
4270 * of a case ignorable sequence and then a cased letter,
4271 * then when lowercasing C, C becomes a final sigma */
4273 /* To determine if this is the end of a word, need to peek
4274 * ahead. Look at the next character */
4275 const U8 *peek = s + u;
4277 /* Skip any case ignorable characters */
4278 while (peek < send && is_utf8_case_ignorable(peek)) {
4279 peek += UTF8SKIP(peek);
4282 /* If we reached the end of the string without finding any
4283 * non-case ignorable characters, or if the next such one
4284 * is not-cased, then we have met the conditions for it
4285 * being a final sigma with regards to peek ahead, and so
4286 * must do peek behind for the remaining conditions. (We
4287 * know there is stuff behind to look at since we tested
4288 * above that this isn't the first letter) */
4289 if (peek >= send || ! is_utf8_cased(peek)) {
4290 peek = utf8_hop(s, -1);
4292 /* Here are at the beginning of the first character
4293 * before the original upper case sigma. Keep backing
4294 * up, skipping any case ignorable characters */
4295 while (is_utf8_case_ignorable(peek)) {
4296 peek = utf8_hop(peek, -1);
4299 /* Here peek points to the first byte of the closest
4300 * non-case-ignorable character before the capital
4301 * sigma. If it is cased, then by the Unicode
4302 * algorithm, we should use a small final sigma instead
4303 * of what we have */
4304 if (is_utf8_cased(peek)) {
4305 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4306 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4310 else { /* Not a context sensitive mapping */
4311 #endif /* End of commented out context sensitive */
4312 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4314 /* If the eventually required minimum size outgrows
4315 * the available space, we need to grow. */
4316 const UV o = d - (U8*)SvPVX_const(dest);
4318 /* If someone lowercases one million U+0130s we
4319 * SvGROW() one million times. Or we could try
4320 * guessing how much to allocate without allocating too
4321 * much. Such is life. Another option would be to
4322 * grow an extra byte or two more each time we need to
4323 * grow, which would cut down the million to 500K, with
4326 d = (U8*)SvPVX(dest) + o;
4328 #ifdef CONTEXT_DEPENDENT_CASING
4331 /* Copy the newly lowercased letter to the output buffer we're
4333 Copy(tmpbuf, d, ulen, U8);
4336 #ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4339 } /* End of looping through the source string */
4342 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4343 } else { /* Not utf8 */
4345 const U8 *const send = s + len;
4347 /* Use locale casing if in locale; regular style if not treating
4348 * latin1 as having case; otherwise the latin1 casing. Do the
4349 * whole thing in a tight loop, for speed, */
4350 if (IN_LOCALE_RUNTIME) {
4353 for (; s < send; d++, s++)
4354 *d = toLOWER_LC(*s);
4356 else if (! IN_UNI_8_BIT) {
4357 for (; s < send; d++, s++) {
4362 for (; s < send; d++, s++) {
4363 *d = toLOWER_LATIN1(*s);
4367 if (source != dest) {
4369 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4372 if (dest != source && SvTAINTED(source))
4381 SV * const sv = TOPs;
4383 register const char *s = SvPV_const(sv,len);
4385 SvUTF8_off(TARG); /* decontaminate */
4388 SvUPGRADE(TARG, SVt_PV);
4389 SvGROW(TARG, (len * 2) + 1);
4393 if (UTF8_IS_CONTINUED(*s)) {
4394 STRLEN ulen = UTF8SKIP(s);
4418 SvCUR_set(TARG, d - SvPVX_const(TARG));
4419 (void)SvPOK_only_UTF8(TARG);
4422 sv_setpvn(TARG, s, len);
4431 dVAR; dSP; dMARK; dORIGMARK;
4432 register AV *const av = MUTABLE_AV(POPs);
4433 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4435 if (SvTYPE(av) == SVt_PVAV) {
4436 const I32 arybase = CopARYBASE_get(PL_curcop);
4437 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4438 bool can_preserve = FALSE;
4444 can_preserve = SvCANEXISTDELETE(av);
4447 if (lval && localizing) {
4450 for (svp = MARK + 1; svp <= SP; svp++) {
4451 const I32 elem = SvIV(*svp);
4455 if (max > AvMAX(av))
4459 while (++MARK <= SP) {
4461 I32 elem = SvIV(*MARK);
4462 bool preeminent = TRUE;
4466 if (localizing && can_preserve) {
4467 /* If we can determine whether the element exist,
4468 * Try to preserve the existenceness of a tied array
4469 * element by using EXISTS and DELETE if possible.
4470 * Fallback to FETCH and STORE otherwise. */
4471 preeminent = av_exists(av, elem);
4474 svp = av_fetch(av, elem, lval);
4476 if (!svp || *svp == &PL_sv_undef)
4477 DIE(aTHX_ PL_no_aelem, elem);
4480 save_aelem(av, elem, svp);
4482 SAVEADELETE(av, elem);
4485 *MARK = svp ? *svp : &PL_sv_undef;
4488 if (GIMME != G_ARRAY) {
4490 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4496 /* Smart dereferencing for keys, values and each */
4508 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4513 "Type of argument to %s must be unblessed hashref or arrayref",
4514 PL_op_desc[PL_op->op_type] );
4517 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4519 "Can't modify %s in %s",
4520 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4523 /* Delegate to correct function for op type */
4525 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4526 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4529 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4537 AV *array = MUTABLE_AV(POPs);
4538 const I32 gimme = GIMME_V;
4539 IV *iterp = Perl_av_iter_p(aTHX_ array);
4540 const IV current = (*iterp)++;
4542 if (current > av_len(array)) {
4544 if (gimme == G_SCALAR)
4551 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4552 if (gimme == G_ARRAY) {
4553 SV **const element = av_fetch(array, current, 0);
4554 PUSHs(element ? *element : &PL_sv_undef);