This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add tests for precedence of CORE:: subs
[perl5.git] / pp.c
CommitLineData
a0d0e21e 1/* pp.c
79072805 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
79072805 5 *
a0d0e21e
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
79072805 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
a0d0e21e 17 */
79072805 18
166f8a29
DM
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
79072805 26#include "EXTERN.h"
864dbfa3 27#define PERL_IN_PP_C
79072805 28#include "perl.h"
77bc9082 29#include "keywords.h"
79072805 30
a4af207c
JH
31#include "reentr.h"
32
dfe9444c
AD
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
8ac85365
NIS
39#endif
40
0630166f
SP
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
13017935
SM
49/* variations on pp_null */
50
93a17b20
LW
51PP(pp_stub)
52{
97aff369 53 dVAR;
39644a26 54 dSP;
54310121 55 if (GIMME_V == G_SCALAR)
3280af22 56 XPUSHs(&PL_sv_undef);
93a17b20
LW
57 RETURN;
58}
59
79072805
LW
60/* Pushy stuff. */
61
93a17b20
LW
62PP(pp_padav)
63{
97aff369 64 dVAR; dSP; dTARGET;
13017935 65 I32 gimme;
e190e9b4 66 assert(SvTYPE(TARG) == SVt_PVAV);
533c011a 67 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
85e6fe83 70 EXTEND(SP, 1);
533c011a 71 if (PL_op->op_flags & OPf_REF) {
85e6fe83 72 PUSHs(TARG);
93a17b20 73 RETURN;
40c94d11
FC
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
79 PUSHs(TARG);
80 RETURN;
40c94d11 81 }
85e6fe83 82 }
13017935
SM
83 gimme = GIMME_V;
84 if (gimme == G_ARRAY) {
502c6561 85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83 86 EXTEND(SP, maxarg);
93965878
NIS
87 if (SvMAGICAL(TARG)) {
88 U32 i;
eb160463 89 for (i=0; i < (U32)maxarg; i++) {
502c6561 90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
3280af22 91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93965878
NIS
92 }
93 }
94 else {
502c6561 95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93965878 96 }
85e6fe83
LW
97 SP += maxarg;
98 }
13017935 99 else if (gimme == G_SCALAR) {
1b6737cc 100 SV* const sv = sv_newmortal();
502c6561 101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83
LW
102 sv_setiv(sv, maxarg);
103 PUSHs(sv);
104 }
105 RETURN;
93a17b20
LW
106}
107
108PP(pp_padhv)
109{
97aff369 110 dVAR; dSP; dTARGET;
54310121 111 I32 gimme;
112
e190e9b4 113 assert(SvTYPE(TARG) == SVt_PVHV);
93a17b20 114 XPUSHs(TARG);
533c011a 115 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
533c011a 118 if (PL_op->op_flags & OPf_REF)
93a17b20 119 RETURN;
40c94d11
FC
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
125 RETURN;
40c94d11 126 }
78f9721b 127 }
54310121 128 gimme = GIMME_V;
129 if (gimme == G_ARRAY) {
981b7185 130 RETURNOP(Perl_do_kv(aTHX));
85e6fe83 131 }
54310121 132 else if (gimme == G_SCALAR) {
85fbaab2 133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
85e6fe83 134 SETs(sv);
85e6fe83 135 }
54310121 136 RETURN;
93a17b20
LW
137}
138
79072805
LW
139/* Translations. */
140
4bdf8368 141static const char S_no_symref_sv[] =
def89bff
NC
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
143
79072805
LW
144PP(pp_rv2gv)
145{
97aff369 146 dVAR; dSP; dTOPss;
8ec5e241 147
f64c9ac5 148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
ed6116ce 149 if (SvROK(sv)) {
a0d0e21e 150 wasref:
93d7320b
DM
151 if (SvAMAGIC(sv)) {
152 sv = amagic_deref_call(sv, to_gv_amg);
153 SPAGAIN;
154 }
ed6116ce 155 sv = SvRV(sv);
b1dadf13 156 if (SvTYPE(sv) == SVt_PVIO) {
159b6efe 157 GV * const gv = MUTABLE_GV(sv_newmortal());
b1dadf13 158 gv_init(gv, 0, "", 0, 0);
a45c7426 159 GvIOp(gv) = MUTABLE_IO(sv);
b37c2d43 160 SvREFCNT_inc_void_NN(sv);
ad64d0ec 161 sv = MUTABLE_SV(gv);
ef54e1a4 162 }
6e592b3a 163 else if (!isGV_with_GP(sv))
cea2e8a9 164 DIE(aTHX_ "Not a GLOB reference");
79072805
LW
165 }
166 else {
6e592b3a 167 if (!isGV_with_GP(sv)) {
afd1915d 168 if (!SvOK(sv) && sv != &PL_sv_undef) {
b13b2135 169 /* If this is a 'my' scalar and flag is set then vivify
853846ea 170 * NI-S 1999/05/07
b13b2135 171 */
ac53db4c 172 if (SvREADONLY(sv))
6ad8f254 173 Perl_croak_no_modify(aTHX);
1d8d4d2a 174 if (PL_op->op_private & OPpDEREF) {
2c8ac474
GS
175 GV *gv;
176 if (cUNOP->op_targ) {
177 STRLEN len;
0bd48802
AL
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
159b6efe 180 gv = MUTABLE_GV(newSV(0));
2c8ac474
GS
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
182 }
183 else {
0bd48802 184 const char * const name = CopSTASHPV(PL_curcop);
2c8ac474 185 gv = newGVgen(name);
1d8d4d2a 186 }
43230e26 187 prepare_SV_for_RV(sv);
ad64d0ec 188 SvRV_set(sv, MUTABLE_SV(gv));
853846ea 189 SvROK_on(sv);
1d8d4d2a 190 SvSETMAGIC(sv);
853846ea 191 goto wasref;
2c8ac474 192 }
533c011a
NIS
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
cea2e8a9 195 DIE(aTHX_ PL_no_usym, "a symbol");
599cee73 196 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 197 report_uninit(sv);
a0d0e21e
LW
198 RETSETUNDEF;
199 }
35cd451c
GS
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
202 {
ad64d0ec 203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
7a5fd60d
NC
204 if (!temp
205 && (!is_gv_magical_sv(sv,0)
ad64d0ec
NC
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
207 SVt_PVGV))))) {
35cd451c 208 RETSETUNDEF;
c9d5ac95 209 }
7a5fd60d 210 sv = temp;
35cd451c
GS
211 }
212 else {
213 if (PL_op->op_private & HINT_STRICT_REFS)
10b53e54 214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
e26df76a
NC
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
219 things. */
220 RETURN;
221 }
7ffa7e75
FC
222 {
223 STRLEN len;
224 const char * const nambeg = SvPV_nomg_const(sv, len);
225 sv = MUTABLE_SV(
226 gv_fetchpvn_flags(
227 nambeg, len, GV_ADD | SvUTF8(sv), SVt_PVGV
228 )
229 );
230 }
35cd451c 231 }
2acc3314
FC
232 /* FAKE globs in the symbol table cause weird bugs (#77810) */
233 if (sv) SvFAKE_off(sv);
93a17b20 234 }
79072805 235 }
2acc3314
FC
236 if (sv && SvFAKE(sv)) {
237 SV *newsv = sv_newmortal();
5cf4b255 238 sv_setsv_flags(newsv, sv, 0);
2acc3314 239 SvFAKE_off(newsv);
d8906c05 240 sv = newsv;
2acc3314 241 }
d8906c05
FC
242 if (PL_op->op_private & OPpLVAL_INTRO)
243 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
244 SETs(sv);
79072805
LW
245 RETURN;
246}
247
dc3c76f8
NC
248/* Helper function for pp_rv2sv and pp_rv2av */
249GV *
fe9845cc
RB
250Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
251 const svtype type, SV ***spp)
dc3c76f8
NC
252{
253 dVAR;
254 GV *gv;
255
7918f24d
NC
256 PERL_ARGS_ASSERT_SOFTREF2XV;
257
dc3c76f8
NC
258 if (PL_op->op_private & HINT_STRICT_REFS) {
259 if (SvOK(sv))
10b53e54 260 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
dc3c76f8
NC
261 else
262 Perl_die(aTHX_ PL_no_usym, what);
263 }
264 if (!SvOK(sv)) {
fd1d9b5c
FC
265 if (
266 PL_op->op_flags & OPf_REF &&
267 PL_op->op_next->op_type != OP_BOOLKEYS
268 )
dc3c76f8
NC
269 Perl_die(aTHX_ PL_no_usym, what);
270 if (ckWARN(WARN_UNINITIALIZED))
271 report_uninit(sv);
272 if (type != SVt_PV && GIMME_V == G_ARRAY) {
273 (*spp)--;
274 return NULL;
275 }
276 **spp = &PL_sv_undef;
277 return NULL;
278 }
279 if ((PL_op->op_flags & OPf_SPECIAL) &&
280 !(PL_op->op_flags & OPf_MOD))
281 {
81e3fc25 282 gv = gv_fetchsv(sv, 0, type);
dc3c76f8
NC
283 if (!gv
284 && (!is_gv_magical_sv(sv,0)
81e3fc25 285 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
dc3c76f8
NC
286 {
287 **spp = &PL_sv_undef;
288 return NULL;
289 }
290 }
291 else {
7ffa7e75
FC
292 STRLEN len;
293 const char * const nambeg = SvPV_nomg_const(sv, len);
294 gv = gv_fetchpvn_flags(nambeg, len, GV_ADD | SvUTF8(sv), type);
dc3c76f8
NC
295 }
296 return gv;
297}
298
79072805
LW
299PP(pp_rv2sv)
300{
97aff369 301 dVAR; dSP; dTOPss;
c445ea15 302 GV *gv = NULL;
79072805 303
0824d667
DM
304 if (!(PL_op->op_private & OPpDEREFed))
305 SvGETMAGIC(sv);
ed6116ce 306 if (SvROK(sv)) {
93d7320b
DM
307 if (SvAMAGIC(sv)) {
308 sv = amagic_deref_call(sv, to_sv_amg);
309 SPAGAIN;
310 }
f5284f61 311
ed6116ce 312 sv = SvRV(sv);
79072805
LW
313 switch (SvTYPE(sv)) {
314 case SVt_PVAV:
315 case SVt_PVHV:
316 case SVt_PVCV:
cbae9b9f
YST
317 case SVt_PVFM:
318 case SVt_PVIO:
cea2e8a9 319 DIE(aTHX_ "Not a SCALAR reference");
42d0e0b7 320 default: NOOP;
79072805
LW
321 }
322 }
323 else {
159b6efe 324 gv = MUTABLE_GV(sv);
748a9306 325
6e592b3a 326 if (!isGV_with_GP(gv)) {
dc3c76f8
NC
327 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
328 if (!gv)
329 RETURN;
463ee0b2 330 }
29c711a3 331 sv = GvSVn(gv);
a0d0e21e 332 }
533c011a 333 if (PL_op->op_flags & OPf_MOD) {
82d03984
RGS
334 if (PL_op->op_private & OPpLVAL_INTRO) {
335 if (cUNOP->op_first->op_type == OP_NULL)
159b6efe 336 sv = save_scalar(MUTABLE_GV(TOPs));
82d03984
RGS
337 else if (gv)
338 sv = save_scalar(gv);
339 else
f1f66076 340 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
82d03984 341 }
533c011a
NIS
342 else if (PL_op->op_private & OPpDEREF)
343 vivify_ref(sv, PL_op->op_private & OPpDEREF);
79072805 344 }
a0d0e21e 345 SETs(sv);
79072805
LW
346 RETURN;
347}
348
349PP(pp_av2arylen)
350{
97aff369 351 dVAR; dSP;
502c6561 352 AV * const av = MUTABLE_AV(TOPs);
02d85cc3
EB
353 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
354 if (lvalue) {
355 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
356 if (!*sv) {
357 *sv = newSV_type(SVt_PVMG);
358 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
359 }
360 SETs(*sv);
361 } else {
362 SETs(sv_2mortal(newSViv(
363 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
364 )));
79072805 365 }
79072805
LW
366 RETURN;
367}
368
a0d0e21e
LW
369PP(pp_pos)
370{
2154eca7 371 dVAR; dSP; dPOPss;
8ec5e241 372
78f9721b 373 if (PL_op->op_flags & OPf_MOD || LVRET) {
16eb5365
FC
374 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
375 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
376 LvTYPE(ret) = '.';
377 LvTARG(ret) = SvREFCNT_inc_simple(sv);
2154eca7 378 PUSHs(ret); /* no SvSETMAGIC */
a0d0e21e
LW
379 RETURN;
380 }
381 else {
a0d0e21e 382 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
1b6737cc 383 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
565764a8 384 if (mg && mg->mg_len >= 0) {
2154eca7 385 dTARGET;
a0ed51b3 386 I32 i = mg->mg_len;
7e2040f0 387 if (DO_UTF8(sv))
a0ed51b3 388 sv_pos_b2u(sv, &i);
fc15ae8f 389 PUSHi(i + CopARYBASE_get(PL_curcop));
a0d0e21e
LW
390 RETURN;
391 }
392 }
393 RETPUSHUNDEF;
394 }
395}
396
79072805
LW
397PP(pp_rv2cv)
398{
97aff369 399 dVAR; dSP;
79072805 400 GV *gv;
1eced8f8 401 HV *stash_unused;
c445ea15
AL
402 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
403 ? 0
404 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
405 ? GV_ADD|GV_NOEXPAND
406 : GV_ADD;
4633a7c4
LW
407 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
408 /* (But not in defined().) */
e26df76a 409
1eced8f8 410 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
07055b4c
CS
411 if (cv) {
412 if (CvCLONE(cv))
ad64d0ec 413 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
d32f2495
SC
414 if ((PL_op->op_private & OPpLVAL_INTRO)) {
415 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
416 cv = GvCV(gv);
417 if (!CvLVALUE(cv))
418 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
419 }
07055b4c 420 }
e26df76a 421 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
ea726b52 422 cv = MUTABLE_CV(gv);
e26df76a 423 }
07055b4c 424 else
ea726b52 425 cv = MUTABLE_CV(&PL_sv_undef);
ad64d0ec 426 SETs(MUTABLE_SV(cv));
79072805
LW
427 RETURN;
428}
429
c07a80fd 430PP(pp_prototype)
431{
97aff369 432 dVAR; dSP;
c07a80fd 433 CV *cv;
434 HV *stash;
435 GV *gv;
fabdb6c0 436 SV *ret = &PL_sv_undef;
c07a80fd 437
b6c543e3 438 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
e3f73d4e 439 const char * s = SvPVX_const(TOPs);
b6c543e3 440 if (strnEQ(s, "CORE::", 6)) {
9927957a
FC
441 SV *const sv =
442 core_prototype(NULL, s + 6, SvCUR(TOPs) - 6, NULL, 1);
b8c38f0a
FC
443 if (sv) ret = sv;
444 goto set;
b6c543e3
IZ
445 }
446 }
f2c0649b 447 cv = sv_2cv(TOPs, &stash, &gv, 0);
5f05dabc 448 if (cv && SvPOK(cv))
59cd0e26 449 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
b6c543e3 450 set:
c07a80fd 451 SETs(ret);
452 RETURN;
453}
454
a0d0e21e
LW
455PP(pp_anoncode)
456{
97aff369 457 dVAR; dSP;
ea726b52 458 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
a5f75d66 459 if (CvCLONE(cv))
ad64d0ec 460 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
5f05dabc 461 EXTEND(SP,1);
ad64d0ec 462 PUSHs(MUTABLE_SV(cv));
a0d0e21e
LW
463 RETURN;
464}
465
466PP(pp_srefgen)
79072805 467{
97aff369 468 dVAR; dSP;
71be2cbc 469 *SP = refto(*SP);
79072805 470 RETURN;
8ec5e241 471}
a0d0e21e
LW
472
473PP(pp_refgen)
474{
97aff369 475 dVAR; dSP; dMARK;
a0d0e21e 476 if (GIMME != G_ARRAY) {
5f0b1d4e
GS
477 if (++MARK <= SP)
478 *MARK = *SP;
479 else
3280af22 480 *MARK = &PL_sv_undef;
5f0b1d4e
GS
481 *MARK = refto(*MARK);
482 SP = MARK;
483 RETURN;
a0d0e21e 484 }
bbce6d69 485 EXTEND_MORTAL(SP - MARK);
71be2cbc 486 while (++MARK <= SP)
487 *MARK = refto(*MARK);
a0d0e21e 488 RETURN;
79072805
LW
489}
490
76e3520e 491STATIC SV*
cea2e8a9 492S_refto(pTHX_ SV *sv)
71be2cbc 493{
97aff369 494 dVAR;
71be2cbc 495 SV* rv;
496
7918f24d
NC
497 PERL_ARGS_ASSERT_REFTO;
498
71be2cbc 499 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
500 if (LvTARGLEN(sv))
68dc0745 501 vivify_defelem(sv);
502 if (!(sv = LvTARG(sv)))
3280af22 503 sv = &PL_sv_undef;
0dd88869 504 else
b37c2d43 505 SvREFCNT_inc_void_NN(sv);
71be2cbc 506 }
d8b46c1b 507 else if (SvTYPE(sv) == SVt_PVAV) {
502c6561
NC
508 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
509 av_reify(MUTABLE_AV(sv));
d8b46c1b 510 SvTEMP_off(sv);
b37c2d43 511 SvREFCNT_inc_void_NN(sv);
d8b46c1b 512 }
f2933f5f
DM
513 else if (SvPADTMP(sv) && !IS_PADGV(sv))
514 sv = newSVsv(sv);
71be2cbc 515 else {
516 SvTEMP_off(sv);
b37c2d43 517 SvREFCNT_inc_void_NN(sv);
71be2cbc 518 }
519 rv = sv_newmortal();
4df7f6af 520 sv_upgrade(rv, SVt_IV);
b162af07 521 SvRV_set(rv, sv);
71be2cbc 522 SvROK_on(rv);
523 return rv;
524}
525
79072805
LW
526PP(pp_ref)
527{
97aff369 528 dVAR; dSP; dTARGET;
e1ec3a88 529 const char *pv;
1b6737cc 530 SV * const sv = POPs;
f12c7020 531
5b295bef
RD
532 if (sv)
533 SvGETMAGIC(sv);
f12c7020 534
a0d0e21e 535 if (!sv || !SvROK(sv))
4633a7c4 536 RETPUSHNO;
79072805 537
cba0b539
FR
538 pv = sv_reftype(SvRV(sv),TRUE);
539 PUSHp(pv, strlen(pv));
79072805
LW
540 RETURN;
541}
542
543PP(pp_bless)
544{
97aff369 545 dVAR; dSP;
463ee0b2 546 HV *stash;
79072805 547
463ee0b2 548 if (MAXARG == 1)
11faa288 549 stash = CopSTASH(PL_curcop);
7b8d334a 550 else {
1b6737cc 551 SV * const ssv = POPs;
7b8d334a 552 STRLEN len;
e1ec3a88 553 const char *ptr;
81689caa 554
016a42f3 555 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
81689caa 556 Perl_croak(aTHX_ "Attempt to bless into a reference");
5c144d81 557 ptr = SvPV_const(ssv,len);
a2a5de95
NC
558 if (len == 0)
559 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
560 "Explicit blessing to '' (assuming package main)");
da51bb9b 561 stash = gv_stashpvn(ptr, len, GV_ADD);
7b8d334a 562 }
a0d0e21e 563
5d3fdfeb 564 (void)sv_bless(TOPs, stash);
79072805
LW
565 RETURN;
566}
567
fb73857a 568PP(pp_gelem)
569{
97aff369 570 dVAR; dSP;
b13b2135 571
1b6737cc
AL
572 SV *sv = POPs;
573 const char * const elem = SvPV_nolen_const(sv);
159b6efe 574 GV * const gv = MUTABLE_GV(POPs);
c445ea15 575 SV * tmpRef = NULL;
1b6737cc 576
c445ea15 577 sv = NULL;
c4ba80c3
NC
578 if (elem) {
579 /* elem will always be NUL terminated. */
1b6737cc 580 const char * const second_letter = elem + 1;
c4ba80c3
NC
581 switch (*elem) {
582 case 'A':
1b6737cc 583 if (strEQ(second_letter, "RRAY"))
ad64d0ec 584 tmpRef = MUTABLE_SV(GvAV(gv));
c4ba80c3
NC
585 break;
586 case 'C':
1b6737cc 587 if (strEQ(second_letter, "ODE"))
ad64d0ec 588 tmpRef = MUTABLE_SV(GvCVu(gv));
c4ba80c3
NC
589 break;
590 case 'F':
1b6737cc 591 if (strEQ(second_letter, "ILEHANDLE")) {
c4ba80c3
NC
592 /* finally deprecated in 5.8.0 */
593 deprecate("*glob{FILEHANDLE}");
ad64d0ec 594 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
595 }
596 else
1b6737cc 597 if (strEQ(second_letter, "ORMAT"))
ad64d0ec 598 tmpRef = MUTABLE_SV(GvFORM(gv));
c4ba80c3
NC
599 break;
600 case 'G':
1b6737cc 601 if (strEQ(second_letter, "LOB"))
ad64d0ec 602 tmpRef = MUTABLE_SV(gv);
c4ba80c3
NC
603 break;
604 case 'H':
1b6737cc 605 if (strEQ(second_letter, "ASH"))
ad64d0ec 606 tmpRef = MUTABLE_SV(GvHV(gv));
c4ba80c3
NC
607 break;
608 case 'I':
1b6737cc 609 if (*second_letter == 'O' && !elem[2])
ad64d0ec 610 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
611 break;
612 case 'N':
1b6737cc 613 if (strEQ(second_letter, "AME"))
a663657d 614 sv = newSVhek(GvNAME_HEK(gv));
c4ba80c3
NC
615 break;
616 case 'P':
1b6737cc 617 if (strEQ(second_letter, "ACKAGE")) {
7fa3a4ab
NC
618 const HV * const stash = GvSTASH(gv);
619 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
396482e1 620 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
c4ba80c3
NC
621 }
622 break;
623 case 'S':
1b6737cc 624 if (strEQ(second_letter, "CALAR"))
f9d52e31 625 tmpRef = GvSVn(gv);
c4ba80c3 626 break;
39b99f21 627 }
fb73857a 628 }
76e3520e
GS
629 if (tmpRef)
630 sv = newRV(tmpRef);
fb73857a 631 if (sv)
632 sv_2mortal(sv);
633 else
3280af22 634 sv = &PL_sv_undef;
fb73857a 635 XPUSHs(sv);
636 RETURN;
637}
638
a0d0e21e 639/* Pattern matching */
79072805 640
a0d0e21e 641PP(pp_study)
79072805 642{
97aff369 643 dVAR; dSP; dPOPss;
a0d0e21e 644 register unsigned char *s;
72de20cd 645 char *sfirst_raw;
a0d0e21e 646 STRLEN len;
4185c919 647 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
72de20cd
NC
648 U8 quanta;
649 STRLEN size;
4185c919
NC
650
651 if (mg && SvSCREAM(sv))
652 RETPUSHYES;
a0d0e21e 653
a4f4e906 654 s = (unsigned char*)(SvPV(sv, len));
bc9a5256 655 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
a4f4e906
NC
656 /* No point in studying a zero length string, and not safe to study
657 anything that doesn't appear to be a simple scalar (and hence might
658 change between now and when the regexp engine runs without our set
bd473224 659 magic ever running) such as a reference to an object with overloaded
bc9a5256
NC
660 stringification. Also refuse to study an FBM scalar, as this gives
661 more flexibility in SV flag usage. No real-world code would ever
662 end up studying an FBM scalar, so this isn't a real pessimisation.
72de20cd
NC
663 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
664 the study length limit from I32_MAX to U32_MAX - 1.
bc9a5256 665 */
a4f4e906
NC
666 RETPUSHNO;
667 }
668
72de20cd
NC
669 if (len < 0xFF) {
670 quanta = 1;
671 } else if (len < 0xFFFF) {
672 quanta = 2;
673 } else
674 quanta = 4;
a0d0e21e 675
72de20cd
NC
676 size = (256 + len) * quanta;
677 sfirst_raw = (char *)safemalloc(size);
678
679 if (!sfirst_raw)
cea2e8a9 680 DIE(aTHX_ "do_study: out of memory");
a0d0e21e 681
4185c919
NC
682 SvSCREAM_on(sv);
683 if (!mg)
684 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
72de20cd
NC
685 mg->mg_ptr = sfirst_raw;
686 mg->mg_len = size;
687 mg->mg_private = quanta;
688
689 memset(sfirst_raw, ~0, 256 * quanta);
690
691 /* The assumption here is that most studied strings are fairly short, hence
692 the pain of the extra code is worth it, given the memory savings.
693 80 character string, 336 bytes as U8, down from 1344 as U32
694 800 character string, 2112 bytes as U16, down from 4224 as U32
695 */
696
697 if (quanta == 1) {
698 U8 *const sfirst = (U8 *)sfirst_raw;
699 U8 *const snext = sfirst + 256;
700 while (len-- > 0) {
701 const U8 ch = s[len];
702 snext[len] = sfirst[ch];
703 sfirst[ch] = len;
704 }
705 } else if (quanta == 2) {
706 U16 *const sfirst = (U16 *)sfirst_raw;
707 U16 *const snext = sfirst + 256;
708 while (len-- > 0) {
709 const U8 ch = s[len];
710 snext[len] = sfirst[ch];
711 sfirst[ch] = len;
712 }
713 } else {
714 U32 *const sfirst = (U32 *)sfirst_raw;
715 U32 *const snext = sfirst + 256;
716 while (len-- > 0) {
717 const U8 ch = s[len];
718 snext[len] = sfirst[ch];
719 sfirst[ch] = len;
720 }
79072805
LW
721 }
722
1e422769 723 RETPUSHYES;
79072805
LW
724}
725
a0d0e21e 726PP(pp_trans)
79072805 727{
97aff369 728 dVAR; dSP; dTARG;
a0d0e21e
LW
729 SV *sv;
730
533c011a 731 if (PL_op->op_flags & OPf_STACKED)
a0d0e21e 732 sv = POPs;
59f00321
RGS
733 else if (PL_op->op_private & OPpTARGET_MY)
734 sv = GETTARGET;
79072805 735 else {
54b9620d 736 sv = DEFSV;
a0d0e21e 737 EXTEND(SP,1);
79072805 738 }
adbc6bb1 739 TARG = sv_newmortal();
bb16bae8
FC
740 if(PL_op->op_type == OP_TRANSR) {
741 SV * const newsv = newSVsv(sv);
742 do_trans(newsv);
743 mPUSHs(newsv);
744 }
745 else PUSHi(do_trans(sv));
a0d0e21e 746 RETURN;
79072805
LW
747}
748
a0d0e21e 749/* Lvalue operators. */
79072805 750
81745e4e
NC
751static void
752S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
753{
754 dVAR;
755 STRLEN len;
756 char *s;
757
758 PERL_ARGS_ASSERT_DO_CHOMP;
759
760 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
761 return;
762 if (SvTYPE(sv) == SVt_PVAV) {
763 I32 i;
764 AV *const av = MUTABLE_AV(sv);
765 const I32 max = AvFILL(av);
766
767 for (i = 0; i <= max; i++) {
768 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
769 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
770 do_chomp(retval, sv, chomping);
771 }
772 return;
773 }
774 else if (SvTYPE(sv) == SVt_PVHV) {
775 HV* const hv = MUTABLE_HV(sv);
776 HE* entry;
777 (void)hv_iterinit(hv);
778 while ((entry = hv_iternext(hv)))
779 do_chomp(retval, hv_iterval(hv,entry), chomping);
780 return;
781 }
782 else if (SvREADONLY(sv)) {
783 if (SvFAKE(sv)) {
784 /* SV is copy-on-write */
785 sv_force_normal_flags(sv, 0);
786 }
787 if (SvREADONLY(sv))
788 Perl_croak_no_modify(aTHX);
789 }
790
791 if (PL_encoding) {
792 if (!SvUTF8(sv)) {
793 /* XXX, here sv is utf8-ized as a side-effect!
794 If encoding.pm is used properly, almost string-generating
795 operations, including literal strings, chr(), input data, etc.
796 should have been utf8-ized already, right?
797 */
798 sv_recode_to_utf8(sv, PL_encoding);
799 }
800 }
801
802 s = SvPV(sv, len);
803 if (chomping) {
804 char *temp_buffer = NULL;
805 SV *svrecode = NULL;
806
807 if (s && len) {
808 s += --len;
809 if (RsPARA(PL_rs)) {
810 if (*s != '\n')
811 goto nope;
812 ++SvIVX(retval);
813 while (len && s[-1] == '\n') {
814 --len;
815 --s;
816 ++SvIVX(retval);
817 }
818 }
819 else {
820 STRLEN rslen, rs_charlen;
821 const char *rsptr = SvPV_const(PL_rs, rslen);
822
823 rs_charlen = SvUTF8(PL_rs)
824 ? sv_len_utf8(PL_rs)
825 : rslen;
826
827 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
828 /* Assumption is that rs is shorter than the scalar. */
829 if (SvUTF8(PL_rs)) {
830 /* RS is utf8, scalar is 8 bit. */
831 bool is_utf8 = TRUE;
832 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
833 &rslen, &is_utf8);
834 if (is_utf8) {
835 /* Cannot downgrade, therefore cannot possibly match
836 */
837 assert (temp_buffer == rsptr);
838 temp_buffer = NULL;
839 goto nope;
840 }
841 rsptr = temp_buffer;
842 }
843 else if (PL_encoding) {
844 /* RS is 8 bit, encoding.pm is used.
845 * Do not recode PL_rs as a side-effect. */
846 svrecode = newSVpvn(rsptr, rslen);
847 sv_recode_to_utf8(svrecode, PL_encoding);
848 rsptr = SvPV_const(svrecode, rslen);
849 rs_charlen = sv_len_utf8(svrecode);
850 }
851 else {
852 /* RS is 8 bit, scalar is utf8. */
853 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
854 rsptr = temp_buffer;
855 }
856 }
857 if (rslen == 1) {
858 if (*s != *rsptr)
859 goto nope;
860 ++SvIVX(retval);
861 }
862 else {
863 if (len < rslen - 1)
864 goto nope;
865 len -= rslen - 1;
866 s -= rslen - 1;
867 if (memNE(s, rsptr, rslen))
868 goto nope;
869 SvIVX(retval) += rs_charlen;
870 }
871 }
872 s = SvPV_force_nolen(sv);
873 SvCUR_set(sv, len);
874 *SvEND(sv) = '\0';
875 SvNIOK_off(sv);
876 SvSETMAGIC(sv);
877 }
878 nope:
879
880 SvREFCNT_dec(svrecode);
881
882 Safefree(temp_buffer);
883 } else {
884 if (len && !SvPOK(sv))
885 s = SvPV_force_nomg(sv, len);
886 if (DO_UTF8(sv)) {
887 if (s && len) {
888 char * const send = s + len;
889 char * const start = s;
890 s = send - 1;
891 while (s > start && UTF8_IS_CONTINUATION(*s))
892 s--;
893 if (is_utf8_string((U8*)s, send - s)) {
894 sv_setpvn(retval, s, send - s);
895 *s = '\0';
896 SvCUR_set(sv, s - start);
897 SvNIOK_off(sv);
898 SvUTF8_on(retval);
899 }
900 }
901 else
902 sv_setpvs(retval, "");
903 }
904 else if (s && len) {
905 s += --len;
906 sv_setpvn(retval, s, 1);
907 *s = '\0';
908 SvCUR_set(sv, len);
909 SvUTF8_off(sv);
910 SvNIOK_off(sv);
911 }
912 else
913 sv_setpvs(retval, "");
914 SvSETMAGIC(sv);
915 }
916}
917
a0d0e21e
LW
918PP(pp_schop)
919{
97aff369 920 dVAR; dSP; dTARGET;
fa54efae
NC
921 const bool chomping = PL_op->op_type == OP_SCHOMP;
922
923 if (chomping)
924 sv_setiv(TARG, 0);
925 do_chomp(TARG, TOPs, chomping);
a0d0e21e
LW
926 SETTARG;
927 RETURN;
79072805
LW
928}
929
a0d0e21e 930PP(pp_chop)
79072805 931{
97aff369 932 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
fa54efae 933 const bool chomping = PL_op->op_type == OP_CHOMP;
8ec5e241 934
fa54efae
NC
935 if (chomping)
936 sv_setiv(TARG, 0);
20cf1f79 937 while (MARK < SP)
fa54efae 938 do_chomp(TARG, *++MARK, chomping);
20cf1f79
NC
939 SP = ORIGMARK;
940 XPUSHTARG;
a0d0e21e 941 RETURN;
79072805
LW
942}
943
a0d0e21e
LW
944PP(pp_undef)
945{
97aff369 946 dVAR; dSP;
a0d0e21e
LW
947 SV *sv;
948
533c011a 949 if (!PL_op->op_private) {
774d564b 950 EXTEND(SP, 1);
a0d0e21e 951 RETPUSHUNDEF;
774d564b 952 }
79072805 953
a0d0e21e
LW
954 sv = POPs;
955 if (!sv)
956 RETPUSHUNDEF;
85e6fe83 957
765f542d 958 SV_CHECK_THINKFIRST_COW_DROP(sv);
85e6fe83 959
a0d0e21e
LW
960 switch (SvTYPE(sv)) {
961 case SVt_NULL:
962 break;
963 case SVt_PVAV:
502c6561 964 av_undef(MUTABLE_AV(sv));
a0d0e21e
LW
965 break;
966 case SVt_PVHV:
85fbaab2 967 hv_undef(MUTABLE_HV(sv));
a0d0e21e
LW
968 break;
969 case SVt_PVCV:
a2a5de95
NC
970 if (cv_const_sv((const CV *)sv))
971 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
972 CvANON((const CV *)sv) ? "(anonymous)"
973 : GvENAME(CvGV((const CV *)sv)));
5f66b61c 974 /* FALLTHROUGH */
9607fc9c 975 case SVt_PVFM:
6fc92669
GS
976 {
977 /* let user-undef'd sub keep its identity */
ea726b52
NC
978 GV* const gv = CvGV((const CV *)sv);
979 cv_undef(MUTABLE_CV(sv));
b3f91e91 980 CvGV_set(MUTABLE_CV(sv), gv);
6fc92669 981 }
a0d0e21e 982 break;
8e07c86e 983 case SVt_PVGV:
6e592b3a 984 if (SvFAKE(sv)) {
3280af22 985 SvSetMagicSV(sv, &PL_sv_undef);
6e592b3a
BM
986 break;
987 }
988 else if (isGV_with_GP(sv)) {
20408e3c 989 GP *gp;
dd69841b
BB
990 HV *stash;
991
dd69841b 992 /* undef *Pkg::meth_name ... */
e530fb81
FC
993 bool method_changed
994 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
995 && HvENAME_get(stash);
996 /* undef *Foo:: */
997 if((stash = GvHV((const GV *)sv))) {
998 if(HvENAME_get(stash))
999 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1000 else stash = NULL;
1001 }
dd69841b 1002
159b6efe 1003 gp_free(MUTABLE_GV(sv));
a02a5408 1004 Newxz(gp, 1, GP);
c43ae56f 1005 GvGP_set(sv, gp_ref(gp));
561b68a9 1006 GvSV(sv) = newSV(0);
57843af0 1007 GvLINE(sv) = CopLINE(PL_curcop);
159b6efe 1008 GvEGV(sv) = MUTABLE_GV(sv);
20408e3c 1009 GvMULTI_on(sv);
e530fb81
FC
1010
1011 if(stash)
afdbe55d 1012 mro_package_moved(NULL, stash, (const GV *)sv, 0);
e530fb81
FC
1013 stash = NULL;
1014 /* undef *Foo::ISA */
1015 if( strEQ(GvNAME((const GV *)sv), "ISA")
1016 && (stash = GvSTASH((const GV *)sv))
1017 && (method_changed || HvENAME(stash)) )
1018 mro_isa_changed_in(stash);
1019 else if(method_changed)
1020 mro_method_changed_in(
da9043f5 1021 GvSTASH((const GV *)sv)
e530fb81
FC
1022 );
1023
6e592b3a 1024 break;
20408e3c 1025 }
6e592b3a 1026 /* FALL THROUGH */
a0d0e21e 1027 default:
b15aece3 1028 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
8bd4d4c5 1029 SvPV_free(sv);
c445ea15 1030 SvPV_set(sv, NULL);
4633a7c4 1031 SvLEN_set(sv, 0);
a0d0e21e 1032 }
0c34ef67 1033 SvOK_off(sv);
4633a7c4 1034 SvSETMAGIC(sv);
79072805 1035 }
a0d0e21e
LW
1036
1037 RETPUSHUNDEF;
79072805
LW
1038}
1039
a0d0e21e 1040PP(pp_predec)
79072805 1041{
97aff369 1042 dVAR; dSP;
6e592b3a 1043 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1044 Perl_croak_no_modify(aTHX);
3510b4a1
NC
1045 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1046 && SvIVX(TOPs) != IV_MIN)
55497cff 1047 {
45977657 1048 SvIV_set(TOPs, SvIVX(TOPs) - 1);
55497cff 1049 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1050 }
1051 else
1052 sv_dec(TOPs);
a0d0e21e
LW
1053 SvSETMAGIC(TOPs);
1054 return NORMAL;
1055}
79072805 1056
a0d0e21e
LW
1057PP(pp_postinc)
1058{
97aff369 1059 dVAR; dSP; dTARGET;
6e592b3a 1060 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1061 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1062 if (SvROK(TOPs))
1063 TARG = sv_newmortal();
a0d0e21e 1064 sv_setsv(TARG, TOPs);
3510b4a1
NC
1065 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1066 && SvIVX(TOPs) != IV_MAX)
55497cff 1067 {
45977657 1068 SvIV_set(TOPs, SvIVX(TOPs) + 1);
55497cff 1069 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1070 }
1071 else
6f1401dc 1072 sv_inc_nomg(TOPs);
a0d0e21e 1073 SvSETMAGIC(TOPs);
1e54a23f 1074 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
a0d0e21e
LW
1075 if (!SvOK(TARG))
1076 sv_setiv(TARG, 0);
1077 SETs(TARG);
1078 return NORMAL;
1079}
79072805 1080
a0d0e21e
LW
1081PP(pp_postdec)
1082{
97aff369 1083 dVAR; dSP; dTARGET;
6e592b3a 1084 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1085 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1086 if (SvROK(TOPs))
1087 TARG = sv_newmortal();
a0d0e21e 1088 sv_setsv(TARG, TOPs);
3510b4a1
NC
1089 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1090 && SvIVX(TOPs) != IV_MIN)
55497cff 1091 {
45977657 1092 SvIV_set(TOPs, SvIVX(TOPs) - 1);
55497cff 1093 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1094 }
1095 else
6f1401dc 1096 sv_dec_nomg(TOPs);
a0d0e21e
LW
1097 SvSETMAGIC(TOPs);
1098 SETs(TARG);
1099 return NORMAL;
1100}
79072805 1101
a0d0e21e
LW
1102/* Ordinary operators. */
1103
1104PP(pp_pow)
1105{
800401ee 1106 dVAR; dSP; dATARGET; SV *svl, *svr;
58d76dfd 1107#ifdef PERL_PRESERVE_IVUV
52a96ae6
HS
1108 bool is_int = 0;
1109#endif
6f1401dc
DM
1110 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1111 svr = TOPs;
1112 svl = TOPm1s;
52a96ae6
HS
1113#ifdef PERL_PRESERVE_IVUV
1114 /* For integer to integer power, we do the calculation by hand wherever
1115 we're sure it is safe; otherwise we call pow() and try to convert to
1116 integer afterwards. */
58d76dfd 1117 {
6f1401dc 1118 SvIV_please_nomg(svr);
800401ee 1119 if (SvIOK(svr)) {
6f1401dc 1120 SvIV_please_nomg(svl);
800401ee 1121 if (SvIOK(svl)) {
900658e3
PF
1122 UV power;
1123 bool baseuok;
1124 UV baseuv;
1125
800401ee
JH
1126 if (SvUOK(svr)) {
1127 power = SvUVX(svr);
900658e3 1128 } else {
800401ee 1129 const IV iv = SvIVX(svr);
900658e3
PF
1130 if (iv >= 0) {
1131 power = iv;
1132 } else {
1133 goto float_it; /* Can't do negative powers this way. */
1134 }
1135 }
1136
800401ee 1137 baseuok = SvUOK(svl);
900658e3 1138 if (baseuok) {
800401ee 1139 baseuv = SvUVX(svl);
900658e3 1140 } else {
800401ee 1141 const IV iv = SvIVX(svl);
900658e3
PF
1142 if (iv >= 0) {
1143 baseuv = iv;
1144 baseuok = TRUE; /* effectively it's a UV now */
1145 } else {
1146 baseuv = -iv; /* abs, baseuok == false records sign */
1147 }
1148 }
52a96ae6
HS
1149 /* now we have integer ** positive integer. */
1150 is_int = 1;
1151
1152 /* foo & (foo - 1) is zero only for a power of 2. */
58d76dfd 1153 if (!(baseuv & (baseuv - 1))) {
52a96ae6 1154 /* We are raising power-of-2 to a positive integer.
58d76dfd
JH
1155 The logic here will work for any base (even non-integer
1156 bases) but it can be less accurate than
1157 pow (base,power) or exp (power * log (base)) when the
1158 intermediate values start to spill out of the mantissa.
1159 With powers of 2 we know this can't happen.
1160 And powers of 2 are the favourite thing for perl
1161 programmers to notice ** not doing what they mean. */
1162 NV result = 1.0;
1163 NV base = baseuok ? baseuv : -(NV)baseuv;
900658e3
PF
1164
1165 if (power & 1) {
1166 result *= base;
1167 }
1168 while (power >>= 1) {
1169 base *= base;
1170 if (power & 1) {
1171 result *= base;
1172 }
1173 }
58d76dfd
JH
1174 SP--;
1175 SETn( result );
6f1401dc 1176 SvIV_please_nomg(svr);
58d76dfd 1177 RETURN;
52a96ae6
HS
1178 } else {
1179 register unsigned int highbit = 8 * sizeof(UV);
900658e3
PF
1180 register unsigned int diff = 8 * sizeof(UV);
1181 while (diff >>= 1) {
1182 highbit -= diff;
1183 if (baseuv >> highbit) {
1184 highbit += diff;
1185 }
52a96ae6
HS
1186 }
1187 /* we now have baseuv < 2 ** highbit */
1188 if (power * highbit <= 8 * sizeof(UV)) {
1189 /* result will definitely fit in UV, so use UV math
1190 on same algorithm as above */
1191 register UV result = 1;
1192 register UV base = baseuv;
f2338a2e 1193 const bool odd_power = cBOOL(power & 1);
900658e3
PF
1194 if (odd_power) {
1195 result *= base;
1196 }
1197 while (power >>= 1) {
1198 base *= base;
1199 if (power & 1) {
52a96ae6 1200 result *= base;
52a96ae6
HS
1201 }
1202 }
1203 SP--;
0615a994 1204 if (baseuok || !odd_power)
52a96ae6
HS
1205 /* answer is positive */
1206 SETu( result );
1207 else if (result <= (UV)IV_MAX)
1208 /* answer negative, fits in IV */
1209 SETi( -(IV)result );
1210 else if (result == (UV)IV_MIN)
1211 /* 2's complement assumption: special case IV_MIN */
1212 SETi( IV_MIN );
1213 else
1214 /* answer negative, doesn't fit */
1215 SETn( -(NV)result );
1216 RETURN;
1217 }
1218 }
1219 }
1220 }
58d76dfd 1221 }
52a96ae6 1222 float_it:
58d76dfd 1223#endif
a0d0e21e 1224 {
6f1401dc
DM
1225 NV right = SvNV_nomg(svr);
1226 NV left = SvNV_nomg(svl);
4efa5a16 1227 (void)POPs;
3aaeb624
JA
1228
1229#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1230 /*
1231 We are building perl with long double support and are on an AIX OS
1232 afflicted with a powl() function that wrongly returns NaNQ for any
1233 negative base. This was reported to IBM as PMR #23047-379 on
1234 03/06/2006. The problem exists in at least the following versions
1235 of AIX and the libm fileset, and no doubt others as well:
1236
1237 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1238 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1239 AIX 5.2.0 bos.adt.libm 5.2.0.85
1240
1241 So, until IBM fixes powl(), we provide the following workaround to
1242 handle the problem ourselves. Our logic is as follows: for
1243 negative bases (left), we use fmod(right, 2) to check if the
1244 exponent is an odd or even integer:
1245
1246 - if odd, powl(left, right) == -powl(-left, right)
1247 - if even, powl(left, right) == powl(-left, right)
1248
1249 If the exponent is not an integer, the result is rightly NaNQ, so
1250 we just return that (as NV_NAN).
1251 */
1252
1253 if (left < 0.0) {
1254 NV mod2 = Perl_fmod( right, 2.0 );
1255 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1256 SETn( -Perl_pow( -left, right) );
1257 } else if (mod2 == 0.0) { /* even integer */
1258 SETn( Perl_pow( -left, right) );
1259 } else { /* fractional power */
1260 SETn( NV_NAN );
1261 }
1262 } else {
1263 SETn( Perl_pow( left, right) );
1264 }
1265#else
52a96ae6 1266 SETn( Perl_pow( left, right) );
3aaeb624
JA
1267#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1268
52a96ae6
HS
1269#ifdef PERL_PRESERVE_IVUV
1270 if (is_int)
6f1401dc 1271 SvIV_please_nomg(svr);
52a96ae6
HS
1272#endif
1273 RETURN;
93a17b20 1274 }
a0d0e21e
LW
1275}
1276
1277PP(pp_multiply)
1278{
800401ee 1279 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1280 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1281 svr = TOPs;
1282 svl = TOPm1s;
28e5dec8 1283#ifdef PERL_PRESERVE_IVUV
6f1401dc 1284 SvIV_please_nomg(svr);
800401ee 1285 if (SvIOK(svr)) {
28e5dec8
JH
1286 /* Unless the left argument is integer in range we are going to have to
1287 use NV maths. Hence only attempt to coerce the right argument if
1288 we know the left is integer. */
1289 /* Left operand is defined, so is it IV? */
6f1401dc 1290 SvIV_please_nomg(svl);
800401ee
JH
1291 if (SvIOK(svl)) {
1292 bool auvok = SvUOK(svl);
1293 bool buvok = SvUOK(svr);
28e5dec8
JH
1294 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1295 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1296 UV alow;
1297 UV ahigh;
1298 UV blow;
1299 UV bhigh;
1300
1301 if (auvok) {
800401ee 1302 alow = SvUVX(svl);
28e5dec8 1303 } else {
800401ee 1304 const IV aiv = SvIVX(svl);
28e5dec8
JH
1305 if (aiv >= 0) {
1306 alow = aiv;
1307 auvok = TRUE; /* effectively it's a UV now */
1308 } else {
1309 alow = -aiv; /* abs, auvok == false records sign */
1310 }
1311 }
1312 if (buvok) {
800401ee 1313 blow = SvUVX(svr);
28e5dec8 1314 } else {
800401ee 1315 const IV biv = SvIVX(svr);
28e5dec8
JH
1316 if (biv >= 0) {
1317 blow = biv;
1318 buvok = TRUE; /* effectively it's a UV now */
1319 } else {
1320 blow = -biv; /* abs, buvok == false records sign */
1321 }
1322 }
1323
1324 /* If this does sign extension on unsigned it's time for plan B */
1325 ahigh = alow >> (4 * sizeof (UV));
1326 alow &= botmask;
1327 bhigh = blow >> (4 * sizeof (UV));
1328 blow &= botmask;
1329 if (ahigh && bhigh) {
6f207bd3 1330 NOOP;
28e5dec8
JH
1331 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1332 which is overflow. Drop to NVs below. */
1333 } else if (!ahigh && !bhigh) {
1334 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1335 so the unsigned multiply cannot overflow. */
c445ea15 1336 const UV product = alow * blow;
28e5dec8
JH
1337 if (auvok == buvok) {
1338 /* -ve * -ve or +ve * +ve gives a +ve result. */
1339 SP--;
1340 SETu( product );
1341 RETURN;
1342 } else if (product <= (UV)IV_MIN) {
1343 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1344 /* -ve result, which could overflow an IV */
1345 SP--;
25716404 1346 SETi( -(IV)product );
28e5dec8
JH
1347 RETURN;
1348 } /* else drop to NVs below. */
1349 } else {
1350 /* One operand is large, 1 small */
1351 UV product_middle;
1352 if (bhigh) {
1353 /* swap the operands */
1354 ahigh = bhigh;
1355 bhigh = blow; /* bhigh now the temp var for the swap */
1356 blow = alow;
1357 alow = bhigh;
1358 }
1359 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1360 multiplies can't overflow. shift can, add can, -ve can. */
1361 product_middle = ahigh * blow;
1362 if (!(product_middle & topmask)) {
1363 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1364 UV product_low;
1365 product_middle <<= (4 * sizeof (UV));
1366 product_low = alow * blow;
1367
1368 /* as for pp_add, UV + something mustn't get smaller.
1369 IIRC ANSI mandates this wrapping *behaviour* for
1370 unsigned whatever the actual representation*/
1371 product_low += product_middle;
1372 if (product_low >= product_middle) {
1373 /* didn't overflow */
1374 if (auvok == buvok) {
1375 /* -ve * -ve or +ve * +ve gives a +ve result. */
1376 SP--;
1377 SETu( product_low );
1378 RETURN;
1379 } else if (product_low <= (UV)IV_MIN) {
1380 /* 2s complement assumption again */
1381 /* -ve result, which could overflow an IV */
1382 SP--;
25716404 1383 SETi( -(IV)product_low );
28e5dec8
JH
1384 RETURN;
1385 } /* else drop to NVs below. */
1386 }
1387 } /* product_middle too large */
1388 } /* ahigh && bhigh */
800401ee
JH
1389 } /* SvIOK(svl) */
1390 } /* SvIOK(svr) */
28e5dec8 1391#endif
a0d0e21e 1392 {
6f1401dc
DM
1393 NV right = SvNV_nomg(svr);
1394 NV left = SvNV_nomg(svl);
4efa5a16 1395 (void)POPs;
a0d0e21e
LW
1396 SETn( left * right );
1397 RETURN;
79072805 1398 }
a0d0e21e
LW
1399}
1400
1401PP(pp_divide)
1402{
800401ee 1403 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1404 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1405 svr = TOPs;
1406 svl = TOPm1s;
5479d192 1407 /* Only try to do UV divide first
68795e93 1408 if ((SLOPPYDIVIDE is true) or
5479d192
NC
1409 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1410 to preserve))
1411 The assumption is that it is better to use floating point divide
1412 whenever possible, only doing integer divide first if we can't be sure.
1413 If NV_PRESERVES_UV is true then we know at compile time that no UV
1414 can be too large to preserve, so don't need to compile the code to
1415 test the size of UVs. */
1416
a0d0e21e 1417#ifdef SLOPPYDIVIDE
5479d192
NC
1418# define PERL_TRY_UV_DIVIDE
1419 /* ensure that 20./5. == 4. */
a0d0e21e 1420#else
5479d192
NC
1421# ifdef PERL_PRESERVE_IVUV
1422# ifndef NV_PRESERVES_UV
1423# define PERL_TRY_UV_DIVIDE
1424# endif
1425# endif
a0d0e21e 1426#endif
5479d192
NC
1427
1428#ifdef PERL_TRY_UV_DIVIDE
6f1401dc 1429 SvIV_please_nomg(svr);
800401ee 1430 if (SvIOK(svr)) {
6f1401dc 1431 SvIV_please_nomg(svl);
800401ee
JH
1432 if (SvIOK(svl)) {
1433 bool left_non_neg = SvUOK(svl);
1434 bool right_non_neg = SvUOK(svr);
5479d192
NC
1435 UV left;
1436 UV right;
1437
1438 if (right_non_neg) {
800401ee 1439 right = SvUVX(svr);
5479d192
NC
1440 }
1441 else {
800401ee 1442 const IV biv = SvIVX(svr);
5479d192
NC
1443 if (biv >= 0) {
1444 right = biv;
1445 right_non_neg = TRUE; /* effectively it's a UV now */
1446 }
1447 else {
1448 right = -biv;
1449 }
1450 }
1451 /* historically undef()/0 gives a "Use of uninitialized value"
1452 warning before dieing, hence this test goes here.
1453 If it were immediately before the second SvIV_please, then
1454 DIE() would be invoked before left was even inspected, so
486ec47a 1455 no inspection would give no warning. */
5479d192
NC
1456 if (right == 0)
1457 DIE(aTHX_ "Illegal division by zero");
1458
1459 if (left_non_neg) {
800401ee 1460 left = SvUVX(svl);
5479d192
NC
1461 }
1462 else {
800401ee 1463 const IV aiv = SvIVX(svl);
5479d192
NC
1464 if (aiv >= 0) {
1465 left = aiv;
1466 left_non_neg = TRUE; /* effectively it's a UV now */
1467 }
1468 else {
1469 left = -aiv;
1470 }
1471 }
1472
1473 if (left >= right
1474#ifdef SLOPPYDIVIDE
1475 /* For sloppy divide we always attempt integer division. */
1476#else
1477 /* Otherwise we only attempt it if either or both operands
1478 would not be preserved by an NV. If both fit in NVs
0c2ee62a
NC
1479 we fall through to the NV divide code below. However,
1480 as left >= right to ensure integer result here, we know that
1481 we can skip the test on the right operand - right big
1482 enough not to be preserved can't get here unless left is
1483 also too big. */
1484
1485 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
5479d192
NC
1486#endif
1487 ) {
1488 /* Integer division can't overflow, but it can be imprecise. */
1b6737cc 1489 const UV result = left / right;
5479d192
NC
1490 if (result * right == left) {
1491 SP--; /* result is valid */
1492 if (left_non_neg == right_non_neg) {
1493 /* signs identical, result is positive. */
1494 SETu( result );
1495 RETURN;
1496 }
1497 /* 2s complement assumption */
1498 if (result <= (UV)IV_MIN)
91f3b821 1499 SETi( -(IV)result );
5479d192
NC
1500 else {
1501 /* It's exact but too negative for IV. */
1502 SETn( -(NV)result );
1503 }
1504 RETURN;
1505 } /* tried integer divide but it was not an integer result */
32fdb065 1506 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
5479d192
NC
1507 } /* left wasn't SvIOK */
1508 } /* right wasn't SvIOK */
1509#endif /* PERL_TRY_UV_DIVIDE */
1510 {
6f1401dc
DM
1511 NV right = SvNV_nomg(svr);
1512 NV left = SvNV_nomg(svl);
4efa5a16 1513 (void)POPs;(void)POPs;
ebc6a117
PD
1514#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1515 if (! Perl_isnan(right) && right == 0.0)
1516#else
5479d192 1517 if (right == 0.0)
ebc6a117 1518#endif
5479d192
NC
1519 DIE(aTHX_ "Illegal division by zero");
1520 PUSHn( left / right );
1521 RETURN;
79072805 1522 }
a0d0e21e
LW
1523}
1524
1525PP(pp_modulo)
1526{
6f1401dc
DM
1527 dVAR; dSP; dATARGET;
1528 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
a0d0e21e 1529 {
9c5ffd7c
JH
1530 UV left = 0;
1531 UV right = 0;
dc656993
JH
1532 bool left_neg = FALSE;
1533 bool right_neg = FALSE;
e2c88acc
NC
1534 bool use_double = FALSE;
1535 bool dright_valid = FALSE;
9c5ffd7c
JH
1536 NV dright = 0.0;
1537 NV dleft = 0.0;
6f1401dc
DM
1538 SV * const svr = TOPs;
1539 SV * const svl = TOPm1s;
1540 SvIV_please_nomg(svr);
800401ee
JH
1541 if (SvIOK(svr)) {
1542 right_neg = !SvUOK(svr);
e2c88acc 1543 if (!right_neg) {
800401ee 1544 right = SvUVX(svr);
e2c88acc 1545 } else {
800401ee 1546 const IV biv = SvIVX(svr);
e2c88acc
NC
1547 if (biv >= 0) {
1548 right = biv;
1549 right_neg = FALSE; /* effectively it's a UV now */
1550 } else {
1551 right = -biv;
1552 }
1553 }
1554 }
1555 else {
6f1401dc 1556 dright = SvNV_nomg(svr);
787eafbd
IZ
1557 right_neg = dright < 0;
1558 if (right_neg)
1559 dright = -dright;
e2c88acc
NC
1560 if (dright < UV_MAX_P1) {
1561 right = U_V(dright);
1562 dright_valid = TRUE; /* In case we need to use double below. */
1563 } else {
1564 use_double = TRUE;
1565 }
787eafbd 1566 }
a0d0e21e 1567
e2c88acc
NC
1568 /* At this point use_double is only true if right is out of range for
1569 a UV. In range NV has been rounded down to nearest UV and
1570 use_double false. */
6f1401dc 1571 SvIV_please_nomg(svl);
800401ee
JH
1572 if (!use_double && SvIOK(svl)) {
1573 if (SvIOK(svl)) {
1574 left_neg = !SvUOK(svl);
e2c88acc 1575 if (!left_neg) {
800401ee 1576 left = SvUVX(svl);
e2c88acc 1577 } else {
800401ee 1578 const IV aiv = SvIVX(svl);
e2c88acc
NC
1579 if (aiv >= 0) {
1580 left = aiv;
1581 left_neg = FALSE; /* effectively it's a UV now */
1582 } else {
1583 left = -aiv;
1584 }
1585 }
1586 }
1587 }
787eafbd 1588 else {
6f1401dc 1589 dleft = SvNV_nomg(svl);
787eafbd
IZ
1590 left_neg = dleft < 0;
1591 if (left_neg)
1592 dleft = -dleft;
68dc0745 1593
e2c88acc
NC
1594 /* This should be exactly the 5.6 behaviour - if left and right are
1595 both in range for UV then use U_V() rather than floor. */
1596 if (!use_double) {
1597 if (dleft < UV_MAX_P1) {
1598 /* right was in range, so is dleft, so use UVs not double.
1599 */
1600 left = U_V(dleft);
1601 }
1602 /* left is out of range for UV, right was in range, so promote
1603 right (back) to double. */
1604 else {
1605 /* The +0.5 is used in 5.6 even though it is not strictly
1606 consistent with the implicit +0 floor in the U_V()
1607 inside the #if 1. */
1608 dleft = Perl_floor(dleft + 0.5);
1609 use_double = TRUE;
1610 if (dright_valid)
1611 dright = Perl_floor(dright + 0.5);
1612 else
1613 dright = right;
1614 }
1615 }
1616 }
6f1401dc 1617 sp -= 2;
787eafbd 1618 if (use_double) {
65202027 1619 NV dans;
787eafbd 1620
787eafbd 1621 if (!dright)
cea2e8a9 1622 DIE(aTHX_ "Illegal modulus zero");
787eafbd 1623
65202027 1624 dans = Perl_fmod(dleft, dright);
787eafbd
IZ
1625 if ((left_neg != right_neg) && dans)
1626 dans = dright - dans;
1627 if (right_neg)
1628 dans = -dans;
1629 sv_setnv(TARG, dans);
1630 }
1631 else {
1632 UV ans;
1633
787eafbd 1634 if (!right)
cea2e8a9 1635 DIE(aTHX_ "Illegal modulus zero");
787eafbd
IZ
1636
1637 ans = left % right;
1638 if ((left_neg != right_neg) && ans)
1639 ans = right - ans;
1640 if (right_neg) {
1641 /* XXX may warn: unary minus operator applied to unsigned type */
1642 /* could change -foo to be (~foo)+1 instead */
1643 if (ans <= ~((UV)IV_MAX)+1)
1644 sv_setiv(TARG, ~ans+1);
1645 else
65202027 1646 sv_setnv(TARG, -(NV)ans);
787eafbd
IZ
1647 }
1648 else
1649 sv_setuv(TARG, ans);
1650 }
1651 PUSHTARG;
1652 RETURN;
79072805 1653 }
a0d0e21e 1654}
79072805 1655
a0d0e21e
LW
1656PP(pp_repeat)
1657{
6f1401dc 1658 dVAR; dSP; dATARGET;
2b573ace 1659 register IV count;
6f1401dc
DM
1660 SV *sv;
1661
1662 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1663 /* TODO: think of some way of doing list-repeat overloading ??? */
1664 sv = POPs;
1665 SvGETMAGIC(sv);
1666 }
1667 else {
1668 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1669 sv = POPs;
1670 }
1671
2b573ace
JH
1672 if (SvIOKp(sv)) {
1673 if (SvUOK(sv)) {
6f1401dc 1674 const UV uv = SvUV_nomg(sv);
2b573ace
JH
1675 if (uv > IV_MAX)
1676 count = IV_MAX; /* The best we can do? */
1677 else
1678 count = uv;
1679 } else {
6f1401dc 1680 const IV iv = SvIV_nomg(sv);
2b573ace
JH
1681 if (iv < 0)
1682 count = 0;
1683 else
1684 count = iv;
1685 }
1686 }
1687 else if (SvNOKp(sv)) {
6f1401dc 1688 const NV nv = SvNV_nomg(sv);
2b573ace
JH
1689 if (nv < 0.0)
1690 count = 0;
1691 else
1692 count = (IV)nv;
1693 }
1694 else
6f1401dc
DM
1695 count = SvIV_nomg(sv);
1696
533c011a 1697 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
a0d0e21e 1698 dMARK;
0bd48802
AL
1699 static const char oom_list_extend[] = "Out of memory during list extend";
1700 const I32 items = SP - MARK;
1701 const I32 max = items * count;
79072805 1702
2b573ace
JH
1703 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1704 /* Did the max computation overflow? */
27d5b266 1705 if (items > 0 && max > 0 && (max < items || max < count))
2b573ace 1706 Perl_croak(aTHX_ oom_list_extend);
a0d0e21e
LW
1707 MEXTEND(MARK, max);
1708 if (count > 1) {
1709 while (SP > MARK) {
976c8a39
JH
1710#if 0
1711 /* This code was intended to fix 20010809.028:
1712
1713 $x = 'abcd';
1714 for (($x =~ /./g) x 2) {
1715 print chop; # "abcdabcd" expected as output.
1716 }
1717
1718 * but that change (#11635) broke this code:
1719
1720 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1721
1722 * I can't think of a better fix that doesn't introduce
1723 * an efficiency hit by copying the SVs. The stack isn't
1724 * refcounted, and mortalisation obviously doesn't
1725 * Do The Right Thing when the stack has more than
1726 * one pointer to the same mortal value.
1727 * .robin.
1728 */
e30acc16
RH
1729 if (*SP) {
1730 *SP = sv_2mortal(newSVsv(*SP));
1731 SvREADONLY_on(*SP);
1732 }
976c8a39
JH
1733#else
1734 if (*SP)
1735 SvTEMP_off((*SP));
1736#endif
a0d0e21e 1737 SP--;
79072805 1738 }
a0d0e21e
LW
1739 MARK++;
1740 repeatcpy((char*)(MARK + items), (char*)MARK,
ad64d0ec 1741 items * sizeof(const SV *), count - 1);
a0d0e21e 1742 SP += max;
79072805 1743 }
a0d0e21e
LW
1744 else if (count <= 0)
1745 SP -= items;
79072805 1746 }
a0d0e21e 1747 else { /* Note: mark already snarfed by pp_list */
0bd48802 1748 SV * const tmpstr = POPs;
a0d0e21e 1749 STRLEN len;
9b877dbb 1750 bool isutf;
2b573ace
JH
1751 static const char oom_string_extend[] =
1752 "Out of memory during string extend";
a0d0e21e 1753
6f1401dc
DM
1754 if (TARG != tmpstr)
1755 sv_setsv_nomg(TARG, tmpstr);
1756 SvPV_force_nomg(TARG, len);
9b877dbb 1757 isutf = DO_UTF8(TARG);
8ebc5c01 1758 if (count != 1) {
1759 if (count < 1)
1760 SvCUR_set(TARG, 0);
1761 else {
c445ea15 1762 const STRLEN max = (UV)count * len;
19a94d75 1763 if (len > MEM_SIZE_MAX / count)
2b573ace
JH
1764 Perl_croak(aTHX_ oom_string_extend);
1765 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
8569b9dc 1766 SvGROW(TARG, max + 1);
a0d0e21e 1767 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
b162af07 1768 SvCUR_set(TARG, SvCUR(TARG) * count);
7a4c00b4 1769 }
a0d0e21e 1770 *SvEND(TARG) = '\0';
a0d0e21e 1771 }
dfcb284a
GS
1772 if (isutf)
1773 (void)SvPOK_only_UTF8(TARG);
1774 else
1775 (void)SvPOK_only(TARG);
b80b6069
RH
1776
1777 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1778 /* The parser saw this as a list repeat, and there
1779 are probably several items on the stack. But we're
1780 in scalar context, and there's no pp_list to save us
1781 now. So drop the rest of the items -- robin@kitsite.com
1782 */
1783 dMARK;
1784 SP = MARK;
1785 }
a0d0e21e 1786 PUSHTARG;
79072805 1787 }
a0d0e21e
LW
1788 RETURN;
1789}
79072805 1790
a0d0e21e
LW
1791PP(pp_subtract)
1792{
800401ee 1793 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
6f1401dc
DM
1794 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1795 svr = TOPs;
1796 svl = TOPm1s;
800401ee 1797 useleft = USE_LEFT(svl);
28e5dec8 1798#ifdef PERL_PRESERVE_IVUV
7dca457a
NC
1799 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1800 "bad things" happen if you rely on signed integers wrapping. */
6f1401dc 1801 SvIV_please_nomg(svr);
800401ee 1802 if (SvIOK(svr)) {
28e5dec8
JH
1803 /* Unless the left argument is integer in range we are going to have to
1804 use NV maths. Hence only attempt to coerce the right argument if
1805 we know the left is integer. */
9c5ffd7c
JH
1806 register UV auv = 0;
1807 bool auvok = FALSE;
7dca457a
NC
1808 bool a_valid = 0;
1809
28e5dec8 1810 if (!useleft) {
7dca457a
NC
1811 auv = 0;
1812 a_valid = auvok = 1;
1813 /* left operand is undef, treat as zero. */
28e5dec8
JH
1814 } else {
1815 /* Left operand is defined, so is it IV? */
6f1401dc 1816 SvIV_please_nomg(svl);
800401ee
JH
1817 if (SvIOK(svl)) {
1818 if ((auvok = SvUOK(svl)))
1819 auv = SvUVX(svl);
7dca457a 1820 else {
800401ee 1821 register const IV aiv = SvIVX(svl);
7dca457a
NC
1822 if (aiv >= 0) {
1823 auv = aiv;
1824 auvok = 1; /* Now acting as a sign flag. */
1825 } else { /* 2s complement assumption for IV_MIN */
1826 auv = (UV)-aiv;
28e5dec8 1827 }
7dca457a
NC
1828 }
1829 a_valid = 1;
1830 }
1831 }
1832 if (a_valid) {
1833 bool result_good = 0;
1834 UV result;
1835 register UV buv;
800401ee 1836 bool buvok = SvUOK(svr);
9041c2e3 1837
7dca457a 1838 if (buvok)
800401ee 1839 buv = SvUVX(svr);
7dca457a 1840 else {
800401ee 1841 register const IV biv = SvIVX(svr);
7dca457a
NC
1842 if (biv >= 0) {
1843 buv = biv;
1844 buvok = 1;
1845 } else
1846 buv = (UV)-biv;
1847 }
1848 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
602f51c4 1849 else "IV" now, independent of how it came in.
7dca457a
NC
1850 if a, b represents positive, A, B negative, a maps to -A etc
1851 a - b => (a - b)
1852 A - b => -(a + b)
1853 a - B => (a + b)
1854 A - B => -(a - b)
1855 all UV maths. negate result if A negative.
1856 subtract if signs same, add if signs differ. */
1857
1858 if (auvok ^ buvok) {
1859 /* Signs differ. */
1860 result = auv + buv;
1861 if (result >= auv)
1862 result_good = 1;
1863 } else {
1864 /* Signs same */
1865 if (auv >= buv) {
1866 result = auv - buv;
1867 /* Must get smaller */
1868 if (result <= auv)
1869 result_good = 1;
1870 } else {
1871 result = buv - auv;
1872 if (result <= buv) {
1873 /* result really should be -(auv-buv). as its negation
1874 of true value, need to swap our result flag */
1875 auvok = !auvok;
1876 result_good = 1;
28e5dec8 1877 }
28e5dec8
JH
1878 }
1879 }
7dca457a
NC
1880 if (result_good) {
1881 SP--;
1882 if (auvok)
1883 SETu( result );
1884 else {
1885 /* Negate result */
1886 if (result <= (UV)IV_MIN)
1887 SETi( -(IV)result );
1888 else {
1889 /* result valid, but out of range for IV. */
1890 SETn( -(NV)result );
1891 }
1892 }
1893 RETURN;
1894 } /* Overflow, drop through to NVs. */
28e5dec8
JH
1895 }
1896 }
1897#endif
a0d0e21e 1898 {
6f1401dc 1899 NV value = SvNV_nomg(svr);
4efa5a16
RD
1900 (void)POPs;
1901
28e5dec8
JH
1902 if (!useleft) {
1903 /* left operand is undef, treat as zero - value */
1904 SETn(-value);
1905 RETURN;
1906 }
6f1401dc 1907 SETn( SvNV_nomg(svl) - value );
28e5dec8 1908 RETURN;
79072805 1909 }
a0d0e21e 1910}
79072805 1911
a0d0e21e
LW
1912PP(pp_left_shift)
1913{
6f1401dc 1914 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1915 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1916 svr = POPs;
1917 svl = TOPs;
a0d0e21e 1918 {
6f1401dc 1919 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1920 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1921 const IV i = SvIV_nomg(svl);
972b05a9 1922 SETi(i << shift);
d0ba1bd2
JH
1923 }
1924 else {
6f1401dc 1925 const UV u = SvUV_nomg(svl);
972b05a9 1926 SETu(u << shift);
d0ba1bd2 1927 }
55497cff 1928 RETURN;
79072805 1929 }
a0d0e21e 1930}
79072805 1931
a0d0e21e
LW
1932PP(pp_right_shift)
1933{
6f1401dc 1934 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1935 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1936 svr = POPs;
1937 svl = TOPs;
a0d0e21e 1938 {
6f1401dc 1939 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1940 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1941 const IV i = SvIV_nomg(svl);
972b05a9 1942 SETi(i >> shift);
d0ba1bd2
JH
1943 }
1944 else {
6f1401dc 1945 const UV u = SvUV_nomg(svl);
972b05a9 1946 SETu(u >> shift);
d0ba1bd2 1947 }
a0d0e21e 1948 RETURN;
93a17b20 1949 }
79072805
LW
1950}
1951
a0d0e21e 1952PP(pp_lt)
79072805 1953{
6f1401dc 1954 dVAR; dSP;
33efebe6
DM
1955 SV *left, *right;
1956
a42d0242 1957 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
33efebe6
DM
1958 right = POPs;
1959 left = TOPs;
1960 SETs(boolSV(
1961 (SvIOK_notUV(left) && SvIOK_notUV(right))
1962 ? (SvIVX(left) < SvIVX(right))
1963 : (do_ncmp(left, right) == -1)
1964 ));
1965 RETURN;
a0d0e21e 1966}
79072805 1967
a0d0e21e
LW
1968PP(pp_gt)
1969{
6f1401dc 1970 dVAR; dSP;
33efebe6 1971 SV *left, *right;
1b6737cc 1972
33efebe6
DM
1973 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1974 right = POPs;
1975 left = TOPs;
1976 SETs(boolSV(
1977 (SvIOK_notUV(left) && SvIOK_notUV(right))
1978 ? (SvIVX(left) > SvIVX(right))
1979 : (do_ncmp(left, right) == 1)
1980 ));
1981 RETURN;
a0d0e21e
LW
1982}
1983
1984PP(pp_le)
1985{
6f1401dc 1986 dVAR; dSP;
33efebe6 1987 SV *left, *right;
1b6737cc 1988
33efebe6
DM
1989 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1990 right = POPs;
1991 left = TOPs;
1992 SETs(boolSV(
1993 (SvIOK_notUV(left) && SvIOK_notUV(right))
1994 ? (SvIVX(left) <= SvIVX(right))
1995 : (do_ncmp(left, right) <= 0)
1996 ));
1997 RETURN;
a0d0e21e
LW
1998}
1999
2000PP(pp_ge)
2001{
6f1401dc 2002 dVAR; dSP;
33efebe6
DM
2003 SV *left, *right;
2004
2005 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2006 right = POPs;
2007 left = TOPs;
2008 SETs(boolSV(
2009 (SvIOK_notUV(left) && SvIOK_notUV(right))
2010 ? (SvIVX(left) >= SvIVX(right))
2011 : ( (do_ncmp(left, right) & 2) == 0)
2012 ));
2013 RETURN;
2014}
1b6737cc 2015
33efebe6
DM
2016PP(pp_ne)
2017{
2018 dVAR; dSP;
2019 SV *left, *right;
2020
2021 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2022 right = POPs;
2023 left = TOPs;
2024 SETs(boolSV(
2025 (SvIOK_notUV(left) && SvIOK_notUV(right))
2026 ? (SvIVX(left) != SvIVX(right))
2027 : (do_ncmp(left, right) != 0)
2028 ));
2029 RETURN;
2030}
1b6737cc 2031
33efebe6
DM
2032/* compare left and right SVs. Returns:
2033 * -1: <
2034 * 0: ==
2035 * 1: >
2036 * 2: left or right was a NaN
2037 */
2038I32
2039Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2040{
2041 dVAR;
1b6737cc 2042
33efebe6
DM
2043 PERL_ARGS_ASSERT_DO_NCMP;
2044#ifdef PERL_PRESERVE_IVUV
2045 SvIV_please_nomg(right);
2046 /* Fortunately it seems NaN isn't IOK */
2047 if (SvIOK(right)) {
2048 SvIV_please_nomg(left);
2049 if (SvIOK(left)) {
2050 if (!SvUOK(left)) {
2051 const IV leftiv = SvIVX(left);
2052 if (!SvUOK(right)) {
2053 /* ## IV <=> IV ## */
2054 const IV rightiv = SvIVX(right);
2055 return (leftiv > rightiv) - (leftiv < rightiv);
28e5dec8 2056 }
33efebe6
DM
2057 /* ## IV <=> UV ## */
2058 if (leftiv < 0)
2059 /* As (b) is a UV, it's >=0, so it must be < */
2060 return -1;
2061 {
2062 const UV rightuv = SvUVX(right);
2063 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
28e5dec8 2064 }
28e5dec8 2065 }
79072805 2066
33efebe6
DM
2067 if (SvUOK(right)) {
2068 /* ## UV <=> UV ## */
2069 const UV leftuv = SvUVX(left);
2070 const UV rightuv = SvUVX(right);
2071 return (leftuv > rightuv) - (leftuv < rightuv);
28e5dec8 2072 }
33efebe6
DM
2073 /* ## UV <=> IV ## */
2074 {
2075 const IV rightiv = SvIVX(right);
2076 if (rightiv < 0)
2077 /* As (a) is a UV, it's >=0, so it cannot be < */
2078 return 1;
2079 {
2080 const UV leftuv = SvUVX(left);
2081 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
28e5dec8 2082 }
28e5dec8 2083 }
33efebe6 2084 /* NOTREACHED */
28e5dec8
JH
2085 }
2086 }
2087#endif
a0d0e21e 2088 {
33efebe6
DM
2089 NV const rnv = SvNV_nomg(right);
2090 NV const lnv = SvNV_nomg(left);
2091
cab190d4 2092#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
33efebe6
DM
2093 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2094 return 2;
2095 }
2096 return (lnv > rnv) - (lnv < rnv);
cab190d4 2097#else
33efebe6
DM
2098 if (lnv < rnv)
2099 return -1;
2100 if (lnv > rnv)
2101 return 1;
2102 if (lnv == rnv)
2103 return 0;
2104 return 2;
cab190d4 2105#endif
a0d0e21e 2106 }
79072805
LW
2107}
2108
33efebe6 2109
a0d0e21e 2110PP(pp_ncmp)
79072805 2111{
33efebe6
DM
2112 dVAR; dSP;
2113 SV *left, *right;
2114 I32 value;
a42d0242 2115 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
33efebe6
DM
2116 right = POPs;
2117 left = TOPs;
2118 value = do_ncmp(left, right);
2119 if (value == 2) {
3280af22 2120 SETs(&PL_sv_undef);
79072805 2121 }
33efebe6
DM
2122 else {
2123 dTARGET;
2124 SETi(value);
2125 }
2126 RETURN;
a0d0e21e 2127}
79072805 2128
afd9910b 2129PP(pp_sle)
a0d0e21e 2130{
97aff369 2131 dVAR; dSP;
79072805 2132
afd9910b
NC
2133 int amg_type = sle_amg;
2134 int multiplier = 1;
2135 int rhs = 1;
79072805 2136
afd9910b
NC
2137 switch (PL_op->op_type) {
2138 case OP_SLT:
2139 amg_type = slt_amg;
2140 /* cmp < 0 */
2141 rhs = 0;
2142 break;
2143 case OP_SGT:
2144 amg_type = sgt_amg;
2145 /* cmp > 0 */
2146 multiplier = -1;
2147 rhs = 0;
2148 break;
2149 case OP_SGE:
2150 amg_type = sge_amg;
2151 /* cmp >= 0 */
2152 multiplier = -1;
2153 break;
79072805 2154 }
79072805 2155
6f1401dc 2156 tryAMAGICbin_MG(amg_type, AMGf_set);
a0d0e21e
LW
2157 {
2158 dPOPTOPssrl;
1b6737cc 2159 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2160 ? sv_cmp_locale_flags(left, right, 0)
2161 : sv_cmp_flags(left, right, 0));
afd9910b 2162 SETs(boolSV(cmp * multiplier < rhs));
a0d0e21e
LW
2163 RETURN;
2164 }
2165}
79072805 2166
36477c24 2167PP(pp_seq)
2168{
6f1401dc
DM
2169 dVAR; dSP;
2170 tryAMAGICbin_MG(seq_amg, AMGf_set);
36477c24 2171 {
2172 dPOPTOPssrl;
078504b2 2173 SETs(boolSV(sv_eq_flags(left, right, 0)));
a0d0e21e
LW
2174 RETURN;
2175 }
2176}
79072805 2177
a0d0e21e 2178PP(pp_sne)
79072805 2179{
6f1401dc
DM
2180 dVAR; dSP;
2181 tryAMAGICbin_MG(sne_amg, AMGf_set);
a0d0e21e
LW
2182 {
2183 dPOPTOPssrl;
078504b2 2184 SETs(boolSV(!sv_eq_flags(left, right, 0)));
a0d0e21e 2185 RETURN;
463ee0b2 2186 }
79072805
LW
2187}
2188
a0d0e21e 2189PP(pp_scmp)
79072805 2190{
6f1401dc
DM
2191 dVAR; dSP; dTARGET;
2192 tryAMAGICbin_MG(scmp_amg, 0);
a0d0e21e
LW
2193 {
2194 dPOPTOPssrl;
1b6737cc 2195 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2196 ? sv_cmp_locale_flags(left, right, 0)
2197 : sv_cmp_flags(left, right, 0));
bbce6d69 2198 SETi( cmp );
a0d0e21e
LW
2199 RETURN;
2200 }
2201}
79072805 2202
55497cff 2203PP(pp_bit_and)
2204{
6f1401dc
DM
2205 dVAR; dSP; dATARGET;
2206 tryAMAGICbin_MG(band_amg, AMGf_assign);
a0d0e21e
LW
2207 {
2208 dPOPTOPssrl;
4633a7c4 2209 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2210 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2211 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2212 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2213 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
972b05a9 2214 SETi(i);
d0ba1bd2
JH
2215 }
2216 else {
1b6737cc 2217 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
972b05a9 2218 SETu(u);
d0ba1bd2 2219 }
b20c4ee1
FC
2220 if (left_ro_nonnum) SvNIOK_off(left);
2221 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2222 }
2223 else {
533c011a 2224 do_vop(PL_op->op_type, TARG, left, right);
a0d0e21e
LW
2225 SETTARG;
2226 }
2227 RETURN;
2228 }
2229}
79072805 2230
a0d0e21e
LW
2231PP(pp_bit_or)
2232{
3658c1f1
NC
2233 dVAR; dSP; dATARGET;
2234 const int op_type = PL_op->op_type;
2235
6f1401dc 2236 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
a0d0e21e
LW
2237 {
2238 dPOPTOPssrl;
4633a7c4 2239 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2240 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2241 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2242 if (PL_op->op_private & HINT_INTEGER) {
3658c1f1
NC
2243 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2244 const IV r = SvIV_nomg(right);
2245 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2246 SETi(result);
d0ba1bd2
JH
2247 }
2248 else {
3658c1f1
NC
2249 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2250 const UV r = SvUV_nomg(right);
2251 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2252 SETu(result);
d0ba1bd2 2253 }
b20c4ee1
FC
2254 if (left_ro_nonnum) SvNIOK_off(left);
2255 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2256 }
2257 else {
3658c1f1 2258 do_vop(op_type, TARG, left, right);
a0d0e21e
LW
2259 SETTARG;
2260 }
2261 RETURN;
79072805 2262 }
a0d0e21e 2263}
79072805 2264
a0d0e21e
LW
2265PP(pp_negate)
2266{
6f1401dc
DM
2267 dVAR; dSP; dTARGET;
2268 tryAMAGICun_MG(neg_amg, AMGf_numeric);
a0d0e21e 2269 {
6f1401dc 2270 SV * const sv = TOPs;
1b6737cc 2271 const int flags = SvFLAGS(sv);
a5b92898 2272
886a4465 2273 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
a5b92898
RB
2274 SvIV_please( sv );
2275 }
2276
28e5dec8
JH
2277 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2278 /* It's publicly an integer, or privately an integer-not-float */
2279 oops_its_an_int:
9b0e499b
GS
2280 if (SvIsUV(sv)) {
2281 if (SvIVX(sv) == IV_MIN) {
28e5dec8 2282 /* 2s complement assumption. */
9b0e499b
GS
2283 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2284 RETURN;
2285 }
2286 else if (SvUVX(sv) <= IV_MAX) {
beccb14c 2287 SETi(-SvIVX(sv));
9b0e499b
GS
2288 RETURN;
2289 }
2290 }
2291 else if (SvIVX(sv) != IV_MIN) {
2292 SETi(-SvIVX(sv));
2293 RETURN;
2294 }
28e5dec8
JH
2295#ifdef PERL_PRESERVE_IVUV
2296 else {
2297 SETu((UV)IV_MIN);
2298 RETURN;
2299 }
2300#endif
9b0e499b
GS
2301 }
2302 if (SvNIOKp(sv))
6f1401dc 2303 SETn(-SvNV_nomg(sv));
4633a7c4 2304 else if (SvPOKp(sv)) {
a0d0e21e 2305 STRLEN len;
6f1401dc 2306 const char * const s = SvPV_nomg_const(sv, len);
bbce6d69 2307 if (isIDFIRST(*s)) {
76f68e9b 2308 sv_setpvs(TARG, "-");
a0d0e21e 2309 sv_catsv(TARG, sv);
79072805 2310 }
a0d0e21e 2311 else if (*s == '+' || *s == '-') {
6f1401dc
DM
2312 sv_setsv_nomg(TARG, sv);
2313 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
79072805 2314 }
8eb28a70 2315 else if (DO_UTF8(sv)) {
6f1401dc 2316 SvIV_please_nomg(sv);
8eb28a70
JH
2317 if (SvIOK(sv))
2318 goto oops_its_an_int;
2319 if (SvNOK(sv))
6f1401dc 2320 sv_setnv(TARG, -SvNV_nomg(sv));
8eb28a70 2321 else {
76f68e9b 2322 sv_setpvs(TARG, "-");
8eb28a70
JH
2323 sv_catsv(TARG, sv);
2324 }
834a4ddd 2325 }
28e5dec8 2326 else {
6f1401dc 2327 SvIV_please_nomg(sv);
8eb28a70
JH
2328 if (SvIOK(sv))
2329 goto oops_its_an_int;
6f1401dc 2330 sv_setnv(TARG, -SvNV_nomg(sv));
28e5dec8 2331 }
a0d0e21e 2332 SETTARG;
79072805 2333 }
4633a7c4 2334 else
6f1401dc 2335 SETn(-SvNV_nomg(sv));
79072805 2336 }
a0d0e21e 2337 RETURN;
79072805
LW
2338}
2339
a0d0e21e 2340PP(pp_not)
79072805 2341{
6f1401dc
DM
2342 dVAR; dSP;
2343 tryAMAGICun_MG(not_amg, AMGf_set);
06c841cf 2344 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
a0d0e21e 2345 return NORMAL;
79072805
LW
2346}
2347
a0d0e21e 2348PP(pp_complement)
79072805 2349{
6f1401dc 2350 dVAR; dSP; dTARGET;
a42d0242 2351 tryAMAGICun_MG(compl_amg, AMGf_numeric);
a0d0e21e
LW
2352 {
2353 dTOPss;
4633a7c4 2354 if (SvNIOKp(sv)) {
d0ba1bd2 2355 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2356 const IV i = ~SvIV_nomg(sv);
972b05a9 2357 SETi(i);
d0ba1bd2
JH
2358 }
2359 else {
1b6737cc 2360 const UV u = ~SvUV_nomg(sv);
972b05a9 2361 SETu(u);
d0ba1bd2 2362 }
a0d0e21e
LW
2363 }
2364 else {
51723571 2365 register U8 *tmps;
55497cff 2366 register I32 anum;
a0d0e21e
LW
2367 STRLEN len;
2368
10516c54 2369 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
891f9566 2370 sv_setsv_nomg(TARG, sv);
6f1401dc 2371 tmps = (U8*)SvPV_force_nomg(TARG, len);
a0d0e21e 2372 anum = len;
1d68d6cd 2373 if (SvUTF8(TARG)) {
a1ca4561 2374 /* Calculate exact length, let's not estimate. */
1d68d6cd 2375 STRLEN targlen = 0;
ba210ebe 2376 STRLEN l;
a1ca4561
YST
2377 UV nchar = 0;
2378 UV nwide = 0;
01f6e806 2379 U8 * const send = tmps + len;
74d49cd0
TS
2380 U8 * const origtmps = tmps;
2381 const UV utf8flags = UTF8_ALLOW_ANYUV;
1d68d6cd 2382
1d68d6cd 2383 while (tmps < send) {
74d49cd0
TS
2384 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2385 tmps += l;
5bbb0b5a 2386 targlen += UNISKIP(~c);
a1ca4561
YST
2387 nchar++;
2388 if (c > 0xff)
2389 nwide++;
1d68d6cd
SC
2390 }
2391
2392 /* Now rewind strings and write them. */
74d49cd0 2393 tmps = origtmps;
a1ca4561
YST
2394
2395 if (nwide) {
01f6e806
AL
2396 U8 *result;
2397 U8 *p;
2398
74d49cd0 2399 Newx(result, targlen + 1, U8);
01f6e806 2400 p = result;
a1ca4561 2401 while (tmps < send) {
74d49cd0
TS
2402 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2403 tmps += l;
01f6e806 2404 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
a1ca4561 2405 }
01f6e806 2406 *p = '\0';
c1c21316
NC
2407 sv_usepvn_flags(TARG, (char*)result, targlen,
2408 SV_HAS_TRAILING_NUL);
a1ca4561
YST
2409 SvUTF8_on(TARG);
2410 }
2411 else {
01f6e806
AL
2412 U8 *result;
2413 U8 *p;
2414
74d49cd0 2415 Newx(result, nchar + 1, U8);
01f6e806 2416 p = result;
a1ca4561 2417 while (tmps < send) {
74d49cd0
TS
2418 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2419 tmps += l;
01f6e806 2420 *p++ = ~c;
a1ca4561 2421 }
01f6e806 2422 *p = '\0';
c1c21316 2423 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
d0a21e00 2424 SvUTF8_off(TARG);
1d68d6cd 2425 }
ec93b65f 2426 SETTARG;
1d68d6cd
SC
2427 RETURN;
2428 }
a0d0e21e 2429#ifdef LIBERAL
51723571
JH
2430 {
2431 register long *tmpl;
2432 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2433 *tmps = ~*tmps;
2434 tmpl = (long*)tmps;
bb7a0f54 2435 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
51723571
JH
2436 *tmpl = ~*tmpl;
2437 tmps = (U8*)tmpl;
2438 }
a0d0e21e
LW
2439#endif
2440 for ( ; anum > 0; anum--, tmps++)
2441 *tmps = ~*tmps;
ec93b65f 2442 SETTARG;
a0d0e21e
LW
2443 }
2444 RETURN;
2445 }
79072805
LW
2446}
2447
a0d0e21e
LW
2448/* integer versions of some of the above */
2449
a0d0e21e 2450PP(pp_i_multiply)
79072805 2451{
6f1401dc
DM
2452 dVAR; dSP; dATARGET;
2453 tryAMAGICbin_MG(mult_amg, AMGf_assign);
a0d0e21e 2454 {
6f1401dc 2455 dPOPTOPiirl_nomg;
a0d0e21e
LW
2456 SETi( left * right );
2457 RETURN;
2458 }
79072805
LW
2459}
2460
a0d0e21e 2461PP(pp_i_divide)
79072805 2462{
85935d8e 2463 IV num;
6f1401dc
DM
2464 dVAR; dSP; dATARGET;
2465 tryAMAGICbin_MG(div_amg, AMGf_assign);
a0d0e21e 2466 {
6f1401dc 2467 dPOPTOPssrl;
85935d8e 2468 IV value = SvIV_nomg(right);
a0d0e21e 2469 if (value == 0)
ece1bcef 2470 DIE(aTHX_ "Illegal division by zero");
85935d8e 2471 num = SvIV_nomg(left);
a0cec769
YST
2472
2473 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2474 if (value == -1)
2475 value = - num;
2476 else
2477 value = num / value;
6f1401dc 2478 SETi(value);
a0d0e21e
LW
2479 RETURN;
2480 }
79072805
LW
2481}
2482
befad5d1 2483#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2484STATIC
2485PP(pp_i_modulo_0)
befad5d1
NC
2486#else
2487PP(pp_i_modulo)
2488#endif
224ec323
JH
2489{
2490 /* This is the vanilla old i_modulo. */
6f1401dc
DM
2491 dVAR; dSP; dATARGET;
2492 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2493 {
6f1401dc 2494 dPOPTOPiirl_nomg;
224ec323
JH
2495 if (!right)
2496 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2497 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2498 if (right == -1)
2499 SETi( 0 );
2500 else
2501 SETi( left % right );
224ec323
JH
2502 RETURN;
2503 }
2504}
2505
11010fa3 2506#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2507STATIC
2508PP(pp_i_modulo_1)
befad5d1 2509
224ec323 2510{
224ec323 2511 /* This is the i_modulo with the workaround for the _moddi3 bug
fce2b89e 2512 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
224ec323 2513 * See below for pp_i_modulo. */
6f1401dc
DM
2514 dVAR; dSP; dATARGET;
2515 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2516 {
6f1401dc 2517 dPOPTOPiirl_nomg;
224ec323
JH
2518 if (!right)
2519 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2520 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2521 if (right == -1)
2522 SETi( 0 );
2523 else
2524 SETi( left % PERL_ABS(right) );
224ec323
JH
2525 RETURN;
2526 }
224ec323
JH
2527}
2528
a0d0e21e 2529PP(pp_i_modulo)
79072805 2530{
6f1401dc
DM
2531 dVAR; dSP; dATARGET;
2532 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2533 {
6f1401dc 2534 dPOPTOPiirl_nomg;
224ec323
JH
2535 if (!right)
2536 DIE(aTHX_ "Illegal modulus zero");
2537 /* The assumption is to use hereafter the old vanilla version... */
2538 PL_op->op_ppaddr =
2539 PL_ppaddr[OP_I_MODULO] =
1c127fab 2540 Perl_pp_i_modulo_0;
224ec323
JH
2541 /* .. but if we have glibc, we might have a buggy _moddi3
2542 * (at least glicb 2.2.5 is known to have this bug), in other
2543 * words our integer modulus with negative quad as the second
2544 * argument might be broken. Test for this and re-patch the
2545 * opcode dispatch table if that is the case, remembering to
2546 * also apply the workaround so that this first round works
2547 * right, too. See [perl #9402] for more information. */
224ec323
JH
2548 {
2549 IV l = 3;
2550 IV r = -10;
2551 /* Cannot do this check with inlined IV constants since
2552 * that seems to work correctly even with the buggy glibc. */
2553 if (l % r == -3) {
2554 /* Yikes, we have the bug.
2555 * Patch in the workaround version. */
2556 PL_op->op_ppaddr =
2557 PL_ppaddr[OP_I_MODULO] =
2558 &Perl_pp_i_modulo_1;
2559 /* Make certain we work right this time, too. */
32fdb065 2560 right = PERL_ABS(right);
224ec323
JH
2561 }
2562 }
a0cec769
YST
2563 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2564 if (right == -1)
2565 SETi( 0 );
2566 else
2567 SETi( left % right );
224ec323
JH
2568 RETURN;
2569 }
79072805 2570}
befad5d1 2571#endif
79072805 2572
a0d0e21e 2573PP(pp_i_add)
79072805 2574{
6f1401dc
DM
2575 dVAR; dSP; dATARGET;
2576 tryAMAGICbin_MG(add_amg, AMGf_assign);
a0d0e21e 2577 {
6f1401dc 2578 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2579 SETi( left + right );
2580 RETURN;
79072805 2581 }
79072805
LW
2582}
2583
a0d0e21e 2584PP(pp_i_subtract)
79072805 2585{
6f1401dc
DM
2586 dVAR; dSP; dATARGET;
2587 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
a0d0e21e 2588 {
6f1401dc 2589 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2590 SETi( left - right );
2591 RETURN;
79072805 2592 }
79072805
LW
2593}
2594
a0d0e21e 2595PP(pp_i_lt)
79072805 2596{
6f1401dc
DM
2597 dVAR; dSP;
2598 tryAMAGICbin_MG(lt_amg, AMGf_set);
a0d0e21e 2599 {
96b6b87f 2600 dPOPTOPiirl_nomg;
54310121 2601 SETs(boolSV(left < right));
a0d0e21e
LW
2602 RETURN;
2603 }
79072805
LW
2604}
2605
a0d0e21e 2606PP(pp_i_gt)
79072805 2607{
6f1401dc
DM
2608 dVAR; dSP;
2609 tryAMAGICbin_MG(gt_amg, AMGf_set);
a0d0e21e 2610 {
96b6b87f 2611 dPOPTOPiirl_nomg;
54310121 2612 SETs(boolSV(left > right));
a0d0e21e
LW
2613 RETURN;
2614 }
79072805
LW
2615}
2616
a0d0e21e 2617PP(pp_i_le)
79072805 2618{
6f1401dc
DM
2619 dVAR; dSP;
2620 tryAMAGICbin_MG(le_amg, AMGf_set);
a0d0e21e 2621 {
96b6b87f 2622 dPOPTOPiirl_nomg;
54310121 2623 SETs(boolSV(left <= right));
a0d0e21e 2624 RETURN;
85e6fe83 2625 }
79072805
LW
2626}
2627
a0d0e21e 2628PP(pp_i_ge)
79072805 2629{
6f1401dc
DM
2630 dVAR; dSP;
2631 tryAMAGICbin_MG(ge_amg, AMGf_set);
a0d0e21e 2632 {
96b6b87f 2633 dPOPTOPiirl_nomg;
54310121 2634 SETs(boolSV(left >= right));
a0d0e21e
LW
2635 RETURN;
2636 }
79072805
LW
2637}
2638
a0d0e21e 2639PP(pp_i_eq)
79072805 2640{
6f1401dc
DM
2641 dVAR; dSP;
2642 tryAMAGICbin_MG(eq_amg, AMGf_set);
a0d0e21e 2643 {
96b6b87f 2644 dPOPTOPiirl_nomg;
54310121 2645 SETs(boolSV(left == right));
a0d0e21e
LW
2646 RETURN;
2647 }
79072805
LW
2648}
2649
a0d0e21e 2650PP(pp_i_ne)
79072805 2651{
6f1401dc
DM
2652 dVAR; dSP;
2653 tryAMAGICbin_MG(ne_amg, AMGf_set);
a0d0e21e 2654 {
96b6b87f 2655 dPOPTOPiirl_nomg;
54310121 2656 SETs(boolSV(left != right));
a0d0e21e
LW
2657 RETURN;
2658 }
79072805
LW
2659}
2660
a0d0e21e 2661PP(pp_i_ncmp)
79072805 2662{
6f1401dc
DM
2663 dVAR; dSP; dTARGET;
2664 tryAMAGICbin_MG(ncmp_amg, 0);
a0d0e21e 2665 {
96b6b87f 2666 dPOPTOPiirl_nomg;
a0d0e21e 2667 I32 value;
79072805 2668
a0d0e21e 2669 if (left > right)
79072805 2670 value = 1;
a0d0e21e 2671 else if (left < right)
79072805 2672 value = -1;
a0d0e21e 2673 else
79072805 2674 value = 0;
a0d0e21e
LW
2675 SETi(value);
2676 RETURN;
79072805 2677 }
85e6fe83
LW
2678}
2679
2680PP(pp_i_negate)
2681{
6f1401dc
DM
2682 dVAR; dSP; dTARGET;
2683 tryAMAGICun_MG(neg_amg, 0);
2684 {
2685 SV * const sv = TOPs;
2686 IV const i = SvIV_nomg(sv);
2687 SETi(-i);
2688 RETURN;
2689 }
85e6fe83
LW
2690}
2691
79072805
LW
2692/* High falutin' math. */
2693
2694PP(pp_atan2)
2695{
6f1401dc
DM
2696 dVAR; dSP; dTARGET;
2697 tryAMAGICbin_MG(atan2_amg, 0);
a0d0e21e 2698 {
096c060c 2699 dPOPTOPnnrl_nomg;
a1021d57 2700 SETn(Perl_atan2(left, right));
a0d0e21e
LW
2701 RETURN;
2702 }
79072805
LW
2703}
2704
2705PP(pp_sin)
2706{
71302fe3
NC
2707 dVAR; dSP; dTARGET;
2708 int amg_type = sin_amg;
2709 const char *neg_report = NULL;
bc81784a 2710 NV (*func)(NV) = Perl_sin;
71302fe3
NC
2711 const int op_type = PL_op->op_type;
2712
2713 switch (op_type) {
2714 case OP_COS:
2715 amg_type = cos_amg;
bc81784a 2716 func = Perl_cos;
71302fe3
NC
2717 break;
2718 case OP_EXP:
2719 amg_type = exp_amg;
bc81784a 2720 func = Perl_exp;
71302fe3
NC
2721 break;
2722 case OP_LOG:
2723 amg_type = log_amg;
bc81784a 2724 func = Perl_log;
71302fe3
NC
2725 neg_report = "log";
2726 break;
2727 case OP_SQRT:
2728 amg_type = sqrt_amg;
bc81784a 2729 func = Perl_sqrt;
71302fe3
NC
2730 neg_report = "sqrt";
2731 break;
a0d0e21e 2732 }
79072805 2733
6f1401dc
DM
2734
2735 tryAMAGICun_MG(amg_type, 0);
a0d0e21e 2736 {
6f1401dc
DM
2737 SV * const arg = POPs;
2738 const NV value = SvNV_nomg(arg);
71302fe3
NC
2739 if (neg_report) {
2740 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2741 SET_NUMERIC_STANDARD();
2742 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2743 }
2744 }
2745 XPUSHn(func(value));
a0d0e21e
LW
2746 RETURN;
2747 }
79072805
LW
2748}
2749
56cb0a1c
AD
2750/* Support Configure command-line overrides for rand() functions.
2751 After 5.005, perhaps we should replace this by Configure support
2752 for drand48(), random(), or rand(). For 5.005, though, maintain
2753 compatibility by calling rand() but allow the user to override it.
2754 See INSTALL for details. --Andy Dougherty 15 July 1998
2755*/
85ab1d1d
JH
2756/* Now it's after 5.005, and Configure supports drand48() and random(),
2757 in addition to rand(). So the overrides should not be needed any more.
2758 --Jarkko Hietaniemi 27 September 1998
2759 */
2760
2761#ifndef HAS_DRAND48_PROTO
20ce7b12 2762extern double drand48 (void);
56cb0a1c
AD
2763#endif
2764
79072805
LW
2765PP(pp_rand)
2766{
97aff369 2767 dVAR; dSP; dTARGET;
65202027 2768 NV value;
79072805
LW
2769 if (MAXARG < 1)
2770 value = 1.0;
2771 else
2772 value = POPn;
2773 if (value == 0.0)
2774 value = 1.0;
80252599 2775 if (!PL_srand_called) {
85ab1d1d 2776 (void)seedDrand01((Rand_seed_t)seed());
80252599 2777 PL_srand_called = TRUE;
93dc8474 2778 }
85ab1d1d 2779 value *= Drand01();
79072805
LW
2780 XPUSHn(value);
2781 RETURN;
2782}
2783
2784PP(pp_srand)
2785{
83832992 2786 dVAR; dSP; dTARGET;
0bd48802 2787 const UV anum = (MAXARG < 1) ? seed() : POPu;
85ab1d1d 2788 (void)seedDrand01((Rand_seed_t)anum);
80252599 2789 PL_srand_called = TRUE;
da1010ec
NC
2790 if (anum)
2791 XPUSHu(anum);
2792 else {
2793 /* Historically srand always returned true. We can avoid breaking
2794 that like this: */
2795 sv_setpvs(TARG, "0 but true");
2796 XPUSHTARG;
2797 }
83832992 2798 RETURN;
79072805
LW
2799}
2800
79072805
LW
2801PP(pp_int)
2802{
6f1401dc
DM
2803 dVAR; dSP; dTARGET;
2804 tryAMAGICun_MG(int_amg, AMGf_numeric);
774d564b 2805 {
6f1401dc
DM
2806 SV * const sv = TOPs;
2807 const IV iv = SvIV_nomg(sv);
28e5dec8
JH
2808 /* XXX it's arguable that compiler casting to IV might be subtly
2809 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2810 else preferring IV has introduced a subtle behaviour change bug. OTOH
2811 relying on floating point to be accurate is a bug. */
2812
c781a409 2813 if (!SvOK(sv)) {
922c4365 2814 SETu(0);
c781a409
RD
2815 }
2816 else if (SvIOK(sv)) {
2817 if (SvIsUV(sv))
6f1401dc 2818 SETu(SvUV_nomg(sv));
c781a409 2819 else
28e5dec8 2820 SETi(iv);
c781a409 2821 }
c781a409 2822 else {
6f1401dc 2823 const NV value = SvNV_nomg(sv);
1048ea30 2824 if (value >= 0.0) {
28e5dec8
JH
2825 if (value < (NV)UV_MAX + 0.5) {
2826 SETu(U_V(value));
2827 } else {
059a1014 2828 SETn(Perl_floor(value));
28e5dec8 2829 }
1048ea30 2830 }
28e5dec8
JH
2831 else {
2832 if (value > (NV)IV_MIN - 0.5) {
2833 SETi(I_V(value));
2834 } else {
1bbae031 2835 SETn(Perl_ceil(value));
28e5dec8
JH
2836 }
2837 }
774d564b 2838 }
79072805 2839 }
79072805
LW
2840 RETURN;
2841}
2842
463ee0b2
LW
2843PP(pp_abs)
2844{
6f1401dc
DM
2845 dVAR; dSP; dTARGET;
2846 tryAMAGICun_MG(abs_amg, AMGf_numeric);
a0d0e21e 2847 {
6f1401dc 2848 SV * const sv = TOPs;
28e5dec8 2849 /* This will cache the NV value if string isn't actually integer */
6f1401dc 2850 const IV iv = SvIV_nomg(sv);
a227d84d 2851
800401ee 2852 if (!SvOK(sv)) {
922c4365 2853 SETu(0);
800401ee
JH
2854 }
2855 else if (SvIOK(sv)) {
28e5dec8 2856 /* IVX is precise */
800401ee 2857 if (SvIsUV(sv)) {
6f1401dc 2858 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
28e5dec8
JH
2859 } else {
2860 if (iv >= 0) {
2861 SETi(iv);
2862 } else {
2863 if (iv != IV_MIN) {
2864 SETi(-iv);
2865 } else {
2866 /* 2s complement assumption. Also, not really needed as
2867 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2868 SETu(IV_MIN);
2869 }
a227d84d 2870 }
28e5dec8
JH
2871 }
2872 } else{
6f1401dc 2873 const NV value = SvNV_nomg(sv);
774d564b 2874 if (value < 0.0)
1b6737cc 2875 SETn(-value);
a4474c9e
DD
2876 else
2877 SETn(value);
774d564b 2878 }
a0d0e21e 2879 }
774d564b 2880 RETURN;
463ee0b2
LW
2881}
2882
79072805
LW
2883PP(pp_oct)
2884{
97aff369 2885 dVAR; dSP; dTARGET;
5c144d81 2886 const char *tmps;
53305cf1 2887 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
6f894ead 2888 STRLEN len;
53305cf1
NC
2889 NV result_nv;
2890 UV result_uv;
1b6737cc 2891 SV* const sv = POPs;
79072805 2892
349d4f2f 2893 tmps = (SvPV_const(sv, len));
2bc69dc4
NIS
2894 if (DO_UTF8(sv)) {
2895 /* If Unicode, try to downgrade
2896 * If not possible, croak. */
1b6737cc 2897 SV* const tsv = sv_2mortal(newSVsv(sv));
2bc69dc4
NIS
2898
2899 SvUTF8_on(tsv);
2900 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 2901 tmps = SvPV_const(tsv, len);
2bc69dc4 2902 }
daa2adfd
NC
2903 if (PL_op->op_type == OP_HEX)
2904 goto hex;
2905
6f894ead 2906 while (*tmps && len && isSPACE(*tmps))
53305cf1 2907 tmps++, len--;
9e24b6e2 2908 if (*tmps == '0')
53305cf1 2909 tmps++, len--;
a674e8db 2910 if (*tmps == 'x' || *tmps == 'X') {
daa2adfd 2911 hex:
53305cf1 2912 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
daa2adfd 2913 }
a674e8db 2914 else if (*tmps == 'b' || *tmps == 'B')
53305cf1 2915 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
464e2e8a 2916 else
53305cf1
NC
2917 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2918
2919 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2920 XPUSHn(result_nv);
2921 }
2922 else {
2923 XPUSHu(result_uv);
2924 }
79072805
LW
2925 RETURN;
2926}
2927
2928/* String stuff. */
2929
2930PP(pp_length)
2931{
97aff369 2932 dVAR; dSP; dTARGET;
0bd48802 2933 SV * const sv = TOPs;
a0ed51b3 2934
656266fc 2935 if (SvGAMAGIC(sv)) {
9f621bb0
NC
2936 /* For an overloaded or magic scalar, we can't know in advance if
2937 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2938 it likes to cache the length. Maybe that should be a documented
2939 feature of it.
92331800
NC
2940 */
2941 STRLEN len;
9f621bb0
NC
2942 const char *const p
2943 = sv_2pv_flags(sv, &len,
2944 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
92331800 2945
d88e091f 2946 if (!p) {
9407f9c1
DL
2947 if (!SvPADTMP(TARG)) {
2948 sv_setsv(TARG, &PL_sv_undef);
2949 SETTARG;
2950 }
2951 SETs(&PL_sv_undef);
d88e091f 2952 }
9f621bb0 2953 else if (DO_UTF8(sv)) {
899be101 2954 SETi(utf8_length((U8*)p, (U8*)p + len));
92331800
NC
2955 }
2956 else
2957 SETi(len);
656266fc 2958 } else if (SvOK(sv)) {
9f621bb0
NC
2959 /* Neither magic nor overloaded. */
2960 if (DO_UTF8(sv))
2961 SETi(sv_len_utf8(sv));
2962 else
2963 SETi(sv_len(sv));
656266fc 2964 } else {
9407f9c1
DL
2965 if (!SvPADTMP(TARG)) {
2966 sv_setsv_nomg(TARG, &PL_sv_undef);
2967 SETTARG;
2968 }
2969 SETs(&PL_sv_undef);
92331800 2970 }
79072805
LW
2971 RETURN;
2972}
2973
2974PP(pp_substr)
2975{
97aff369 2976 dVAR; dSP; dTARGET;
79072805 2977 SV *sv;
463ee0b2 2978 STRLEN curlen;
9402d6ed 2979 STRLEN utf8_curlen;
777f7c56
EB
2980 SV * pos_sv;
2981 IV pos1_iv;
2982 int pos1_is_uv;
2983 IV pos2_iv;
2984 int pos2_is_uv;
2985 SV * len_sv;
2986 IV len_iv = 0;
2987 int len_is_uv = 1;
050e6362 2988 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
e1ec3a88 2989 const char *tmps;
777f7c56 2990 const IV arybase = CopARYBASE_get(PL_curcop);
9402d6ed 2991 SV *repl_sv = NULL;
cbbf8932 2992 const char *repl = NULL;
7b8d334a 2993 STRLEN repl_len;
050e6362 2994 const int num_args = PL_op->op_private & 7;
13e30c65 2995 bool repl_need_utf8_upgrade = FALSE;
9402d6ed 2996 bool repl_is_utf8 = FALSE;
79072805 2997
78f9721b
SM
2998 if (num_args > 2) {
2999 if (num_args > 3) {
9402d6ed 3000 repl_sv = POPs;
83003860 3001 repl = SvPV_const(repl_sv, repl_len);
9402d6ed 3002 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
7b8d334a 3003 }
777f7c56
EB
3004 len_sv = POPs;
3005 len_iv = SvIV(len_sv);
3006 len_is_uv = SvIOK_UV(len_sv);
5d82c453 3007 }
777f7c56
EB
3008 pos_sv = POPs;
3009 pos1_iv = SvIV(pos_sv);
3010 pos1_is_uv = SvIOK_UV(pos_sv);
79072805 3011 sv = POPs;
849ca7ee 3012 PUTBACK;
9402d6ed
JH
3013 if (repl_sv) {
3014 if (repl_is_utf8) {
3015 if (!DO_UTF8(sv))
3016 sv_utf8_upgrade(sv);
3017 }
13e30c65
JH
3018 else if (DO_UTF8(sv))
3019 repl_need_utf8_upgrade = TRUE;
9402d6ed 3020 }
5c144d81 3021 tmps = SvPV_const(sv, curlen);
7e2040f0 3022 if (DO_UTF8(sv)) {
9402d6ed
JH
3023 utf8_curlen = sv_len_utf8(sv);
3024 if (utf8_curlen == curlen)
3025 utf8_curlen = 0;
a0ed51b3 3026 else
9402d6ed 3027 curlen = utf8_curlen;
a0ed51b3 3028 }
d1c2b58a 3029 else
9402d6ed 3030 utf8_curlen = 0;
a0ed51b3 3031
777f7c56
EB
3032 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3033 UV pos1_uv = pos1_iv-arybase;
3034 /* Overflow can occur when $[ < 0 */
3035 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
1c900557 3036 goto bound_fail;
777f7c56
EB
3037 pos1_iv = pos1_uv;
3038 pos1_is_uv = 1;
3039 }
3040 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
1c900557 3041 goto bound_fail; /* $[=3; substr($_,2,...) */
777f7c56
EB
3042 }
3043 else { /* pos < $[ */
3044 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3045 pos1_iv = curlen;
3046 pos1_is_uv = 1;
3047 } else {
3048 if (curlen) {
3049 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3050 pos1_iv += curlen;
3051 }
5d82c453 3052 }
68dc0745 3053 }
777f7c56
EB
3054 if (pos1_is_uv || pos1_iv > 0) {
3055 if ((UV)pos1_iv > curlen)
1c900557 3056 goto bound_fail;
777f7c56
EB
3057 }
3058
3059 if (num_args > 2) {
3060 if (!len_is_uv && len_iv < 0) {
3061 pos2_iv = curlen + len_iv;
3062 if (curlen)
3063 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3064 else
3065 pos2_is_uv = 0;
3066 } else { /* len_iv >= 0 */
3067 if (!pos1_is_uv && pos1_iv < 0) {
3068 pos2_iv = pos1_iv + len_iv;
3069 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3070 } else {
3071 if ((UV)len_iv > curlen-(UV)pos1_iv)
3072 pos2_iv = curlen;
3073 else
3074 pos2_iv = pos1_iv+len_iv;
3075 pos2_is_uv = 1;
3076 }
5d82c453 3077 }
2304df62 3078 }
79072805 3079 else {
777f7c56
EB
3080 pos2_iv = curlen;
3081 pos2_is_uv = 1;
3082 }
3083
3084 if (!pos2_is_uv && pos2_iv < 0) {
3085 if (!pos1_is_uv && pos1_iv < 0)
1c900557 3086 goto bound_fail;
777f7c56
EB
3087 pos2_iv = 0;
3088 }
3089 else if (!pos1_is_uv && pos1_iv < 0)
3090 pos1_iv = 0;
3091
3092 if ((UV)pos2_iv < (UV)pos1_iv)
3093 pos2_iv = pos1_iv;
3094 if ((UV)pos2_iv > curlen)
3095 pos2_iv = curlen;
3096
3097 {
3098 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3099 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3100 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
777f7c56 3101 STRLEN byte_len = len;
d931b1be
NC
3102 STRLEN byte_pos = utf8_curlen
3103 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3104
2154eca7
EB
3105 if (lvalue && !repl) {
3106 SV * ret;
3107
3108 if (!SvGMAGICAL(sv)) {
3109 if (SvROK(sv)) {
3110 SvPV_force_nolen(sv);
3111 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3112 "Attempt to use reference as lvalue in substr");
3113 }
3114 if (isGV_with_GP(sv))
3115 SvPV_force_nolen(sv);
3116 else if (SvOK(sv)) /* is it defined ? */
3117 (void)SvPOK_only_UTF8(sv);
3118 else
3119 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
781e7547 3120 }
2154eca7
EB
3121
3122 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3123 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3124 LvTYPE(ret) = 'x';
3125 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3126 LvTARGOFF(ret) = pos;
3127 LvTARGLEN(ret) = len;
3128
3129 SPAGAIN;
3130 PUSHs(ret); /* avoid SvSETMAGIC here */
3131 RETURN;
781e7547
DM
3132 }
3133
2154eca7
EB
3134 SvTAINTED_off(TARG); /* decontaminate */
3135 SvUTF8_off(TARG); /* decontaminate */
3136
3137 tmps += byte_pos;
777f7c56 3138 sv_setpvn(TARG, tmps, byte_len);
12aa1545 3139#ifdef USE_LOCALE_COLLATE
14befaf4 3140 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
12aa1545 3141#endif
9402d6ed 3142 if (utf8_curlen)
7f66633b 3143 SvUTF8_on(TARG);
2154eca7 3144
f7928d6c 3145 if (repl) {
13e30c65
JH
3146 SV* repl_sv_copy = NULL;
3147
3148 if (repl_need_utf8_upgrade) {
3149 repl_sv_copy = newSVsv(repl_sv);
3150 sv_utf8_upgrade(repl_sv_copy);
349d4f2f 3151 repl = SvPV_const(repl_sv_copy, repl_len);
13e30c65
JH
3152 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3153 }
502d9230
VP
3154 if (!SvOK(sv))
3155 sv_setpvs(sv, "");
777f7c56 3156 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
9402d6ed 3157 if (repl_is_utf8)
f7928d6c 3158 SvUTF8_on(sv);
ef8d46e8 3159 SvREFCNT_dec(repl_sv_copy);
f7928d6c 3160 }
79072805 3161 }
849ca7ee 3162 SPAGAIN;
e27c778f
FC
3163 SvSETMAGIC(TARG);
3164 PUSHs(TARG);
79072805 3165 RETURN;
777f7c56 3166
1c900557 3167bound_fail:
777f7c56
EB
3168 if (lvalue || repl)
3169 Perl_croak(aTHX_ "substr outside of string");
3170 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3171 RETPUSHUNDEF;
79072805
LW
3172}
3173
3174PP(pp_vec)
3175{
2154eca7 3176 dVAR; dSP;
1b6737cc
AL
3177 register const IV size = POPi;
3178 register const IV offset = POPi;
3179 register SV * const src = POPs;
3180 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2154eca7 3181 SV * ret;
a0d0e21e 3182
81e118e0 3183 if (lvalue) { /* it's an lvalue! */
2154eca7
EB
3184 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3185 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3186 LvTYPE(ret) = 'v';
3187 LvTARG(ret) = SvREFCNT_inc_simple(src);
3188 LvTARGOFF(ret) = offset;
3189 LvTARGLEN(ret) = size;
3190 }
3191 else {
3192 dTARGET;
3193 SvTAINTED_off(TARG); /* decontaminate */
3194 ret = TARG;
79072805
LW
3195 }
3196
2154eca7
EB
3197 sv_setuv(ret, do_vecget(src, offset, size));
3198 PUSHs(ret);
79072805
LW
3199 RETURN;
3200}
3201
3202PP(pp_index)
3203{
97aff369 3204 dVAR; dSP; dTARGET;
79072805
LW
3205 SV *big;
3206 SV *little;
c445ea15 3207 SV *temp = NULL;
ad66a58c 3208 STRLEN biglen;
2723d216 3209 STRLEN llen = 0;
79072805
LW
3210 I32 offset;
3211 I32 retval;
73ee8be2
NC
3212 const char *big_p;
3213 const char *little_p;
fc15ae8f 3214 const I32 arybase = CopARYBASE_get(PL_curcop);
2f040f7f
NC
3215 bool big_utf8;
3216 bool little_utf8;
2723d216 3217 const bool is_index = PL_op->op_type == OP_INDEX;
79072805 3218
2723d216
NC
3219 if (MAXARG >= 3) {
3220 /* arybase is in characters, like offset, so combine prior to the
3221 UTF-8 to bytes calculation. */
79072805 3222 offset = POPi - arybase;
2723d216 3223 }
79072805
LW
3224 little = POPs;
3225 big = POPs;
73ee8be2
NC
3226 big_p = SvPV_const(big, biglen);
3227 little_p = SvPV_const(little, llen);
3228
e609e586
NC
3229 big_utf8 = DO_UTF8(big);
3230 little_utf8 = DO_UTF8(little);
3231 if (big_utf8 ^ little_utf8) {
3232 /* One needs to be upgraded. */
2f040f7f
NC
3233 if (little_utf8 && !PL_encoding) {
3234 /* Well, maybe instead we might be able to downgrade the small
3235 string? */
1eced8f8 3236 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
2f040f7f
NC
3237 &little_utf8);
3238 if (little_utf8) {
3239 /* If the large string is ISO-8859-1, and it's not possible to
3240 convert the small string to ISO-8859-1, then there is no
3241 way that it could be found anywhere by index. */
3242 retval = -1;
3243 goto fail;
3244 }
e609e586 3245
2f040f7f
NC
3246 /* At this point, pv is a malloc()ed string. So donate it to temp
3247 to ensure it will get free()d */
3248 little = temp = newSV(0);
73ee8be2
NC
3249 sv_usepvn(temp, pv, llen);
3250 little_p = SvPVX(little);
e609e586 3251 } else {
73ee8be2
NC
3252 temp = little_utf8
3253 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
2f040f7f
NC
3254
3255 if (PL_encoding) {
3256 sv_recode_to_utf8(temp, PL_encoding);
3257 } else {
3258 sv_utf8_upgrade(temp);
3259 }
3260 if (little_utf8) {
3261 big = temp;
3262 big_utf8 = TRUE;
73ee8be2 3263 big_p = SvPV_const(big, biglen);
2f040f7f
NC
3264 } else {
3265 little = temp;
73ee8be2 3266 little_p = SvPV_const(little, llen);
2f040f7f 3267 }
e609e586
NC
3268 }
3269 }
73ee8be2
NC
3270 if (SvGAMAGIC(big)) {
3271 /* Life just becomes a lot easier if I use a temporary here.
3272 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3273 will trigger magic and overloading again, as will fbm_instr()
3274 */
59cd0e26
NC
3275 big = newSVpvn_flags(big_p, biglen,
3276 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3277 big_p = SvPVX(big);
3278 }
e4e44778 3279 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
73ee8be2
NC
3280 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3281 warn on undef, and we've already triggered a warning with the
3282 SvPV_const some lines above. We can't remove that, as we need to
3283 call some SvPV to trigger overloading early and find out if the
3284 string is UTF-8.
3285 This is all getting to messy. The API isn't quite clean enough,
3286 because data access has side effects.
3287 */
59cd0e26
NC
3288 little = newSVpvn_flags(little_p, llen,
3289 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3290 little_p = SvPVX(little);
3291 }
e609e586 3292
79072805 3293 if (MAXARG < 3)
2723d216 3294 offset = is_index ? 0 : biglen;
a0ed51b3 3295 else {
ad66a58c 3296 if (big_utf8 && offset > 0)
a0ed51b3 3297 sv_pos_u2b(big, &offset, 0);
73ee8be2
NC
3298 if (!is_index)
3299 offset += llen;
a0ed51b3 3300 }
79072805
LW
3301 if (offset < 0)
3302 offset = 0;
ad66a58c
NC
3303 else if (offset > (I32)biglen)
3304 offset = biglen;
73ee8be2
NC
3305 if (!(little_p = is_index
3306 ? fbm_instr((unsigned char*)big_p + offset,
3307 (unsigned char*)big_p + biglen, little, 0)
3308 : rninstr(big_p, big_p + offset,
3309 little_p, little_p + llen)))
a0ed51b3 3310 retval = -1;
ad66a58c 3311 else {
73ee8be2 3312 retval = little_p - big_p;
ad66a58c
NC
3313 if (retval > 0 && big_utf8)
3314 sv_pos_b2u(big, &retval);
3315 }
ef8d46e8 3316 SvREFCNT_dec(temp);
2723d216 3317 fail:
a0ed51b3 3318 PUSHi(retval + arybase);
79072805
LW
3319 RETURN;
3320}
3321
3322PP(pp_sprintf)
3323{
97aff369 3324 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3e6bd4bf 3325 SvTAINTED_off(TARG);
79072805 3326 do_sprintf(TARG, SP-MARK, MARK+1);
bbce6d69 3327 TAINT_IF(SvTAINTED(TARG));
79072805
LW
3328 SP = ORIGMARK;
3329 PUSHTARG;
3330 RETURN;
3331}
3332
79072805
LW
3333PP(pp_ord)
3334{
97aff369 3335 dVAR; dSP; dTARGET;
1eced8f8 3336
7df053ec 3337 SV *argsv = POPs;
ba210ebe 3338 STRLEN len;
349d4f2f 3339 const U8 *s = (U8*)SvPV_const(argsv, len);
121910a4 3340
799ef3cb 3341 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
1eced8f8 3342 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
799ef3cb 3343 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
121910a4
JH
3344 argsv = tmpsv;
3345 }
79072805 3346
872c91ae 3347 XPUSHu(DO_UTF8(argsv) ?
89ebb4a3 3348 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
5fc32dea 3349 (UV)(*s & 0xff));
68795e93 3350
79072805
LW
3351 RETURN;
3352}
3353
463ee0b2
LW
3354PP(pp_chr)
3355{
97aff369 3356 dVAR; dSP; dTARGET;
463ee0b2 3357 char *tmps;
8a064bd6
JH
3358 UV value;
3359
3360 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3361 ||
3362 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3363 if (IN_BYTES) {
3364 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3365 } else {
3366 (void) POPs; /* Ignore the argument value. */
3367 value = UNICODE_REPLACEMENT;
3368 }
3369 } else {
3370 value = POPu;
3371 }
463ee0b2 3372
862a34c6 3373 SvUPGRADE(TARG,SVt_PV);
a0ed51b3 3374
0064a8a9 3375 if (value > 255 && !IN_BYTES) {
eb160463 3376 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
62961d2e 3377 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
349d4f2f 3378 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
a0ed51b3
LW
3379 *tmps = '\0';
3380 (void)SvPOK_only(TARG);
aa6ffa16 3381 SvUTF8_on(TARG);
a0ed51b3
LW
3382 XPUSHs(TARG);
3383 RETURN;
3384 }
3385
748a9306 3386 SvGROW(TARG,2);
463ee0b2
LW
3387 SvCUR_set(TARG, 1);
3388 tmps = SvPVX(TARG);
eb160463 3389 *tmps++ = (char)value;
748a9306 3390 *tmps = '\0';
a0d0e21e 3391 (void)SvPOK_only(TARG);
4c5ed6e2 3392
88632417 3393 if (PL_encoding && !IN_BYTES) {
799ef3cb 3394 sv_recode_to_utf8(TARG, PL_encoding);
88632417
JH
3395 tmps = SvPVX(TARG);
3396 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
4c5ed6e2
TS
3397 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3398 SvGROW(TARG, 2);
d5a15ac2 3399 tmps = SvPVX(TARG);
4c5ed6e2
TS
3400 SvCUR_set(TARG, 1);
3401 *tmps++ = (char)value;
88632417 3402 *tmps = '\0';
4c5ed6e2 3403 SvUTF8_off(TARG);
88632417
JH
3404 }
3405 }
4c5ed6e2 3406
463ee0b2
LW
3407 XPUSHs(TARG);
3408 RETURN;
3409}
3410
79072805
LW
3411PP(pp_crypt)
3412{
79072805 3413#ifdef HAS_CRYPT
97aff369 3414 dVAR; dSP; dTARGET;
5f74f29c 3415 dPOPTOPssrl;
85c16d83 3416 STRLEN len;
10516c54 3417 const char *tmps = SvPV_const(left, len);
2bc69dc4 3418
85c16d83 3419 if (DO_UTF8(left)) {
2bc69dc4 3420 /* If Unicode, try to downgrade.
f2791508
JH
3421 * If not possible, croak.
3422 * Yes, we made this up. */
1b6737cc 3423 SV* const tsv = sv_2mortal(newSVsv(left));
2bc69dc4 3424
f2791508 3425 SvUTF8_on(tsv);
2bc69dc4 3426 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 3427 tmps = SvPV_const(tsv, len);
85c16d83 3428 }
05404ffe
JH
3429# ifdef USE_ITHREADS
3430# ifdef HAS_CRYPT_R
3431 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3432 /* This should be threadsafe because in ithreads there is only
3433 * one thread per interpreter. If this would not be true,
3434 * we would need a mutex to protect this malloc. */
3435 PL_reentrant_buffer->_crypt_struct_buffer =
3436 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3437#if defined(__GLIBC__) || defined(__EMX__)
3438 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3439 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3440 /* work around glibc-2.2.5 bug */
3441 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3442 }
05404ffe 3443#endif
6ab58e4d 3444 }
05404ffe
JH
3445# endif /* HAS_CRYPT_R */
3446# endif /* USE_ITHREADS */
5f74f29c 3447# ifdef FCRYPT
83003860 3448 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3449# else
83003860 3450 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3451# endif
ec93b65f 3452 SETTARG;
4808266b 3453 RETURN;
79072805 3454#else
b13b2135 3455 DIE(aTHX_
79072805
LW
3456 "The crypt() function is unimplemented due to excessive paranoia.");
3457#endif
79072805
LW
3458}
3459
00f254e2
KW
3460/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3461 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3462
00f254e2
KW
3463/* Below are several macros that generate code */
3464/* Generates code to store a unicode codepoint c that is known to occupy
3465 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3466#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3467 STMT_START { \
3468 *(p) = UTF8_TWO_BYTE_HI(c); \
3469 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3470 } STMT_END
3471
3472/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3473 * available byte after the two bytes */
3474#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3475 STMT_START { \
3476 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3477 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3478 } STMT_END
3479
3480/* Generates code to store the upper case of latin1 character l which is known
3481 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3482 * are only two characters that fit this description, and this macro knows
3483 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3484 * bytes */
3485#define STORE_NON_LATIN1_UC(p, l) \
3486STMT_START { \
3487 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3488 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3489 } else { /* Must be the following letter */ \
3490 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3491 } \
3492} STMT_END
3493
3494/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3495 * after the character stored */
3496#define CAT_NON_LATIN1_UC(p, l) \
3497STMT_START { \
3498 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3499 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3500 } else { \
3501 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3502 } \
3503} STMT_END
3504
3505/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3506 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3507 * and must require two bytes to store it. Advances p to point to the next
3508 * available position */
3509#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3510STMT_START { \
3511 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3512 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3513 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3514 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3515 } else {/* else is one of the other two special cases */ \
3516 CAT_NON_LATIN1_UC((p), (l)); \
3517 } \
3518} STMT_END
3519
79072805
LW
3520PP(pp_ucfirst)
3521{
00f254e2
KW
3522 /* Actually is both lcfirst() and ucfirst(). Only the first character
3523 * changes. This means that possibly we can change in-place, ie., just
3524 * take the source and change that one character and store it back, but not
3525 * if read-only etc, or if the length changes */
3526
97aff369 3527 dVAR;
39644a26 3528 dSP;
d54190f6 3529 SV *source = TOPs;
00f254e2 3530 STRLEN slen; /* slen is the byte length of the whole SV. */
d54190f6
NC
3531 STRLEN need;
3532 SV *dest;
00f254e2
KW
3533 bool inplace; /* ? Convert first char only, in-place */
3534 bool doing_utf8 = FALSE; /* ? using utf8 */
3535 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
12e9c124 3536 const int op_type = PL_op->op_type;
d54190f6
NC
3537 const U8 *s;
3538 U8 *d;
3539 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
00f254e2
KW
3540 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3541 * stored as UTF-8 at s. */
3542 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3543 * lowercased) character stored in tmpbuf. May be either
3544 * UTF-8 or not, but in either case is the number of bytes */
d54190f6
NC
3545
3546 SvGETMAGIC(source);
3547 if (SvOK(source)) {
3548 s = (const U8*)SvPV_nomg_const(source, slen);
3549 } else {
0a0ffbce
RGS
3550 if (ckWARN(WARN_UNINITIALIZED))
3551 report_uninit(source);
1eced8f8 3552 s = (const U8*)"";
d54190f6
NC
3553 slen = 0;
3554 }
a0ed51b3 3555
00f254e2
KW
3556 /* We may be able to get away with changing only the first character, in
3557 * place, but not if read-only, etc. Later we may discover more reasons to
3558 * not convert in-place. */
3559 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3560
3561 /* First calculate what the changed first character should be. This affects
3562 * whether we can just swap it out, leaving the rest of the string unchanged,
3563 * or even if have to convert the dest to UTF-8 when the source isn't */
3564
3565 if (! slen) { /* If empty */
3566 need = 1; /* still need a trailing NUL */
3567 }
3568 else if (DO_UTF8(source)) { /* Is the source utf8? */
d54190f6 3569 doing_utf8 = TRUE;
00f254e2
KW
3570
3571/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3572 * and doesn't allow for the user to specify their own. When code is added to
3573 * detect if there is a user-defined mapping in force here, and if so to use
3574 * that, then the code below can be compiled. The detection would be a good
3575 * thing anyway, as currently the user-defined mappings only work on utf8
3576 * strings, and thus depend on the chosen internal storage method, which is a
3577 * bad thing */
3578#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3579 if (UTF8_IS_INVARIANT(*s)) {
3580
3581 /* An invariant source character is either ASCII or, in EBCDIC, an
3582 * ASCII equivalent or a caseless C1 control. In both these cases,
3583 * the lower and upper cases of any character are also invariants
3584 * (and title case is the same as upper case). So it is safe to
3585 * use the simple case change macros which avoid the overhead of
3586 * the general functions. Note that if perl were to be extended to
3587 * do locale handling in UTF-8 strings, this wouldn't be true in,
3588 * for example, Lithuanian or Turkic. */
3589 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3590 tculen = ulen = 1;
3591 need = slen + 1;
12e9c124 3592 }
00f254e2
KW
3593 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3594 U8 chr;
3595
3596 /* Similarly, if the source character isn't invariant but is in the
3597 * latin1 range (or EBCDIC equivalent thereof), we have the case
3598 * changes compiled into perl, and can avoid the overhead of the
3599 * general functions. In this range, the characters are stored as
3600 * two UTF-8 bytes, and it so happens that any changed-case version
3601 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3602 tculen = ulen = 2;
3603 need = slen + 1;
3604
3605 /* Convert the two source bytes to a single Unicode code point
3606 * value, change case and save for below */
356979f4 3607 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
00f254e2
KW
3608 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3609 U8 lower = toLOWER_LATIN1(chr);
3610 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3611 }
3612 else { /* ucfirst */
3613 U8 upper = toUPPER_LATIN1_MOD(chr);
3614
3615 /* Most of the latin1 range characters are well-behaved. Their
3616 * title and upper cases are the same, and are also in the
3617 * latin1 range. The macro above returns their upper (hence
3618 * title) case, and all that need be done is to save the result
3619 * for below. However, several characters are problematic, and
3620 * have to be handled specially. The MOD in the macro name
3621 * above means that these tricky characters all get mapped to
3622 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3623 * This mapping saves some tests for the majority of the
3624 * characters */
3625
3626 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3627
3628 /* Not tricky. Just save it. */
3629 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3630 }
3631 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3632
3633 /* This one is tricky because it is two characters long,
3634 * though the UTF-8 is still two bytes, so the stored
3635 * length doesn't change */
3636 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3637 *(tmpbuf + 1) = 's';
3638 }
3639 else {
3640
3641 /* The other two have their title and upper cases the same,
3642 * but are tricky because the changed-case characters
3643 * aren't in the latin1 range. They, however, do fit into
3644 * two UTF-8 bytes */
3645 STORE_NON_LATIN1_UC(tmpbuf, chr);
3646 }
3647 }
3648 }
3649 else {
3650#endif /* end of dont want to break user-defined casing */
3651
3652 /* Here, can't short-cut the general case */
3653
3654 utf8_to_uvchr(s, &ulen);
3655 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3656 else toLOWER_utf8(s, tmpbuf, &tculen);
3657
3658 /* we can't do in-place if the length changes. */
3659 if (ulen != tculen) inplace = FALSE;
3660 need = slen + 1 - ulen + tculen;
3661#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3662 }
3663#endif
d54190f6 3664 }
00f254e2
KW
3665 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3666 * latin1 is treated as caseless. Note that a locale takes
3667 * precedence */
3668 tculen = 1; /* Most characters will require one byte, but this will
3669 * need to be overridden for the tricky ones */
3670 need = slen + 1;
3671
3672 if (op_type == OP_LCFIRST) {
d54190f6 3673
00f254e2
KW
3674 /* lower case the first letter: no trickiness for any character */
3675 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3676 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3677 }
3678 /* is ucfirst() */
3679 else if (IN_LOCALE_RUNTIME) {
3680 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3681 * have upper and title case different
3682 */
3683 }
3684 else if (! IN_UNI_8_BIT) {
3685 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3686 * on EBCDIC machines whatever the
3687 * native function does */
3688 }
3689 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3690 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3691
3692 /* tmpbuf now has the correct title case for all latin1 characters
3693 * except for the several ones that have tricky handling. All
3694 * of these are mapped by the MOD to the letter below. */
3695 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3696
3697 /* The length is going to change, with all three of these, so
3698 * can't replace just the first character */
3699 inplace = FALSE;
3700
3701 /* We use the original to distinguish between these tricky
3702 * cases */
3703 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3704 /* Two character title case 'Ss', but can remain non-UTF-8 */
3705 need = slen + 2;
3706 *tmpbuf = 'S';
3707 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3708 tculen = 2;
3709 }
3710 else {
d54190f6 3711
00f254e2
KW
3712 /* The other two tricky ones have their title case outside
3713 * latin1. It is the same as their upper case. */
3714 doing_utf8 = TRUE;
3715 STORE_NON_LATIN1_UC(tmpbuf, *s);
3716
3717 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3718 * and their upper cases is 2. */
3719 tculen = ulen = 2;
3720
3721 /* The entire result will have to be in UTF-8. Assume worst
3722 * case sizing in conversion. (all latin1 characters occupy
3723 * at most two bytes in utf8) */
3724 convert_source_to_utf8 = TRUE;
3725 need = slen * 2 + 1;
3726 }
3727 } /* End of is one of the three special chars */
3728 } /* End of use Unicode (Latin1) semantics */
3729 } /* End of changing the case of the first character */
3730
3731 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3732 * generate the result */
3733 if (inplace) {
3734
3735 /* We can convert in place. This means we change just the first
3736 * character without disturbing the rest; no need to grow */
d54190f6
NC
3737 dest = source;
3738 s = d = (U8*)SvPV_force_nomg(source, slen);
3739 } else {
3740 dTARGET;
3741
3742 dest = TARG;
3743
00f254e2
KW
3744 /* Here, we can't convert in place; we earlier calculated how much
3745 * space we will need, so grow to accommodate that */
d54190f6 3746 SvUPGRADE(dest, SVt_PV);
3b416f41 3747 d = (U8*)SvGROW(dest, need);
d54190f6
NC
3748 (void)SvPOK_only(dest);
3749
3750 SETs(dest);
d54190f6 3751 }
44bc797b 3752
d54190f6 3753 if (doing_utf8) {
00f254e2
KW
3754 if (! inplace) {
3755 if (! convert_source_to_utf8) {
3756
3757 /* Here both source and dest are in UTF-8, but have to create
3758 * the entire output. We initialize the result to be the
3759 * title/lower cased first character, and then append the rest
3760 * of the string. */
3761 sv_setpvn(dest, (char*)tmpbuf, tculen);
3762 if (slen > ulen) {
3763 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3764 }
3765 }
3766 else {
3767 const U8 *const send = s + slen;
3768
3769 /* Here the dest needs to be in UTF-8, but the source isn't,
3770 * except we earlier UTF-8'd the first character of the source
3771 * into tmpbuf. First put that into dest, and then append the
3772 * rest of the source, converting it to UTF-8 as we go. */
3773
3774 /* Assert tculen is 2 here because the only two characters that
3775 * get to this part of the code have 2-byte UTF-8 equivalents */
3776 *d++ = *tmpbuf;
3777 *d++ = *(tmpbuf + 1);
3778 s++; /* We have just processed the 1st char */
3779
3780 for (; s < send; s++) {
3781 d = uvchr_to_utf8(d, *s);
3782 }
3783 *d = '\0';
3784 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3785 }
d54190f6 3786 SvUTF8_on(dest);
a0ed51b3 3787 }
00f254e2 3788 else { /* in-place UTF-8. Just overwrite the first character */
d54190f6
NC
3789 Copy(tmpbuf, d, tculen, U8);
3790 SvCUR_set(dest, need - 1);
a0ed51b3 3791 }
a0ed51b3 3792 }
00f254e2
KW
3793 else { /* Neither source nor dest are in or need to be UTF-8 */
3794 if (slen) {
2de3dbcc 3795 if (IN_LOCALE_RUNTIME) {
31351b04 3796 TAINT;
d54190f6 3797 SvTAINTED_on(dest);
31351b04 3798 }
00f254e2
KW
3799 if (inplace) { /* in-place, only need to change the 1st char */
3800 *d = *tmpbuf;
3801 }
3802 else { /* Not in-place */
3803
3804 /* Copy the case-changed character(s) from tmpbuf */
3805 Copy(tmpbuf, d, tculen, U8);
3806 d += tculen - 1; /* Code below expects d to point to final
3807 * character stored */
3808 }
3809 }
3810 else { /* empty source */
3811 /* See bug #39028: Don't taint if empty */
d54190f6
NC
3812 *d = *s;
3813 }
3814
00f254e2
KW
3815 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3816 * the destination to retain that flag */
d54190f6
NC
3817 if (SvUTF8(source))
3818 SvUTF8_on(dest);
3819
00f254e2 3820 if (!inplace) { /* Finish the rest of the string, unchanged */
d54190f6
NC
3821 /* This will copy the trailing NUL */
3822 Copy(s + 1, d + 1, slen, U8);
3823 SvCUR_set(dest, need - 1);
bbce6d69 3824 }
bbce6d69 3825 }
539689e7
FC
3826 if (dest != source && SvTAINTED(source))
3827 SvTAINT(dest);
d54190f6 3828 SvSETMAGIC(dest);
79072805
LW
3829 RETURN;
3830}
3831
67306194
NC
3832/* There's so much setup/teardown code common between uc and lc, I wonder if
3833 it would be worth merging the two, and just having a switch outside each
00f254e2 3834 of the three tight loops. There is less and less commonality though */
79072805
LW
3835PP(pp_uc)
3836{
97aff369 3837 dVAR;
39644a26 3838 dSP;
67306194 3839 SV *source = TOPs;
463ee0b2 3840 STRLEN len;
67306194
NC
3841 STRLEN min;
3842 SV *dest;
3843 const U8 *s;
3844 U8 *d;
79072805 3845
67306194
NC
3846 SvGETMAGIC(source);
3847
3848 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
00f254e2
KW
3849 && SvTEMP(source) && !DO_UTF8(source)
3850 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3851
3852 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3853 * make the loop tight, so we overwrite the source with the dest before
3854 * looking at it, and we need to look at the original source
3855 * afterwards. There would also need to be code added to handle
3856 * switching to not in-place in midstream if we run into characters
3857 * that change the length.
3858 */
67306194
NC
3859 dest = source;
3860 s = d = (U8*)SvPV_force_nomg(source, len);
3861 min = len + 1;
3862 } else {
a0ed51b3 3863 dTARGET;
a0ed51b3 3864
67306194 3865 dest = TARG;
128c9517 3866
67306194
NC
3867 /* The old implementation would copy source into TARG at this point.
3868 This had the side effect that if source was undef, TARG was now
3869 an undefined SV with PADTMP set, and they don't warn inside
3870 sv_2pv_flags(). However, we're now getting the PV direct from
3871 source, which doesn't have PADTMP set, so it would warn. Hence the
3872 little games. */
3873
3874 if (SvOK(source)) {
3875 s = (const U8*)SvPV_nomg_const(source, len);
3876 } else {
0a0ffbce
RGS
3877 if (ckWARN(WARN_UNINITIALIZED))
3878 report_uninit(source);
1eced8f8 3879 s = (const U8*)"";
67306194 3880 len = 0;
a0ed51b3 3881 }
67306194
NC
3882 min = len + 1;
3883
3884 SvUPGRADE(dest, SVt_PV);
3b416f41 3885 d = (U8*)SvGROW(dest, min);
67306194
NC
3886 (void)SvPOK_only(dest);
3887
3888 SETs(dest);
a0ed51b3 3889 }
31351b04 3890
67306194
NC
3891 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3892 to check DO_UTF8 again here. */
3893
3894 if (DO_UTF8(source)) {
3895 const U8 *const send = s + len;
3896 U8 tmpbuf[UTF8_MAXBYTES+1];
3897
4c8a458a
KW
3898 /* All occurrences of these are to be moved to follow any other marks.
3899 * This is context-dependent. We may not be passed enough context to
3900 * move the iota subscript beyond all of them, but we do the best we can
3901 * with what we're given. The result is always better than if we
3902 * hadn't done this. And, the problem would only arise if we are
3903 * passed a character without all its combining marks, which would be
3904 * the caller's mistake. The information this is based on comes from a
3905 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3906 * itself) and so can't be checked properly to see if it ever gets
3907 * revised. But the likelihood of it changing is remote */
00f254e2 3908 bool in_iota_subscript = FALSE;
00f254e2 3909
67306194 3910 while (s < send) {
00f254e2
KW
3911 if (in_iota_subscript && ! is_utf8_mark(s)) {
3912 /* A non-mark. Time to output the iota subscript */
3913#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3914#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3915
3916 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3917 in_iota_subscript = FALSE;
3918 }
00f254e2
KW
3919
3920
3921/* See comments at the first instance in this file of this ifdef */
3922#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
67306194 3923
00f254e2
KW
3924 /* If the UTF-8 character is invariant, then it is in the range
3925 * known by the standard macro; result is only one byte long */
3926 if (UTF8_IS_INVARIANT(*s)) {
3927 *d++ = toUPPER(*s);
3928 s++;
3929 }
3930 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3931
3932 /* Likewise, if it fits in a byte, its case change is in our
3933 * table */
81367fea 3934 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
00f254e2
KW
3935 U8 upper = toUPPER_LATIN1_MOD(orig);
3936 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
81367fea 3937 s++;
00f254e2
KW
3938 }
3939 else {
3940#else
3941 {
3942#endif
3943
3944 /* Otherwise, need the general UTF-8 case. Get the changed
3945 * case value and copy it to the output buffer */
3946
3947 const STRLEN u = UTF8SKIP(s);
3948 STRLEN ulen;
67306194 3949
00f254e2 3950 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4c8a458a
KW
3951 if (uv == GREEK_CAPITAL_LETTER_IOTA
3952 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3953 {
00f254e2
KW
3954 in_iota_subscript = TRUE;
3955 }
3956 else {
00f254e2
KW
3957 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3958 /* If the eventually required minimum size outgrows
3959 * the available space, we need to grow. */
3960 const UV o = d - (U8*)SvPVX_const(dest);
3961
3962 /* If someone uppercases one million U+03B0s we
3963 * SvGROW() one million times. Or we could try
3964 * guessing how much to allocate without allocating too
4c8a458a
KW
3965 * much. Such is life. See corresponding comment in
3966 * lc code for another option */
00f254e2
KW
3967 SvGROW(dest, min);
3968 d = (U8*)SvPVX(dest) + o;
3969 }
3970 Copy(tmpbuf, d, ulen, U8);
3971 d += ulen;
00f254e2 3972 }
00f254e2 3973 s += u;
67306194 3974 }
67306194 3975 }
4c8a458a
KW
3976 if (in_iota_subscript) {
3977 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3978 }
67306194
NC
3979 SvUTF8_on(dest);
3980 *d = '\0';
3981 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4c8a458a
KW
3982 }
3983 else { /* Not UTF-8 */
67306194
NC
3984 if (len) {
3985 const U8 *const send = s + len;
00f254e2
KW
3986
3987 /* Use locale casing if in locale; regular style if not treating
3988 * latin1 as having case; otherwise the latin1 casing. Do the
3989 * whole thing in a tight loop, for speed, */
2de3dbcc 3990 if (IN_LOCALE_RUNTIME) {
31351b04 3991 TAINT;
67306194
NC
3992 SvTAINTED_on(dest);
3993 for (; s < send; d++, s++)
3994 *d = toUPPER_LC(*s);
31351b04 3995 }
00f254e2
KW
3996 else if (! IN_UNI_8_BIT) {
3997 for (; s < send; d++, s++) {
67306194 3998 *d = toUPPER(*s);
00f254e2 3999 }
31351b04 4000 }
00f254e2
KW
4001 else {
4002 for (; s < send; d++, s++) {
4003 *d = toUPPER_LATIN1_MOD(*s);
4004 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4005
4006 /* The mainstream case is the tight loop above. To avoid
4007 * extra tests in that, all three characters that require
4008 * special handling are mapped by the MOD to the one tested
4009 * just above.
4010 * Use the source to distinguish between the three cases */
4011
4012 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4013
4014 /* uc() of this requires 2 characters, but they are
4015 * ASCII. If not enough room, grow the string */
4016 if (SvLEN(dest) < ++min) {
4017 const UV o = d - (U8*)SvPVX_const(dest);
4018 SvGROW(dest, min);
4019 d = (U8*)SvPVX(dest) + o;
4020 }
4021 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4022 continue; /* Back to the tight loop; still in ASCII */
4023 }
4024
4025 /* The other two special handling characters have their
4026 * upper cases outside the latin1 range, hence need to be
4027 * in UTF-8, so the whole result needs to be in UTF-8. So,
4028 * here we are somewhere in the middle of processing a
4029 * non-UTF-8 string, and realize that we will have to convert
4030 * the whole thing to UTF-8. What to do? There are
4031 * several possibilities. The simplest to code is to
4032 * convert what we have so far, set a flag, and continue on
4033 * in the loop. The flag would be tested each time through
4034 * the loop, and if set, the next character would be
4035 * converted to UTF-8 and stored. But, I (khw) didn't want
4036 * to slow down the mainstream case at all for this fairly
4037 * rare case, so I didn't want to add a test that didn't
4038 * absolutely have to be there in the loop, besides the
4039 * possibility that it would get too complicated for
4040 * optimizers to deal with. Another possibility is to just
4041 * give up, convert the source to UTF-8, and restart the
4042 * function that way. Another possibility is to convert
4043 * both what has already been processed and what is yet to
4044 * come separately to UTF-8, then jump into the loop that
4045 * handles UTF-8. But the most efficient time-wise of the
4046 * ones I could think of is what follows, and turned out to
4047 * not require much extra code. */
4048
4049 /* Convert what we have so far into UTF-8, telling the
4050 * function that we know it should be converted, and to
4051 * allow extra space for what we haven't processed yet.
4052 * Assume the worst case space requirements for converting
4053 * what we haven't processed so far: that it will require
4054 * two bytes for each remaining source character, plus the
4055 * NUL at the end. This may cause the string pointer to
4056 * move, so re-find it. */
4057
4058 len = d - (U8*)SvPVX_const(dest);
4059 SvCUR_set(dest, len);
4060 len = sv_utf8_upgrade_flags_grow(dest,
4061 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4062 (send -s) * 2 + 1);
4063 d = (U8*)SvPVX(dest) + len;
4064
4065 /* And append the current character's upper case in UTF-8 */
4066 CAT_NON_LATIN1_UC(d, *s);
4067
4068 /* Now process the remainder of the source, converting to
4069 * upper and UTF-8. If a resulting byte is invariant in
4070 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4071 * append it to the output. */
4072
4073 s++;
4074 for (; s < send; s++) {
4075 U8 upper = toUPPER_LATIN1_MOD(*s);
4076 if UTF8_IS_INVARIANT(upper) {
4077 *d++ = upper;
4078 }
4079 else {
4080 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4081 }
4082 }
4083
4084 /* Here have processed the whole source; no need to continue
4085 * with the outer loop. Each character has been converted
4086 * to upper case and converted to UTF-8 */
4087
4088 break;
4089 } /* End of processing all latin1-style chars */
4090 } /* End of processing all chars */
4091 } /* End of source is not empty */
4092
67306194 4093 if (source != dest) {
00f254e2 4094 *d = '\0'; /* Here d points to 1 after last char, add NUL */
67306194
NC
4095 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4096 }
00f254e2 4097 } /* End of isn't utf8 */
539689e7
FC
4098 if (dest != source && SvTAINTED(source))
4099 SvTAINT(dest);
67306194 4100 SvSETMAGIC(dest);
79072805
LW
4101 RETURN;
4102}
4103
4104PP(pp_lc)
4105{
97aff369 4106 dVAR;
39644a26 4107 dSP;
ec9af7d4 4108 SV *source = TOPs;
463ee0b2 4109 STRLEN len;
ec9af7d4
NC
4110 STRLEN min;
4111 SV *dest;
4112 const U8 *s;
4113 U8 *d;
79072805 4114
ec9af7d4
NC
4115 SvGETMAGIC(source);
4116
4117 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
17fa0776 4118 && SvTEMP(source) && !DO_UTF8(source)) {
ec9af7d4 4119
00f254e2
KW
4120 /* We can convert in place, as lowercasing anything in the latin1 range
4121 * (or else DO_UTF8 would have been on) doesn't lengthen it */
ec9af7d4
NC
4122 dest = source;
4123 s = d = (U8*)SvPV_force_nomg(source, len);
4124 min = len + 1;
4125 } else {
a0ed51b3 4126 dTARGET;
a0ed51b3 4127
ec9af7d4
NC
4128 dest = TARG;
4129
4130 /* The old implementation would copy source into TARG at this point.
4131 This had the side effect that if source was undef, TARG was now
4132 an undefined SV with PADTMP set, and they don't warn inside
4133 sv_2pv_flags(). However, we're now getting the PV direct from
4134 source, which doesn't have PADTMP set, so it would warn. Hence the
4135 little games. */
4136
4137 if (SvOK(source)) {
4138 s = (const U8*)SvPV_nomg_const(source, len);
4139 } else {
0a0ffbce
RGS
4140 if (ckWARN(WARN_UNINITIALIZED))
4141 report_uninit(source);
1eced8f8 4142 s = (const U8*)"";
ec9af7d4 4143 len = 0;
a0ed51b3 4144 }
ec9af7d4 4145 min = len + 1;
128c9517 4146
ec9af7d4 4147 SvUPGRADE(dest, SVt_PV);
3b416f41 4148 d = (U8*)SvGROW(dest, min);
ec9af7d4
NC
4149 (void)SvPOK_only(dest);
4150
4151 SETs(dest);
4152 }
4153
4154 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4155 to check DO_UTF8 again here. */
4156
4157 if (DO_UTF8(source)) {
4158 const U8 *const send = s + len;
4159 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4160
4161 while (s < send) {
00f254e2
KW
4162/* See comments at the first instance in this file of this ifdef */
4163#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4164 if (UTF8_IS_INVARIANT(*s)) {
89ebb4a3 4165
00f254e2 4166 /* Invariant characters use the standard mappings compiled in.
ec9af7d4 4167 */
00f254e2
KW
4168 *d++ = toLOWER(*s);
4169 s++;
ec9af7d4 4170 }
00f254e2 4171 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
89ebb4a3 4172
00f254e2 4173 /* As do the ones in the Latin1 range */
81367fea 4174 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
00f254e2 4175 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
81367fea 4176 s++;
a0ed51b3 4177 }
00f254e2
KW
4178 else {
4179#endif
4180 /* Here, is utf8 not in Latin-1 range, have to go out and get
4181 * the mappings from the tables. */
4182
4183 const STRLEN u = UTF8SKIP(s);
4184 STRLEN ulen;
4185
00f254e2
KW
4186#ifndef CONTEXT_DEPENDENT_CASING
4187 toLOWER_utf8(s, tmpbuf, &ulen);
4188#else
4c8a458a
KW
4189/* This is ifdefd out because it needs more work and thought. It isn't clear
4190 * that we should do it.
4191 * A minor objection is that this is based on a hard-coded rule from the
4192 * Unicode standard, and may change, but this is not very likely at all.
4193 * mktables should check and warn if it does.
4194 * More importantly, if the sigma occurs at the end of the string, we don't
4195 * have enough context to know whether it is part of a larger string or going
4196 * to be or not. It may be that we are passed a subset of the context, via
4197 * a \U...\E, for example, and we could conceivably know the larger context if
4198 * code were changed to pass that in. But, if the string passed in is an
4199 * intermediate result, and the user concatenates two strings together
4200 * after we have made a final sigma, that would be wrong. If the final sigma
4201 * occurs in the middle of the string we are working on, then we know that it
4202 * should be a final sigma, but otherwise we can't be sure. */
00f254e2
KW
4203
4204 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4205
4206 /* If the lower case is a small sigma, it may be that we need
4207 * to change it to a final sigma. This happens at the end of
4208 * a word that contains more than just this character, and only
4209 * when we started with a capital sigma. */
4210 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4211 s > send - len && /* Makes sure not the first letter */
4212 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4213 ) {
4214
4215 /* We use the algorithm in:
4216 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4217 * is a CAPITAL SIGMA): If C is preceded by a sequence
4218 * consisting of a cased letter and a case-ignorable
4219 * sequence, and C is not followed by a sequence consisting
4220 * of a case ignorable sequence and then a cased letter,
4221 * then when lowercasing C, C becomes a final sigma */
4222
4223 /* To determine if this is the end of a word, need to peek
4224 * ahead. Look at the next character */
4225 const U8 *peek = s + u;
4226
4227 /* Skip any case ignorable characters */
4228 while (peek < send && is_utf8_case_ignorable(peek)) {
4229 peek += UTF8SKIP(peek);
4230 }
4231
4232 /* If we reached the end of the string without finding any
4233 * non-case ignorable characters, or if the next such one
4234 * is not-cased, then we have met the conditions for it
4235 * being a final sigma with regards to peek ahead, and so
4236 * must do peek behind for the remaining conditions. (We
4237 * know there is stuff behind to look at since we tested
4238 * above that this isn't the first letter) */
4239 if (peek >= send || ! is_utf8_cased(peek)) {
4240 peek = utf8_hop(s, -1);
4241
4242 /* Here are at the beginning of the first character
4243 * before the original upper case sigma. Keep backing
4244 * up, skipping any case ignorable characters */
4245 while (is_utf8_case_ignorable(peek)) {
4246 peek = utf8_hop(peek, -1);
4247 }
4248
4249 /* Here peek points to the first byte of the closest
4250 * non-case-ignorable character before the capital
4251 * sigma. If it is cased, then by the Unicode
4252 * algorithm, we should use a small final sigma instead
4253 * of what we have */
4254 if (is_utf8_cased(peek)) {
4255 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4256 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4257 }
4258 }
4259 }
4260 else { /* Not a context sensitive mapping */
4261#endif /* End of commented out context sensitive */
4262 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4263
4264 /* If the eventually required minimum size outgrows
4265 * the available space, we need to grow. */
4266 const UV o = d - (U8*)SvPVX_const(dest);
4267
4268 /* If someone lowercases one million U+0130s we
4269 * SvGROW() one million times. Or we could try
4270 * guessing how much to allocate without allocating too
4271 * much. Such is life. Another option would be to
4272 * grow an extra byte or two more each time we need to
4273 * grow, which would cut down the million to 500K, with
4274 * little waste */
4275 SvGROW(dest, min);
4276 d = (U8*)SvPVX(dest) + o;
4277 }
4278#ifdef CONTEXT_DEPENDENT_CASING
4279 }
4280#endif
4281 /* Copy the newly lowercased letter to the output buffer we're
4282 * building */
4283 Copy(tmpbuf, d, ulen, U8);
4284 d += ulen;
4285 s += u;
4286#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4287 }
4288#endif
4289 } /* End of looping through the source string */
ec9af7d4
NC
4290 SvUTF8_on(dest);
4291 *d = '\0';
4292 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
00f254e2 4293 } else { /* Not utf8 */
31351b04 4294 if (len) {
ec9af7d4 4295 const U8 *const send = s + len;
00f254e2
KW
4296
4297 /* Use locale casing if in locale; regular style if not treating
4298 * latin1 as having case; otherwise the latin1 casing. Do the
4299 * whole thing in a tight loop, for speed, */
2de3dbcc 4300 if (IN_LOCALE_RUNTIME) {
31351b04 4301 TAINT;
ec9af7d4
NC
4302 SvTAINTED_on(dest);
4303 for (; s < send; d++, s++)
4304 *d = toLOWER_LC(*s);
31351b04 4305 }
00f254e2
KW
4306 else if (! IN_UNI_8_BIT) {
4307 for (; s < send; d++, s++) {
ec9af7d4 4308 *d = toLOWER(*s);
00f254e2
KW
4309 }
4310 }
4311 else {
4312 for (; s < send; d++, s++) {
4313 *d = toLOWER_LATIN1(*s);
4314 }
31351b04 4315 }
bbce6d69 4316 }
ec9af7d4
NC
4317 if (source != dest) {
4318 *d = '\0';
4319 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4320 }
79072805 4321 }
539689e7
FC
4322 if (dest != source && SvTAINTED(source))
4323 SvTAINT(dest);
ec9af7d4 4324 SvSETMAGIC(dest);
79072805
LW
4325 RETURN;
4326}
4327
a0d0e21e 4328PP(pp_quotemeta)
79072805 4329{
97aff369 4330 dVAR; dSP; dTARGET;
1b6737cc 4331 SV * const sv = TOPs;
a0d0e21e 4332 STRLEN len;
0d46e09a 4333 register const char *s = SvPV_const(sv,len);
79072805 4334
7e2040f0 4335 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 4336 if (len) {
1b6737cc 4337 register char *d;
862a34c6 4338 SvUPGRADE(TARG, SVt_PV);
c07a80fd 4339 SvGROW(TARG, (len * 2) + 1);
a0d0e21e 4340 d = SvPVX(TARG);
7e2040f0 4341 if (DO_UTF8(sv)) {
0dd2cdef 4342 while (len) {
fd400ab9 4343 if (UTF8_IS_CONTINUED(*s)) {
0dd2cdef
LW
4344 STRLEN ulen = UTF8SKIP(s);
4345 if (ulen > len)
4346 ulen = len;
4347 len -= ulen;
4348 while (ulen--)
4349 *d++ = *s++;
4350 }
4351 else {
4352 if (!isALNUM(*s))
4353 *d++ = '\\';
4354 *d++ = *s++;
4355 len--;
4356 }
4357 }
7e2040f0 4358 SvUTF8_on(TARG);
0dd2cdef
LW
4359 }
4360 else {
4361 while (len--) {
4362 if (!isALNUM(*s))
4363 *d++ = '\\';
4364 *d++ = *s++;
4365 }
79072805 4366 }
a0d0e21e 4367 *d = '\0';
349d4f2f 4368 SvCUR_set(TARG, d - SvPVX_const(TARG));
3aa33fe5 4369 (void)SvPOK_only_UTF8(TARG);
79072805 4370 }
a0d0e21e
LW
4371 else
4372 sv_setpvn(TARG, s, len);
ec93b65f 4373 SETTARG;
79072805
LW
4374 RETURN;
4375}
4376
a0d0e21e 4377/* Arrays. */
79072805 4378
a0d0e21e 4379PP(pp_aslice)
79072805 4380{
97aff369 4381 dVAR; dSP; dMARK; dORIGMARK;
502c6561 4382 register AV *const av = MUTABLE_AV(POPs);
1b6737cc 4383 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
79072805 4384
a0d0e21e 4385 if (SvTYPE(av) == SVt_PVAV) {
fc15ae8f 4386 const I32 arybase = CopARYBASE_get(PL_curcop);
4ad10a0b
VP
4387 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4388 bool can_preserve = FALSE;
4389
4390 if (localizing) {
4391 MAGIC *mg;
4392 HV *stash;
4393
4394 can_preserve = SvCANEXISTDELETE(av);
4395 }
4396
4397 if (lval && localizing) {
1b6737cc 4398 register SV **svp;
748a9306 4399 I32 max = -1;
924508f0 4400 for (svp = MARK + 1; svp <= SP; svp++) {
4ea561bc 4401 const I32 elem = SvIV(*svp);
748a9306
LW
4402 if (elem > max)
4403 max = elem;
4404 }
4405 if (max > AvMAX(av))
4406 av_extend(av, max);
4407 }
4ad10a0b 4408
a0d0e21e 4409 while (++MARK <= SP) {
1b6737cc 4410 register SV **svp;
4ea561bc 4411 I32 elem = SvIV(*MARK);
4ad10a0b 4412 bool preeminent = TRUE;
a0d0e21e 4413
748a9306
LW
4414 if (elem > 0)
4415 elem -= arybase;
4ad10a0b
VP
4416 if (localizing && can_preserve) {
4417 /* If we can determine whether the element exist,
4418 * Try to preserve the existenceness of a tied array
4419 * element by using EXISTS and DELETE if possible.
4420 * Fallback to FETCH and STORE otherwise. */
4421 preeminent = av_exists(av, elem);
4422 }
4423
a0d0e21e
LW
4424 svp = av_fetch(av, elem, lval);
4425 if (lval) {
3280af22 4426 if (!svp || *svp == &PL_sv_undef)
cea2e8a9 4427 DIE(aTHX_ PL_no_aelem, elem);
4ad10a0b
VP
4428 if (localizing) {
4429 if (preeminent)
4430 save_aelem(av, elem, svp);
4431 else
4432 SAVEADELETE(av, elem);
4433 }
79072805 4434 }
3280af22 4435 *MARK = svp ? *svp : &PL_sv_undef;
79072805
LW
4436 }
4437 }
748a9306 4438 if (GIMME != G_ARRAY) {
a0d0e21e 4439 MARK = ORIGMARK;
04ab2c87 4440 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e
LW
4441 SP = MARK;
4442 }
79072805
LW
4443 RETURN;
4444}
4445
cba5a3b0
DG
4446/* Smart dereferencing for keys, values and each */
4447PP(pp_rkeys)
4448{
4449 dVAR;
4450 dSP;
4451 dPOPss;
4452
7ac5715b
FC
4453 SvGETMAGIC(sv);
4454
4455 if (
4456 !SvROK(sv)
4457 || (sv = SvRV(sv),
4458 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4459 || SvOBJECT(sv)
4460 )
4461 ) {
4462 DIE(aTHX_
4463 "Type of argument to %s must be unblessed hashref or arrayref",
4c540399 4464 PL_op_desc[PL_op->op_type] );
cba5a3b0
DG
4465 }
4466
d8065907
FC
4467 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4468 DIE(aTHX_
4469 "Can't modify %s in %s",
4470 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4471 );
4472
cba5a3b0
DG
4473 /* Delegate to correct function for op type */
4474 PUSHs(sv);
4475 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4476 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4477 }
4478 else {
4479 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4480 }
4481}
4482
878d132a
NC
4483PP(pp_aeach)
4484{
4485 dVAR;
4486 dSP;
502c6561 4487 AV *array = MUTABLE_AV(POPs);
878d132a 4488 const I32 gimme = GIMME_V;
453d94a9 4489 IV *iterp = Perl_av_iter_p(aTHX_ array);
878d132a
NC
4490 const IV current = (*iterp)++;
4491
4492 if (current > av_len(array)) {
4493 *iterp = 0;
4494 if (gimme == G_SCALAR)
4495 RETPUSHUNDEF;
4496 else
4497 RETURN;
4498 }
4499
4500 EXTEND(SP, 2);
4501 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4502 if (gimme == G_ARRAY) {
4503 SV **const element = av_fetch(array, current, 0);
4504 PUSHs(element ? *element : &PL_sv_undef);
4505 }
4506 RETURN;
4507}
4508
4509PP(pp_akeys)
4510{
4511 dVAR;
4512 dSP;
502c6561 4513 AV *array = MUTABLE_AV(POPs);
878d132a
NC
4514 const I32 gimme = GIMME_V;
4515
4516 *Perl_av_iter_p(aTHX_ array) = 0;
4517
4518 if (gimme == G_SCALAR) {
4519 dTARGET;
4520 PUSHi(av_len(array) + 1);
4521 }
4522 else if (gimme == G_ARRAY) {
4523 IV n = Perl_av_len(aTHX_ array);
4524 IV i = CopARYBASE_get(PL_curcop);
4525
4526 EXTEND(SP, n + 1);
4527
cba5a3b0 4528 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
878d132a
NC
4529 n += i;
4530 for (; i <= n; i++) {
4531 mPUSHi(i);
4532 }
4533 }
4534 else {
4535 for (i = 0; i <= n; i++) {
4536 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4537 PUSHs(elem ? *elem : &PL_sv_undef);
4538 }
4539 }
4540 }
4541 RETURN;
4542}
4543
79072805
LW
4544/* Associative arrays. */
4545
4546PP(pp_each)
4547{
97aff369 4548 dVAR;
39644a26 4549 dSP;
85fbaab2 4550 HV * hash = MUTABLE_HV(POPs);
c07a80fd 4551 HE *entry;
f54cb97a 4552 const I32 gimme = GIMME_V;
8ec5e241 4553
c07a80fd 4554 PUTBACK;
c750a3ec 4555 /* might clobber stack_sp */
6d822dc4 4556 entry = hv_iternext(hash);
c07a80fd 4557 SPAGAIN;
79072805 4558
79072805
LW
4559 EXTEND(SP, 2);
4560 if (entry) {
1b6737cc 4561 SV* const sv = hv_iterkeysv(entry);
574c8022 4562 PUSHs(sv); /* won't clobber stack_sp */
54310121 4563 if (gimme == G_ARRAY) {
59af0135 4564 SV *val;
c07a80fd 4565 PUTBACK;
c750a3ec 4566 /* might clobber stack_sp */
6d822dc4 4567 val = hv_iterval(hash, entry);
c07a80fd 4568 SPAGAIN;
59af0135 4569 PUSHs(val);
79072805 4570 }
79072805 4571 }
54310121 4572 else if (gimme == G_SCALAR)
79072805
LW
4573 RETPUSHUNDEF;
4574
4575 RETURN;
4576}
4577
7332a6c4
VP
4578STATIC OP *
4579S_do_delete_local(pTHX)
79072805 4580{
97aff369 4581 dVAR;
39644a26 4582 dSP;
f54cb97a 4583 const I32 gimme = GIMME_V;
7332a6c4
VP
4584 const MAGIC *mg;
4585 HV *stash;
4586
4587 if (PL_op->op_private & OPpSLICE) {
4588 dMARK; dORIGMARK;
4589 SV * const osv = POPs;
4590 const bool tied = SvRMAGICAL(osv)
4591 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4592 const bool can_preserve = SvCANEXISTDELETE(osv)
4593 || mg_find((const SV *)osv, PERL_MAGIC_env);
4594 const U32 type = SvTYPE(osv);
4595 if (type == SVt_PVHV) { /* hash element */
4596 HV * const hv = MUTABLE_HV(osv);
4597 while (++MARK <= SP) {
4598 SV * const keysv = *MARK;
4599 SV *sv = NULL;
4600 bool preeminent = TRUE;
4601 if (can_preserve)
4602 preeminent = hv_exists_ent(hv, keysv, 0);
4603 if (tied) {
4604 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4605 if (he)
4606 sv = HeVAL(he);
4607 else
4608 preeminent = FALSE;
4609 }
4610 else {
4611 sv = hv_delete_ent(hv, keysv, 0, 0);
4612 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4613 }
4614 if (preeminent) {
4615 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4616 if (tied) {
4617 *MARK = sv_mortalcopy(sv);
4618 mg_clear(sv);
4619 } else
4620 *MARK = sv;
4621 }
4622 else {
4623 SAVEHDELETE(hv, keysv);
4624 *MARK = &PL_sv_undef;
4625 }
4626 }
4627 }
4628 else if (type == SVt_PVAV) { /* array element */
4629 if (PL_op->op_flags & OPf_SPECIAL) {
4630 AV * const av = MUTABLE_AV(osv);
4631 while (++MARK <= SP) {
4632 I32 idx = SvIV(*MARK);
4633 SV *sv = NULL;
4634 bool preeminent = TRUE;
4635 if (can_preserve)
4636 preeminent = av_exists(av, idx);
4637 if (tied) {
4638 SV **svp = av_fetch(av, idx, 1);
4639 if (svp)
4640 sv = *svp;
4641 else
4642 preeminent = FALSE;
4643 }
4644 else {
4645 sv = av_delete(av, idx, 0);
4646 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4647 }
4648 if (preeminent) {
4649 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4650 if (tied) {
4651 *MARK = sv_mortalcopy(sv);
4652 mg_clear(sv);
4653 } else
4654 *MARK = sv;
4655 }
4656 else {
4657 SAVEADELETE(av, idx);
4658 *MARK = &PL_sv_undef;
4659 }
4660 }
4661 }
4662 }
4663 else
4664 DIE(aTHX_ "Not a HASH reference");
4665 if (gimme == G_VOID)
4666 SP = ORIGMARK;
4667 else if (gimme == G_SCALAR) {
4668 MARK = ORIGMARK;
4669 if (SP > MARK)
4670 *++MARK = *SP;
4671 else
4672 *++MARK = &PL_sv_undef;
4673 SP = MARK;
4674 }
4675 }
4676 else {
4677 SV * const keysv = POPs;
4678 SV * const osv = POPs;
4679 const bool tied = SvRMAGICAL(osv)
4680 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4681 const bool can_preserve = SvCANEXISTDELETE(osv)
4682 || mg_find((const SV *)osv, PERL_MAGIC_env);
4683 const U32 type = SvTYPE(osv);
4684 SV *sv = NULL;
4685 if (type == SVt_PVHV) {
4686 HV * const hv = MUTABLE_HV(osv);
4687 bool preeminent = TRUE;
4688 if (can_preserve)
4689 preeminent = hv_exists_ent(hv, keysv, 0);
4690 if (tied) {
4691 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4692 if (he)
4693 sv = HeVAL(he);
4694 else
4695 preeminent = FALSE;
4696 }
4697 else {
4698 sv = hv_delete_ent(hv, keysv, 0, 0);
4699 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4700 }
4701 if (preeminent) {
4702 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4703 if (tied) {
4704 SV *nsv = sv_mortalcopy(sv);
4705 mg_clear(sv);
4706 sv = nsv;
4707 }
4708 }
4709 else
4710 SAVEHDELETE(hv, keysv);
4711 }
4712 else if (type == SVt_PVAV) {
4713 if (PL_op->op_flags & OPf_SPECIAL) {
4714 AV * const av = MUTABLE_AV(osv);
4715 I32 idx = SvIV(keysv);
4716 bool preeminent = TRUE;
4717 if (can_preserve)
4718 preeminent = av_exists(av, idx);
4719 if (tied) {
4720 SV **svp = av_fetch(av, idx, 1);
4721 if (svp)
4722 sv = *svp;
4723 else
4724 preeminent = FALSE;
4725 }
4726 else {
4727 sv = av_delete(av, idx, 0);
4728 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4729 }
4730 if (preeminent) {
4731 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4732 if (tied) {
4733 SV *nsv = sv_mortalcopy(sv);
4734 mg_clear(sv);
4735 sv = nsv;
4736 }
4737 }
4738 else
4739 SAVEADELETE(av, idx);
4740 }
4741 else
4742 DIE(aTHX_ "panic: avhv_delete no longer supported");
4743 }
4744 else
4745 DIE(aTHX_ "Not a HASH reference");
4746 if (!sv)
4747 sv = &PL_sv_undef;
4748 if (gimme != G_VOID)
4749 PUSHs(sv);
4750 }
4751
4752 RETURN;
4753}
4754
4755PP(pp_delete)
4756{
4757 dVAR;
4758 dSP;
4759 I32 gimme;
4760 I32 discard;
4761
4762 if (PL_op->op_private & OPpLVAL_INTRO)
4763 return do_delete_local();
4764
4765 gimme = GIMME_V;
4766 discard = (gimme == G_VOID) ? G_DISCARD : 0;
5f05dabc 4767
533c011a 4768 if (PL_op->op_private & OPpSLICE) {
5f05dabc 4769 dMARK; dORIGMARK;
85fbaab2 4770 HV * const hv = MUTABLE_HV(POPs);
1b6737cc 4771 const U32 hvtype = SvTYPE(hv);
01020589
GS
4772 if (hvtype == SVt_PVHV) { /* hash element */
4773 while (++MARK <= SP) {
1b6737cc 4774 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
01020589
GS
4775 *MARK = sv ? sv : &PL_sv_undef;
4776 }
5f05dabc 4777 }
6d822dc4
MS
4778 else if (hvtype == SVt_PVAV) { /* array element */
4779 if (PL_op->op_flags & OPf_SPECIAL) {
4780 while (++MARK <= SP) {
502c6561 4781 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
6d822dc4
MS
4782 *MARK = sv ? sv : &PL_sv_undef;
4783 }
4784 }
01020589
GS
4785 }
4786 else
4787 DIE(aTHX_ "Not a HASH reference");
54310121 4788 if (discard)
4789 SP = ORIGMARK;
4790 else if (gimme == G_SCALAR) {
5f05dabc 4791 MARK = ORIGMARK;
9111c9c0
DM
4792 if (SP > MARK)
4793 *++MARK = *SP;
4794 else
4795 *++MARK = &PL_sv_undef;
5f05dabc 4796 SP = MARK;
4797 }
4798 }
4799 else {
4800 SV *keysv = POPs;
85fbaab2 4801 HV * const hv = MUTABLE_HV(POPs);
295d248e 4802 SV *sv = NULL;
97fcbf96
MB
4803 if (SvTYPE(hv) == SVt_PVHV)
4804 sv = hv_delete_ent(hv, keysv, discard, 0);
01020589
GS
4805 else if (SvTYPE(hv) == SVt_PVAV) {
4806 if (PL_op->op_flags & OPf_SPECIAL)
502c6561 4807 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
af288a60
HS
4808 else
4809 DIE(aTHX_ "panic: avhv_delete no longer supported");
01020589 4810 }
97fcbf96 4811 else
cea2e8a9 4812 DIE(aTHX_ "Not a HASH reference");
5f05dabc 4813 if (!sv)
3280af22 4814 sv = &PL_sv_undef;
54310121 4815 if (!discard)
4816 PUSHs(sv);
79072805 4817 }
79072805
LW
4818 RETURN;
4819}
4820
a0d0e21e 4821PP(pp_exists)
79072805 4822{
97aff369 4823 dVAR;
39644a26 4824 dSP;
afebc493
GS
4825 SV *tmpsv;
4826 HV *hv;
4827
4828 if (PL_op->op_private & OPpEXISTS_SUB) {
4829 GV *gv;
0bd48802 4830 SV * const sv = POPs;
f2c0649b 4831 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
afebc493
GS
4832 if (cv)
4833 RETPUSHYES;
4834 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4835 RETPUSHYES;
4836 RETPUSHNO;
4837 }
4838 tmpsv = POPs;
85fbaab2 4839 hv = MUTABLE_HV(POPs);
c750a3ec 4840 if (SvTYPE(hv) == SVt_PVHV) {
ae77835f 4841 if (hv_exists_ent(hv, tmpsv, 0))
c750a3ec 4842 RETPUSHYES;
ef54e1a4
JH
4843 }
4844 else if (SvTYPE(hv) == SVt_PVAV) {
01020589 4845 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
502c6561 4846 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
01020589
GS
4847 RETPUSHYES;
4848 }
ef54e1a4
JH
4849 }
4850 else {
cea2e8a9 4851 DIE(aTHX_ "Not a HASH reference");
a0d0e21e 4852 }
a0d0e21e
LW
4853 RETPUSHNO;
4854}
79072805 4855
a0d0e21e
LW
4856PP(pp_hslice)
4857{
97aff369 4858 dVAR; dSP; dMARK; dORIGMARK;
85fbaab2 4859 register HV * const hv = MUTABLE_HV(POPs);
1b6737cc
AL
4860 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4861 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
d30e492c 4862 bool can_preserve = FALSE;
79072805 4863
eb85dfd3
DM
4864 if (localizing) {
4865 MAGIC *mg;
4866 HV *stash;
4867
d30e492c
VP
4868 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4869 can_preserve = TRUE;
eb85dfd3
DM
4870 }
4871
6d822dc4 4872 while (++MARK <= SP) {
1b6737cc 4873 SV * const keysv = *MARK;
6d822dc4
MS
4874 SV **svp;
4875 HE *he;
d30e492c
VP
4876 bool preeminent = TRUE;
4877
4878 if (localizing && can_preserve) {
4879 /* If we can determine whether the element exist,
4880 * try to preserve the existenceness of a tied hash
4881 * element by using EXISTS and DELETE if possible.
4882 * Fallback to FETCH and STORE otherwise. */
4883 preeminent = hv_exists_ent(hv, keysv, 0);
6d822dc4 4884 }
eb85dfd3 4885
6d822dc4 4886 he = hv_fetch_ent(hv, keysv, lval, 0);
fe5bfecd 4887 svp = he ? &HeVAL(he) : NULL;
eb85dfd3 4888
6d822dc4
MS
4889 if (lval) {
4890 if (!svp || *svp == &PL_sv_undef) {
be2597df 4891 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
6d822dc4
MS
4892 }
4893 if (localizing) {
7a2e501a 4894 if (HvNAME_get(hv) && isGV(*svp))
159b6efe 4895 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
47cfc530
VP
4896 else if (preeminent)
4897 save_helem_flags(hv, keysv, svp,
4898 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4899 else
4900 SAVEHDELETE(hv, keysv);
6d822dc4
MS
4901 }
4902 }
4903 *MARK = svp ? *svp : &PL_sv_undef;
79072805 4904 }
a0d0e21e
LW
4905 if (GIMME != G_ARRAY) {
4906 MARK = ORIGMARK;
04ab2c87 4907 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e 4908 SP = MARK;
79072805 4909 }
a0d0e21e
LW
4910 RETURN;
4911}
4912
4913/* List operators. */
4914
4915PP(pp_list)
4916{
97aff369 4917 dVAR; dSP; dMARK;
a0d0e21e
LW
4918 if (GIMME != G_ARRAY) {
4919 if (++MARK <= SP)
4920 *MARK = *SP; /* unwanted list, return last item */
8990e307 4921 else
3280af22 4922 *MARK = &PL_sv_undef;
a0d0e21e 4923 SP = MARK;
79072805 4924 }
a0d0e21e 4925 RETURN;
79072805
LW
4926}
4927
a0d0e21e 4928PP(pp_lslice)
79072805 4929{
97aff369 4930 dVAR;
39644a26 4931 dSP;
1b6737cc
AL
4932 SV ** const lastrelem = PL_stack_sp;
4933 SV ** const lastlelem = PL_stack_base + POPMARK;
4934 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4935 register SV ** const firstrelem = lastlelem + 1;
fc15ae8f 4936 const I32 arybase = CopARYBASE_get(PL_curcop);
42e73ed0 4937 I32 is_something_there = FALSE;
1b6737cc
AL
4938
4939 register const I32 max = lastrelem - lastlelem;
a0d0e21e 4940 register SV **lelem;
a0d0e21e
LW
4941
4942 if (GIMME != G_ARRAY) {
4ea561bc 4943 I32 ix = SvIV(*lastlelem);
748a9306
LW
4944 if (ix < 0)
4945 ix += max;
4946 else
4947 ix -= arybase;
a0d0e21e 4948 if (ix < 0 || ix >= max)
3280af22 4949 *firstlelem = &PL_sv_undef;
a0d0e21e
LW
4950 else
4951 *firstlelem = firstrelem[ix];
4952 SP = firstlelem;
4953 RETURN;
4954 }
4955
4956 if (max == 0) {
4957 SP = firstlelem - 1;
4958 RETURN;
4959 }
4960
4961 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4ea561bc 4962 I32 ix = SvIV(*lelem);
c73bf8e3 4963 if (ix < 0)
a0d0e21e 4964 ix += max;
b13b2135 4965 else
748a9306 4966 ix -= arybase;
c73bf8e3
HS
4967 if (ix < 0 || ix >= max)
4968 *lelem = &PL_sv_undef;
4969 else {
4970 is_something_there = TRUE;
4971 if (!(*lelem = firstrelem[ix]))
3280af22 4972 *lelem = &PL_sv_undef;
748a9306 4973 }
79072805 4974 }
4633a7c4
LW
4975 if (is_something_there)
4976 SP = lastlelem;
4977 else
4978 SP = firstlelem - 1;
79072805
LW
4979 RETURN;
4980}
4981
a0d0e21e
LW
4982PP(pp_anonlist)
4983{
97aff369 4984 dVAR; dSP; dMARK; dORIGMARK;
1b6737cc 4985 const I32 items = SP - MARK;
ad64d0ec 4986 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
44a8e56a 4987 SP = ORIGMARK; /* av_make() might realloc stack_sp */
6e449a3a
MHM
4988 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4989 ? newRV_noinc(av) : av);
a0d0e21e
LW
4990 RETURN;
4991}
4992
4993PP(pp_anonhash)
79072805 4994{
97aff369 4995 dVAR; dSP; dMARK; dORIGMARK;
78c72037 4996 HV* const hv = newHV();
a0d0e21e
LW
4997
4998 while (MARK < SP) {
1b6737cc 4999 SV * const key = *++MARK;
561b68a9 5000 SV * const val = newSV(0);
a0d0e21e
LW
5001 if (MARK < SP)
5002 sv_setsv(val, *++MARK);
a2a5de95
NC
5003 else
5004 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
f12c7020 5005 (void)hv_store_ent(hv,key,val,0);
79072805 5006 }
a0d0e21e 5007 SP = ORIGMARK;
6e449a3a 5008 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
ad64d0ec 5009 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
79072805
LW
5010 RETURN;
5011}
5012
d4fc4415
FC
5013static AV *
5014S_deref_plain_array(pTHX_ AV *ary)
5015{
5016 if (SvTYPE(ary) == SVt_PVAV) return ary;
d2d95e13 5017 SvGETMAGIC((SV *)ary);
d4fc4415
FC
5018 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5019 Perl_die(aTHX_ "Not an ARRAY reference");
5020 else if (SvOBJECT(SvRV(ary)))
5021 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5022 return (AV *)SvRV(ary);
5023}
5024
5025#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5026# define DEREF_PLAIN_ARRAY(ary) \
5027 ({ \
5028 AV *aRrRay = ary; \
5029 SvTYPE(aRrRay) == SVt_PVAV \
5030 ? aRrRay \
5031 : S_deref_plain_array(aTHX_ aRrRay); \
5032 })
5033#else
5034# define DEREF_PLAIN_ARRAY(ary) \
5035 ( \
3b0f6d32 5036 PL_Sv = (SV *)(ary), \
d4fc4415
FC
5037 SvTYPE(PL_Sv) == SVt_PVAV \
5038 ? (AV *)PL_Sv \
3b0f6d32 5039 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
d4fc4415
FC
5040 )
5041#endif
5042
a0d0e21e 5043PP(pp_splice)
79072805 5044{
27da23d5 5045 dVAR; dSP; dMARK; dORIGMARK;
5cd408a2 5046 int num_args = (SP - MARK);
d4fc4415 5047 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
a0d0e21e
LW
5048 register SV **src;
5049 register SV **dst;
5050 register I32 i;
5051 register I32 offset;
5052 register I32 length;
5053 I32 newlen;
5054 I32 after;
5055 I32 diff;
ad64d0ec 5056 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 5057
1b6737cc 5058 if (mg) {
af71faff
NC
5059 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5060 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5061 sp - mark);
93965878 5062 }
79072805 5063
a0d0e21e 5064 SP++;
79072805 5065
a0d0e21e 5066 if (++MARK < SP) {
4ea561bc 5067 offset = i = SvIV(*MARK);
a0d0e21e 5068 if (offset < 0)
93965878 5069 offset += AvFILLp(ary) + 1;
a0d0e21e 5070 else
fc15ae8f 5071 offset -= CopARYBASE_get(PL_curcop);
84902520 5072 if (offset < 0)
cea2e8a9 5073 DIE(aTHX_ PL_no_aelem, i);
a0d0e21e
LW
5074 if (++MARK < SP) {
5075 length = SvIVx(*MARK++);
48cdf507
GA
5076 if (length < 0) {
5077 length += AvFILLp(ary) - offset + 1;
5078 if (length < 0)
5079 length = 0;
5080 }
79072805
LW
5081 }
5082 else
a0d0e21e 5083 length = AvMAX(ary) + 1; /* close enough to infinity */
79072805 5084 }
a0d0e21e
LW
5085 else {
5086 offset = 0;
5087 length = AvMAX(ary) + 1;
5088 }
8cbc2e3b 5089 if (offset > AvFILLp(ary) + 1) {
5cd408a2
EB
5090 if (num_args > 2)
5091 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
93965878 5092 offset = AvFILLp(ary) + 1;
8cbc2e3b 5093 }
93965878 5094 after = AvFILLp(ary) + 1 - (offset + length);
a0d0e21e
LW
5095 if (after < 0) { /* not that much array */
5096 length += after; /* offset+length now in array */
5097 after = 0;
5098 if (!AvALLOC(ary))
5099 av_extend(ary, 0);
5100 }
5101
5102 /* At this point, MARK .. SP-1 is our new LIST */
5103
5104 newlen = SP - MARK;
5105 diff = newlen - length;
13d7cbc1
GS
5106 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5107 av_reify(ary);
a0d0e21e 5108
50528de0
WL
5109 /* make new elements SVs now: avoid problems if they're from the array */
5110 for (dst = MARK, i = newlen; i; i--) {
1b6737cc 5111 SV * const h = *dst;
f2b990bf 5112 *dst++ = newSVsv(h);
50528de0
WL
5113 }
5114
a0d0e21e 5115 if (diff < 0) { /* shrinking the area */
95b63a38 5116 SV **tmparyval = NULL;
a0d0e21e 5117 if (newlen) {
a02a5408 5118 Newx(tmparyval, newlen, SV*); /* so remember insertion */
a0d0e21e 5119 Copy(MARK, tmparyval, newlen, SV*);
79072805 5120 }
a0d0e21e
LW
5121
5122 MARK = ORIGMARK + 1;
5123 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5124 MEXTEND(MARK, length);
5125 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5126 if (AvREAL(ary)) {
bbce6d69 5127 EXTEND_MORTAL(length);
36477c24 5128 for (i = length, dst = MARK; i; i--) {
486ec47a 5129 sv_2mortal(*dst); /* free them eventually */
36477c24 5130 dst++;
5131 }
a0d0e21e
LW
5132 }
5133 MARK += length - 1;
79072805 5134 }
a0d0e21e
LW
5135 else {
5136 *MARK = AvARRAY(ary)[offset+length-1];
5137 if (AvREAL(ary)) {
d689ffdd 5138 sv_2mortal(*MARK);
a0d0e21e
LW
5139 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5140 SvREFCNT_dec(*dst++); /* free them now */
79072805 5141 }
a0d0e21e 5142 }
93965878 5143 AvFILLp(ary) += diff;
a0d0e21e
LW
5144
5145 /* pull up or down? */
5146
5147 if (offset < after) { /* easier to pull up */
5148 if (offset) { /* esp. if nothing to pull */
5149 src = &AvARRAY(ary)[offset-1];
5150 dst = src - diff; /* diff is negative */
5151 for (i = offset; i > 0; i--) /* can't trust Copy */
5152 *dst-- = *src--;
79072805 5153 }
a0d0e21e 5154 dst = AvARRAY(ary);
9c6bc640 5155 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
a0d0e21e
LW
5156 AvMAX(ary) += diff;
5157 }
5158 else {
5159 if (after) { /* anything to pull down? */
5160 src = AvARRAY(ary) + offset + length;
5161 dst = src + diff; /* diff is negative */
5162 Move(src, dst, after, SV*);
79072805 5163 }
93965878 5164 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
a0d0e21e
LW
5165 /* avoid later double free */
5166 }
5167 i = -diff;
5168 while (i)
3280af22 5169 dst[--i] = &PL_sv_undef;
a0d0e21e
LW
5170
5171 if (newlen) {
50528de0 5172 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e
LW
5173 Safefree(tmparyval);
5174 }
5175 }
5176 else { /* no, expanding (or same) */
d3961450 5177 SV** tmparyval = NULL;
a0d0e21e 5178 if (length) {
a02a5408 5179 Newx(tmparyval, length, SV*); /* so remember deletion */
a0d0e21e
LW
5180 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5181 }
5182
5183 if (diff > 0) { /* expanding */
a0d0e21e 5184 /* push up or down? */
a0d0e21e
LW
5185 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5186 if (offset) {
5187 src = AvARRAY(ary);
5188 dst = src - diff;
5189 Move(src, dst, offset, SV*);
79072805 5190 }
9c6bc640 5191 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
a0d0e21e 5192 AvMAX(ary) += diff;
93965878 5193 AvFILLp(ary) += diff;
79072805
LW
5194 }
5195 else {
93965878
NIS
5196 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5197 av_extend(ary, AvFILLp(ary) + diff);
5198 AvFILLp(ary) += diff;
a0d0e21e
LW
5199
5200 if (after) {
93965878 5201 dst = AvARRAY(ary) + AvFILLp(ary);
a0d0e21e
LW
5202 src = dst - diff;
5203 for (i = after; i; i--) {
5204 *dst-- = *src--;
5205 }
79072805
LW
5206 }
5207 }
a0d0e21e
LW
5208 }
5209
50528de0
WL
5210 if (newlen) {
5211 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e 5212 }
50528de0 5213
a0d0e21e
LW
5214 MARK = ORIGMARK + 1;
5215 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5216 if (length) {
5217 Copy(tmparyval, MARK, length, SV*);
5218 if (AvREAL(ary)) {
bbce6d69 5219 EXTEND_MORTAL(length);
36477c24 5220 for (i = length, dst = MARK; i; i--) {
486ec47a 5221 sv_2mortal(*dst); /* free them eventually */
36477c24 5222 dst++;
5223 }
79072805
LW
5224 }
5225 }
a0d0e21e
LW
5226 MARK += length - 1;
5227 }
5228 else if (length--) {
5229 *MARK = tmparyval[length];
5230 if (AvREAL(ary)) {
d689ffdd 5231 sv_2mortal(*MARK);
a0d0e21e
LW
5232 while (length-- > 0)
5233 SvREFCNT_dec(tmparyval[length]);
79072805 5234 }
79072805 5235 }
a0d0e21e 5236 else
3280af22 5237 *MARK = &PL_sv_undef;
d3961450 5238 Safefree(tmparyval);
79072805 5239 }
474af990
FR
5240
5241 if (SvMAGICAL(ary))
5242 mg_set(MUTABLE_SV(ary));
5243
a0d0e21e 5244 SP = MARK;
79072805
LW
5245 RETURN;
5246}
5247
a0d0e21e 5248PP(pp_push)
79072805 5249{
27da23d5 5250 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5251 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5252 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
79072805 5253
1b6737cc 5254 if (mg) {
ad64d0ec 5255 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
93965878
NIS
5256 PUSHMARK(MARK);
5257 PUTBACK;
d343c3ef 5258 ENTER_with_name("call_PUSH");
864dbfa3 5259 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5260 LEAVE_with_name("call_PUSH");
93965878 5261 SPAGAIN;
93965878 5262 }
a60c0954 5263 else {
89c14e2e 5264 PL_delaymagic = DM_DELAY;
a60c0954 5265 for (++MARK; MARK <= SP; MARK++) {
561b68a9 5266 SV * const sv = newSV(0);
a60c0954
NIS
5267 if (*MARK)
5268 sv_setsv(sv, *MARK);
0a75904b 5269 av_store(ary, AvFILLp(ary)+1, sv);
a60c0954 5270 }
354b0578 5271 if (PL_delaymagic & DM_ARRAY_ISA)
ad64d0ec 5272 mg_set(MUTABLE_SV(ary));
89c14e2e
BB
5273
5274 PL_delaymagic = 0;
6eeabd23
VP
5275 }
5276 SP = ORIGMARK;
5277 if (OP_GIMME(PL_op, 0) != G_VOID) {
5278 PUSHi( AvFILL(ary) + 1 );
79072805 5279 }
79072805
LW
5280 RETURN;
5281}
5282
a0d0e21e 5283PP(pp_shift)
79072805 5284{
97aff369 5285 dVAR;
39644a26 5286 dSP;
538f5756 5287 AV * const av = PL_op->op_flags & OPf_SPECIAL
d4fc4415 5288 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
789b4bc9 5289 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
79072805 5290 EXTEND(SP, 1);
c2b4a044 5291 assert (sv);
d689ffdd 5292 if (AvREAL(av))
a0d0e21e
LW
5293 (void)sv_2mortal(sv);
5294 PUSHs(sv);
79072805 5295 RETURN;
79072805
LW
5296}
5297
a0d0e21e 5298PP(pp_unshift)
79072805 5299{
27da23d5 5300 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5301 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5302 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 5303
1b6737cc 5304 if (mg) {
ad64d0ec 5305 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
7fd66d9d 5306 PUSHMARK(MARK);
93965878 5307 PUTBACK;
d343c3ef 5308 ENTER_with_name("call_UNSHIFT");
864dbfa3 5309 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
d343c3ef 5310 LEAVE_with_name("call_UNSHIFT");
93965878 5311 SPAGAIN;
93965878 5312 }
a60c0954 5313 else {
1b6737cc 5314 register I32 i = 0;
a60c0954
NIS
5315 av_unshift(ary, SP - MARK);
5316 while (MARK < SP) {
1b6737cc 5317 SV * const sv = newSVsv(*++MARK);
a60c0954
NIS
5318 (void)av_store(ary, i++, sv);
5319 }
79072805 5320 }
a0d0e21e 5321 SP = ORIGMARK;
6eeabd23 5322 if (OP_GIMME(PL_op, 0) != G_VOID) {
5658d0a9
LR
5323 PUSHi( AvFILL(ary) + 1 );
5324 }
79072805 5325 RETURN;
79072805
LW
5326}
5327
a0d0e21e 5328PP(pp_reverse)
79072805 5329{
97aff369 5330 dVAR; dSP; dMARK;
79072805 5331
a0d0e21e 5332 if (GIMME == G_ARRAY) {
484c818f
VP
5333 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5334 AV *av;
5335
5336 /* See pp_sort() */
5337 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5338 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5339 av = MUTABLE_AV((*SP));
5340 /* In-place reversing only happens in void context for the array
5341 * assignment. We don't need to push anything on the stack. */
5342 SP = MARK;
5343
5344 if (SvMAGICAL(av)) {
5345 I32 i, j;
5346 register SV *tmp = sv_newmortal();
5347 /* For SvCANEXISTDELETE */
5348 HV *stash;
5349 const MAGIC *mg;
5350 bool can_preserve = SvCANEXISTDELETE(av);
5351
5352 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5353 register SV *begin, *end;
5354
5355 if (can_preserve) {
5356 if (!av_exists(av, i)) {
5357 if (av_exists(av, j)) {
5358 register SV *sv = av_delete(av, j, 0);
5359 begin = *av_fetch(av, i, TRUE);
5360 sv_setsv_mg(begin, sv);
5361 }
5362 continue;
5363 }
5364 else if (!av_exists(av, j)) {
5365 register SV *sv = av_delete(av, i, 0);
5366 end = *av_fetch(av, j, TRUE);
5367 sv_setsv_mg(end, sv);
5368 continue;
5369 }
5370 }
5371
5372 begin = *av_fetch(av, i, TRUE);
5373 end = *av_fetch(av, j, TRUE);
5374 sv_setsv(tmp, begin);
5375 sv_setsv_mg(begin, end);
5376 sv_setsv_mg(end, tmp);
5377 }
5378 }
5379 else {
5380 SV **begin = AvARRAY(av);
484c818f 5381
95a26d8e
VP
5382 if (begin) {
5383 SV **end = begin + AvFILLp(av);
5384
5385 while (begin < end) {
5386 register SV * const tmp = *begin;
5387 *begin++ = *end;
5388 *end-- = tmp;
5389 }
484c818f
VP
5390 }
5391 }
5392 }
5393 else {
5394 SV **oldsp = SP;
5395 MARK++;
5396 while (MARK < SP) {
5397 register SV * const tmp = *MARK;
5398 *MARK++ = *SP;
5399 *SP-- = tmp;
5400 }
5401 /* safe as long as stack cannot get extended in the above */
5402 SP = oldsp;
a0d0e21e 5403 }
79072805
LW
5404 }
5405 else {
a0d0e21e
LW
5406 register char *up;
5407 register char *down;
5408 register I32 tmp;
5409 dTARGET;
5410 STRLEN len;
79072805 5411
7e2040f0 5412 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 5413 if (SP - MARK > 1)
3280af22 5414 do_join(TARG, &PL_sv_no, MARK, SP);
1e21d011 5415 else {
789bd863 5416 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
1e21d011
B
5417 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5418 report_uninit(TARG);
5419 }
5420
a0d0e21e
LW
5421 up = SvPV_force(TARG, len);
5422 if (len > 1) {
7e2040f0 5423 if (DO_UTF8(TARG)) { /* first reverse each character */
dfe13c55 5424 U8* s = (U8*)SvPVX(TARG);
349d4f2f 5425 const U8* send = (U8*)(s + len);
a0ed51b3 5426 while (s < send) {
d742c382 5427 if (UTF8_IS_INVARIANT(*s)) {
a0ed51b3
LW
5428 s++;
5429 continue;
5430 }
5431 else {
9041c2e3 5432 if (!utf8_to_uvchr(s, 0))
a0dbb045 5433 break;
dfe13c55 5434 up = (char*)s;
a0ed51b3 5435 s += UTF8SKIP(s);
dfe13c55 5436 down = (char*)(s - 1);
a0dbb045 5437 /* reverse this character */
a0ed51b3
LW
5438 while (down > up) {
5439 tmp = *up;
5440 *up++ = *down;
eb160463 5441 *down-- = (char)tmp;
a0ed51b3
LW
5442 }
5443 }
5444 }
5445 up = SvPVX(TARG);
5446 }
a0d0e21e
LW
5447 down = SvPVX(TARG) + len - 1;
5448 while (down > up) {
5449 tmp = *up;
5450 *up++ = *down;
eb160463 5451 *down-- = (char)tmp;
a0d0e21e 5452 }
3aa33fe5 5453 (void)SvPOK_only_UTF8(TARG);
79072805 5454 }
a0d0e21e
LW
5455 SP = MARK + 1;
5456 SETTARG;
79072805 5457 }
a0d0e21e 5458 RETURN;
79072805
LW
5459}
5460
a0d0e21e 5461PP(pp_split)
79072805 5462{
27da23d5 5463 dVAR; dSP; dTARG;
a0d0e21e 5464 AV *ary;
467f0320 5465 register IV limit = POPi; /* note, negative is forever */
1b6737cc 5466 SV * const sv = POPs;
a0d0e21e 5467 STRLEN len;
727b7506 5468 register const char *s = SvPV_const(sv, len);
1b6737cc 5469 const bool do_utf8 = DO_UTF8(sv);
727b7506 5470 const char *strend = s + len;
44a8e56a 5471 register PMOP *pm;
d9f97599 5472 register REGEXP *rx;
a0d0e21e 5473 register SV *dstr;
727b7506 5474 register const char *m;
a0d0e21e 5475 I32 iters = 0;
bb7a0f54 5476 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
792b2c16 5477 I32 maxiters = slen + 10;
c1a7495a 5478 I32 trailing_empty = 0;
727b7506 5479 const char *orig;
1b6737cc 5480 const I32 origlimit = limit;
a0d0e21e
LW
5481 I32 realarray = 0;
5482 I32 base;
f54cb97a 5483 const I32 gimme = GIMME_V;
941446f6 5484 bool gimme_scalar;
f54cb97a 5485 const I32 oldsave = PL_savestack_ix;
437d3b4e 5486 U32 make_mortal = SVs_TEMP;
7fba1cd6 5487 bool multiline = 0;
b37c2d43 5488 MAGIC *mg = NULL;
79072805 5489
44a8e56a 5490#ifdef DEBUGGING
5491 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5492#else
5493 pm = (PMOP*)POPs;
5494#endif
a0d0e21e 5495 if (!pm || !s)
2269b42e 5496 DIE(aTHX_ "panic: pp_split");
aaa362c4 5497 rx = PM_GETRE(pm);
bbce6d69 5498
a62b1201 5499 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
07bc277f 5500 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
bbce6d69 5501
a30b2f1f 5502 RX_MATCH_UTF8_set(rx, do_utf8);
d9f424b2 5503
971a9dd3 5504#ifdef USE_ITHREADS
20e98b0f 5505 if (pm->op_pmreplrootu.op_pmtargetoff) {
159b6efe 5506 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
20e98b0f 5507 }
971a9dd3 5508#else
20e98b0f
NC
5509 if (pm->op_pmreplrootu.op_pmtargetgv) {
5510 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
971a9dd3 5511 }
20e98b0f 5512#endif
79072805 5513 else
7d49f689 5514 ary = NULL;
a0d0e21e
LW
5515 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5516 realarray = 1;
8ec5e241 5517 PUTBACK;
a0d0e21e
LW
5518 av_extend(ary,0);
5519 av_clear(ary);
8ec5e241 5520 SPAGAIN;
ad64d0ec 5521 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
8ec5e241 5522 PUSHMARK(SP);
ad64d0ec 5523 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
8ec5e241
NIS
5524 }
5525 else {
1c0b011c 5526 if (!AvREAL(ary)) {
1b6737cc 5527 I32 i;
1c0b011c 5528 AvREAL_on(ary);
abff13bb 5529 AvREIFY_off(ary);
1c0b011c 5530 for (i = AvFILLp(ary); i >= 0; i--)
3280af22 5531 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
1c0b011c
NIS
5532 }
5533 /* temporarily switch stacks */
8b7059b1 5534 SAVESWITCHSTACK(PL_curstack, ary);
8ec5e241 5535 make_mortal = 0;
1c0b011c 5536 }
79072805 5537 }
3280af22 5538 base = SP - PL_stack_base;
a0d0e21e 5539 orig = s;
07bc277f 5540 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
613f191e
TS
5541 if (do_utf8) {
5542 while (*s == ' ' || is_utf8_space((U8*)s))
5543 s += UTF8SKIP(s);
5544 }
a62b1201 5545 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
bbce6d69 5546 while (isSPACE_LC(*s))
5547 s++;
5548 }
5549 else {
5550 while (isSPACE(*s))
5551 s++;
5552 }
a0d0e21e 5553 }
73134a2e 5554 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
7fba1cd6 5555 multiline = 1;
c07a80fd 5556 }
5557
941446f6
FC
5558 gimme_scalar = gimme == G_SCALAR && !ary;
5559
a0d0e21e
LW
5560 if (!limit)
5561 limit = maxiters + 2;
07bc277f 5562 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
a0d0e21e 5563 while (--limit) {
bbce6d69 5564 m = s;
8727f688
YO
5565 /* this one uses 'm' and is a negative test */
5566 if (do_utf8) {
613f191e
TS
5567 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5568 const int t = UTF8SKIP(m);
5569 /* is_utf8_space returns FALSE for malform utf8 */
5570 if (strend - m < t)
5571 m = strend;
5572 else
5573 m += t;
5574 }
a62b1201
KW
5575 }
5576 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5577 while (m < strend && !isSPACE_LC(*m))
5578 ++m;
5579 } else {
5580 while (m < strend && !isSPACE(*m))
5581 ++m;
5582 }
a0d0e21e
LW
5583 if (m >= strend)
5584 break;
bbce6d69 5585
c1a7495a
BB
5586 if (gimme_scalar) {
5587 iters++;
5588 if (m-s == 0)
5589 trailing_empty++;
5590 else
5591 trailing_empty = 0;
5592 } else {
5593 dstr = newSVpvn_flags(s, m-s,
5594 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5595 XPUSHs(dstr);
5596 }
bbce6d69 5597
613f191e
TS
5598 /* skip the whitespace found last */
5599 if (do_utf8)
5600 s = m + UTF8SKIP(m);
5601 else
5602 s = m + 1;
5603
8727f688
YO
5604 /* this one uses 's' and is a positive test */
5605 if (do_utf8) {
613f191e 5606 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
8727f688 5607 s += UTF8SKIP(s);
a62b1201
KW
5608 }
5609 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5610 while (s < strend && isSPACE_LC(*s))
5611 ++s;
5612 } else {
5613 while (s < strend && isSPACE(*s))
5614 ++s;
5615 }
79072805
LW
5616 }
5617 }
07bc277f 5618 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
a0d0e21e 5619 while (--limit) {
a6e20a40
AL
5620 for (m = s; m < strend && *m != '\n'; m++)
5621 ;
a0d0e21e
LW
5622 m++;
5623 if (m >= strend)
5624 break;
c1a7495a
BB
5625
5626 if (gimme_scalar) {
5627 iters++;
5628 if (m-s == 0)
5629 trailing_empty++;
5630 else
5631 trailing_empty = 0;
5632 } else {
5633 dstr = newSVpvn_flags(s, m-s,
5634 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5635 XPUSHs(dstr);
5636 }
a0d0e21e
LW
5637 s = m;
5638 }
5639 }
07bc277f 5640 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
640f820d
AB
5641 /*
5642 Pre-extend the stack, either the number of bytes or
5643 characters in the string or a limited amount, triggered by:
5644
5645 my ($x, $y) = split //, $str;
5646 or
5647 split //, $str, $i;
5648 */
c1a7495a
BB
5649 if (!gimme_scalar) {
5650 const U32 items = limit - 1;
5651 if (items < slen)
5652 EXTEND(SP, items);
5653 else
5654 EXTEND(SP, slen);
5655 }
640f820d 5656
e9515b0f
AB
5657 if (do_utf8) {
5658 while (--limit) {
5659 /* keep track of how many bytes we skip over */
5660 m = s;
640f820d 5661 s += UTF8SKIP(s);
c1a7495a
BB
5662 if (gimme_scalar) {
5663 iters++;
5664 if (s-m == 0)
5665 trailing_empty++;
5666 else
5667 trailing_empty = 0;
5668 } else {
5669 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
640f820d 5670
c1a7495a
BB
5671 PUSHs(dstr);
5672 }
640f820d 5673
e9515b0f
AB
5674 if (s >= strend)
5675 break;
5676 }
5677 } else {
5678 while (--limit) {
c1a7495a
BB
5679 if (gimme_scalar) {
5680 iters++;
5681 } else {
5682 dstr = newSVpvn(s, 1);
e9515b0f 5683
e9515b0f 5684
c1a7495a
BB
5685 if (make_mortal)
5686 sv_2mortal(dstr);
640f820d 5687
c1a7495a
BB
5688 PUSHs(dstr);
5689 }
5690
5691 s++;
e9515b0f
AB
5692
5693 if (s >= strend)
5694 break;
5695 }
640f820d
AB
5696 }
5697 }
3c8556c3 5698 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
07bc277f
NC
5699 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5700 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5701 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5702 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
f9f4320a 5703 SV * const csv = CALLREG_INTUIT_STRING(rx);
cf93c79d 5704
07bc277f 5705 len = RX_MINLENRET(rx);
3c8556c3 5706 if (len == 1 && !RX_UTF8(rx) && !tail) {
1b6737cc 5707 const char c = *SvPV_nolen_const(csv);
a0d0e21e 5708 while (--limit) {
a6e20a40
AL
5709 for (m = s; m < strend && *m != c; m++)
5710 ;
a0d0e21e
LW
5711 if (m >= strend)
5712 break;
c1a7495a
BB
5713 if (gimme_scalar) {
5714 iters++;
5715 if (m-s == 0)
5716 trailing_empty++;
5717 else
5718 trailing_empty = 0;
5719 } else {
5720 dstr = newSVpvn_flags(s, m-s,
5721 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5722 XPUSHs(dstr);
5723 }
93f04dac
JH
5724 /* The rx->minlen is in characters but we want to step
5725 * s ahead by bytes. */
1aa99e6b
IH
5726 if (do_utf8)
5727 s = (char*)utf8_hop((U8*)m, len);
5728 else
5729 s = m + len; /* Fake \n at the end */
a0d0e21e
LW
5730 }
5731 }
5732 else {
a0d0e21e 5733 while (s < strend && --limit &&
f722798b 5734 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
7fba1cd6 5735 csv, multiline ? FBMrf_MULTILINE : 0)) )
a0d0e21e 5736 {
c1a7495a
BB
5737 if (gimme_scalar) {
5738 iters++;
5739 if (m-s == 0)
5740 trailing_empty++;
5741 else
5742 trailing_empty = 0;
5743 } else {
5744 dstr = newSVpvn_flags(s, m-s,
5745 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5746 XPUSHs(dstr);
5747 }
93f04dac
JH
5748 /* The rx->minlen is in characters but we want to step
5749 * s ahead by bytes. */
1aa99e6b
IH
5750 if (do_utf8)
5751 s = (char*)utf8_hop((U8*)m, len);
5752 else
5753 s = m + len; /* Fake \n at the end */
a0d0e21e 5754 }
463ee0b2 5755 }
463ee0b2 5756 }
a0d0e21e 5757 else {
07bc277f 5758 maxiters += slen * RX_NPARENS(rx);
080c2dec 5759 while (s < strend && --limit)
bbce6d69 5760 {
1b6737cc 5761 I32 rex_return;
080c2dec 5762 PUTBACK;
f9f4320a 5763 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
bfafcb9a 5764 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
080c2dec 5765 SPAGAIN;
1b6737cc 5766 if (rex_return == 0)
080c2dec 5767 break;
d9f97599 5768 TAINT_IF(RX_MATCH_TAINTED(rx));
07bc277f 5769 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
a0d0e21e
LW
5770 m = s;
5771 s = orig;
07bc277f 5772 orig = RX_SUBBEG(rx);
a0d0e21e
LW
5773 s = orig + (m - s);
5774 strend = s + (strend - m);
5775 }
07bc277f 5776 m = RX_OFFS(rx)[0].start + orig;
c1a7495a
BB
5777
5778 if (gimme_scalar) {
5779 iters++;
5780 if (m-s == 0)
5781 trailing_empty++;
5782 else
5783 trailing_empty = 0;
5784 } else {
5785 dstr = newSVpvn_flags(s, m-s,
5786 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5787 XPUSHs(dstr);
5788 }
07bc277f 5789 if (RX_NPARENS(rx)) {
1b6737cc 5790 I32 i;
07bc277f
NC
5791 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5792 s = RX_OFFS(rx)[i].start + orig;
5793 m = RX_OFFS(rx)[i].end + orig;
6de67870
JP
5794
5795 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5796 parens that didn't match -- they should be set to
5797 undef, not the empty string */
c1a7495a
BB
5798 if (gimme_scalar) {
5799 iters++;
5800 if (m-s == 0)
5801 trailing_empty++;
5802 else
5803 trailing_empty = 0;
5804 } else {
5805 if (m >= orig && s >= orig) {
5806 dstr = newSVpvn_flags(s, m-s,
5807 (do_utf8 ? SVf_UTF8 : 0)
5808 | make_mortal);
5809 }
5810 else
5811 dstr = &PL_sv_undef; /* undef, not "" */
5812 XPUSHs(dstr);
748a9306 5813 }
c1a7495a 5814
a0d0e21e
LW
5815 }
5816 }
07bc277f 5817 s = RX_OFFS(rx)[0].end + orig;
a0d0e21e 5818 }
79072805 5819 }
8ec5e241 5820
c1a7495a
BB
5821 if (!gimme_scalar) {
5822 iters = (SP - PL_stack_base) - base;
5823 }
a0d0e21e 5824 if (iters > maxiters)
cea2e8a9 5825 DIE(aTHX_ "Split loop");
8ec5e241 5826
a0d0e21e
LW
5827 /* keep field after final delim? */
5828 if (s < strend || (iters && origlimit)) {
c1a7495a
BB
5829 if (!gimme_scalar) {
5830 const STRLEN l = strend - s;
5831 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5832 XPUSHs(dstr);
5833 }
a0d0e21e 5834 iters++;
79072805 5835 }
a0d0e21e 5836 else if (!origlimit) {
c1a7495a
BB
5837 if (gimme_scalar) {
5838 iters -= trailing_empty;
5839 } else {
5840 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5841 if (TOPs && !make_mortal)
5842 sv_2mortal(TOPs);
5843 *SP-- = &PL_sv_undef;
5844 iters--;
5845 }
89900bd3 5846 }
a0d0e21e 5847 }
8ec5e241 5848
8b7059b1
DM
5849 PUTBACK;
5850 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5851 SPAGAIN;
a0d0e21e 5852 if (realarray) {
8ec5e241 5853 if (!mg) {
1c0b011c
NIS
5854 if (SvSMAGICAL(ary)) {
5855 PUTBACK;
ad64d0ec 5856 mg_set(MUTABLE_SV(ary));
1c0b011c
NIS
5857 SPAGAIN;
5858 }
5859 if (gimme == G_ARRAY) {
5860 EXTEND(SP, iters);
5861 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5862 SP += iters;
5863 RETURN;
5864 }
8ec5e241 5865 }
1c0b011c 5866 else {
fb73857a 5867 PUTBACK;
d343c3ef 5868 ENTER_with_name("call_PUSH");
864dbfa3 5869 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5870 LEAVE_with_name("call_PUSH");
fb73857a 5871 SPAGAIN;
8ec5e241 5872 if (gimme == G_ARRAY) {
1b6737cc 5873 I32 i;
8ec5e241
NIS
5874 /* EXTEND should not be needed - we just popped them */
5875 EXTEND(SP, iters);
5876 for (i=0; i < iters; i++) {
5877 SV **svp = av_fetch(ary, i, FALSE);
3280af22 5878 PUSHs((svp) ? *svp : &PL_sv_undef);
8ec5e241 5879 }
1c0b011c
NIS
5880 RETURN;
5881 }
a0d0e21e
LW
5882 }
5883 }
5884 else {
5885 if (gimme == G_ARRAY)
5886 RETURN;
5887 }
7f18b612
YST
5888
5889 GETTARGET;
5890 PUSHi(iters);
5891 RETURN;
79072805 5892}
85e6fe83 5893
c5917253
NC
5894PP(pp_once)
5895{
5896 dSP;
5897 SV *const sv = PAD_SVl(PL_op->op_targ);
5898
5899 if (SvPADSTALE(sv)) {
5900 /* First time. */
5901 SvPADSTALE_off(sv);
5902 RETURNOP(cLOGOP->op_other);
5903 }
5904 RETURNOP(cLOGOP->op_next);
5905}
5906
c0329465
MB
5907PP(pp_lock)
5908{
97aff369 5909 dVAR;
39644a26 5910 dSP;
c0329465 5911 dTOPss;
e55aaa0e 5912 SV *retsv = sv;
076a2a80 5913 assert(SvTYPE(retsv) != SVt_PVCV);
68795e93 5914 SvLOCK(sv);
076a2a80 5915 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
e55aaa0e
MB
5916 retsv = refto(retsv);
5917 }
5918 SETs(retsv);
c0329465
MB
5919 RETURN;
5920}
a863c7d1 5921
65bca31a
NC
5922
5923PP(unimplemented_op)
5924{
97aff369 5925 dVAR;
361ed549
NC
5926 const Optype op_type = PL_op->op_type;
5927 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5928 with out of range op numbers - it only "special" cases op_custom.
5929 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5930 if we get here for a custom op then that means that the custom op didn't
5931 have an implementation. Given that OP_NAME() looks up the custom op
5932 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5933 registers &PL_unimplemented_op as the address of their custom op.
5934 NULL doesn't generate a useful error message. "custom" does. */
5935 const char *const name = op_type >= OP_max
5936 ? "[out of range]" : PL_op_name[PL_op->op_type];
7627e6d0
NC
5937 if(OP_IS_SOCKET(op_type))
5938 DIE(aTHX_ PL_no_sock_func, name);
361ed549 5939 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
65bca31a
NC
5940}
5941
867fa1e2
YO
5942PP(pp_boolkeys)
5943{
5944 dVAR;
5945 dSP;
5946 HV * const hv = (HV*)POPs;
5947
fd1d9b5c
FC
5948 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5949
867fa1e2
YO
5950 if (SvRMAGICAL(hv)) {
5951 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5952 if (mg) {
5953 XPUSHs(magic_scalarpack(hv, mg));
5954 RETURN;
5955 }
5956 }
5957
1b95d04f 5958 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
867fa1e2
YO
5959 RETURN;
5960}
5961
e609e586
NC
5962/*
5963 * Local variables:
5964 * c-indentation-style: bsd
5965 * c-basic-offset: 4
5966 * indent-tabs-mode: t
5967 * End:
5968 *
37442d52
RGS
5969 * ex: set ts=8 sts=4 sw=4 noet:
5970 */