This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Clean up MANIFEST before creating a new one.
[perl5.git] / pp.c
CommitLineData
a0d0e21e 1/* pp.c
79072805 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
79072805 5 *
a0d0e21e
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
79072805 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
a0d0e21e 17 */
79072805 18
166f8a29
DM
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
79072805 26#include "EXTERN.h"
864dbfa3 27#define PERL_IN_PP_C
79072805 28#include "perl.h"
77bc9082 29#include "keywords.h"
79072805 30
a4af207c
JH
31#include "reentr.h"
32
dfe9444c
AD
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
8ac85365
NIS
39#endif
40
0630166f
SP
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
13017935
SM
49/* variations on pp_null */
50
93a17b20
LW
51PP(pp_stub)
52{
97aff369 53 dVAR;
39644a26 54 dSP;
54310121 55 if (GIMME_V == G_SCALAR)
3280af22 56 XPUSHs(&PL_sv_undef);
93a17b20
LW
57 RETURN;
58}
59
79072805
LW
60/* Pushy stuff. */
61
93a17b20
LW
62PP(pp_padav)
63{
97aff369 64 dVAR; dSP; dTARGET;
13017935 65 I32 gimme;
e190e9b4 66 assert(SvTYPE(TARG) == SVt_PVAV);
533c011a 67 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
85e6fe83 70 EXTEND(SP, 1);
533c011a 71 if (PL_op->op_flags & OPf_REF) {
85e6fe83 72 PUSHs(TARG);
93a17b20 73 RETURN;
40c94d11
FC
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
79 PUSHs(TARG);
80 RETURN;
40c94d11 81 }
85e6fe83 82 }
13017935
SM
83 gimme = GIMME_V;
84 if (gimme == G_ARRAY) {
502c6561 85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83 86 EXTEND(SP, maxarg);
93965878
NIS
87 if (SvMAGICAL(TARG)) {
88 U32 i;
eb160463 89 for (i=0; i < (U32)maxarg; i++) {
502c6561 90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
3280af22 91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93965878
NIS
92 }
93 }
94 else {
502c6561 95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93965878 96 }
85e6fe83
LW
97 SP += maxarg;
98 }
13017935 99 else if (gimme == G_SCALAR) {
1b6737cc 100 SV* const sv = sv_newmortal();
502c6561 101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83
LW
102 sv_setiv(sv, maxarg);
103 PUSHs(sv);
104 }
105 RETURN;
93a17b20
LW
106}
107
108PP(pp_padhv)
109{
97aff369 110 dVAR; dSP; dTARGET;
54310121 111 I32 gimme;
112
e190e9b4 113 assert(SvTYPE(TARG) == SVt_PVHV);
93a17b20 114 XPUSHs(TARG);
533c011a 115 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
533c011a 118 if (PL_op->op_flags & OPf_REF)
93a17b20 119 RETURN;
40c94d11
FC
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
125 RETURN;
40c94d11 126 }
78f9721b 127 }
54310121 128 gimme = GIMME_V;
129 if (gimme == G_ARRAY) {
981b7185 130 RETURNOP(Perl_do_kv(aTHX));
85e6fe83 131 }
54310121 132 else if (gimme == G_SCALAR) {
85fbaab2 133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
85e6fe83 134 SETs(sv);
85e6fe83 135 }
54310121 136 RETURN;
93a17b20
LW
137}
138
79072805
LW
139/* Translations. */
140
4bdf8368 141static const char S_no_symref_sv[] =
def89bff
NC
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
143
79072805
LW
144PP(pp_rv2gv)
145{
97aff369 146 dVAR; dSP; dTOPss;
8ec5e241 147
f64c9ac5 148 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
ed6116ce 149 if (SvROK(sv)) {
a0d0e21e 150 wasref:
93d7320b
DM
151 if (SvAMAGIC(sv)) {
152 sv = amagic_deref_call(sv, to_gv_amg);
153 SPAGAIN;
154 }
ed6116ce 155 sv = SvRV(sv);
b1dadf13 156 if (SvTYPE(sv) == SVt_PVIO) {
159b6efe 157 GV * const gv = MUTABLE_GV(sv_newmortal());
b1dadf13 158 gv_init(gv, 0, "", 0, 0);
a45c7426 159 GvIOp(gv) = MUTABLE_IO(sv);
b37c2d43 160 SvREFCNT_inc_void_NN(sv);
ad64d0ec 161 sv = MUTABLE_SV(gv);
ef54e1a4 162 }
6e592b3a 163 else if (!isGV_with_GP(sv))
cea2e8a9 164 DIE(aTHX_ "Not a GLOB reference");
79072805
LW
165 }
166 else {
6e592b3a 167 if (!isGV_with_GP(sv)) {
afd1915d 168 if (!SvOK(sv) && sv != &PL_sv_undef) {
b13b2135 169 /* If this is a 'my' scalar and flag is set then vivify
853846ea 170 * NI-S 1999/05/07
b13b2135 171 */
ac53db4c 172 if (SvREADONLY(sv))
6ad8f254 173 Perl_croak_no_modify(aTHX);
1d8d4d2a 174 if (PL_op->op_private & OPpDEREF) {
2c8ac474
GS
175 GV *gv;
176 if (cUNOP->op_targ) {
177 STRLEN len;
0bd48802
AL
178 SV * const namesv = PAD_SV(cUNOP->op_targ);
179 const char * const name = SvPV(namesv, len);
159b6efe 180 gv = MUTABLE_GV(newSV(0));
2c8ac474
GS
181 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
182 }
183 else {
0bd48802 184 const char * const name = CopSTASHPV(PL_curcop);
2c8ac474 185 gv = newGVgen(name);
1d8d4d2a 186 }
43230e26 187 prepare_SV_for_RV(sv);
ad64d0ec 188 SvRV_set(sv, MUTABLE_SV(gv));
853846ea 189 SvROK_on(sv);
1d8d4d2a 190 SvSETMAGIC(sv);
853846ea 191 goto wasref;
2c8ac474 192 }
533c011a
NIS
193 if (PL_op->op_flags & OPf_REF ||
194 PL_op->op_private & HINT_STRICT_REFS)
cea2e8a9 195 DIE(aTHX_ PL_no_usym, "a symbol");
599cee73 196 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 197 report_uninit(sv);
a0d0e21e
LW
198 RETSETUNDEF;
199 }
35cd451c
GS
200 if ((PL_op->op_flags & OPf_SPECIAL) &&
201 !(PL_op->op_flags & OPf_MOD))
202 {
ad64d0ec 203 SV * const temp = MUTABLE_SV(gv_fetchsv(sv, 0, SVt_PVGV));
7a5fd60d
NC
204 if (!temp
205 && (!is_gv_magical_sv(sv,0)
ad64d0ec
NC
206 || !(sv = MUTABLE_SV(gv_fetchsv(sv, GV_ADD,
207 SVt_PVGV))))) {
35cd451c 208 RETSETUNDEF;
c9d5ac95 209 }
7a5fd60d 210 sv = temp;
35cd451c
GS
211 }
212 else {
213 if (PL_op->op_private & HINT_STRICT_REFS)
10b53e54 214 DIE(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), "a symbol");
e26df76a
NC
215 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
216 == OPpDONT_INIT_GV) {
217 /* We are the target of a coderef assignment. Return
218 the scalar unchanged, and let pp_sasssign deal with
219 things. */
220 RETURN;
221 }
7ffa7e75
FC
222 {
223 STRLEN len;
224 const char * const nambeg = SvPV_nomg_const(sv, len);
225 sv = MUTABLE_SV(
226 gv_fetchpvn_flags(
227 nambeg, len, GV_ADD | SvUTF8(sv), SVt_PVGV
228 )
229 );
230 }
35cd451c 231 }
2acc3314
FC
232 /* FAKE globs in the symbol table cause weird bugs (#77810) */
233 if (sv) SvFAKE_off(sv);
93a17b20 234 }
79072805 235 }
2acc3314
FC
236 if (sv && SvFAKE(sv)) {
237 SV *newsv = sv_newmortal();
5cf4b255 238 sv_setsv_flags(newsv, sv, 0);
2acc3314 239 SvFAKE_off(newsv);
d8906c05 240 sv = newsv;
2acc3314 241 }
d8906c05
FC
242 if (PL_op->op_private & OPpLVAL_INTRO)
243 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
244 SETs(sv);
79072805
LW
245 RETURN;
246}
247
dc3c76f8
NC
248/* Helper function for pp_rv2sv and pp_rv2av */
249GV *
fe9845cc
RB
250Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
251 const svtype type, SV ***spp)
dc3c76f8
NC
252{
253 dVAR;
254 GV *gv;
255
7918f24d
NC
256 PERL_ARGS_ASSERT_SOFTREF2XV;
257
dc3c76f8
NC
258 if (PL_op->op_private & HINT_STRICT_REFS) {
259 if (SvOK(sv))
10b53e54 260 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
dc3c76f8
NC
261 else
262 Perl_die(aTHX_ PL_no_usym, what);
263 }
264 if (!SvOK(sv)) {
fd1d9b5c
FC
265 if (
266 PL_op->op_flags & OPf_REF &&
267 PL_op->op_next->op_type != OP_BOOLKEYS
268 )
dc3c76f8
NC
269 Perl_die(aTHX_ PL_no_usym, what);
270 if (ckWARN(WARN_UNINITIALIZED))
271 report_uninit(sv);
272 if (type != SVt_PV && GIMME_V == G_ARRAY) {
273 (*spp)--;
274 return NULL;
275 }
276 **spp = &PL_sv_undef;
277 return NULL;
278 }
279 if ((PL_op->op_flags & OPf_SPECIAL) &&
280 !(PL_op->op_flags & OPf_MOD))
281 {
81e3fc25 282 gv = gv_fetchsv(sv, 0, type);
dc3c76f8
NC
283 if (!gv
284 && (!is_gv_magical_sv(sv,0)
81e3fc25 285 || !(gv = gv_fetchsv(sv, GV_ADD, type))))
dc3c76f8
NC
286 {
287 **spp = &PL_sv_undef;
288 return NULL;
289 }
290 }
291 else {
7ffa7e75
FC
292 STRLEN len;
293 const char * const nambeg = SvPV_nomg_const(sv, len);
294 gv = gv_fetchpvn_flags(nambeg, len, GV_ADD | SvUTF8(sv), type);
dc3c76f8
NC
295 }
296 return gv;
297}
298
79072805
LW
299PP(pp_rv2sv)
300{
97aff369 301 dVAR; dSP; dTOPss;
c445ea15 302 GV *gv = NULL;
79072805 303
0824d667
DM
304 if (!(PL_op->op_private & OPpDEREFed))
305 SvGETMAGIC(sv);
ed6116ce 306 if (SvROK(sv)) {
93d7320b
DM
307 if (SvAMAGIC(sv)) {
308 sv = amagic_deref_call(sv, to_sv_amg);
309 SPAGAIN;
310 }
f5284f61 311
ed6116ce 312 sv = SvRV(sv);
79072805
LW
313 switch (SvTYPE(sv)) {
314 case SVt_PVAV:
315 case SVt_PVHV:
316 case SVt_PVCV:
cbae9b9f
YST
317 case SVt_PVFM:
318 case SVt_PVIO:
cea2e8a9 319 DIE(aTHX_ "Not a SCALAR reference");
42d0e0b7 320 default: NOOP;
79072805
LW
321 }
322 }
323 else {
159b6efe 324 gv = MUTABLE_GV(sv);
748a9306 325
6e592b3a 326 if (!isGV_with_GP(gv)) {
dc3c76f8
NC
327 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
328 if (!gv)
329 RETURN;
463ee0b2 330 }
29c711a3 331 sv = GvSVn(gv);
a0d0e21e 332 }
533c011a 333 if (PL_op->op_flags & OPf_MOD) {
82d03984
RGS
334 if (PL_op->op_private & OPpLVAL_INTRO) {
335 if (cUNOP->op_first->op_type == OP_NULL)
159b6efe 336 sv = save_scalar(MUTABLE_GV(TOPs));
82d03984
RGS
337 else if (gv)
338 sv = save_scalar(gv);
339 else
f1f66076 340 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
82d03984 341 }
533c011a
NIS
342 else if (PL_op->op_private & OPpDEREF)
343 vivify_ref(sv, PL_op->op_private & OPpDEREF);
79072805 344 }
a0d0e21e 345 SETs(sv);
79072805
LW
346 RETURN;
347}
348
349PP(pp_av2arylen)
350{
97aff369 351 dVAR; dSP;
502c6561 352 AV * const av = MUTABLE_AV(TOPs);
02d85cc3
EB
353 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
354 if (lvalue) {
355 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
356 if (!*sv) {
357 *sv = newSV_type(SVt_PVMG);
358 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
359 }
360 SETs(*sv);
361 } else {
362 SETs(sv_2mortal(newSViv(
363 AvFILL(MUTABLE_AV(av)) + CopARYBASE_get(PL_curcop)
364 )));
79072805 365 }
79072805
LW
366 RETURN;
367}
368
a0d0e21e
LW
369PP(pp_pos)
370{
2154eca7 371 dVAR; dSP; dPOPss;
8ec5e241 372
78f9721b 373 if (PL_op->op_flags & OPf_MOD || LVRET) {
16eb5365
FC
374 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
375 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
376 LvTYPE(ret) = '.';
377 LvTARG(ret) = SvREFCNT_inc_simple(sv);
2154eca7 378 PUSHs(ret); /* no SvSETMAGIC */
a0d0e21e
LW
379 RETURN;
380 }
381 else {
a0d0e21e 382 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
1b6737cc 383 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
565764a8 384 if (mg && mg->mg_len >= 0) {
2154eca7 385 dTARGET;
a0ed51b3 386 I32 i = mg->mg_len;
7e2040f0 387 if (DO_UTF8(sv))
a0ed51b3 388 sv_pos_b2u(sv, &i);
fc15ae8f 389 PUSHi(i + CopARYBASE_get(PL_curcop));
a0d0e21e
LW
390 RETURN;
391 }
392 }
393 RETPUSHUNDEF;
394 }
395}
396
79072805
LW
397PP(pp_rv2cv)
398{
97aff369 399 dVAR; dSP;
79072805 400 GV *gv;
1eced8f8 401 HV *stash_unused;
c445ea15
AL
402 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
403 ? 0
404 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
405 ? GV_ADD|GV_NOEXPAND
406 : GV_ADD;
4633a7c4
LW
407 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
408 /* (But not in defined().) */
e26df76a 409
1eced8f8 410 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
07055b4c
CS
411 if (cv) {
412 if (CvCLONE(cv))
ad64d0ec 413 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
d32f2495
SC
414 if ((PL_op->op_private & OPpLVAL_INTRO)) {
415 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
416 cv = GvCV(gv);
417 if (!CvLVALUE(cv))
418 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
419 }
07055b4c 420 }
e26df76a 421 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
ea726b52 422 cv = MUTABLE_CV(gv);
e26df76a 423 }
07055b4c 424 else
ea726b52 425 cv = MUTABLE_CV(&PL_sv_undef);
ad64d0ec 426 SETs(MUTABLE_SV(cv));
79072805
LW
427 RETURN;
428}
429
c07a80fd 430PP(pp_prototype)
431{
97aff369 432 dVAR; dSP;
c07a80fd 433 CV *cv;
434 HV *stash;
435 GV *gv;
fabdb6c0 436 SV *ret = &PL_sv_undef;
c07a80fd 437
b6c543e3 438 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
e3f73d4e 439 const char * s = SvPVX_const(TOPs);
b6c543e3 440 if (strnEQ(s, "CORE::", 6)) {
b8c38f0a
FC
441 SV *const sv = core_prototype(NULL, s + 6, SvCUR(TOPs) - 6, 1);
442 if (sv) ret = sv;
443 goto set;
b6c543e3
IZ
444 }
445 }
f2c0649b 446 cv = sv_2cv(TOPs, &stash, &gv, 0);
5f05dabc 447 if (cv && SvPOK(cv))
59cd0e26 448 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
b6c543e3 449 set:
c07a80fd 450 SETs(ret);
451 RETURN;
452}
453
a0d0e21e
LW
454PP(pp_anoncode)
455{
97aff369 456 dVAR; dSP;
ea726b52 457 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
a5f75d66 458 if (CvCLONE(cv))
ad64d0ec 459 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
5f05dabc 460 EXTEND(SP,1);
ad64d0ec 461 PUSHs(MUTABLE_SV(cv));
a0d0e21e
LW
462 RETURN;
463}
464
465PP(pp_srefgen)
79072805 466{
97aff369 467 dVAR; dSP;
71be2cbc 468 *SP = refto(*SP);
79072805 469 RETURN;
8ec5e241 470}
a0d0e21e
LW
471
472PP(pp_refgen)
473{
97aff369 474 dVAR; dSP; dMARK;
a0d0e21e 475 if (GIMME != G_ARRAY) {
5f0b1d4e
GS
476 if (++MARK <= SP)
477 *MARK = *SP;
478 else
3280af22 479 *MARK = &PL_sv_undef;
5f0b1d4e
GS
480 *MARK = refto(*MARK);
481 SP = MARK;
482 RETURN;
a0d0e21e 483 }
bbce6d69 484 EXTEND_MORTAL(SP - MARK);
71be2cbc 485 while (++MARK <= SP)
486 *MARK = refto(*MARK);
a0d0e21e 487 RETURN;
79072805
LW
488}
489
76e3520e 490STATIC SV*
cea2e8a9 491S_refto(pTHX_ SV *sv)
71be2cbc 492{
97aff369 493 dVAR;
71be2cbc 494 SV* rv;
495
7918f24d
NC
496 PERL_ARGS_ASSERT_REFTO;
497
71be2cbc 498 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
499 if (LvTARGLEN(sv))
68dc0745 500 vivify_defelem(sv);
501 if (!(sv = LvTARG(sv)))
3280af22 502 sv = &PL_sv_undef;
0dd88869 503 else
b37c2d43 504 SvREFCNT_inc_void_NN(sv);
71be2cbc 505 }
d8b46c1b 506 else if (SvTYPE(sv) == SVt_PVAV) {
502c6561
NC
507 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
508 av_reify(MUTABLE_AV(sv));
d8b46c1b 509 SvTEMP_off(sv);
b37c2d43 510 SvREFCNT_inc_void_NN(sv);
d8b46c1b 511 }
f2933f5f
DM
512 else if (SvPADTMP(sv) && !IS_PADGV(sv))
513 sv = newSVsv(sv);
71be2cbc 514 else {
515 SvTEMP_off(sv);
b37c2d43 516 SvREFCNT_inc_void_NN(sv);
71be2cbc 517 }
518 rv = sv_newmortal();
4df7f6af 519 sv_upgrade(rv, SVt_IV);
b162af07 520 SvRV_set(rv, sv);
71be2cbc 521 SvROK_on(rv);
522 return rv;
523}
524
79072805
LW
525PP(pp_ref)
526{
97aff369 527 dVAR; dSP; dTARGET;
e1ec3a88 528 const char *pv;
1b6737cc 529 SV * const sv = POPs;
f12c7020 530
5b295bef
RD
531 if (sv)
532 SvGETMAGIC(sv);
f12c7020 533
a0d0e21e 534 if (!sv || !SvROK(sv))
4633a7c4 535 RETPUSHNO;
79072805 536
cba0b539
FR
537 pv = sv_reftype(SvRV(sv),TRUE);
538 PUSHp(pv, strlen(pv));
79072805
LW
539 RETURN;
540}
541
542PP(pp_bless)
543{
97aff369 544 dVAR; dSP;
463ee0b2 545 HV *stash;
79072805 546
463ee0b2 547 if (MAXARG == 1)
11faa288 548 stash = CopSTASH(PL_curcop);
7b8d334a 549 else {
1b6737cc 550 SV * const ssv = POPs;
7b8d334a 551 STRLEN len;
e1ec3a88 552 const char *ptr;
81689caa 553
016a42f3 554 if (ssv && !SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
81689caa 555 Perl_croak(aTHX_ "Attempt to bless into a reference");
5c144d81 556 ptr = SvPV_const(ssv,len);
a2a5de95
NC
557 if (len == 0)
558 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
559 "Explicit blessing to '' (assuming package main)");
da51bb9b 560 stash = gv_stashpvn(ptr, len, GV_ADD);
7b8d334a 561 }
a0d0e21e 562
5d3fdfeb 563 (void)sv_bless(TOPs, stash);
79072805
LW
564 RETURN;
565}
566
fb73857a 567PP(pp_gelem)
568{
97aff369 569 dVAR; dSP;
b13b2135 570
1b6737cc
AL
571 SV *sv = POPs;
572 const char * const elem = SvPV_nolen_const(sv);
159b6efe 573 GV * const gv = MUTABLE_GV(POPs);
c445ea15 574 SV * tmpRef = NULL;
1b6737cc 575
c445ea15 576 sv = NULL;
c4ba80c3
NC
577 if (elem) {
578 /* elem will always be NUL terminated. */
1b6737cc 579 const char * const second_letter = elem + 1;
c4ba80c3
NC
580 switch (*elem) {
581 case 'A':
1b6737cc 582 if (strEQ(second_letter, "RRAY"))
ad64d0ec 583 tmpRef = MUTABLE_SV(GvAV(gv));
c4ba80c3
NC
584 break;
585 case 'C':
1b6737cc 586 if (strEQ(second_letter, "ODE"))
ad64d0ec 587 tmpRef = MUTABLE_SV(GvCVu(gv));
c4ba80c3
NC
588 break;
589 case 'F':
1b6737cc 590 if (strEQ(second_letter, "ILEHANDLE")) {
c4ba80c3
NC
591 /* finally deprecated in 5.8.0 */
592 deprecate("*glob{FILEHANDLE}");
ad64d0ec 593 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
594 }
595 else
1b6737cc 596 if (strEQ(second_letter, "ORMAT"))
ad64d0ec 597 tmpRef = MUTABLE_SV(GvFORM(gv));
c4ba80c3
NC
598 break;
599 case 'G':
1b6737cc 600 if (strEQ(second_letter, "LOB"))
ad64d0ec 601 tmpRef = MUTABLE_SV(gv);
c4ba80c3
NC
602 break;
603 case 'H':
1b6737cc 604 if (strEQ(second_letter, "ASH"))
ad64d0ec 605 tmpRef = MUTABLE_SV(GvHV(gv));
c4ba80c3
NC
606 break;
607 case 'I':
1b6737cc 608 if (*second_letter == 'O' && !elem[2])
ad64d0ec 609 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
610 break;
611 case 'N':
1b6737cc 612 if (strEQ(second_letter, "AME"))
a663657d 613 sv = newSVhek(GvNAME_HEK(gv));
c4ba80c3
NC
614 break;
615 case 'P':
1b6737cc 616 if (strEQ(second_letter, "ACKAGE")) {
7fa3a4ab
NC
617 const HV * const stash = GvSTASH(gv);
618 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
396482e1 619 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
c4ba80c3
NC
620 }
621 break;
622 case 'S':
1b6737cc 623 if (strEQ(second_letter, "CALAR"))
f9d52e31 624 tmpRef = GvSVn(gv);
c4ba80c3 625 break;
39b99f21 626 }
fb73857a 627 }
76e3520e
GS
628 if (tmpRef)
629 sv = newRV(tmpRef);
fb73857a 630 if (sv)
631 sv_2mortal(sv);
632 else
3280af22 633 sv = &PL_sv_undef;
fb73857a 634 XPUSHs(sv);
635 RETURN;
636}
637
a0d0e21e 638/* Pattern matching */
79072805 639
a0d0e21e 640PP(pp_study)
79072805 641{
97aff369 642 dVAR; dSP; dPOPss;
a0d0e21e 643 register unsigned char *s;
72de20cd 644 char *sfirst_raw;
a0d0e21e 645 STRLEN len;
4185c919 646 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
72de20cd
NC
647 U8 quanta;
648 STRLEN size;
4185c919
NC
649
650 if (mg && SvSCREAM(sv))
651 RETPUSHYES;
a0d0e21e 652
a4f4e906 653 s = (unsigned char*)(SvPV(sv, len));
bc9a5256 654 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
a4f4e906
NC
655 /* No point in studying a zero length string, and not safe to study
656 anything that doesn't appear to be a simple scalar (and hence might
657 change between now and when the regexp engine runs without our set
bd473224 658 magic ever running) such as a reference to an object with overloaded
bc9a5256
NC
659 stringification. Also refuse to study an FBM scalar, as this gives
660 more flexibility in SV flag usage. No real-world code would ever
661 end up studying an FBM scalar, so this isn't a real pessimisation.
72de20cd
NC
662 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
663 the study length limit from I32_MAX to U32_MAX - 1.
bc9a5256 664 */
a4f4e906
NC
665 RETPUSHNO;
666 }
667
72de20cd
NC
668 if (len < 0xFF) {
669 quanta = 1;
670 } else if (len < 0xFFFF) {
671 quanta = 2;
672 } else
673 quanta = 4;
a0d0e21e 674
72de20cd
NC
675 size = (256 + len) * quanta;
676 sfirst_raw = (char *)safemalloc(size);
677
678 if (!sfirst_raw)
cea2e8a9 679 DIE(aTHX_ "do_study: out of memory");
a0d0e21e 680
4185c919
NC
681 SvSCREAM_on(sv);
682 if (!mg)
683 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
72de20cd
NC
684 mg->mg_ptr = sfirst_raw;
685 mg->mg_len = size;
686 mg->mg_private = quanta;
687
688 memset(sfirst_raw, ~0, 256 * quanta);
689
690 /* The assumption here is that most studied strings are fairly short, hence
691 the pain of the extra code is worth it, given the memory savings.
692 80 character string, 336 bytes as U8, down from 1344 as U32
693 800 character string, 2112 bytes as U16, down from 4224 as U32
694 */
695
696 if (quanta == 1) {
697 U8 *const sfirst = (U8 *)sfirst_raw;
698 U8 *const snext = sfirst + 256;
699 while (len-- > 0) {
700 const U8 ch = s[len];
701 snext[len] = sfirst[ch];
702 sfirst[ch] = len;
703 }
704 } else if (quanta == 2) {
705 U16 *const sfirst = (U16 *)sfirst_raw;
706 U16 *const snext = sfirst + 256;
707 while (len-- > 0) {
708 const U8 ch = s[len];
709 snext[len] = sfirst[ch];
710 sfirst[ch] = len;
711 }
712 } else {
713 U32 *const sfirst = (U32 *)sfirst_raw;
714 U32 *const snext = sfirst + 256;
715 while (len-- > 0) {
716 const U8 ch = s[len];
717 snext[len] = sfirst[ch];
718 sfirst[ch] = len;
719 }
79072805
LW
720 }
721
1e422769 722 RETPUSHYES;
79072805
LW
723}
724
a0d0e21e 725PP(pp_trans)
79072805 726{
97aff369 727 dVAR; dSP; dTARG;
a0d0e21e
LW
728 SV *sv;
729
533c011a 730 if (PL_op->op_flags & OPf_STACKED)
a0d0e21e 731 sv = POPs;
59f00321
RGS
732 else if (PL_op->op_private & OPpTARGET_MY)
733 sv = GETTARGET;
79072805 734 else {
54b9620d 735 sv = DEFSV;
a0d0e21e 736 EXTEND(SP,1);
79072805 737 }
adbc6bb1 738 TARG = sv_newmortal();
bb16bae8
FC
739 if(PL_op->op_type == OP_TRANSR) {
740 SV * const newsv = newSVsv(sv);
741 do_trans(newsv);
742 mPUSHs(newsv);
743 }
744 else PUSHi(do_trans(sv));
a0d0e21e 745 RETURN;
79072805
LW
746}
747
a0d0e21e 748/* Lvalue operators. */
79072805 749
81745e4e
NC
750static void
751S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
752{
753 dVAR;
754 STRLEN len;
755 char *s;
756
757 PERL_ARGS_ASSERT_DO_CHOMP;
758
759 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
760 return;
761 if (SvTYPE(sv) == SVt_PVAV) {
762 I32 i;
763 AV *const av = MUTABLE_AV(sv);
764 const I32 max = AvFILL(av);
765
766 for (i = 0; i <= max; i++) {
767 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
768 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
769 do_chomp(retval, sv, chomping);
770 }
771 return;
772 }
773 else if (SvTYPE(sv) == SVt_PVHV) {
774 HV* const hv = MUTABLE_HV(sv);
775 HE* entry;
776 (void)hv_iterinit(hv);
777 while ((entry = hv_iternext(hv)))
778 do_chomp(retval, hv_iterval(hv,entry), chomping);
779 return;
780 }
781 else if (SvREADONLY(sv)) {
782 if (SvFAKE(sv)) {
783 /* SV is copy-on-write */
784 sv_force_normal_flags(sv, 0);
785 }
786 if (SvREADONLY(sv))
787 Perl_croak_no_modify(aTHX);
788 }
789
790 if (PL_encoding) {
791 if (!SvUTF8(sv)) {
792 /* XXX, here sv is utf8-ized as a side-effect!
793 If encoding.pm is used properly, almost string-generating
794 operations, including literal strings, chr(), input data, etc.
795 should have been utf8-ized already, right?
796 */
797 sv_recode_to_utf8(sv, PL_encoding);
798 }
799 }
800
801 s = SvPV(sv, len);
802 if (chomping) {
803 char *temp_buffer = NULL;
804 SV *svrecode = NULL;
805
806 if (s && len) {
807 s += --len;
808 if (RsPARA(PL_rs)) {
809 if (*s != '\n')
810 goto nope;
811 ++SvIVX(retval);
812 while (len && s[-1] == '\n') {
813 --len;
814 --s;
815 ++SvIVX(retval);
816 }
817 }
818 else {
819 STRLEN rslen, rs_charlen;
820 const char *rsptr = SvPV_const(PL_rs, rslen);
821
822 rs_charlen = SvUTF8(PL_rs)
823 ? sv_len_utf8(PL_rs)
824 : rslen;
825
826 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
827 /* Assumption is that rs is shorter than the scalar. */
828 if (SvUTF8(PL_rs)) {
829 /* RS is utf8, scalar is 8 bit. */
830 bool is_utf8 = TRUE;
831 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
832 &rslen, &is_utf8);
833 if (is_utf8) {
834 /* Cannot downgrade, therefore cannot possibly match
835 */
836 assert (temp_buffer == rsptr);
837 temp_buffer = NULL;
838 goto nope;
839 }
840 rsptr = temp_buffer;
841 }
842 else if (PL_encoding) {
843 /* RS is 8 bit, encoding.pm is used.
844 * Do not recode PL_rs as a side-effect. */
845 svrecode = newSVpvn(rsptr, rslen);
846 sv_recode_to_utf8(svrecode, PL_encoding);
847 rsptr = SvPV_const(svrecode, rslen);
848 rs_charlen = sv_len_utf8(svrecode);
849 }
850 else {
851 /* RS is 8 bit, scalar is utf8. */
852 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
853 rsptr = temp_buffer;
854 }
855 }
856 if (rslen == 1) {
857 if (*s != *rsptr)
858 goto nope;
859 ++SvIVX(retval);
860 }
861 else {
862 if (len < rslen - 1)
863 goto nope;
864 len -= rslen - 1;
865 s -= rslen - 1;
866 if (memNE(s, rsptr, rslen))
867 goto nope;
868 SvIVX(retval) += rs_charlen;
869 }
870 }
871 s = SvPV_force_nolen(sv);
872 SvCUR_set(sv, len);
873 *SvEND(sv) = '\0';
874 SvNIOK_off(sv);
875 SvSETMAGIC(sv);
876 }
877 nope:
878
879 SvREFCNT_dec(svrecode);
880
881 Safefree(temp_buffer);
882 } else {
883 if (len && !SvPOK(sv))
884 s = SvPV_force_nomg(sv, len);
885 if (DO_UTF8(sv)) {
886 if (s && len) {
887 char * const send = s + len;
888 char * const start = s;
889 s = send - 1;
890 while (s > start && UTF8_IS_CONTINUATION(*s))
891 s--;
892 if (is_utf8_string((U8*)s, send - s)) {
893 sv_setpvn(retval, s, send - s);
894 *s = '\0';
895 SvCUR_set(sv, s - start);
896 SvNIOK_off(sv);
897 SvUTF8_on(retval);
898 }
899 }
900 else
901 sv_setpvs(retval, "");
902 }
903 else if (s && len) {
904 s += --len;
905 sv_setpvn(retval, s, 1);
906 *s = '\0';
907 SvCUR_set(sv, len);
908 SvUTF8_off(sv);
909 SvNIOK_off(sv);
910 }
911 else
912 sv_setpvs(retval, "");
913 SvSETMAGIC(sv);
914 }
915}
916
a0d0e21e
LW
917PP(pp_schop)
918{
97aff369 919 dVAR; dSP; dTARGET;
fa54efae
NC
920 const bool chomping = PL_op->op_type == OP_SCHOMP;
921
922 if (chomping)
923 sv_setiv(TARG, 0);
924 do_chomp(TARG, TOPs, chomping);
a0d0e21e
LW
925 SETTARG;
926 RETURN;
79072805
LW
927}
928
a0d0e21e 929PP(pp_chop)
79072805 930{
97aff369 931 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
fa54efae 932 const bool chomping = PL_op->op_type == OP_CHOMP;
8ec5e241 933
fa54efae
NC
934 if (chomping)
935 sv_setiv(TARG, 0);
20cf1f79 936 while (MARK < SP)
fa54efae 937 do_chomp(TARG, *++MARK, chomping);
20cf1f79
NC
938 SP = ORIGMARK;
939 XPUSHTARG;
a0d0e21e 940 RETURN;
79072805
LW
941}
942
a0d0e21e
LW
943PP(pp_undef)
944{
97aff369 945 dVAR; dSP;
a0d0e21e
LW
946 SV *sv;
947
533c011a 948 if (!PL_op->op_private) {
774d564b 949 EXTEND(SP, 1);
a0d0e21e 950 RETPUSHUNDEF;
774d564b 951 }
79072805 952
a0d0e21e
LW
953 sv = POPs;
954 if (!sv)
955 RETPUSHUNDEF;
85e6fe83 956
765f542d 957 SV_CHECK_THINKFIRST_COW_DROP(sv);
85e6fe83 958
a0d0e21e
LW
959 switch (SvTYPE(sv)) {
960 case SVt_NULL:
961 break;
962 case SVt_PVAV:
502c6561 963 av_undef(MUTABLE_AV(sv));
a0d0e21e
LW
964 break;
965 case SVt_PVHV:
85fbaab2 966 hv_undef(MUTABLE_HV(sv));
a0d0e21e
LW
967 break;
968 case SVt_PVCV:
a2a5de95
NC
969 if (cv_const_sv((const CV *)sv))
970 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
971 CvANON((const CV *)sv) ? "(anonymous)"
972 : GvENAME(CvGV((const CV *)sv)));
5f66b61c 973 /* FALLTHROUGH */
9607fc9c 974 case SVt_PVFM:
6fc92669
GS
975 {
976 /* let user-undef'd sub keep its identity */
ea726b52
NC
977 GV* const gv = CvGV((const CV *)sv);
978 cv_undef(MUTABLE_CV(sv));
b3f91e91 979 CvGV_set(MUTABLE_CV(sv), gv);
6fc92669 980 }
a0d0e21e 981 break;
8e07c86e 982 case SVt_PVGV:
6e592b3a 983 if (SvFAKE(sv)) {
3280af22 984 SvSetMagicSV(sv, &PL_sv_undef);
6e592b3a
BM
985 break;
986 }
987 else if (isGV_with_GP(sv)) {
20408e3c 988 GP *gp;
dd69841b
BB
989 HV *stash;
990
dd69841b 991 /* undef *Pkg::meth_name ... */
e530fb81
FC
992 bool method_changed
993 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
994 && HvENAME_get(stash);
995 /* undef *Foo:: */
996 if((stash = GvHV((const GV *)sv))) {
997 if(HvENAME_get(stash))
998 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
999 else stash = NULL;
1000 }
dd69841b 1001
159b6efe 1002 gp_free(MUTABLE_GV(sv));
a02a5408 1003 Newxz(gp, 1, GP);
c43ae56f 1004 GvGP_set(sv, gp_ref(gp));
561b68a9 1005 GvSV(sv) = newSV(0);
57843af0 1006 GvLINE(sv) = CopLINE(PL_curcop);
159b6efe 1007 GvEGV(sv) = MUTABLE_GV(sv);
20408e3c 1008 GvMULTI_on(sv);
e530fb81
FC
1009
1010 if(stash)
afdbe55d 1011 mro_package_moved(NULL, stash, (const GV *)sv, 0);
e530fb81
FC
1012 stash = NULL;
1013 /* undef *Foo::ISA */
1014 if( strEQ(GvNAME((const GV *)sv), "ISA")
1015 && (stash = GvSTASH((const GV *)sv))
1016 && (method_changed || HvENAME(stash)) )
1017 mro_isa_changed_in(stash);
1018 else if(method_changed)
1019 mro_method_changed_in(
da9043f5 1020 GvSTASH((const GV *)sv)
e530fb81
FC
1021 );
1022
6e592b3a 1023 break;
20408e3c 1024 }
6e592b3a 1025 /* FALL THROUGH */
a0d0e21e 1026 default:
b15aece3 1027 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
8bd4d4c5 1028 SvPV_free(sv);
c445ea15 1029 SvPV_set(sv, NULL);
4633a7c4 1030 SvLEN_set(sv, 0);
a0d0e21e 1031 }
0c34ef67 1032 SvOK_off(sv);
4633a7c4 1033 SvSETMAGIC(sv);
79072805 1034 }
a0d0e21e
LW
1035
1036 RETPUSHUNDEF;
79072805
LW
1037}
1038
a0d0e21e 1039PP(pp_predec)
79072805 1040{
97aff369 1041 dVAR; dSP;
6e592b3a 1042 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1043 Perl_croak_no_modify(aTHX);
3510b4a1
NC
1044 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1045 && SvIVX(TOPs) != IV_MIN)
55497cff 1046 {
45977657 1047 SvIV_set(TOPs, SvIVX(TOPs) - 1);
55497cff 1048 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1049 }
1050 else
1051 sv_dec(TOPs);
a0d0e21e
LW
1052 SvSETMAGIC(TOPs);
1053 return NORMAL;
1054}
79072805 1055
a0d0e21e
LW
1056PP(pp_postinc)
1057{
97aff369 1058 dVAR; dSP; dTARGET;
6e592b3a 1059 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1060 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1061 if (SvROK(TOPs))
1062 TARG = sv_newmortal();
a0d0e21e 1063 sv_setsv(TARG, TOPs);
3510b4a1
NC
1064 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1065 && SvIVX(TOPs) != IV_MAX)
55497cff 1066 {
45977657 1067 SvIV_set(TOPs, SvIVX(TOPs) + 1);
55497cff 1068 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1069 }
1070 else
6f1401dc 1071 sv_inc_nomg(TOPs);
a0d0e21e 1072 SvSETMAGIC(TOPs);
1e54a23f 1073 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
a0d0e21e
LW
1074 if (!SvOK(TARG))
1075 sv_setiv(TARG, 0);
1076 SETs(TARG);
1077 return NORMAL;
1078}
79072805 1079
a0d0e21e
LW
1080PP(pp_postdec)
1081{
97aff369 1082 dVAR; dSP; dTARGET;
6e592b3a 1083 if (SvTYPE(TOPs) >= SVt_PVAV || isGV_with_GP(TOPs))
6ad8f254 1084 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1085 if (SvROK(TOPs))
1086 TARG = sv_newmortal();
a0d0e21e 1087 sv_setsv(TARG, TOPs);
3510b4a1
NC
1088 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
1089 && SvIVX(TOPs) != IV_MIN)
55497cff 1090 {
45977657 1091 SvIV_set(TOPs, SvIVX(TOPs) - 1);
55497cff 1092 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306
LW
1093 }
1094 else
6f1401dc 1095 sv_dec_nomg(TOPs);
a0d0e21e
LW
1096 SvSETMAGIC(TOPs);
1097 SETs(TARG);
1098 return NORMAL;
1099}
79072805 1100
a0d0e21e
LW
1101/* Ordinary operators. */
1102
1103PP(pp_pow)
1104{
800401ee 1105 dVAR; dSP; dATARGET; SV *svl, *svr;
58d76dfd 1106#ifdef PERL_PRESERVE_IVUV
52a96ae6
HS
1107 bool is_int = 0;
1108#endif
6f1401dc
DM
1109 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1110 svr = TOPs;
1111 svl = TOPm1s;
52a96ae6
HS
1112#ifdef PERL_PRESERVE_IVUV
1113 /* For integer to integer power, we do the calculation by hand wherever
1114 we're sure it is safe; otherwise we call pow() and try to convert to
1115 integer afterwards. */
58d76dfd 1116 {
6f1401dc 1117 SvIV_please_nomg(svr);
800401ee 1118 if (SvIOK(svr)) {
6f1401dc 1119 SvIV_please_nomg(svl);
800401ee 1120 if (SvIOK(svl)) {
900658e3
PF
1121 UV power;
1122 bool baseuok;
1123 UV baseuv;
1124
800401ee
JH
1125 if (SvUOK(svr)) {
1126 power = SvUVX(svr);
900658e3 1127 } else {
800401ee 1128 const IV iv = SvIVX(svr);
900658e3
PF
1129 if (iv >= 0) {
1130 power = iv;
1131 } else {
1132 goto float_it; /* Can't do negative powers this way. */
1133 }
1134 }
1135
800401ee 1136 baseuok = SvUOK(svl);
900658e3 1137 if (baseuok) {
800401ee 1138 baseuv = SvUVX(svl);
900658e3 1139 } else {
800401ee 1140 const IV iv = SvIVX(svl);
900658e3
PF
1141 if (iv >= 0) {
1142 baseuv = iv;
1143 baseuok = TRUE; /* effectively it's a UV now */
1144 } else {
1145 baseuv = -iv; /* abs, baseuok == false records sign */
1146 }
1147 }
52a96ae6
HS
1148 /* now we have integer ** positive integer. */
1149 is_int = 1;
1150
1151 /* foo & (foo - 1) is zero only for a power of 2. */
58d76dfd 1152 if (!(baseuv & (baseuv - 1))) {
52a96ae6 1153 /* We are raising power-of-2 to a positive integer.
58d76dfd
JH
1154 The logic here will work for any base (even non-integer
1155 bases) but it can be less accurate than
1156 pow (base,power) or exp (power * log (base)) when the
1157 intermediate values start to spill out of the mantissa.
1158 With powers of 2 we know this can't happen.
1159 And powers of 2 are the favourite thing for perl
1160 programmers to notice ** not doing what they mean. */
1161 NV result = 1.0;
1162 NV base = baseuok ? baseuv : -(NV)baseuv;
900658e3
PF
1163
1164 if (power & 1) {
1165 result *= base;
1166 }
1167 while (power >>= 1) {
1168 base *= base;
1169 if (power & 1) {
1170 result *= base;
1171 }
1172 }
58d76dfd
JH
1173 SP--;
1174 SETn( result );
6f1401dc 1175 SvIV_please_nomg(svr);
58d76dfd 1176 RETURN;
52a96ae6
HS
1177 } else {
1178 register unsigned int highbit = 8 * sizeof(UV);
900658e3
PF
1179 register unsigned int diff = 8 * sizeof(UV);
1180 while (diff >>= 1) {
1181 highbit -= diff;
1182 if (baseuv >> highbit) {
1183 highbit += diff;
1184 }
52a96ae6
HS
1185 }
1186 /* we now have baseuv < 2 ** highbit */
1187 if (power * highbit <= 8 * sizeof(UV)) {
1188 /* result will definitely fit in UV, so use UV math
1189 on same algorithm as above */
1190 register UV result = 1;
1191 register UV base = baseuv;
f2338a2e 1192 const bool odd_power = cBOOL(power & 1);
900658e3
PF
1193 if (odd_power) {
1194 result *= base;
1195 }
1196 while (power >>= 1) {
1197 base *= base;
1198 if (power & 1) {
52a96ae6 1199 result *= base;
52a96ae6
HS
1200 }
1201 }
1202 SP--;
0615a994 1203 if (baseuok || !odd_power)
52a96ae6
HS
1204 /* answer is positive */
1205 SETu( result );
1206 else if (result <= (UV)IV_MAX)
1207 /* answer negative, fits in IV */
1208 SETi( -(IV)result );
1209 else if (result == (UV)IV_MIN)
1210 /* 2's complement assumption: special case IV_MIN */
1211 SETi( IV_MIN );
1212 else
1213 /* answer negative, doesn't fit */
1214 SETn( -(NV)result );
1215 RETURN;
1216 }
1217 }
1218 }
1219 }
58d76dfd 1220 }
52a96ae6 1221 float_it:
58d76dfd 1222#endif
a0d0e21e 1223 {
6f1401dc
DM
1224 NV right = SvNV_nomg(svr);
1225 NV left = SvNV_nomg(svl);
4efa5a16 1226 (void)POPs;
3aaeb624
JA
1227
1228#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1229 /*
1230 We are building perl with long double support and are on an AIX OS
1231 afflicted with a powl() function that wrongly returns NaNQ for any
1232 negative base. This was reported to IBM as PMR #23047-379 on
1233 03/06/2006. The problem exists in at least the following versions
1234 of AIX and the libm fileset, and no doubt others as well:
1235
1236 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1237 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1238 AIX 5.2.0 bos.adt.libm 5.2.0.85
1239
1240 So, until IBM fixes powl(), we provide the following workaround to
1241 handle the problem ourselves. Our logic is as follows: for
1242 negative bases (left), we use fmod(right, 2) to check if the
1243 exponent is an odd or even integer:
1244
1245 - if odd, powl(left, right) == -powl(-left, right)
1246 - if even, powl(left, right) == powl(-left, right)
1247
1248 If the exponent is not an integer, the result is rightly NaNQ, so
1249 we just return that (as NV_NAN).
1250 */
1251
1252 if (left < 0.0) {
1253 NV mod2 = Perl_fmod( right, 2.0 );
1254 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1255 SETn( -Perl_pow( -left, right) );
1256 } else if (mod2 == 0.0) { /* even integer */
1257 SETn( Perl_pow( -left, right) );
1258 } else { /* fractional power */
1259 SETn( NV_NAN );
1260 }
1261 } else {
1262 SETn( Perl_pow( left, right) );
1263 }
1264#else
52a96ae6 1265 SETn( Perl_pow( left, right) );
3aaeb624
JA
1266#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1267
52a96ae6
HS
1268#ifdef PERL_PRESERVE_IVUV
1269 if (is_int)
6f1401dc 1270 SvIV_please_nomg(svr);
52a96ae6
HS
1271#endif
1272 RETURN;
93a17b20 1273 }
a0d0e21e
LW
1274}
1275
1276PP(pp_multiply)
1277{
800401ee 1278 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1279 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1280 svr = TOPs;
1281 svl = TOPm1s;
28e5dec8 1282#ifdef PERL_PRESERVE_IVUV
6f1401dc 1283 SvIV_please_nomg(svr);
800401ee 1284 if (SvIOK(svr)) {
28e5dec8
JH
1285 /* Unless the left argument is integer in range we are going to have to
1286 use NV maths. Hence only attempt to coerce the right argument if
1287 we know the left is integer. */
1288 /* Left operand is defined, so is it IV? */
6f1401dc 1289 SvIV_please_nomg(svl);
800401ee
JH
1290 if (SvIOK(svl)) {
1291 bool auvok = SvUOK(svl);
1292 bool buvok = SvUOK(svr);
28e5dec8
JH
1293 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1294 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1295 UV alow;
1296 UV ahigh;
1297 UV blow;
1298 UV bhigh;
1299
1300 if (auvok) {
800401ee 1301 alow = SvUVX(svl);
28e5dec8 1302 } else {
800401ee 1303 const IV aiv = SvIVX(svl);
28e5dec8
JH
1304 if (aiv >= 0) {
1305 alow = aiv;
1306 auvok = TRUE; /* effectively it's a UV now */
1307 } else {
1308 alow = -aiv; /* abs, auvok == false records sign */
1309 }
1310 }
1311 if (buvok) {
800401ee 1312 blow = SvUVX(svr);
28e5dec8 1313 } else {
800401ee 1314 const IV biv = SvIVX(svr);
28e5dec8
JH
1315 if (biv >= 0) {
1316 blow = biv;
1317 buvok = TRUE; /* effectively it's a UV now */
1318 } else {
1319 blow = -biv; /* abs, buvok == false records sign */
1320 }
1321 }
1322
1323 /* If this does sign extension on unsigned it's time for plan B */
1324 ahigh = alow >> (4 * sizeof (UV));
1325 alow &= botmask;
1326 bhigh = blow >> (4 * sizeof (UV));
1327 blow &= botmask;
1328 if (ahigh && bhigh) {
6f207bd3 1329 NOOP;
28e5dec8
JH
1330 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1331 which is overflow. Drop to NVs below. */
1332 } else if (!ahigh && !bhigh) {
1333 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1334 so the unsigned multiply cannot overflow. */
c445ea15 1335 const UV product = alow * blow;
28e5dec8
JH
1336 if (auvok == buvok) {
1337 /* -ve * -ve or +ve * +ve gives a +ve result. */
1338 SP--;
1339 SETu( product );
1340 RETURN;
1341 } else if (product <= (UV)IV_MIN) {
1342 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1343 /* -ve result, which could overflow an IV */
1344 SP--;
25716404 1345 SETi( -(IV)product );
28e5dec8
JH
1346 RETURN;
1347 } /* else drop to NVs below. */
1348 } else {
1349 /* One operand is large, 1 small */
1350 UV product_middle;
1351 if (bhigh) {
1352 /* swap the operands */
1353 ahigh = bhigh;
1354 bhigh = blow; /* bhigh now the temp var for the swap */
1355 blow = alow;
1356 alow = bhigh;
1357 }
1358 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1359 multiplies can't overflow. shift can, add can, -ve can. */
1360 product_middle = ahigh * blow;
1361 if (!(product_middle & topmask)) {
1362 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1363 UV product_low;
1364 product_middle <<= (4 * sizeof (UV));
1365 product_low = alow * blow;
1366
1367 /* as for pp_add, UV + something mustn't get smaller.
1368 IIRC ANSI mandates this wrapping *behaviour* for
1369 unsigned whatever the actual representation*/
1370 product_low += product_middle;
1371 if (product_low >= product_middle) {
1372 /* didn't overflow */
1373 if (auvok == buvok) {
1374 /* -ve * -ve or +ve * +ve gives a +ve result. */
1375 SP--;
1376 SETu( product_low );
1377 RETURN;
1378 } else if (product_low <= (UV)IV_MIN) {
1379 /* 2s complement assumption again */
1380 /* -ve result, which could overflow an IV */
1381 SP--;
25716404 1382 SETi( -(IV)product_low );
28e5dec8
JH
1383 RETURN;
1384 } /* else drop to NVs below. */
1385 }
1386 } /* product_middle too large */
1387 } /* ahigh && bhigh */
800401ee
JH
1388 } /* SvIOK(svl) */
1389 } /* SvIOK(svr) */
28e5dec8 1390#endif
a0d0e21e 1391 {
6f1401dc
DM
1392 NV right = SvNV_nomg(svr);
1393 NV left = SvNV_nomg(svl);
4efa5a16 1394 (void)POPs;
a0d0e21e
LW
1395 SETn( left * right );
1396 RETURN;
79072805 1397 }
a0d0e21e
LW
1398}
1399
1400PP(pp_divide)
1401{
800401ee 1402 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1403 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1404 svr = TOPs;
1405 svl = TOPm1s;
5479d192 1406 /* Only try to do UV divide first
68795e93 1407 if ((SLOPPYDIVIDE is true) or
5479d192
NC
1408 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1409 to preserve))
1410 The assumption is that it is better to use floating point divide
1411 whenever possible, only doing integer divide first if we can't be sure.
1412 If NV_PRESERVES_UV is true then we know at compile time that no UV
1413 can be too large to preserve, so don't need to compile the code to
1414 test the size of UVs. */
1415
a0d0e21e 1416#ifdef SLOPPYDIVIDE
5479d192
NC
1417# define PERL_TRY_UV_DIVIDE
1418 /* ensure that 20./5. == 4. */
a0d0e21e 1419#else
5479d192
NC
1420# ifdef PERL_PRESERVE_IVUV
1421# ifndef NV_PRESERVES_UV
1422# define PERL_TRY_UV_DIVIDE
1423# endif
1424# endif
a0d0e21e 1425#endif
5479d192
NC
1426
1427#ifdef PERL_TRY_UV_DIVIDE
6f1401dc 1428 SvIV_please_nomg(svr);
800401ee 1429 if (SvIOK(svr)) {
6f1401dc 1430 SvIV_please_nomg(svl);
800401ee
JH
1431 if (SvIOK(svl)) {
1432 bool left_non_neg = SvUOK(svl);
1433 bool right_non_neg = SvUOK(svr);
5479d192
NC
1434 UV left;
1435 UV right;
1436
1437 if (right_non_neg) {
800401ee 1438 right = SvUVX(svr);
5479d192
NC
1439 }
1440 else {
800401ee 1441 const IV biv = SvIVX(svr);
5479d192
NC
1442 if (biv >= 0) {
1443 right = biv;
1444 right_non_neg = TRUE; /* effectively it's a UV now */
1445 }
1446 else {
1447 right = -biv;
1448 }
1449 }
1450 /* historically undef()/0 gives a "Use of uninitialized value"
1451 warning before dieing, hence this test goes here.
1452 If it were immediately before the second SvIV_please, then
1453 DIE() would be invoked before left was even inspected, so
486ec47a 1454 no inspection would give no warning. */
5479d192
NC
1455 if (right == 0)
1456 DIE(aTHX_ "Illegal division by zero");
1457
1458 if (left_non_neg) {
800401ee 1459 left = SvUVX(svl);
5479d192
NC
1460 }
1461 else {
800401ee 1462 const IV aiv = SvIVX(svl);
5479d192
NC
1463 if (aiv >= 0) {
1464 left = aiv;
1465 left_non_neg = TRUE; /* effectively it's a UV now */
1466 }
1467 else {
1468 left = -aiv;
1469 }
1470 }
1471
1472 if (left >= right
1473#ifdef SLOPPYDIVIDE
1474 /* For sloppy divide we always attempt integer division. */
1475#else
1476 /* Otherwise we only attempt it if either or both operands
1477 would not be preserved by an NV. If both fit in NVs
0c2ee62a
NC
1478 we fall through to the NV divide code below. However,
1479 as left >= right to ensure integer result here, we know that
1480 we can skip the test on the right operand - right big
1481 enough not to be preserved can't get here unless left is
1482 also too big. */
1483
1484 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
5479d192
NC
1485#endif
1486 ) {
1487 /* Integer division can't overflow, but it can be imprecise. */
1b6737cc 1488 const UV result = left / right;
5479d192
NC
1489 if (result * right == left) {
1490 SP--; /* result is valid */
1491 if (left_non_neg == right_non_neg) {
1492 /* signs identical, result is positive. */
1493 SETu( result );
1494 RETURN;
1495 }
1496 /* 2s complement assumption */
1497 if (result <= (UV)IV_MIN)
91f3b821 1498 SETi( -(IV)result );
5479d192
NC
1499 else {
1500 /* It's exact but too negative for IV. */
1501 SETn( -(NV)result );
1502 }
1503 RETURN;
1504 } /* tried integer divide but it was not an integer result */
32fdb065 1505 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
5479d192
NC
1506 } /* left wasn't SvIOK */
1507 } /* right wasn't SvIOK */
1508#endif /* PERL_TRY_UV_DIVIDE */
1509 {
6f1401dc
DM
1510 NV right = SvNV_nomg(svr);
1511 NV left = SvNV_nomg(svl);
4efa5a16 1512 (void)POPs;(void)POPs;
ebc6a117
PD
1513#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1514 if (! Perl_isnan(right) && right == 0.0)
1515#else
5479d192 1516 if (right == 0.0)
ebc6a117 1517#endif
5479d192
NC
1518 DIE(aTHX_ "Illegal division by zero");
1519 PUSHn( left / right );
1520 RETURN;
79072805 1521 }
a0d0e21e
LW
1522}
1523
1524PP(pp_modulo)
1525{
6f1401dc
DM
1526 dVAR; dSP; dATARGET;
1527 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
a0d0e21e 1528 {
9c5ffd7c
JH
1529 UV left = 0;
1530 UV right = 0;
dc656993
JH
1531 bool left_neg = FALSE;
1532 bool right_neg = FALSE;
e2c88acc
NC
1533 bool use_double = FALSE;
1534 bool dright_valid = FALSE;
9c5ffd7c
JH
1535 NV dright = 0.0;
1536 NV dleft = 0.0;
6f1401dc
DM
1537 SV * const svr = TOPs;
1538 SV * const svl = TOPm1s;
1539 SvIV_please_nomg(svr);
800401ee
JH
1540 if (SvIOK(svr)) {
1541 right_neg = !SvUOK(svr);
e2c88acc 1542 if (!right_neg) {
800401ee 1543 right = SvUVX(svr);
e2c88acc 1544 } else {
800401ee 1545 const IV biv = SvIVX(svr);
e2c88acc
NC
1546 if (biv >= 0) {
1547 right = biv;
1548 right_neg = FALSE; /* effectively it's a UV now */
1549 } else {
1550 right = -biv;
1551 }
1552 }
1553 }
1554 else {
6f1401dc 1555 dright = SvNV_nomg(svr);
787eafbd
IZ
1556 right_neg = dright < 0;
1557 if (right_neg)
1558 dright = -dright;
e2c88acc
NC
1559 if (dright < UV_MAX_P1) {
1560 right = U_V(dright);
1561 dright_valid = TRUE; /* In case we need to use double below. */
1562 } else {
1563 use_double = TRUE;
1564 }
787eafbd 1565 }
a0d0e21e 1566
e2c88acc
NC
1567 /* At this point use_double is only true if right is out of range for
1568 a UV. In range NV has been rounded down to nearest UV and
1569 use_double false. */
6f1401dc 1570 SvIV_please_nomg(svl);
800401ee
JH
1571 if (!use_double && SvIOK(svl)) {
1572 if (SvIOK(svl)) {
1573 left_neg = !SvUOK(svl);
e2c88acc 1574 if (!left_neg) {
800401ee 1575 left = SvUVX(svl);
e2c88acc 1576 } else {
800401ee 1577 const IV aiv = SvIVX(svl);
e2c88acc
NC
1578 if (aiv >= 0) {
1579 left = aiv;
1580 left_neg = FALSE; /* effectively it's a UV now */
1581 } else {
1582 left = -aiv;
1583 }
1584 }
1585 }
1586 }
787eafbd 1587 else {
6f1401dc 1588 dleft = SvNV_nomg(svl);
787eafbd
IZ
1589 left_neg = dleft < 0;
1590 if (left_neg)
1591 dleft = -dleft;
68dc0745 1592
e2c88acc
NC
1593 /* This should be exactly the 5.6 behaviour - if left and right are
1594 both in range for UV then use U_V() rather than floor. */
1595 if (!use_double) {
1596 if (dleft < UV_MAX_P1) {
1597 /* right was in range, so is dleft, so use UVs not double.
1598 */
1599 left = U_V(dleft);
1600 }
1601 /* left is out of range for UV, right was in range, so promote
1602 right (back) to double. */
1603 else {
1604 /* The +0.5 is used in 5.6 even though it is not strictly
1605 consistent with the implicit +0 floor in the U_V()
1606 inside the #if 1. */
1607 dleft = Perl_floor(dleft + 0.5);
1608 use_double = TRUE;
1609 if (dright_valid)
1610 dright = Perl_floor(dright + 0.5);
1611 else
1612 dright = right;
1613 }
1614 }
1615 }
6f1401dc 1616 sp -= 2;
787eafbd 1617 if (use_double) {
65202027 1618 NV dans;
787eafbd 1619
787eafbd 1620 if (!dright)
cea2e8a9 1621 DIE(aTHX_ "Illegal modulus zero");
787eafbd 1622
65202027 1623 dans = Perl_fmod(dleft, dright);
787eafbd
IZ
1624 if ((left_neg != right_neg) && dans)
1625 dans = dright - dans;
1626 if (right_neg)
1627 dans = -dans;
1628 sv_setnv(TARG, dans);
1629 }
1630 else {
1631 UV ans;
1632
787eafbd 1633 if (!right)
cea2e8a9 1634 DIE(aTHX_ "Illegal modulus zero");
787eafbd
IZ
1635
1636 ans = left % right;
1637 if ((left_neg != right_neg) && ans)
1638 ans = right - ans;
1639 if (right_neg) {
1640 /* XXX may warn: unary minus operator applied to unsigned type */
1641 /* could change -foo to be (~foo)+1 instead */
1642 if (ans <= ~((UV)IV_MAX)+1)
1643 sv_setiv(TARG, ~ans+1);
1644 else
65202027 1645 sv_setnv(TARG, -(NV)ans);
787eafbd
IZ
1646 }
1647 else
1648 sv_setuv(TARG, ans);
1649 }
1650 PUSHTARG;
1651 RETURN;
79072805 1652 }
a0d0e21e 1653}
79072805 1654
a0d0e21e
LW
1655PP(pp_repeat)
1656{
6f1401dc 1657 dVAR; dSP; dATARGET;
2b573ace 1658 register IV count;
6f1401dc
DM
1659 SV *sv;
1660
1661 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1662 /* TODO: think of some way of doing list-repeat overloading ??? */
1663 sv = POPs;
1664 SvGETMAGIC(sv);
1665 }
1666 else {
1667 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1668 sv = POPs;
1669 }
1670
2b573ace
JH
1671 if (SvIOKp(sv)) {
1672 if (SvUOK(sv)) {
6f1401dc 1673 const UV uv = SvUV_nomg(sv);
2b573ace
JH
1674 if (uv > IV_MAX)
1675 count = IV_MAX; /* The best we can do? */
1676 else
1677 count = uv;
1678 } else {
6f1401dc 1679 const IV iv = SvIV_nomg(sv);
2b573ace
JH
1680 if (iv < 0)
1681 count = 0;
1682 else
1683 count = iv;
1684 }
1685 }
1686 else if (SvNOKp(sv)) {
6f1401dc 1687 const NV nv = SvNV_nomg(sv);
2b573ace
JH
1688 if (nv < 0.0)
1689 count = 0;
1690 else
1691 count = (IV)nv;
1692 }
1693 else
6f1401dc
DM
1694 count = SvIV_nomg(sv);
1695
533c011a 1696 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
a0d0e21e 1697 dMARK;
0bd48802
AL
1698 static const char oom_list_extend[] = "Out of memory during list extend";
1699 const I32 items = SP - MARK;
1700 const I32 max = items * count;
79072805 1701
2b573ace
JH
1702 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1703 /* Did the max computation overflow? */
27d5b266 1704 if (items > 0 && max > 0 && (max < items || max < count))
2b573ace 1705 Perl_croak(aTHX_ oom_list_extend);
a0d0e21e
LW
1706 MEXTEND(MARK, max);
1707 if (count > 1) {
1708 while (SP > MARK) {
976c8a39
JH
1709#if 0
1710 /* This code was intended to fix 20010809.028:
1711
1712 $x = 'abcd';
1713 for (($x =~ /./g) x 2) {
1714 print chop; # "abcdabcd" expected as output.
1715 }
1716
1717 * but that change (#11635) broke this code:
1718
1719 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1720
1721 * I can't think of a better fix that doesn't introduce
1722 * an efficiency hit by copying the SVs. The stack isn't
1723 * refcounted, and mortalisation obviously doesn't
1724 * Do The Right Thing when the stack has more than
1725 * one pointer to the same mortal value.
1726 * .robin.
1727 */
e30acc16
RH
1728 if (*SP) {
1729 *SP = sv_2mortal(newSVsv(*SP));
1730 SvREADONLY_on(*SP);
1731 }
976c8a39
JH
1732#else
1733 if (*SP)
1734 SvTEMP_off((*SP));
1735#endif
a0d0e21e 1736 SP--;
79072805 1737 }
a0d0e21e
LW
1738 MARK++;
1739 repeatcpy((char*)(MARK + items), (char*)MARK,
ad64d0ec 1740 items * sizeof(const SV *), count - 1);
a0d0e21e 1741 SP += max;
79072805 1742 }
a0d0e21e
LW
1743 else if (count <= 0)
1744 SP -= items;
79072805 1745 }
a0d0e21e 1746 else { /* Note: mark already snarfed by pp_list */
0bd48802 1747 SV * const tmpstr = POPs;
a0d0e21e 1748 STRLEN len;
9b877dbb 1749 bool isutf;
2b573ace
JH
1750 static const char oom_string_extend[] =
1751 "Out of memory during string extend";
a0d0e21e 1752
6f1401dc
DM
1753 if (TARG != tmpstr)
1754 sv_setsv_nomg(TARG, tmpstr);
1755 SvPV_force_nomg(TARG, len);
9b877dbb 1756 isutf = DO_UTF8(TARG);
8ebc5c01 1757 if (count != 1) {
1758 if (count < 1)
1759 SvCUR_set(TARG, 0);
1760 else {
c445ea15 1761 const STRLEN max = (UV)count * len;
19a94d75 1762 if (len > MEM_SIZE_MAX / count)
2b573ace
JH
1763 Perl_croak(aTHX_ oom_string_extend);
1764 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
8569b9dc 1765 SvGROW(TARG, max + 1);
a0d0e21e 1766 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
b162af07 1767 SvCUR_set(TARG, SvCUR(TARG) * count);
7a4c00b4 1768 }
a0d0e21e 1769 *SvEND(TARG) = '\0';
a0d0e21e 1770 }
dfcb284a
GS
1771 if (isutf)
1772 (void)SvPOK_only_UTF8(TARG);
1773 else
1774 (void)SvPOK_only(TARG);
b80b6069
RH
1775
1776 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1777 /* The parser saw this as a list repeat, and there
1778 are probably several items on the stack. But we're
1779 in scalar context, and there's no pp_list to save us
1780 now. So drop the rest of the items -- robin@kitsite.com
1781 */
1782 dMARK;
1783 SP = MARK;
1784 }
a0d0e21e 1785 PUSHTARG;
79072805 1786 }
a0d0e21e
LW
1787 RETURN;
1788}
79072805 1789
a0d0e21e
LW
1790PP(pp_subtract)
1791{
800401ee 1792 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
6f1401dc
DM
1793 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1794 svr = TOPs;
1795 svl = TOPm1s;
800401ee 1796 useleft = USE_LEFT(svl);
28e5dec8 1797#ifdef PERL_PRESERVE_IVUV
7dca457a
NC
1798 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1799 "bad things" happen if you rely on signed integers wrapping. */
6f1401dc 1800 SvIV_please_nomg(svr);
800401ee 1801 if (SvIOK(svr)) {
28e5dec8
JH
1802 /* Unless the left argument is integer in range we are going to have to
1803 use NV maths. Hence only attempt to coerce the right argument if
1804 we know the left is integer. */
9c5ffd7c
JH
1805 register UV auv = 0;
1806 bool auvok = FALSE;
7dca457a
NC
1807 bool a_valid = 0;
1808
28e5dec8 1809 if (!useleft) {
7dca457a
NC
1810 auv = 0;
1811 a_valid = auvok = 1;
1812 /* left operand is undef, treat as zero. */
28e5dec8
JH
1813 } else {
1814 /* Left operand is defined, so is it IV? */
6f1401dc 1815 SvIV_please_nomg(svl);
800401ee
JH
1816 if (SvIOK(svl)) {
1817 if ((auvok = SvUOK(svl)))
1818 auv = SvUVX(svl);
7dca457a 1819 else {
800401ee 1820 register const IV aiv = SvIVX(svl);
7dca457a
NC
1821 if (aiv >= 0) {
1822 auv = aiv;
1823 auvok = 1; /* Now acting as a sign flag. */
1824 } else { /* 2s complement assumption for IV_MIN */
1825 auv = (UV)-aiv;
28e5dec8 1826 }
7dca457a
NC
1827 }
1828 a_valid = 1;
1829 }
1830 }
1831 if (a_valid) {
1832 bool result_good = 0;
1833 UV result;
1834 register UV buv;
800401ee 1835 bool buvok = SvUOK(svr);
9041c2e3 1836
7dca457a 1837 if (buvok)
800401ee 1838 buv = SvUVX(svr);
7dca457a 1839 else {
800401ee 1840 register const IV biv = SvIVX(svr);
7dca457a
NC
1841 if (biv >= 0) {
1842 buv = biv;
1843 buvok = 1;
1844 } else
1845 buv = (UV)-biv;
1846 }
1847 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
602f51c4 1848 else "IV" now, independent of how it came in.
7dca457a
NC
1849 if a, b represents positive, A, B negative, a maps to -A etc
1850 a - b => (a - b)
1851 A - b => -(a + b)
1852 a - B => (a + b)
1853 A - B => -(a - b)
1854 all UV maths. negate result if A negative.
1855 subtract if signs same, add if signs differ. */
1856
1857 if (auvok ^ buvok) {
1858 /* Signs differ. */
1859 result = auv + buv;
1860 if (result >= auv)
1861 result_good = 1;
1862 } else {
1863 /* Signs same */
1864 if (auv >= buv) {
1865 result = auv - buv;
1866 /* Must get smaller */
1867 if (result <= auv)
1868 result_good = 1;
1869 } else {
1870 result = buv - auv;
1871 if (result <= buv) {
1872 /* result really should be -(auv-buv). as its negation
1873 of true value, need to swap our result flag */
1874 auvok = !auvok;
1875 result_good = 1;
28e5dec8 1876 }
28e5dec8
JH
1877 }
1878 }
7dca457a
NC
1879 if (result_good) {
1880 SP--;
1881 if (auvok)
1882 SETu( result );
1883 else {
1884 /* Negate result */
1885 if (result <= (UV)IV_MIN)
1886 SETi( -(IV)result );
1887 else {
1888 /* result valid, but out of range for IV. */
1889 SETn( -(NV)result );
1890 }
1891 }
1892 RETURN;
1893 } /* Overflow, drop through to NVs. */
28e5dec8
JH
1894 }
1895 }
1896#endif
a0d0e21e 1897 {
6f1401dc 1898 NV value = SvNV_nomg(svr);
4efa5a16
RD
1899 (void)POPs;
1900
28e5dec8
JH
1901 if (!useleft) {
1902 /* left operand is undef, treat as zero - value */
1903 SETn(-value);
1904 RETURN;
1905 }
6f1401dc 1906 SETn( SvNV_nomg(svl) - value );
28e5dec8 1907 RETURN;
79072805 1908 }
a0d0e21e 1909}
79072805 1910
a0d0e21e
LW
1911PP(pp_left_shift)
1912{
6f1401dc 1913 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1914 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1915 svr = POPs;
1916 svl = TOPs;
a0d0e21e 1917 {
6f1401dc 1918 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1919 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1920 const IV i = SvIV_nomg(svl);
972b05a9 1921 SETi(i << shift);
d0ba1bd2
JH
1922 }
1923 else {
6f1401dc 1924 const UV u = SvUV_nomg(svl);
972b05a9 1925 SETu(u << shift);
d0ba1bd2 1926 }
55497cff 1927 RETURN;
79072805 1928 }
a0d0e21e 1929}
79072805 1930
a0d0e21e
LW
1931PP(pp_right_shift)
1932{
6f1401dc 1933 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1934 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1935 svr = POPs;
1936 svl = TOPs;
a0d0e21e 1937 {
6f1401dc 1938 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1939 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1940 const IV i = SvIV_nomg(svl);
972b05a9 1941 SETi(i >> shift);
d0ba1bd2
JH
1942 }
1943 else {
6f1401dc 1944 const UV u = SvUV_nomg(svl);
972b05a9 1945 SETu(u >> shift);
d0ba1bd2 1946 }
a0d0e21e 1947 RETURN;
93a17b20 1948 }
79072805
LW
1949}
1950
a0d0e21e 1951PP(pp_lt)
79072805 1952{
6f1401dc 1953 dVAR; dSP;
33efebe6
DM
1954 SV *left, *right;
1955
a42d0242 1956 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
33efebe6
DM
1957 right = POPs;
1958 left = TOPs;
1959 SETs(boolSV(
1960 (SvIOK_notUV(left) && SvIOK_notUV(right))
1961 ? (SvIVX(left) < SvIVX(right))
1962 : (do_ncmp(left, right) == -1)
1963 ));
1964 RETURN;
a0d0e21e 1965}
79072805 1966
a0d0e21e
LW
1967PP(pp_gt)
1968{
6f1401dc 1969 dVAR; dSP;
33efebe6 1970 SV *left, *right;
1b6737cc 1971
33efebe6
DM
1972 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1973 right = POPs;
1974 left = TOPs;
1975 SETs(boolSV(
1976 (SvIOK_notUV(left) && SvIOK_notUV(right))
1977 ? (SvIVX(left) > SvIVX(right))
1978 : (do_ncmp(left, right) == 1)
1979 ));
1980 RETURN;
a0d0e21e
LW
1981}
1982
1983PP(pp_le)
1984{
6f1401dc 1985 dVAR; dSP;
33efebe6 1986 SV *left, *right;
1b6737cc 1987
33efebe6
DM
1988 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1989 right = POPs;
1990 left = TOPs;
1991 SETs(boolSV(
1992 (SvIOK_notUV(left) && SvIOK_notUV(right))
1993 ? (SvIVX(left) <= SvIVX(right))
1994 : (do_ncmp(left, right) <= 0)
1995 ));
1996 RETURN;
a0d0e21e
LW
1997}
1998
1999PP(pp_ge)
2000{
6f1401dc 2001 dVAR; dSP;
33efebe6
DM
2002 SV *left, *right;
2003
2004 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
2005 right = POPs;
2006 left = TOPs;
2007 SETs(boolSV(
2008 (SvIOK_notUV(left) && SvIOK_notUV(right))
2009 ? (SvIVX(left) >= SvIVX(right))
2010 : ( (do_ncmp(left, right) & 2) == 0)
2011 ));
2012 RETURN;
2013}
1b6737cc 2014
33efebe6
DM
2015PP(pp_ne)
2016{
2017 dVAR; dSP;
2018 SV *left, *right;
2019
2020 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2021 right = POPs;
2022 left = TOPs;
2023 SETs(boolSV(
2024 (SvIOK_notUV(left) && SvIOK_notUV(right))
2025 ? (SvIVX(left) != SvIVX(right))
2026 : (do_ncmp(left, right) != 0)
2027 ));
2028 RETURN;
2029}
1b6737cc 2030
33efebe6
DM
2031/* compare left and right SVs. Returns:
2032 * -1: <
2033 * 0: ==
2034 * 1: >
2035 * 2: left or right was a NaN
2036 */
2037I32
2038Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2039{
2040 dVAR;
1b6737cc 2041
33efebe6
DM
2042 PERL_ARGS_ASSERT_DO_NCMP;
2043#ifdef PERL_PRESERVE_IVUV
2044 SvIV_please_nomg(right);
2045 /* Fortunately it seems NaN isn't IOK */
2046 if (SvIOK(right)) {
2047 SvIV_please_nomg(left);
2048 if (SvIOK(left)) {
2049 if (!SvUOK(left)) {
2050 const IV leftiv = SvIVX(left);
2051 if (!SvUOK(right)) {
2052 /* ## IV <=> IV ## */
2053 const IV rightiv = SvIVX(right);
2054 return (leftiv > rightiv) - (leftiv < rightiv);
28e5dec8 2055 }
33efebe6
DM
2056 /* ## IV <=> UV ## */
2057 if (leftiv < 0)
2058 /* As (b) is a UV, it's >=0, so it must be < */
2059 return -1;
2060 {
2061 const UV rightuv = SvUVX(right);
2062 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
28e5dec8 2063 }
28e5dec8 2064 }
79072805 2065
33efebe6
DM
2066 if (SvUOK(right)) {
2067 /* ## UV <=> UV ## */
2068 const UV leftuv = SvUVX(left);
2069 const UV rightuv = SvUVX(right);
2070 return (leftuv > rightuv) - (leftuv < rightuv);
28e5dec8 2071 }
33efebe6
DM
2072 /* ## UV <=> IV ## */
2073 {
2074 const IV rightiv = SvIVX(right);
2075 if (rightiv < 0)
2076 /* As (a) is a UV, it's >=0, so it cannot be < */
2077 return 1;
2078 {
2079 const UV leftuv = SvUVX(left);
2080 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
28e5dec8 2081 }
28e5dec8 2082 }
33efebe6 2083 /* NOTREACHED */
28e5dec8
JH
2084 }
2085 }
2086#endif
a0d0e21e 2087 {
33efebe6
DM
2088 NV const rnv = SvNV_nomg(right);
2089 NV const lnv = SvNV_nomg(left);
2090
cab190d4 2091#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
33efebe6
DM
2092 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2093 return 2;
2094 }
2095 return (lnv > rnv) - (lnv < rnv);
cab190d4 2096#else
33efebe6
DM
2097 if (lnv < rnv)
2098 return -1;
2099 if (lnv > rnv)
2100 return 1;
2101 if (lnv == rnv)
2102 return 0;
2103 return 2;
cab190d4 2104#endif
a0d0e21e 2105 }
79072805
LW
2106}
2107
33efebe6 2108
a0d0e21e 2109PP(pp_ncmp)
79072805 2110{
33efebe6
DM
2111 dVAR; dSP;
2112 SV *left, *right;
2113 I32 value;
a42d0242 2114 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
33efebe6
DM
2115 right = POPs;
2116 left = TOPs;
2117 value = do_ncmp(left, right);
2118 if (value == 2) {
3280af22 2119 SETs(&PL_sv_undef);
79072805 2120 }
33efebe6
DM
2121 else {
2122 dTARGET;
2123 SETi(value);
2124 }
2125 RETURN;
a0d0e21e 2126}
79072805 2127
afd9910b 2128PP(pp_sle)
a0d0e21e 2129{
97aff369 2130 dVAR; dSP;
79072805 2131
afd9910b
NC
2132 int amg_type = sle_amg;
2133 int multiplier = 1;
2134 int rhs = 1;
79072805 2135
afd9910b
NC
2136 switch (PL_op->op_type) {
2137 case OP_SLT:
2138 amg_type = slt_amg;
2139 /* cmp < 0 */
2140 rhs = 0;
2141 break;
2142 case OP_SGT:
2143 amg_type = sgt_amg;
2144 /* cmp > 0 */
2145 multiplier = -1;
2146 rhs = 0;
2147 break;
2148 case OP_SGE:
2149 amg_type = sge_amg;
2150 /* cmp >= 0 */
2151 multiplier = -1;
2152 break;
79072805 2153 }
79072805 2154
6f1401dc 2155 tryAMAGICbin_MG(amg_type, AMGf_set);
a0d0e21e
LW
2156 {
2157 dPOPTOPssrl;
1b6737cc 2158 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2159 ? sv_cmp_locale_flags(left, right, 0)
2160 : sv_cmp_flags(left, right, 0));
afd9910b 2161 SETs(boolSV(cmp * multiplier < rhs));
a0d0e21e
LW
2162 RETURN;
2163 }
2164}
79072805 2165
36477c24 2166PP(pp_seq)
2167{
6f1401dc
DM
2168 dVAR; dSP;
2169 tryAMAGICbin_MG(seq_amg, AMGf_set);
36477c24 2170 {
2171 dPOPTOPssrl;
078504b2 2172 SETs(boolSV(sv_eq_flags(left, right, 0)));
a0d0e21e
LW
2173 RETURN;
2174 }
2175}
79072805 2176
a0d0e21e 2177PP(pp_sne)
79072805 2178{
6f1401dc
DM
2179 dVAR; dSP;
2180 tryAMAGICbin_MG(sne_amg, AMGf_set);
a0d0e21e
LW
2181 {
2182 dPOPTOPssrl;
078504b2 2183 SETs(boolSV(!sv_eq_flags(left, right, 0)));
a0d0e21e 2184 RETURN;
463ee0b2 2185 }
79072805
LW
2186}
2187
a0d0e21e 2188PP(pp_scmp)
79072805 2189{
6f1401dc
DM
2190 dVAR; dSP; dTARGET;
2191 tryAMAGICbin_MG(scmp_amg, 0);
a0d0e21e
LW
2192 {
2193 dPOPTOPssrl;
1b6737cc 2194 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2195 ? sv_cmp_locale_flags(left, right, 0)
2196 : sv_cmp_flags(left, right, 0));
bbce6d69 2197 SETi( cmp );
a0d0e21e
LW
2198 RETURN;
2199 }
2200}
79072805 2201
55497cff 2202PP(pp_bit_and)
2203{
6f1401dc
DM
2204 dVAR; dSP; dATARGET;
2205 tryAMAGICbin_MG(band_amg, AMGf_assign);
a0d0e21e
LW
2206 {
2207 dPOPTOPssrl;
4633a7c4 2208 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2209 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2210 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2211 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2212 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
972b05a9 2213 SETi(i);
d0ba1bd2
JH
2214 }
2215 else {
1b6737cc 2216 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
972b05a9 2217 SETu(u);
d0ba1bd2 2218 }
b20c4ee1
FC
2219 if (left_ro_nonnum) SvNIOK_off(left);
2220 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2221 }
2222 else {
533c011a 2223 do_vop(PL_op->op_type, TARG, left, right);
a0d0e21e
LW
2224 SETTARG;
2225 }
2226 RETURN;
2227 }
2228}
79072805 2229
a0d0e21e
LW
2230PP(pp_bit_or)
2231{
3658c1f1
NC
2232 dVAR; dSP; dATARGET;
2233 const int op_type = PL_op->op_type;
2234
6f1401dc 2235 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
a0d0e21e
LW
2236 {
2237 dPOPTOPssrl;
4633a7c4 2238 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2239 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2240 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2241 if (PL_op->op_private & HINT_INTEGER) {
3658c1f1
NC
2242 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2243 const IV r = SvIV_nomg(right);
2244 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2245 SETi(result);
d0ba1bd2
JH
2246 }
2247 else {
3658c1f1
NC
2248 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2249 const UV r = SvUV_nomg(right);
2250 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2251 SETu(result);
d0ba1bd2 2252 }
b20c4ee1
FC
2253 if (left_ro_nonnum) SvNIOK_off(left);
2254 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2255 }
2256 else {
3658c1f1 2257 do_vop(op_type, TARG, left, right);
a0d0e21e
LW
2258 SETTARG;
2259 }
2260 RETURN;
79072805 2261 }
a0d0e21e 2262}
79072805 2263
a0d0e21e
LW
2264PP(pp_negate)
2265{
6f1401dc
DM
2266 dVAR; dSP; dTARGET;
2267 tryAMAGICun_MG(neg_amg, AMGf_numeric);
a0d0e21e 2268 {
6f1401dc 2269 SV * const sv = TOPs;
1b6737cc 2270 const int flags = SvFLAGS(sv);
a5b92898 2271
886a4465 2272 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
a5b92898
RB
2273 SvIV_please( sv );
2274 }
2275
28e5dec8
JH
2276 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2277 /* It's publicly an integer, or privately an integer-not-float */
2278 oops_its_an_int:
9b0e499b
GS
2279 if (SvIsUV(sv)) {
2280 if (SvIVX(sv) == IV_MIN) {
28e5dec8 2281 /* 2s complement assumption. */
9b0e499b
GS
2282 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2283 RETURN;
2284 }
2285 else if (SvUVX(sv) <= IV_MAX) {
beccb14c 2286 SETi(-SvIVX(sv));
9b0e499b
GS
2287 RETURN;
2288 }
2289 }
2290 else if (SvIVX(sv) != IV_MIN) {
2291 SETi(-SvIVX(sv));
2292 RETURN;
2293 }
28e5dec8
JH
2294#ifdef PERL_PRESERVE_IVUV
2295 else {
2296 SETu((UV)IV_MIN);
2297 RETURN;
2298 }
2299#endif
9b0e499b
GS
2300 }
2301 if (SvNIOKp(sv))
6f1401dc 2302 SETn(-SvNV_nomg(sv));
4633a7c4 2303 else if (SvPOKp(sv)) {
a0d0e21e 2304 STRLEN len;
6f1401dc 2305 const char * const s = SvPV_nomg_const(sv, len);
bbce6d69 2306 if (isIDFIRST(*s)) {
76f68e9b 2307 sv_setpvs(TARG, "-");
a0d0e21e 2308 sv_catsv(TARG, sv);
79072805 2309 }
a0d0e21e 2310 else if (*s == '+' || *s == '-') {
6f1401dc
DM
2311 sv_setsv_nomg(TARG, sv);
2312 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
79072805 2313 }
8eb28a70 2314 else if (DO_UTF8(sv)) {
6f1401dc 2315 SvIV_please_nomg(sv);
8eb28a70
JH
2316 if (SvIOK(sv))
2317 goto oops_its_an_int;
2318 if (SvNOK(sv))
6f1401dc 2319 sv_setnv(TARG, -SvNV_nomg(sv));
8eb28a70 2320 else {
76f68e9b 2321 sv_setpvs(TARG, "-");
8eb28a70
JH
2322 sv_catsv(TARG, sv);
2323 }
834a4ddd 2324 }
28e5dec8 2325 else {
6f1401dc 2326 SvIV_please_nomg(sv);
8eb28a70
JH
2327 if (SvIOK(sv))
2328 goto oops_its_an_int;
6f1401dc 2329 sv_setnv(TARG, -SvNV_nomg(sv));
28e5dec8 2330 }
a0d0e21e 2331 SETTARG;
79072805 2332 }
4633a7c4 2333 else
6f1401dc 2334 SETn(-SvNV_nomg(sv));
79072805 2335 }
a0d0e21e 2336 RETURN;
79072805
LW
2337}
2338
a0d0e21e 2339PP(pp_not)
79072805 2340{
6f1401dc
DM
2341 dVAR; dSP;
2342 tryAMAGICun_MG(not_amg, AMGf_set);
06c841cf 2343 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
a0d0e21e 2344 return NORMAL;
79072805
LW
2345}
2346
a0d0e21e 2347PP(pp_complement)
79072805 2348{
6f1401dc 2349 dVAR; dSP; dTARGET;
a42d0242 2350 tryAMAGICun_MG(compl_amg, AMGf_numeric);
a0d0e21e
LW
2351 {
2352 dTOPss;
4633a7c4 2353 if (SvNIOKp(sv)) {
d0ba1bd2 2354 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2355 const IV i = ~SvIV_nomg(sv);
972b05a9 2356 SETi(i);
d0ba1bd2
JH
2357 }
2358 else {
1b6737cc 2359 const UV u = ~SvUV_nomg(sv);
972b05a9 2360 SETu(u);
d0ba1bd2 2361 }
a0d0e21e
LW
2362 }
2363 else {
51723571 2364 register U8 *tmps;
55497cff 2365 register I32 anum;
a0d0e21e
LW
2366 STRLEN len;
2367
10516c54 2368 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
891f9566 2369 sv_setsv_nomg(TARG, sv);
6f1401dc 2370 tmps = (U8*)SvPV_force_nomg(TARG, len);
a0d0e21e 2371 anum = len;
1d68d6cd 2372 if (SvUTF8(TARG)) {
a1ca4561 2373 /* Calculate exact length, let's not estimate. */
1d68d6cd 2374 STRLEN targlen = 0;
ba210ebe 2375 STRLEN l;
a1ca4561
YST
2376 UV nchar = 0;
2377 UV nwide = 0;
01f6e806 2378 U8 * const send = tmps + len;
74d49cd0
TS
2379 U8 * const origtmps = tmps;
2380 const UV utf8flags = UTF8_ALLOW_ANYUV;
1d68d6cd 2381
1d68d6cd 2382 while (tmps < send) {
74d49cd0
TS
2383 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2384 tmps += l;
5bbb0b5a 2385 targlen += UNISKIP(~c);
a1ca4561
YST
2386 nchar++;
2387 if (c > 0xff)
2388 nwide++;
1d68d6cd
SC
2389 }
2390
2391 /* Now rewind strings and write them. */
74d49cd0 2392 tmps = origtmps;
a1ca4561
YST
2393
2394 if (nwide) {
01f6e806
AL
2395 U8 *result;
2396 U8 *p;
2397
74d49cd0 2398 Newx(result, targlen + 1, U8);
01f6e806 2399 p = result;
a1ca4561 2400 while (tmps < send) {
74d49cd0
TS
2401 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2402 tmps += l;
01f6e806 2403 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
a1ca4561 2404 }
01f6e806 2405 *p = '\0';
c1c21316
NC
2406 sv_usepvn_flags(TARG, (char*)result, targlen,
2407 SV_HAS_TRAILING_NUL);
a1ca4561
YST
2408 SvUTF8_on(TARG);
2409 }
2410 else {
01f6e806
AL
2411 U8 *result;
2412 U8 *p;
2413
74d49cd0 2414 Newx(result, nchar + 1, U8);
01f6e806 2415 p = result;
a1ca4561 2416 while (tmps < send) {
74d49cd0
TS
2417 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2418 tmps += l;
01f6e806 2419 *p++ = ~c;
a1ca4561 2420 }
01f6e806 2421 *p = '\0';
c1c21316 2422 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
d0a21e00 2423 SvUTF8_off(TARG);
1d68d6cd 2424 }
ec93b65f 2425 SETTARG;
1d68d6cd
SC
2426 RETURN;
2427 }
a0d0e21e 2428#ifdef LIBERAL
51723571
JH
2429 {
2430 register long *tmpl;
2431 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2432 *tmps = ~*tmps;
2433 tmpl = (long*)tmps;
bb7a0f54 2434 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
51723571
JH
2435 *tmpl = ~*tmpl;
2436 tmps = (U8*)tmpl;
2437 }
a0d0e21e
LW
2438#endif
2439 for ( ; anum > 0; anum--, tmps++)
2440 *tmps = ~*tmps;
ec93b65f 2441 SETTARG;
a0d0e21e
LW
2442 }
2443 RETURN;
2444 }
79072805
LW
2445}
2446
a0d0e21e
LW
2447/* integer versions of some of the above */
2448
a0d0e21e 2449PP(pp_i_multiply)
79072805 2450{
6f1401dc
DM
2451 dVAR; dSP; dATARGET;
2452 tryAMAGICbin_MG(mult_amg, AMGf_assign);
a0d0e21e 2453 {
6f1401dc 2454 dPOPTOPiirl_nomg;
a0d0e21e
LW
2455 SETi( left * right );
2456 RETURN;
2457 }
79072805
LW
2458}
2459
a0d0e21e 2460PP(pp_i_divide)
79072805 2461{
85935d8e 2462 IV num;
6f1401dc
DM
2463 dVAR; dSP; dATARGET;
2464 tryAMAGICbin_MG(div_amg, AMGf_assign);
a0d0e21e 2465 {
6f1401dc 2466 dPOPTOPssrl;
85935d8e 2467 IV value = SvIV_nomg(right);
a0d0e21e 2468 if (value == 0)
ece1bcef 2469 DIE(aTHX_ "Illegal division by zero");
85935d8e 2470 num = SvIV_nomg(left);
a0cec769
YST
2471
2472 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2473 if (value == -1)
2474 value = - num;
2475 else
2476 value = num / value;
6f1401dc 2477 SETi(value);
a0d0e21e
LW
2478 RETURN;
2479 }
79072805
LW
2480}
2481
befad5d1 2482#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2483STATIC
2484PP(pp_i_modulo_0)
befad5d1
NC
2485#else
2486PP(pp_i_modulo)
2487#endif
224ec323
JH
2488{
2489 /* This is the vanilla old i_modulo. */
6f1401dc
DM
2490 dVAR; dSP; dATARGET;
2491 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2492 {
6f1401dc 2493 dPOPTOPiirl_nomg;
224ec323
JH
2494 if (!right)
2495 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2496 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2497 if (right == -1)
2498 SETi( 0 );
2499 else
2500 SETi( left % right );
224ec323
JH
2501 RETURN;
2502 }
2503}
2504
11010fa3 2505#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2506STATIC
2507PP(pp_i_modulo_1)
befad5d1 2508
224ec323 2509{
224ec323 2510 /* This is the i_modulo with the workaround for the _moddi3 bug
fce2b89e 2511 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
224ec323 2512 * See below for pp_i_modulo. */
6f1401dc
DM
2513 dVAR; dSP; dATARGET;
2514 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2515 {
6f1401dc 2516 dPOPTOPiirl_nomg;
224ec323
JH
2517 if (!right)
2518 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2519 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2520 if (right == -1)
2521 SETi( 0 );
2522 else
2523 SETi( left % PERL_ABS(right) );
224ec323
JH
2524 RETURN;
2525 }
224ec323
JH
2526}
2527
a0d0e21e 2528PP(pp_i_modulo)
79072805 2529{
6f1401dc
DM
2530 dVAR; dSP; dATARGET;
2531 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2532 {
6f1401dc 2533 dPOPTOPiirl_nomg;
224ec323
JH
2534 if (!right)
2535 DIE(aTHX_ "Illegal modulus zero");
2536 /* The assumption is to use hereafter the old vanilla version... */
2537 PL_op->op_ppaddr =
2538 PL_ppaddr[OP_I_MODULO] =
1c127fab 2539 Perl_pp_i_modulo_0;
224ec323
JH
2540 /* .. but if we have glibc, we might have a buggy _moddi3
2541 * (at least glicb 2.2.5 is known to have this bug), in other
2542 * words our integer modulus with negative quad as the second
2543 * argument might be broken. Test for this and re-patch the
2544 * opcode dispatch table if that is the case, remembering to
2545 * also apply the workaround so that this first round works
2546 * right, too. See [perl #9402] for more information. */
224ec323
JH
2547 {
2548 IV l = 3;
2549 IV r = -10;
2550 /* Cannot do this check with inlined IV constants since
2551 * that seems to work correctly even with the buggy glibc. */
2552 if (l % r == -3) {
2553 /* Yikes, we have the bug.
2554 * Patch in the workaround version. */
2555 PL_op->op_ppaddr =
2556 PL_ppaddr[OP_I_MODULO] =
2557 &Perl_pp_i_modulo_1;
2558 /* Make certain we work right this time, too. */
32fdb065 2559 right = PERL_ABS(right);
224ec323
JH
2560 }
2561 }
a0cec769
YST
2562 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2563 if (right == -1)
2564 SETi( 0 );
2565 else
2566 SETi( left % right );
224ec323
JH
2567 RETURN;
2568 }
79072805 2569}
befad5d1 2570#endif
79072805 2571
a0d0e21e 2572PP(pp_i_add)
79072805 2573{
6f1401dc
DM
2574 dVAR; dSP; dATARGET;
2575 tryAMAGICbin_MG(add_amg, AMGf_assign);
a0d0e21e 2576 {
6f1401dc 2577 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2578 SETi( left + right );
2579 RETURN;
79072805 2580 }
79072805
LW
2581}
2582
a0d0e21e 2583PP(pp_i_subtract)
79072805 2584{
6f1401dc
DM
2585 dVAR; dSP; dATARGET;
2586 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
a0d0e21e 2587 {
6f1401dc 2588 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2589 SETi( left - right );
2590 RETURN;
79072805 2591 }
79072805
LW
2592}
2593
a0d0e21e 2594PP(pp_i_lt)
79072805 2595{
6f1401dc
DM
2596 dVAR; dSP;
2597 tryAMAGICbin_MG(lt_amg, AMGf_set);
a0d0e21e 2598 {
96b6b87f 2599 dPOPTOPiirl_nomg;
54310121 2600 SETs(boolSV(left < right));
a0d0e21e
LW
2601 RETURN;
2602 }
79072805
LW
2603}
2604
a0d0e21e 2605PP(pp_i_gt)
79072805 2606{
6f1401dc
DM
2607 dVAR; dSP;
2608 tryAMAGICbin_MG(gt_amg, AMGf_set);
a0d0e21e 2609 {
96b6b87f 2610 dPOPTOPiirl_nomg;
54310121 2611 SETs(boolSV(left > right));
a0d0e21e
LW
2612 RETURN;
2613 }
79072805
LW
2614}
2615
a0d0e21e 2616PP(pp_i_le)
79072805 2617{
6f1401dc
DM
2618 dVAR; dSP;
2619 tryAMAGICbin_MG(le_amg, AMGf_set);
a0d0e21e 2620 {
96b6b87f 2621 dPOPTOPiirl_nomg;
54310121 2622 SETs(boolSV(left <= right));
a0d0e21e 2623 RETURN;
85e6fe83 2624 }
79072805
LW
2625}
2626
a0d0e21e 2627PP(pp_i_ge)
79072805 2628{
6f1401dc
DM
2629 dVAR; dSP;
2630 tryAMAGICbin_MG(ge_amg, AMGf_set);
a0d0e21e 2631 {
96b6b87f 2632 dPOPTOPiirl_nomg;
54310121 2633 SETs(boolSV(left >= right));
a0d0e21e
LW
2634 RETURN;
2635 }
79072805
LW
2636}
2637
a0d0e21e 2638PP(pp_i_eq)
79072805 2639{
6f1401dc
DM
2640 dVAR; dSP;
2641 tryAMAGICbin_MG(eq_amg, AMGf_set);
a0d0e21e 2642 {
96b6b87f 2643 dPOPTOPiirl_nomg;
54310121 2644 SETs(boolSV(left == right));
a0d0e21e
LW
2645 RETURN;
2646 }
79072805
LW
2647}
2648
a0d0e21e 2649PP(pp_i_ne)
79072805 2650{
6f1401dc
DM
2651 dVAR; dSP;
2652 tryAMAGICbin_MG(ne_amg, AMGf_set);
a0d0e21e 2653 {
96b6b87f 2654 dPOPTOPiirl_nomg;
54310121 2655 SETs(boolSV(left != right));
a0d0e21e
LW
2656 RETURN;
2657 }
79072805
LW
2658}
2659
a0d0e21e 2660PP(pp_i_ncmp)
79072805 2661{
6f1401dc
DM
2662 dVAR; dSP; dTARGET;
2663 tryAMAGICbin_MG(ncmp_amg, 0);
a0d0e21e 2664 {
96b6b87f 2665 dPOPTOPiirl_nomg;
a0d0e21e 2666 I32 value;
79072805 2667
a0d0e21e 2668 if (left > right)
79072805 2669 value = 1;
a0d0e21e 2670 else if (left < right)
79072805 2671 value = -1;
a0d0e21e 2672 else
79072805 2673 value = 0;
a0d0e21e
LW
2674 SETi(value);
2675 RETURN;
79072805 2676 }
85e6fe83
LW
2677}
2678
2679PP(pp_i_negate)
2680{
6f1401dc
DM
2681 dVAR; dSP; dTARGET;
2682 tryAMAGICun_MG(neg_amg, 0);
2683 {
2684 SV * const sv = TOPs;
2685 IV const i = SvIV_nomg(sv);
2686 SETi(-i);
2687 RETURN;
2688 }
85e6fe83
LW
2689}
2690
79072805
LW
2691/* High falutin' math. */
2692
2693PP(pp_atan2)
2694{
6f1401dc
DM
2695 dVAR; dSP; dTARGET;
2696 tryAMAGICbin_MG(atan2_amg, 0);
a0d0e21e 2697 {
096c060c 2698 dPOPTOPnnrl_nomg;
a1021d57 2699 SETn(Perl_atan2(left, right));
a0d0e21e
LW
2700 RETURN;
2701 }
79072805
LW
2702}
2703
2704PP(pp_sin)
2705{
71302fe3
NC
2706 dVAR; dSP; dTARGET;
2707 int amg_type = sin_amg;
2708 const char *neg_report = NULL;
bc81784a 2709 NV (*func)(NV) = Perl_sin;
71302fe3
NC
2710 const int op_type = PL_op->op_type;
2711
2712 switch (op_type) {
2713 case OP_COS:
2714 amg_type = cos_amg;
bc81784a 2715 func = Perl_cos;
71302fe3
NC
2716 break;
2717 case OP_EXP:
2718 amg_type = exp_amg;
bc81784a 2719 func = Perl_exp;
71302fe3
NC
2720 break;
2721 case OP_LOG:
2722 amg_type = log_amg;
bc81784a 2723 func = Perl_log;
71302fe3
NC
2724 neg_report = "log";
2725 break;
2726 case OP_SQRT:
2727 amg_type = sqrt_amg;
bc81784a 2728 func = Perl_sqrt;
71302fe3
NC
2729 neg_report = "sqrt";
2730 break;
a0d0e21e 2731 }
79072805 2732
6f1401dc
DM
2733
2734 tryAMAGICun_MG(amg_type, 0);
a0d0e21e 2735 {
6f1401dc
DM
2736 SV * const arg = POPs;
2737 const NV value = SvNV_nomg(arg);
71302fe3
NC
2738 if (neg_report) {
2739 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2740 SET_NUMERIC_STANDARD();
2741 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2742 }
2743 }
2744 XPUSHn(func(value));
a0d0e21e
LW
2745 RETURN;
2746 }
79072805
LW
2747}
2748
56cb0a1c
AD
2749/* Support Configure command-line overrides for rand() functions.
2750 After 5.005, perhaps we should replace this by Configure support
2751 for drand48(), random(), or rand(). For 5.005, though, maintain
2752 compatibility by calling rand() but allow the user to override it.
2753 See INSTALL for details. --Andy Dougherty 15 July 1998
2754*/
85ab1d1d
JH
2755/* Now it's after 5.005, and Configure supports drand48() and random(),
2756 in addition to rand(). So the overrides should not be needed any more.
2757 --Jarkko Hietaniemi 27 September 1998
2758 */
2759
2760#ifndef HAS_DRAND48_PROTO
20ce7b12 2761extern double drand48 (void);
56cb0a1c
AD
2762#endif
2763
79072805
LW
2764PP(pp_rand)
2765{
97aff369 2766 dVAR; dSP; dTARGET;
65202027 2767 NV value;
79072805
LW
2768 if (MAXARG < 1)
2769 value = 1.0;
2770 else
2771 value = POPn;
2772 if (value == 0.0)
2773 value = 1.0;
80252599 2774 if (!PL_srand_called) {
85ab1d1d 2775 (void)seedDrand01((Rand_seed_t)seed());
80252599 2776 PL_srand_called = TRUE;
93dc8474 2777 }
85ab1d1d 2778 value *= Drand01();
79072805
LW
2779 XPUSHn(value);
2780 RETURN;
2781}
2782
2783PP(pp_srand)
2784{
83832992 2785 dVAR; dSP; dTARGET;
0bd48802 2786 const UV anum = (MAXARG < 1) ? seed() : POPu;
85ab1d1d 2787 (void)seedDrand01((Rand_seed_t)anum);
80252599 2788 PL_srand_called = TRUE;
da1010ec
NC
2789 if (anum)
2790 XPUSHu(anum);
2791 else {
2792 /* Historically srand always returned true. We can avoid breaking
2793 that like this: */
2794 sv_setpvs(TARG, "0 but true");
2795 XPUSHTARG;
2796 }
83832992 2797 RETURN;
79072805
LW
2798}
2799
79072805
LW
2800PP(pp_int)
2801{
6f1401dc
DM
2802 dVAR; dSP; dTARGET;
2803 tryAMAGICun_MG(int_amg, AMGf_numeric);
774d564b 2804 {
6f1401dc
DM
2805 SV * const sv = TOPs;
2806 const IV iv = SvIV_nomg(sv);
28e5dec8
JH
2807 /* XXX it's arguable that compiler casting to IV might be subtly
2808 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2809 else preferring IV has introduced a subtle behaviour change bug. OTOH
2810 relying on floating point to be accurate is a bug. */
2811
c781a409 2812 if (!SvOK(sv)) {
922c4365 2813 SETu(0);
c781a409
RD
2814 }
2815 else if (SvIOK(sv)) {
2816 if (SvIsUV(sv))
6f1401dc 2817 SETu(SvUV_nomg(sv));
c781a409 2818 else
28e5dec8 2819 SETi(iv);
c781a409 2820 }
c781a409 2821 else {
6f1401dc 2822 const NV value = SvNV_nomg(sv);
1048ea30 2823 if (value >= 0.0) {
28e5dec8
JH
2824 if (value < (NV)UV_MAX + 0.5) {
2825 SETu(U_V(value));
2826 } else {
059a1014 2827 SETn(Perl_floor(value));
28e5dec8 2828 }
1048ea30 2829 }
28e5dec8
JH
2830 else {
2831 if (value > (NV)IV_MIN - 0.5) {
2832 SETi(I_V(value));
2833 } else {
1bbae031 2834 SETn(Perl_ceil(value));
28e5dec8
JH
2835 }
2836 }
774d564b 2837 }
79072805 2838 }
79072805
LW
2839 RETURN;
2840}
2841
463ee0b2
LW
2842PP(pp_abs)
2843{
6f1401dc
DM
2844 dVAR; dSP; dTARGET;
2845 tryAMAGICun_MG(abs_amg, AMGf_numeric);
a0d0e21e 2846 {
6f1401dc 2847 SV * const sv = TOPs;
28e5dec8 2848 /* This will cache the NV value if string isn't actually integer */
6f1401dc 2849 const IV iv = SvIV_nomg(sv);
a227d84d 2850
800401ee 2851 if (!SvOK(sv)) {
922c4365 2852 SETu(0);
800401ee
JH
2853 }
2854 else if (SvIOK(sv)) {
28e5dec8 2855 /* IVX is precise */
800401ee 2856 if (SvIsUV(sv)) {
6f1401dc 2857 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
28e5dec8
JH
2858 } else {
2859 if (iv >= 0) {
2860 SETi(iv);
2861 } else {
2862 if (iv != IV_MIN) {
2863 SETi(-iv);
2864 } else {
2865 /* 2s complement assumption. Also, not really needed as
2866 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2867 SETu(IV_MIN);
2868 }
a227d84d 2869 }
28e5dec8
JH
2870 }
2871 } else{
6f1401dc 2872 const NV value = SvNV_nomg(sv);
774d564b 2873 if (value < 0.0)
1b6737cc 2874 SETn(-value);
a4474c9e
DD
2875 else
2876 SETn(value);
774d564b 2877 }
a0d0e21e 2878 }
774d564b 2879 RETURN;
463ee0b2
LW
2880}
2881
79072805
LW
2882PP(pp_oct)
2883{
97aff369 2884 dVAR; dSP; dTARGET;
5c144d81 2885 const char *tmps;
53305cf1 2886 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
6f894ead 2887 STRLEN len;
53305cf1
NC
2888 NV result_nv;
2889 UV result_uv;
1b6737cc 2890 SV* const sv = POPs;
79072805 2891
349d4f2f 2892 tmps = (SvPV_const(sv, len));
2bc69dc4
NIS
2893 if (DO_UTF8(sv)) {
2894 /* If Unicode, try to downgrade
2895 * If not possible, croak. */
1b6737cc 2896 SV* const tsv = sv_2mortal(newSVsv(sv));
2bc69dc4
NIS
2897
2898 SvUTF8_on(tsv);
2899 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 2900 tmps = SvPV_const(tsv, len);
2bc69dc4 2901 }
daa2adfd
NC
2902 if (PL_op->op_type == OP_HEX)
2903 goto hex;
2904
6f894ead 2905 while (*tmps && len && isSPACE(*tmps))
53305cf1 2906 tmps++, len--;
9e24b6e2 2907 if (*tmps == '0')
53305cf1 2908 tmps++, len--;
a674e8db 2909 if (*tmps == 'x' || *tmps == 'X') {
daa2adfd 2910 hex:
53305cf1 2911 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
daa2adfd 2912 }
a674e8db 2913 else if (*tmps == 'b' || *tmps == 'B')
53305cf1 2914 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
464e2e8a 2915 else
53305cf1
NC
2916 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2917
2918 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2919 XPUSHn(result_nv);
2920 }
2921 else {
2922 XPUSHu(result_uv);
2923 }
79072805
LW
2924 RETURN;
2925}
2926
2927/* String stuff. */
2928
2929PP(pp_length)
2930{
97aff369 2931 dVAR; dSP; dTARGET;
0bd48802 2932 SV * const sv = TOPs;
a0ed51b3 2933
656266fc 2934 if (SvGAMAGIC(sv)) {
9f621bb0
NC
2935 /* For an overloaded or magic scalar, we can't know in advance if
2936 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2937 it likes to cache the length. Maybe that should be a documented
2938 feature of it.
92331800
NC
2939 */
2940 STRLEN len;
9f621bb0
NC
2941 const char *const p
2942 = sv_2pv_flags(sv, &len,
2943 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
92331800 2944
d88e091f 2945 if (!p) {
9407f9c1
DL
2946 if (!SvPADTMP(TARG)) {
2947 sv_setsv(TARG, &PL_sv_undef);
2948 SETTARG;
2949 }
2950 SETs(&PL_sv_undef);
d88e091f 2951 }
9f621bb0 2952 else if (DO_UTF8(sv)) {
899be101 2953 SETi(utf8_length((U8*)p, (U8*)p + len));
92331800
NC
2954 }
2955 else
2956 SETi(len);
656266fc 2957 } else if (SvOK(sv)) {
9f621bb0
NC
2958 /* Neither magic nor overloaded. */
2959 if (DO_UTF8(sv))
2960 SETi(sv_len_utf8(sv));
2961 else
2962 SETi(sv_len(sv));
656266fc 2963 } else {
9407f9c1
DL
2964 if (!SvPADTMP(TARG)) {
2965 sv_setsv_nomg(TARG, &PL_sv_undef);
2966 SETTARG;
2967 }
2968 SETs(&PL_sv_undef);
92331800 2969 }
79072805
LW
2970 RETURN;
2971}
2972
2973PP(pp_substr)
2974{
97aff369 2975 dVAR; dSP; dTARGET;
79072805 2976 SV *sv;
463ee0b2 2977 STRLEN curlen;
9402d6ed 2978 STRLEN utf8_curlen;
777f7c56
EB
2979 SV * pos_sv;
2980 IV pos1_iv;
2981 int pos1_is_uv;
2982 IV pos2_iv;
2983 int pos2_is_uv;
2984 SV * len_sv;
2985 IV len_iv = 0;
2986 int len_is_uv = 1;
050e6362 2987 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
e1ec3a88 2988 const char *tmps;
777f7c56 2989 const IV arybase = CopARYBASE_get(PL_curcop);
9402d6ed 2990 SV *repl_sv = NULL;
cbbf8932 2991 const char *repl = NULL;
7b8d334a 2992 STRLEN repl_len;
050e6362 2993 const int num_args = PL_op->op_private & 7;
13e30c65 2994 bool repl_need_utf8_upgrade = FALSE;
9402d6ed 2995 bool repl_is_utf8 = FALSE;
79072805 2996
78f9721b
SM
2997 if (num_args > 2) {
2998 if (num_args > 3) {
9402d6ed 2999 repl_sv = POPs;
83003860 3000 repl = SvPV_const(repl_sv, repl_len);
9402d6ed 3001 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
7b8d334a 3002 }
777f7c56
EB
3003 len_sv = POPs;
3004 len_iv = SvIV(len_sv);
3005 len_is_uv = SvIOK_UV(len_sv);
5d82c453 3006 }
777f7c56
EB
3007 pos_sv = POPs;
3008 pos1_iv = SvIV(pos_sv);
3009 pos1_is_uv = SvIOK_UV(pos_sv);
79072805 3010 sv = POPs;
849ca7ee 3011 PUTBACK;
9402d6ed
JH
3012 if (repl_sv) {
3013 if (repl_is_utf8) {
3014 if (!DO_UTF8(sv))
3015 sv_utf8_upgrade(sv);
3016 }
13e30c65
JH
3017 else if (DO_UTF8(sv))
3018 repl_need_utf8_upgrade = TRUE;
9402d6ed 3019 }
5c144d81 3020 tmps = SvPV_const(sv, curlen);
7e2040f0 3021 if (DO_UTF8(sv)) {
9402d6ed
JH
3022 utf8_curlen = sv_len_utf8(sv);
3023 if (utf8_curlen == curlen)
3024 utf8_curlen = 0;
a0ed51b3 3025 else
9402d6ed 3026 curlen = utf8_curlen;
a0ed51b3 3027 }
d1c2b58a 3028 else
9402d6ed 3029 utf8_curlen = 0;
a0ed51b3 3030
777f7c56
EB
3031 if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
3032 UV pos1_uv = pos1_iv-arybase;
3033 /* Overflow can occur when $[ < 0 */
3034 if (arybase < 0 && pos1_uv < (UV)pos1_iv)
1c900557 3035 goto bound_fail;
777f7c56
EB
3036 pos1_iv = pos1_uv;
3037 pos1_is_uv = 1;
3038 }
3039 else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
1c900557 3040 goto bound_fail; /* $[=3; substr($_,2,...) */
777f7c56
EB
3041 }
3042 else { /* pos < $[ */
3043 if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
3044 pos1_iv = curlen;
3045 pos1_is_uv = 1;
3046 } else {
3047 if (curlen) {
3048 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3049 pos1_iv += curlen;
3050 }
5d82c453 3051 }
68dc0745 3052 }
777f7c56
EB
3053 if (pos1_is_uv || pos1_iv > 0) {
3054 if ((UV)pos1_iv > curlen)
1c900557 3055 goto bound_fail;
777f7c56
EB
3056 }
3057
3058 if (num_args > 2) {
3059 if (!len_is_uv && len_iv < 0) {
3060 pos2_iv = curlen + len_iv;
3061 if (curlen)
3062 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3063 else
3064 pos2_is_uv = 0;
3065 } else { /* len_iv >= 0 */
3066 if (!pos1_is_uv && pos1_iv < 0) {
3067 pos2_iv = pos1_iv + len_iv;
3068 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3069 } else {
3070 if ((UV)len_iv > curlen-(UV)pos1_iv)
3071 pos2_iv = curlen;
3072 else
3073 pos2_iv = pos1_iv+len_iv;
3074 pos2_is_uv = 1;
3075 }
5d82c453 3076 }
2304df62 3077 }
79072805 3078 else {
777f7c56
EB
3079 pos2_iv = curlen;
3080 pos2_is_uv = 1;
3081 }
3082
3083 if (!pos2_is_uv && pos2_iv < 0) {
3084 if (!pos1_is_uv && pos1_iv < 0)
1c900557 3085 goto bound_fail;
777f7c56
EB
3086 pos2_iv = 0;
3087 }
3088 else if (!pos1_is_uv && pos1_iv < 0)
3089 pos1_iv = 0;
3090
3091 if ((UV)pos2_iv < (UV)pos1_iv)
3092 pos2_iv = pos1_iv;
3093 if ((UV)pos2_iv > curlen)
3094 pos2_iv = curlen;
3095
3096 {
3097 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3098 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3099 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
777f7c56 3100 STRLEN byte_len = len;
d931b1be
NC
3101 STRLEN byte_pos = utf8_curlen
3102 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3103
2154eca7
EB
3104 if (lvalue && !repl) {
3105 SV * ret;
3106
3107 if (!SvGMAGICAL(sv)) {
3108 if (SvROK(sv)) {
3109 SvPV_force_nolen(sv);
3110 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3111 "Attempt to use reference as lvalue in substr");
3112 }
3113 if (isGV_with_GP(sv))
3114 SvPV_force_nolen(sv);
3115 else if (SvOK(sv)) /* is it defined ? */
3116 (void)SvPOK_only_UTF8(sv);
3117 else
3118 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
781e7547 3119 }
2154eca7
EB
3120
3121 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3122 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3123 LvTYPE(ret) = 'x';
3124 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3125 LvTARGOFF(ret) = pos;
3126 LvTARGLEN(ret) = len;
3127
3128 SPAGAIN;
3129 PUSHs(ret); /* avoid SvSETMAGIC here */
3130 RETURN;
781e7547
DM
3131 }
3132
2154eca7
EB
3133 SvTAINTED_off(TARG); /* decontaminate */
3134 SvUTF8_off(TARG); /* decontaminate */
3135
3136 tmps += byte_pos;
777f7c56 3137 sv_setpvn(TARG, tmps, byte_len);
12aa1545 3138#ifdef USE_LOCALE_COLLATE
14befaf4 3139 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
12aa1545 3140#endif
9402d6ed 3141 if (utf8_curlen)
7f66633b 3142 SvUTF8_on(TARG);
2154eca7 3143
f7928d6c 3144 if (repl) {
13e30c65
JH
3145 SV* repl_sv_copy = NULL;
3146
3147 if (repl_need_utf8_upgrade) {
3148 repl_sv_copy = newSVsv(repl_sv);
3149 sv_utf8_upgrade(repl_sv_copy);
349d4f2f 3150 repl = SvPV_const(repl_sv_copy, repl_len);
13e30c65
JH
3151 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3152 }
502d9230
VP
3153 if (!SvOK(sv))
3154 sv_setpvs(sv, "");
777f7c56 3155 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
9402d6ed 3156 if (repl_is_utf8)
f7928d6c 3157 SvUTF8_on(sv);
ef8d46e8 3158 SvREFCNT_dec(repl_sv_copy);
f7928d6c 3159 }
79072805 3160 }
849ca7ee 3161 SPAGAIN;
e27c778f
FC
3162 SvSETMAGIC(TARG);
3163 PUSHs(TARG);
79072805 3164 RETURN;
777f7c56 3165
1c900557 3166bound_fail:
777f7c56
EB
3167 if (lvalue || repl)
3168 Perl_croak(aTHX_ "substr outside of string");
3169 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3170 RETPUSHUNDEF;
79072805
LW
3171}
3172
3173PP(pp_vec)
3174{
2154eca7 3175 dVAR; dSP;
1b6737cc
AL
3176 register const IV size = POPi;
3177 register const IV offset = POPi;
3178 register SV * const src = POPs;
3179 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2154eca7 3180 SV * ret;
a0d0e21e 3181
81e118e0 3182 if (lvalue) { /* it's an lvalue! */
2154eca7
EB
3183 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3184 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3185 LvTYPE(ret) = 'v';
3186 LvTARG(ret) = SvREFCNT_inc_simple(src);
3187 LvTARGOFF(ret) = offset;
3188 LvTARGLEN(ret) = size;
3189 }
3190 else {
3191 dTARGET;
3192 SvTAINTED_off(TARG); /* decontaminate */
3193 ret = TARG;
79072805
LW
3194 }
3195
2154eca7
EB
3196 sv_setuv(ret, do_vecget(src, offset, size));
3197 PUSHs(ret);
79072805
LW
3198 RETURN;
3199}
3200
3201PP(pp_index)
3202{
97aff369 3203 dVAR; dSP; dTARGET;
79072805
LW
3204 SV *big;
3205 SV *little;
c445ea15 3206 SV *temp = NULL;
ad66a58c 3207 STRLEN biglen;
2723d216 3208 STRLEN llen = 0;
79072805
LW
3209 I32 offset;
3210 I32 retval;
73ee8be2
NC
3211 const char *big_p;
3212 const char *little_p;
fc15ae8f 3213 const I32 arybase = CopARYBASE_get(PL_curcop);
2f040f7f
NC
3214 bool big_utf8;
3215 bool little_utf8;
2723d216 3216 const bool is_index = PL_op->op_type == OP_INDEX;
79072805 3217
2723d216
NC
3218 if (MAXARG >= 3) {
3219 /* arybase is in characters, like offset, so combine prior to the
3220 UTF-8 to bytes calculation. */
79072805 3221 offset = POPi - arybase;
2723d216 3222 }
79072805
LW
3223 little = POPs;
3224 big = POPs;
73ee8be2
NC
3225 big_p = SvPV_const(big, biglen);
3226 little_p = SvPV_const(little, llen);
3227
e609e586
NC
3228 big_utf8 = DO_UTF8(big);
3229 little_utf8 = DO_UTF8(little);
3230 if (big_utf8 ^ little_utf8) {
3231 /* One needs to be upgraded. */
2f040f7f
NC
3232 if (little_utf8 && !PL_encoding) {
3233 /* Well, maybe instead we might be able to downgrade the small
3234 string? */
1eced8f8 3235 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
2f040f7f
NC
3236 &little_utf8);
3237 if (little_utf8) {
3238 /* If the large string is ISO-8859-1, and it's not possible to
3239 convert the small string to ISO-8859-1, then there is no
3240 way that it could be found anywhere by index. */
3241 retval = -1;
3242 goto fail;
3243 }
e609e586 3244
2f040f7f
NC
3245 /* At this point, pv is a malloc()ed string. So donate it to temp
3246 to ensure it will get free()d */
3247 little = temp = newSV(0);
73ee8be2
NC
3248 sv_usepvn(temp, pv, llen);
3249 little_p = SvPVX(little);
e609e586 3250 } else {
73ee8be2
NC
3251 temp = little_utf8
3252 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
2f040f7f
NC
3253
3254 if (PL_encoding) {
3255 sv_recode_to_utf8(temp, PL_encoding);
3256 } else {
3257 sv_utf8_upgrade(temp);
3258 }
3259 if (little_utf8) {
3260 big = temp;
3261 big_utf8 = TRUE;
73ee8be2 3262 big_p = SvPV_const(big, biglen);
2f040f7f
NC
3263 } else {
3264 little = temp;
73ee8be2 3265 little_p = SvPV_const(little, llen);
2f040f7f 3266 }
e609e586
NC
3267 }
3268 }
73ee8be2
NC
3269 if (SvGAMAGIC(big)) {
3270 /* Life just becomes a lot easier if I use a temporary here.
3271 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3272 will trigger magic and overloading again, as will fbm_instr()
3273 */
59cd0e26
NC
3274 big = newSVpvn_flags(big_p, biglen,
3275 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3276 big_p = SvPVX(big);
3277 }
e4e44778 3278 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
73ee8be2
NC
3279 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3280 warn on undef, and we've already triggered a warning with the
3281 SvPV_const some lines above. We can't remove that, as we need to
3282 call some SvPV to trigger overloading early and find out if the
3283 string is UTF-8.
3284 This is all getting to messy. The API isn't quite clean enough,
3285 because data access has side effects.
3286 */
59cd0e26
NC
3287 little = newSVpvn_flags(little_p, llen,
3288 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3289 little_p = SvPVX(little);
3290 }
e609e586 3291
79072805 3292 if (MAXARG < 3)
2723d216 3293 offset = is_index ? 0 : biglen;
a0ed51b3 3294 else {
ad66a58c 3295 if (big_utf8 && offset > 0)
a0ed51b3 3296 sv_pos_u2b(big, &offset, 0);
73ee8be2
NC
3297 if (!is_index)
3298 offset += llen;
a0ed51b3 3299 }
79072805
LW
3300 if (offset < 0)
3301 offset = 0;
ad66a58c
NC
3302 else if (offset > (I32)biglen)
3303 offset = biglen;
73ee8be2
NC
3304 if (!(little_p = is_index
3305 ? fbm_instr((unsigned char*)big_p + offset,
3306 (unsigned char*)big_p + biglen, little, 0)
3307 : rninstr(big_p, big_p + offset,
3308 little_p, little_p + llen)))
a0ed51b3 3309 retval = -1;
ad66a58c 3310 else {
73ee8be2 3311 retval = little_p - big_p;
ad66a58c
NC
3312 if (retval > 0 && big_utf8)
3313 sv_pos_b2u(big, &retval);
3314 }
ef8d46e8 3315 SvREFCNT_dec(temp);
2723d216 3316 fail:
a0ed51b3 3317 PUSHi(retval + arybase);
79072805
LW
3318 RETURN;
3319}
3320
3321PP(pp_sprintf)
3322{
97aff369 3323 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3e6bd4bf 3324 SvTAINTED_off(TARG);
79072805 3325 do_sprintf(TARG, SP-MARK, MARK+1);
bbce6d69 3326 TAINT_IF(SvTAINTED(TARG));
79072805
LW
3327 SP = ORIGMARK;
3328 PUSHTARG;
3329 RETURN;
3330}
3331
79072805
LW
3332PP(pp_ord)
3333{
97aff369 3334 dVAR; dSP; dTARGET;
1eced8f8 3335
7df053ec 3336 SV *argsv = POPs;
ba210ebe 3337 STRLEN len;
349d4f2f 3338 const U8 *s = (U8*)SvPV_const(argsv, len);
121910a4 3339
799ef3cb 3340 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
1eced8f8 3341 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
799ef3cb 3342 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
121910a4
JH
3343 argsv = tmpsv;
3344 }
79072805 3345
872c91ae 3346 XPUSHu(DO_UTF8(argsv) ?
89ebb4a3 3347 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
5fc32dea 3348 (UV)(*s & 0xff));
68795e93 3349
79072805
LW
3350 RETURN;
3351}
3352
463ee0b2
LW
3353PP(pp_chr)
3354{
97aff369 3355 dVAR; dSP; dTARGET;
463ee0b2 3356 char *tmps;
8a064bd6
JH
3357 UV value;
3358
3359 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3360 ||
3361 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3362 if (IN_BYTES) {
3363 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3364 } else {
3365 (void) POPs; /* Ignore the argument value. */
3366 value = UNICODE_REPLACEMENT;
3367 }
3368 } else {
3369 value = POPu;
3370 }
463ee0b2 3371
862a34c6 3372 SvUPGRADE(TARG,SVt_PV);
a0ed51b3 3373
0064a8a9 3374 if (value > 255 && !IN_BYTES) {
eb160463 3375 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
62961d2e 3376 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
349d4f2f 3377 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
a0ed51b3
LW
3378 *tmps = '\0';
3379 (void)SvPOK_only(TARG);
aa6ffa16 3380 SvUTF8_on(TARG);
a0ed51b3
LW
3381 XPUSHs(TARG);
3382 RETURN;
3383 }
3384
748a9306 3385 SvGROW(TARG,2);
463ee0b2
LW
3386 SvCUR_set(TARG, 1);
3387 tmps = SvPVX(TARG);
eb160463 3388 *tmps++ = (char)value;
748a9306 3389 *tmps = '\0';
a0d0e21e 3390 (void)SvPOK_only(TARG);
4c5ed6e2 3391
88632417 3392 if (PL_encoding && !IN_BYTES) {
799ef3cb 3393 sv_recode_to_utf8(TARG, PL_encoding);
88632417
JH
3394 tmps = SvPVX(TARG);
3395 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
4c5ed6e2
TS
3396 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3397 SvGROW(TARG, 2);
d5a15ac2 3398 tmps = SvPVX(TARG);
4c5ed6e2
TS
3399 SvCUR_set(TARG, 1);
3400 *tmps++ = (char)value;
88632417 3401 *tmps = '\0';
4c5ed6e2 3402 SvUTF8_off(TARG);
88632417
JH
3403 }
3404 }
4c5ed6e2 3405
463ee0b2
LW
3406 XPUSHs(TARG);
3407 RETURN;
3408}
3409
79072805
LW
3410PP(pp_crypt)
3411{
79072805 3412#ifdef HAS_CRYPT
97aff369 3413 dVAR; dSP; dTARGET;
5f74f29c 3414 dPOPTOPssrl;
85c16d83 3415 STRLEN len;
10516c54 3416 const char *tmps = SvPV_const(left, len);
2bc69dc4 3417
85c16d83 3418 if (DO_UTF8(left)) {
2bc69dc4 3419 /* If Unicode, try to downgrade.
f2791508
JH
3420 * If not possible, croak.
3421 * Yes, we made this up. */
1b6737cc 3422 SV* const tsv = sv_2mortal(newSVsv(left));
2bc69dc4 3423
f2791508 3424 SvUTF8_on(tsv);
2bc69dc4 3425 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 3426 tmps = SvPV_const(tsv, len);
85c16d83 3427 }
05404ffe
JH
3428# ifdef USE_ITHREADS
3429# ifdef HAS_CRYPT_R
3430 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3431 /* This should be threadsafe because in ithreads there is only
3432 * one thread per interpreter. If this would not be true,
3433 * we would need a mutex to protect this malloc. */
3434 PL_reentrant_buffer->_crypt_struct_buffer =
3435 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3436#if defined(__GLIBC__) || defined(__EMX__)
3437 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3438 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3439 /* work around glibc-2.2.5 bug */
3440 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3441 }
05404ffe 3442#endif
6ab58e4d 3443 }
05404ffe
JH
3444# endif /* HAS_CRYPT_R */
3445# endif /* USE_ITHREADS */
5f74f29c 3446# ifdef FCRYPT
83003860 3447 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3448# else
83003860 3449 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3450# endif
ec93b65f 3451 SETTARG;
4808266b 3452 RETURN;
79072805 3453#else
b13b2135 3454 DIE(aTHX_
79072805
LW
3455 "The crypt() function is unimplemented due to excessive paranoia.");
3456#endif
79072805
LW
3457}
3458
00f254e2
KW
3459/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3460 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3461
00f254e2
KW
3462/* Below are several macros that generate code */
3463/* Generates code to store a unicode codepoint c that is known to occupy
3464 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3465#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3466 STMT_START { \
3467 *(p) = UTF8_TWO_BYTE_HI(c); \
3468 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3469 } STMT_END
3470
3471/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3472 * available byte after the two bytes */
3473#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3474 STMT_START { \
3475 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3476 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3477 } STMT_END
3478
3479/* Generates code to store the upper case of latin1 character l which is known
3480 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3481 * are only two characters that fit this description, and this macro knows
3482 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3483 * bytes */
3484#define STORE_NON_LATIN1_UC(p, l) \
3485STMT_START { \
3486 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3487 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3488 } else { /* Must be the following letter */ \
3489 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3490 } \
3491} STMT_END
3492
3493/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3494 * after the character stored */
3495#define CAT_NON_LATIN1_UC(p, l) \
3496STMT_START { \
3497 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3498 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3499 } else { \
3500 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3501 } \
3502} STMT_END
3503
3504/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3505 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3506 * and must require two bytes to store it. Advances p to point to the next
3507 * available position */
3508#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3509STMT_START { \
3510 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3511 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3512 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3513 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3514 } else {/* else is one of the other two special cases */ \
3515 CAT_NON_LATIN1_UC((p), (l)); \
3516 } \
3517} STMT_END
3518
79072805
LW
3519PP(pp_ucfirst)
3520{
00f254e2
KW
3521 /* Actually is both lcfirst() and ucfirst(). Only the first character
3522 * changes. This means that possibly we can change in-place, ie., just
3523 * take the source and change that one character and store it back, but not
3524 * if read-only etc, or if the length changes */
3525
97aff369 3526 dVAR;
39644a26 3527 dSP;
d54190f6 3528 SV *source = TOPs;
00f254e2 3529 STRLEN slen; /* slen is the byte length of the whole SV. */
d54190f6
NC
3530 STRLEN need;
3531 SV *dest;
00f254e2
KW
3532 bool inplace; /* ? Convert first char only, in-place */
3533 bool doing_utf8 = FALSE; /* ? using utf8 */
3534 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
12e9c124 3535 const int op_type = PL_op->op_type;
d54190f6
NC
3536 const U8 *s;
3537 U8 *d;
3538 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
00f254e2
KW
3539 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3540 * stored as UTF-8 at s. */
3541 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3542 * lowercased) character stored in tmpbuf. May be either
3543 * UTF-8 or not, but in either case is the number of bytes */
d54190f6
NC
3544
3545 SvGETMAGIC(source);
3546 if (SvOK(source)) {
3547 s = (const U8*)SvPV_nomg_const(source, slen);
3548 } else {
0a0ffbce
RGS
3549 if (ckWARN(WARN_UNINITIALIZED))
3550 report_uninit(source);
1eced8f8 3551 s = (const U8*)"";
d54190f6
NC
3552 slen = 0;
3553 }
a0ed51b3 3554
00f254e2
KW
3555 /* We may be able to get away with changing only the first character, in
3556 * place, but not if read-only, etc. Later we may discover more reasons to
3557 * not convert in-place. */
3558 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3559
3560 /* First calculate what the changed first character should be. This affects
3561 * whether we can just swap it out, leaving the rest of the string unchanged,
3562 * or even if have to convert the dest to UTF-8 when the source isn't */
3563
3564 if (! slen) { /* If empty */
3565 need = 1; /* still need a trailing NUL */
3566 }
3567 else if (DO_UTF8(source)) { /* Is the source utf8? */
d54190f6 3568 doing_utf8 = TRUE;
00f254e2
KW
3569
3570/* TODO: This is #ifdefd out because it has hard-coded the standard mappings,
3571 * and doesn't allow for the user to specify their own. When code is added to
3572 * detect if there is a user-defined mapping in force here, and if so to use
3573 * that, then the code below can be compiled. The detection would be a good
3574 * thing anyway, as currently the user-defined mappings only work on utf8
3575 * strings, and thus depend on the chosen internal storage method, which is a
3576 * bad thing */
3577#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3578 if (UTF8_IS_INVARIANT(*s)) {
3579
3580 /* An invariant source character is either ASCII or, in EBCDIC, an
3581 * ASCII equivalent or a caseless C1 control. In both these cases,
3582 * the lower and upper cases of any character are also invariants
3583 * (and title case is the same as upper case). So it is safe to
3584 * use the simple case change macros which avoid the overhead of
3585 * the general functions. Note that if perl were to be extended to
3586 * do locale handling in UTF-8 strings, this wouldn't be true in,
3587 * for example, Lithuanian or Turkic. */
3588 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3589 tculen = ulen = 1;
3590 need = slen + 1;
12e9c124 3591 }
00f254e2
KW
3592 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3593 U8 chr;
3594
3595 /* Similarly, if the source character isn't invariant but is in the
3596 * latin1 range (or EBCDIC equivalent thereof), we have the case
3597 * changes compiled into perl, and can avoid the overhead of the
3598 * general functions. In this range, the characters are stored as
3599 * two UTF-8 bytes, and it so happens that any changed-case version
3600 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3601 tculen = ulen = 2;
3602 need = slen + 1;
3603
3604 /* Convert the two source bytes to a single Unicode code point
3605 * value, change case and save for below */
356979f4 3606 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
00f254e2
KW
3607 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3608 U8 lower = toLOWER_LATIN1(chr);
3609 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3610 }
3611 else { /* ucfirst */
3612 U8 upper = toUPPER_LATIN1_MOD(chr);
3613
3614 /* Most of the latin1 range characters are well-behaved. Their
3615 * title and upper cases are the same, and are also in the
3616 * latin1 range. The macro above returns their upper (hence
3617 * title) case, and all that need be done is to save the result
3618 * for below. However, several characters are problematic, and
3619 * have to be handled specially. The MOD in the macro name
3620 * above means that these tricky characters all get mapped to
3621 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3622 * This mapping saves some tests for the majority of the
3623 * characters */
3624
3625 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3626
3627 /* Not tricky. Just save it. */
3628 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3629 }
3630 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3631
3632 /* This one is tricky because it is two characters long,
3633 * though the UTF-8 is still two bytes, so the stored
3634 * length doesn't change */
3635 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3636 *(tmpbuf + 1) = 's';
3637 }
3638 else {
3639
3640 /* The other two have their title and upper cases the same,
3641 * but are tricky because the changed-case characters
3642 * aren't in the latin1 range. They, however, do fit into
3643 * two UTF-8 bytes */
3644 STORE_NON_LATIN1_UC(tmpbuf, chr);
3645 }
3646 }
3647 }
3648 else {
3649#endif /* end of dont want to break user-defined casing */
3650
3651 /* Here, can't short-cut the general case */
3652
3653 utf8_to_uvchr(s, &ulen);
3654 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3655 else toLOWER_utf8(s, tmpbuf, &tculen);
3656
3657 /* we can't do in-place if the length changes. */
3658 if (ulen != tculen) inplace = FALSE;
3659 need = slen + 1 - ulen + tculen;
3660#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
3661 }
3662#endif
d54190f6 3663 }
00f254e2
KW
3664 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3665 * latin1 is treated as caseless. Note that a locale takes
3666 * precedence */
3667 tculen = 1; /* Most characters will require one byte, but this will
3668 * need to be overridden for the tricky ones */
3669 need = slen + 1;
3670
3671 if (op_type == OP_LCFIRST) {
d54190f6 3672
00f254e2
KW
3673 /* lower case the first letter: no trickiness for any character */
3674 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3675 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3676 }
3677 /* is ucfirst() */
3678 else if (IN_LOCALE_RUNTIME) {
3679 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3680 * have upper and title case different
3681 */
3682 }
3683 else if (! IN_UNI_8_BIT) {
3684 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3685 * on EBCDIC machines whatever the
3686 * native function does */
3687 }
3688 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3689 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3690
3691 /* tmpbuf now has the correct title case for all latin1 characters
3692 * except for the several ones that have tricky handling. All
3693 * of these are mapped by the MOD to the letter below. */
3694 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3695
3696 /* The length is going to change, with all three of these, so
3697 * can't replace just the first character */
3698 inplace = FALSE;
3699
3700 /* We use the original to distinguish between these tricky
3701 * cases */
3702 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3703 /* Two character title case 'Ss', but can remain non-UTF-8 */
3704 need = slen + 2;
3705 *tmpbuf = 'S';
3706 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3707 tculen = 2;
3708 }
3709 else {
d54190f6 3710
00f254e2
KW
3711 /* The other two tricky ones have their title case outside
3712 * latin1. It is the same as their upper case. */
3713 doing_utf8 = TRUE;
3714 STORE_NON_LATIN1_UC(tmpbuf, *s);
3715
3716 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3717 * and their upper cases is 2. */
3718 tculen = ulen = 2;
3719
3720 /* The entire result will have to be in UTF-8. Assume worst
3721 * case sizing in conversion. (all latin1 characters occupy
3722 * at most two bytes in utf8) */
3723 convert_source_to_utf8 = TRUE;
3724 need = slen * 2 + 1;
3725 }
3726 } /* End of is one of the three special chars */
3727 } /* End of use Unicode (Latin1) semantics */
3728 } /* End of changing the case of the first character */
3729
3730 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3731 * generate the result */
3732 if (inplace) {
3733
3734 /* We can convert in place. This means we change just the first
3735 * character without disturbing the rest; no need to grow */
d54190f6
NC
3736 dest = source;
3737 s = d = (U8*)SvPV_force_nomg(source, slen);
3738 } else {
3739 dTARGET;
3740
3741 dest = TARG;
3742
00f254e2
KW
3743 /* Here, we can't convert in place; we earlier calculated how much
3744 * space we will need, so grow to accommodate that */
d54190f6 3745 SvUPGRADE(dest, SVt_PV);
3b416f41 3746 d = (U8*)SvGROW(dest, need);
d54190f6
NC
3747 (void)SvPOK_only(dest);
3748
3749 SETs(dest);
d54190f6 3750 }
44bc797b 3751
d54190f6 3752 if (doing_utf8) {
00f254e2
KW
3753 if (! inplace) {
3754 if (! convert_source_to_utf8) {
3755
3756 /* Here both source and dest are in UTF-8, but have to create
3757 * the entire output. We initialize the result to be the
3758 * title/lower cased first character, and then append the rest
3759 * of the string. */
3760 sv_setpvn(dest, (char*)tmpbuf, tculen);
3761 if (slen > ulen) {
3762 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3763 }
3764 }
3765 else {
3766 const U8 *const send = s + slen;
3767
3768 /* Here the dest needs to be in UTF-8, but the source isn't,
3769 * except we earlier UTF-8'd the first character of the source
3770 * into tmpbuf. First put that into dest, and then append the
3771 * rest of the source, converting it to UTF-8 as we go. */
3772
3773 /* Assert tculen is 2 here because the only two characters that
3774 * get to this part of the code have 2-byte UTF-8 equivalents */
3775 *d++ = *tmpbuf;
3776 *d++ = *(tmpbuf + 1);
3777 s++; /* We have just processed the 1st char */
3778
3779 for (; s < send; s++) {
3780 d = uvchr_to_utf8(d, *s);
3781 }
3782 *d = '\0';
3783 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3784 }
d54190f6 3785 SvUTF8_on(dest);
a0ed51b3 3786 }
00f254e2 3787 else { /* in-place UTF-8. Just overwrite the first character */
d54190f6
NC
3788 Copy(tmpbuf, d, tculen, U8);
3789 SvCUR_set(dest, need - 1);
a0ed51b3 3790 }
a0ed51b3 3791 }
00f254e2
KW
3792 else { /* Neither source nor dest are in or need to be UTF-8 */
3793 if (slen) {
2de3dbcc 3794 if (IN_LOCALE_RUNTIME) {
31351b04 3795 TAINT;
d54190f6 3796 SvTAINTED_on(dest);
31351b04 3797 }
00f254e2
KW
3798 if (inplace) { /* in-place, only need to change the 1st char */
3799 *d = *tmpbuf;
3800 }
3801 else { /* Not in-place */
3802
3803 /* Copy the case-changed character(s) from tmpbuf */
3804 Copy(tmpbuf, d, tculen, U8);
3805 d += tculen - 1; /* Code below expects d to point to final
3806 * character stored */
3807 }
3808 }
3809 else { /* empty source */
3810 /* See bug #39028: Don't taint if empty */
d54190f6
NC
3811 *d = *s;
3812 }
3813
00f254e2
KW
3814 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3815 * the destination to retain that flag */
d54190f6
NC
3816 if (SvUTF8(source))
3817 SvUTF8_on(dest);
3818
00f254e2 3819 if (!inplace) { /* Finish the rest of the string, unchanged */
d54190f6
NC
3820 /* This will copy the trailing NUL */
3821 Copy(s + 1, d + 1, slen, U8);
3822 SvCUR_set(dest, need - 1);
bbce6d69 3823 }
bbce6d69 3824 }
539689e7
FC
3825 if (dest != source && SvTAINTED(source))
3826 SvTAINT(dest);
d54190f6 3827 SvSETMAGIC(dest);
79072805
LW
3828 RETURN;
3829}
3830
67306194
NC
3831/* There's so much setup/teardown code common between uc and lc, I wonder if
3832 it would be worth merging the two, and just having a switch outside each
00f254e2 3833 of the three tight loops. There is less and less commonality though */
79072805
LW
3834PP(pp_uc)
3835{
97aff369 3836 dVAR;
39644a26 3837 dSP;
67306194 3838 SV *source = TOPs;
463ee0b2 3839 STRLEN len;
67306194
NC
3840 STRLEN min;
3841 SV *dest;
3842 const U8 *s;
3843 U8 *d;
79072805 3844
67306194
NC
3845 SvGETMAGIC(source);
3846
3847 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
00f254e2
KW
3848 && SvTEMP(source) && !DO_UTF8(source)
3849 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3850
3851 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3852 * make the loop tight, so we overwrite the source with the dest before
3853 * looking at it, and we need to look at the original source
3854 * afterwards. There would also need to be code added to handle
3855 * switching to not in-place in midstream if we run into characters
3856 * that change the length.
3857 */
67306194
NC
3858 dest = source;
3859 s = d = (U8*)SvPV_force_nomg(source, len);
3860 min = len + 1;
3861 } else {
a0ed51b3 3862 dTARGET;
a0ed51b3 3863
67306194 3864 dest = TARG;
128c9517 3865
67306194
NC
3866 /* The old implementation would copy source into TARG at this point.
3867 This had the side effect that if source was undef, TARG was now
3868 an undefined SV with PADTMP set, and they don't warn inside
3869 sv_2pv_flags(). However, we're now getting the PV direct from
3870 source, which doesn't have PADTMP set, so it would warn. Hence the
3871 little games. */
3872
3873 if (SvOK(source)) {
3874 s = (const U8*)SvPV_nomg_const(source, len);
3875 } else {
0a0ffbce
RGS
3876 if (ckWARN(WARN_UNINITIALIZED))
3877 report_uninit(source);
1eced8f8 3878 s = (const U8*)"";
67306194 3879 len = 0;
a0ed51b3 3880 }
67306194
NC
3881 min = len + 1;
3882
3883 SvUPGRADE(dest, SVt_PV);
3b416f41 3884 d = (U8*)SvGROW(dest, min);
67306194
NC
3885 (void)SvPOK_only(dest);
3886
3887 SETs(dest);
a0ed51b3 3888 }
31351b04 3889
67306194
NC
3890 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3891 to check DO_UTF8 again here. */
3892
3893 if (DO_UTF8(source)) {
3894 const U8 *const send = s + len;
3895 U8 tmpbuf[UTF8_MAXBYTES+1];
3896
4c8a458a
KW
3897 /* All occurrences of these are to be moved to follow any other marks.
3898 * This is context-dependent. We may not be passed enough context to
3899 * move the iota subscript beyond all of them, but we do the best we can
3900 * with what we're given. The result is always better than if we
3901 * hadn't done this. And, the problem would only arise if we are
3902 * passed a character without all its combining marks, which would be
3903 * the caller's mistake. The information this is based on comes from a
3904 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3905 * itself) and so can't be checked properly to see if it ever gets
3906 * revised. But the likelihood of it changing is remote */
00f254e2 3907 bool in_iota_subscript = FALSE;
00f254e2 3908
67306194 3909 while (s < send) {
00f254e2
KW
3910 if (in_iota_subscript && ! is_utf8_mark(s)) {
3911 /* A non-mark. Time to output the iota subscript */
3912#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3913#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3914
3915 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3916 in_iota_subscript = FALSE;
3917 }
00f254e2
KW
3918
3919
3920/* See comments at the first instance in this file of this ifdef */
3921#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
67306194 3922
00f254e2
KW
3923 /* If the UTF-8 character is invariant, then it is in the range
3924 * known by the standard macro; result is only one byte long */
3925 if (UTF8_IS_INVARIANT(*s)) {
3926 *d++ = toUPPER(*s);
3927 s++;
3928 }
3929 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3930
3931 /* Likewise, if it fits in a byte, its case change is in our
3932 * table */
81367fea 3933 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *s++);
00f254e2
KW
3934 U8 upper = toUPPER_LATIN1_MOD(orig);
3935 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
81367fea 3936 s++;
00f254e2
KW
3937 }
3938 else {
3939#else
3940 {
3941#endif
3942
3943 /* Otherwise, need the general UTF-8 case. Get the changed
3944 * case value and copy it to the output buffer */
3945
3946 const STRLEN u = UTF8SKIP(s);
3947 STRLEN ulen;
67306194 3948
00f254e2 3949 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4c8a458a
KW
3950 if (uv == GREEK_CAPITAL_LETTER_IOTA
3951 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3952 {
00f254e2
KW
3953 in_iota_subscript = TRUE;
3954 }
3955 else {
00f254e2
KW
3956 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3957 /* If the eventually required minimum size outgrows
3958 * the available space, we need to grow. */
3959 const UV o = d - (U8*)SvPVX_const(dest);
3960
3961 /* If someone uppercases one million U+03B0s we
3962 * SvGROW() one million times. Or we could try
3963 * guessing how much to allocate without allocating too
4c8a458a
KW
3964 * much. Such is life. See corresponding comment in
3965 * lc code for another option */
00f254e2
KW
3966 SvGROW(dest, min);
3967 d = (U8*)SvPVX(dest) + o;
3968 }
3969 Copy(tmpbuf, d, ulen, U8);
3970 d += ulen;
00f254e2 3971 }
00f254e2 3972 s += u;
67306194 3973 }
67306194 3974 }
4c8a458a
KW
3975 if (in_iota_subscript) {
3976 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3977 }
67306194
NC
3978 SvUTF8_on(dest);
3979 *d = '\0';
3980 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4c8a458a
KW
3981 }
3982 else { /* Not UTF-8 */
67306194
NC
3983 if (len) {
3984 const U8 *const send = s + len;
00f254e2
KW
3985
3986 /* Use locale casing if in locale; regular style if not treating
3987 * latin1 as having case; otherwise the latin1 casing. Do the
3988 * whole thing in a tight loop, for speed, */
2de3dbcc 3989 if (IN_LOCALE_RUNTIME) {
31351b04 3990 TAINT;
67306194
NC
3991 SvTAINTED_on(dest);
3992 for (; s < send; d++, s++)
3993 *d = toUPPER_LC(*s);
31351b04 3994 }
00f254e2
KW
3995 else if (! IN_UNI_8_BIT) {
3996 for (; s < send; d++, s++) {
67306194 3997 *d = toUPPER(*s);
00f254e2 3998 }
31351b04 3999 }
00f254e2
KW
4000 else {
4001 for (; s < send; d++, s++) {
4002 *d = toUPPER_LATIN1_MOD(*s);
4003 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
4004
4005 /* The mainstream case is the tight loop above. To avoid
4006 * extra tests in that, all three characters that require
4007 * special handling are mapped by the MOD to the one tested
4008 * just above.
4009 * Use the source to distinguish between the three cases */
4010
4011 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4012
4013 /* uc() of this requires 2 characters, but they are
4014 * ASCII. If not enough room, grow the string */
4015 if (SvLEN(dest) < ++min) {
4016 const UV o = d - (U8*)SvPVX_const(dest);
4017 SvGROW(dest, min);
4018 d = (U8*)SvPVX(dest) + o;
4019 }
4020 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4021 continue; /* Back to the tight loop; still in ASCII */
4022 }
4023
4024 /* The other two special handling characters have their
4025 * upper cases outside the latin1 range, hence need to be
4026 * in UTF-8, so the whole result needs to be in UTF-8. So,
4027 * here we are somewhere in the middle of processing a
4028 * non-UTF-8 string, and realize that we will have to convert
4029 * the whole thing to UTF-8. What to do? There are
4030 * several possibilities. The simplest to code is to
4031 * convert what we have so far, set a flag, and continue on
4032 * in the loop. The flag would be tested each time through
4033 * the loop, and if set, the next character would be
4034 * converted to UTF-8 and stored. But, I (khw) didn't want
4035 * to slow down the mainstream case at all for this fairly
4036 * rare case, so I didn't want to add a test that didn't
4037 * absolutely have to be there in the loop, besides the
4038 * possibility that it would get too complicated for
4039 * optimizers to deal with. Another possibility is to just
4040 * give up, convert the source to UTF-8, and restart the
4041 * function that way. Another possibility is to convert
4042 * both what has already been processed and what is yet to
4043 * come separately to UTF-8, then jump into the loop that
4044 * handles UTF-8. But the most efficient time-wise of the
4045 * ones I could think of is what follows, and turned out to
4046 * not require much extra code. */
4047
4048 /* Convert what we have so far into UTF-8, telling the
4049 * function that we know it should be converted, and to
4050 * allow extra space for what we haven't processed yet.
4051 * Assume the worst case space requirements for converting
4052 * what we haven't processed so far: that it will require
4053 * two bytes for each remaining source character, plus the
4054 * NUL at the end. This may cause the string pointer to
4055 * move, so re-find it. */
4056
4057 len = d - (U8*)SvPVX_const(dest);
4058 SvCUR_set(dest, len);
4059 len = sv_utf8_upgrade_flags_grow(dest,
4060 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4061 (send -s) * 2 + 1);
4062 d = (U8*)SvPVX(dest) + len;
4063
4064 /* And append the current character's upper case in UTF-8 */
4065 CAT_NON_LATIN1_UC(d, *s);
4066
4067 /* Now process the remainder of the source, converting to
4068 * upper and UTF-8. If a resulting byte is invariant in
4069 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4070 * append it to the output. */
4071
4072 s++;
4073 for (; s < send; s++) {
4074 U8 upper = toUPPER_LATIN1_MOD(*s);
4075 if UTF8_IS_INVARIANT(upper) {
4076 *d++ = upper;
4077 }
4078 else {
4079 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4080 }
4081 }
4082
4083 /* Here have processed the whole source; no need to continue
4084 * with the outer loop. Each character has been converted
4085 * to upper case and converted to UTF-8 */
4086
4087 break;
4088 } /* End of processing all latin1-style chars */
4089 } /* End of processing all chars */
4090 } /* End of source is not empty */
4091
67306194 4092 if (source != dest) {
00f254e2 4093 *d = '\0'; /* Here d points to 1 after last char, add NUL */
67306194
NC
4094 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4095 }
00f254e2 4096 } /* End of isn't utf8 */
539689e7
FC
4097 if (dest != source && SvTAINTED(source))
4098 SvTAINT(dest);
67306194 4099 SvSETMAGIC(dest);
79072805
LW
4100 RETURN;
4101}
4102
4103PP(pp_lc)
4104{
97aff369 4105 dVAR;
39644a26 4106 dSP;
ec9af7d4 4107 SV *source = TOPs;
463ee0b2 4108 STRLEN len;
ec9af7d4
NC
4109 STRLEN min;
4110 SV *dest;
4111 const U8 *s;
4112 U8 *d;
79072805 4113
ec9af7d4
NC
4114 SvGETMAGIC(source);
4115
4116 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
17fa0776 4117 && SvTEMP(source) && !DO_UTF8(source)) {
ec9af7d4 4118
00f254e2
KW
4119 /* We can convert in place, as lowercasing anything in the latin1 range
4120 * (or else DO_UTF8 would have been on) doesn't lengthen it */
ec9af7d4
NC
4121 dest = source;
4122 s = d = (U8*)SvPV_force_nomg(source, len);
4123 min = len + 1;
4124 } else {
a0ed51b3 4125 dTARGET;
a0ed51b3 4126
ec9af7d4
NC
4127 dest = TARG;
4128
4129 /* The old implementation would copy source into TARG at this point.
4130 This had the side effect that if source was undef, TARG was now
4131 an undefined SV with PADTMP set, and they don't warn inside
4132 sv_2pv_flags(). However, we're now getting the PV direct from
4133 source, which doesn't have PADTMP set, so it would warn. Hence the
4134 little games. */
4135
4136 if (SvOK(source)) {
4137 s = (const U8*)SvPV_nomg_const(source, len);
4138 } else {
0a0ffbce
RGS
4139 if (ckWARN(WARN_UNINITIALIZED))
4140 report_uninit(source);
1eced8f8 4141 s = (const U8*)"";
ec9af7d4 4142 len = 0;
a0ed51b3 4143 }
ec9af7d4 4144 min = len + 1;
128c9517 4145
ec9af7d4 4146 SvUPGRADE(dest, SVt_PV);
3b416f41 4147 d = (U8*)SvGROW(dest, min);
ec9af7d4
NC
4148 (void)SvPOK_only(dest);
4149
4150 SETs(dest);
4151 }
4152
4153 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4154 to check DO_UTF8 again here. */
4155
4156 if (DO_UTF8(source)) {
4157 const U8 *const send = s + len;
4158 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4159
4160 while (s < send) {
00f254e2
KW
4161/* See comments at the first instance in this file of this ifdef */
4162#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4163 if (UTF8_IS_INVARIANT(*s)) {
89ebb4a3 4164
00f254e2 4165 /* Invariant characters use the standard mappings compiled in.
ec9af7d4 4166 */
00f254e2
KW
4167 *d++ = toLOWER(*s);
4168 s++;
ec9af7d4 4169 }
00f254e2 4170 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
89ebb4a3 4171
00f254e2 4172 /* As do the ones in the Latin1 range */
81367fea 4173 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *s++));
00f254e2 4174 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
81367fea 4175 s++;
a0ed51b3 4176 }
00f254e2
KW
4177 else {
4178#endif
4179 /* Here, is utf8 not in Latin-1 range, have to go out and get
4180 * the mappings from the tables. */
4181
4182 const STRLEN u = UTF8SKIP(s);
4183 STRLEN ulen;
4184
00f254e2
KW
4185#ifndef CONTEXT_DEPENDENT_CASING
4186 toLOWER_utf8(s, tmpbuf, &ulen);
4187#else
4c8a458a
KW
4188/* This is ifdefd out because it needs more work and thought. It isn't clear
4189 * that we should do it.
4190 * A minor objection is that this is based on a hard-coded rule from the
4191 * Unicode standard, and may change, but this is not very likely at all.
4192 * mktables should check and warn if it does.
4193 * More importantly, if the sigma occurs at the end of the string, we don't
4194 * have enough context to know whether it is part of a larger string or going
4195 * to be or not. It may be that we are passed a subset of the context, via
4196 * a \U...\E, for example, and we could conceivably know the larger context if
4197 * code were changed to pass that in. But, if the string passed in is an
4198 * intermediate result, and the user concatenates two strings together
4199 * after we have made a final sigma, that would be wrong. If the final sigma
4200 * occurs in the middle of the string we are working on, then we know that it
4201 * should be a final sigma, but otherwise we can't be sure. */
00f254e2
KW
4202
4203 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4204
4205 /* If the lower case is a small sigma, it may be that we need
4206 * to change it to a final sigma. This happens at the end of
4207 * a word that contains more than just this character, and only
4208 * when we started with a capital sigma. */
4209 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4210 s > send - len && /* Makes sure not the first letter */
4211 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4212 ) {
4213
4214 /* We use the algorithm in:
4215 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4216 * is a CAPITAL SIGMA): If C is preceded by a sequence
4217 * consisting of a cased letter and a case-ignorable
4218 * sequence, and C is not followed by a sequence consisting
4219 * of a case ignorable sequence and then a cased letter,
4220 * then when lowercasing C, C becomes a final sigma */
4221
4222 /* To determine if this is the end of a word, need to peek
4223 * ahead. Look at the next character */
4224 const U8 *peek = s + u;
4225
4226 /* Skip any case ignorable characters */
4227 while (peek < send && is_utf8_case_ignorable(peek)) {
4228 peek += UTF8SKIP(peek);
4229 }
4230
4231 /* If we reached the end of the string without finding any
4232 * non-case ignorable characters, or if the next such one
4233 * is not-cased, then we have met the conditions for it
4234 * being a final sigma with regards to peek ahead, and so
4235 * must do peek behind for the remaining conditions. (We
4236 * know there is stuff behind to look at since we tested
4237 * above that this isn't the first letter) */
4238 if (peek >= send || ! is_utf8_cased(peek)) {
4239 peek = utf8_hop(s, -1);
4240
4241 /* Here are at the beginning of the first character
4242 * before the original upper case sigma. Keep backing
4243 * up, skipping any case ignorable characters */
4244 while (is_utf8_case_ignorable(peek)) {
4245 peek = utf8_hop(peek, -1);
4246 }
4247
4248 /* Here peek points to the first byte of the closest
4249 * non-case-ignorable character before the capital
4250 * sigma. If it is cased, then by the Unicode
4251 * algorithm, we should use a small final sigma instead
4252 * of what we have */
4253 if (is_utf8_cased(peek)) {
4254 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4255 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4256 }
4257 }
4258 }
4259 else { /* Not a context sensitive mapping */
4260#endif /* End of commented out context sensitive */
4261 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4262
4263 /* If the eventually required minimum size outgrows
4264 * the available space, we need to grow. */
4265 const UV o = d - (U8*)SvPVX_const(dest);
4266
4267 /* If someone lowercases one million U+0130s we
4268 * SvGROW() one million times. Or we could try
4269 * guessing how much to allocate without allocating too
4270 * much. Such is life. Another option would be to
4271 * grow an extra byte or two more each time we need to
4272 * grow, which would cut down the million to 500K, with
4273 * little waste */
4274 SvGROW(dest, min);
4275 d = (U8*)SvPVX(dest) + o;
4276 }
4277#ifdef CONTEXT_DEPENDENT_CASING
4278 }
4279#endif
4280 /* Copy the newly lowercased letter to the output buffer we're
4281 * building */
4282 Copy(tmpbuf, d, ulen, U8);
4283 d += ulen;
4284 s += u;
4285#ifdef GO_AHEAD_AND_BREAK_USER_DEFINED_CASE_MAPPINGS
4286 }
4287#endif
4288 } /* End of looping through the source string */
ec9af7d4
NC
4289 SvUTF8_on(dest);
4290 *d = '\0';
4291 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
00f254e2 4292 } else { /* Not utf8 */
31351b04 4293 if (len) {
ec9af7d4 4294 const U8 *const send = s + len;
00f254e2
KW
4295
4296 /* Use locale casing if in locale; regular style if not treating
4297 * latin1 as having case; otherwise the latin1 casing. Do the
4298 * whole thing in a tight loop, for speed, */
2de3dbcc 4299 if (IN_LOCALE_RUNTIME) {
31351b04 4300 TAINT;
ec9af7d4
NC
4301 SvTAINTED_on(dest);
4302 for (; s < send; d++, s++)
4303 *d = toLOWER_LC(*s);
31351b04 4304 }
00f254e2
KW
4305 else if (! IN_UNI_8_BIT) {
4306 for (; s < send; d++, s++) {
ec9af7d4 4307 *d = toLOWER(*s);
00f254e2
KW
4308 }
4309 }
4310 else {
4311 for (; s < send; d++, s++) {
4312 *d = toLOWER_LATIN1(*s);
4313 }
31351b04 4314 }
bbce6d69 4315 }
ec9af7d4
NC
4316 if (source != dest) {
4317 *d = '\0';
4318 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4319 }
79072805 4320 }
539689e7
FC
4321 if (dest != source && SvTAINTED(source))
4322 SvTAINT(dest);
ec9af7d4 4323 SvSETMAGIC(dest);
79072805
LW
4324 RETURN;
4325}
4326
a0d0e21e 4327PP(pp_quotemeta)
79072805 4328{
97aff369 4329 dVAR; dSP; dTARGET;
1b6737cc 4330 SV * const sv = TOPs;
a0d0e21e 4331 STRLEN len;
0d46e09a 4332 register const char *s = SvPV_const(sv,len);
79072805 4333
7e2040f0 4334 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 4335 if (len) {
1b6737cc 4336 register char *d;
862a34c6 4337 SvUPGRADE(TARG, SVt_PV);
c07a80fd 4338 SvGROW(TARG, (len * 2) + 1);
a0d0e21e 4339 d = SvPVX(TARG);
7e2040f0 4340 if (DO_UTF8(sv)) {
0dd2cdef 4341 while (len) {
fd400ab9 4342 if (UTF8_IS_CONTINUED(*s)) {
0dd2cdef
LW
4343 STRLEN ulen = UTF8SKIP(s);
4344 if (ulen > len)
4345 ulen = len;
4346 len -= ulen;
4347 while (ulen--)
4348 *d++ = *s++;
4349 }
4350 else {
4351 if (!isALNUM(*s))
4352 *d++ = '\\';
4353 *d++ = *s++;
4354 len--;
4355 }
4356 }
7e2040f0 4357 SvUTF8_on(TARG);
0dd2cdef
LW
4358 }
4359 else {
4360 while (len--) {
4361 if (!isALNUM(*s))
4362 *d++ = '\\';
4363 *d++ = *s++;
4364 }
79072805 4365 }
a0d0e21e 4366 *d = '\0';
349d4f2f 4367 SvCUR_set(TARG, d - SvPVX_const(TARG));
3aa33fe5 4368 (void)SvPOK_only_UTF8(TARG);
79072805 4369 }
a0d0e21e
LW
4370 else
4371 sv_setpvn(TARG, s, len);
ec93b65f 4372 SETTARG;
79072805
LW
4373 RETURN;
4374}
4375
a0d0e21e 4376/* Arrays. */
79072805 4377
a0d0e21e 4378PP(pp_aslice)
79072805 4379{
97aff369 4380 dVAR; dSP; dMARK; dORIGMARK;
502c6561 4381 register AV *const av = MUTABLE_AV(POPs);
1b6737cc 4382 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
79072805 4383
a0d0e21e 4384 if (SvTYPE(av) == SVt_PVAV) {
fc15ae8f 4385 const I32 arybase = CopARYBASE_get(PL_curcop);
4ad10a0b
VP
4386 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4387 bool can_preserve = FALSE;
4388
4389 if (localizing) {
4390 MAGIC *mg;
4391 HV *stash;
4392
4393 can_preserve = SvCANEXISTDELETE(av);
4394 }
4395
4396 if (lval && localizing) {
1b6737cc 4397 register SV **svp;
748a9306 4398 I32 max = -1;
924508f0 4399 for (svp = MARK + 1; svp <= SP; svp++) {
4ea561bc 4400 const I32 elem = SvIV(*svp);
748a9306
LW
4401 if (elem > max)
4402 max = elem;
4403 }
4404 if (max > AvMAX(av))
4405 av_extend(av, max);
4406 }
4ad10a0b 4407
a0d0e21e 4408 while (++MARK <= SP) {
1b6737cc 4409 register SV **svp;
4ea561bc 4410 I32 elem = SvIV(*MARK);
4ad10a0b 4411 bool preeminent = TRUE;
a0d0e21e 4412
748a9306
LW
4413 if (elem > 0)
4414 elem -= arybase;
4ad10a0b
VP
4415 if (localizing && can_preserve) {
4416 /* If we can determine whether the element exist,
4417 * Try to preserve the existenceness of a tied array
4418 * element by using EXISTS and DELETE if possible.
4419 * Fallback to FETCH and STORE otherwise. */
4420 preeminent = av_exists(av, elem);
4421 }
4422
a0d0e21e
LW
4423 svp = av_fetch(av, elem, lval);
4424 if (lval) {
3280af22 4425 if (!svp || *svp == &PL_sv_undef)
cea2e8a9 4426 DIE(aTHX_ PL_no_aelem, elem);
4ad10a0b
VP
4427 if (localizing) {
4428 if (preeminent)
4429 save_aelem(av, elem, svp);
4430 else
4431 SAVEADELETE(av, elem);
4432 }
79072805 4433 }
3280af22 4434 *MARK = svp ? *svp : &PL_sv_undef;
79072805
LW
4435 }
4436 }
748a9306 4437 if (GIMME != G_ARRAY) {
a0d0e21e 4438 MARK = ORIGMARK;
04ab2c87 4439 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e
LW
4440 SP = MARK;
4441 }
79072805
LW
4442 RETURN;
4443}
4444
cba5a3b0
DG
4445/* Smart dereferencing for keys, values and each */
4446PP(pp_rkeys)
4447{
4448 dVAR;
4449 dSP;
4450 dPOPss;
4451
7ac5715b
FC
4452 SvGETMAGIC(sv);
4453
4454 if (
4455 !SvROK(sv)
4456 || (sv = SvRV(sv),
4457 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4458 || SvOBJECT(sv)
4459 )
4460 ) {
4461 DIE(aTHX_
4462 "Type of argument to %s must be unblessed hashref or arrayref",
4c540399 4463 PL_op_desc[PL_op->op_type] );
cba5a3b0
DG
4464 }
4465
d8065907
FC
4466 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4467 DIE(aTHX_
4468 "Can't modify %s in %s",
4469 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4470 );
4471
cba5a3b0
DG
4472 /* Delegate to correct function for op type */
4473 PUSHs(sv);
4474 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4475 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4476 }
4477 else {
4478 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4479 }
4480}
4481
878d132a
NC
4482PP(pp_aeach)
4483{
4484 dVAR;
4485 dSP;
502c6561 4486 AV *array = MUTABLE_AV(POPs);
878d132a 4487 const I32 gimme = GIMME_V;
453d94a9 4488 IV *iterp = Perl_av_iter_p(aTHX_ array);
878d132a
NC
4489 const IV current = (*iterp)++;
4490
4491 if (current > av_len(array)) {
4492 *iterp = 0;
4493 if (gimme == G_SCALAR)
4494 RETPUSHUNDEF;
4495 else
4496 RETURN;
4497 }
4498
4499 EXTEND(SP, 2);
4500 mPUSHi(CopARYBASE_get(PL_curcop) + current);
4501 if (gimme == G_ARRAY) {
4502 SV **const element = av_fetch(array, current, 0);
4503 PUSHs(element ? *element : &PL_sv_undef);
4504 }
4505 RETURN;
4506}
4507
4508PP(pp_akeys)
4509{
4510 dVAR;
4511 dSP;
502c6561 4512 AV *array = MUTABLE_AV(POPs);
878d132a
NC
4513 const I32 gimme = GIMME_V;
4514
4515 *Perl_av_iter_p(aTHX_ array) = 0;
4516
4517 if (gimme == G_SCALAR) {
4518 dTARGET;
4519 PUSHi(av_len(array) + 1);
4520 }
4521 else if (gimme == G_ARRAY) {
4522 IV n = Perl_av_len(aTHX_ array);
4523 IV i = CopARYBASE_get(PL_curcop);
4524
4525 EXTEND(SP, n + 1);
4526
cba5a3b0 4527 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
878d132a
NC
4528 n += i;
4529 for (; i <= n; i++) {
4530 mPUSHi(i);
4531 }
4532 }
4533 else {
4534 for (i = 0; i <= n; i++) {
4535 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4536 PUSHs(elem ? *elem : &PL_sv_undef);
4537 }
4538 }
4539 }
4540 RETURN;
4541}
4542
79072805
LW
4543/* Associative arrays. */
4544
4545PP(pp_each)
4546{
97aff369 4547 dVAR;
39644a26 4548 dSP;
85fbaab2 4549 HV * hash = MUTABLE_HV(POPs);
c07a80fd 4550 HE *entry;
f54cb97a 4551 const I32 gimme = GIMME_V;
8ec5e241 4552
c07a80fd 4553 PUTBACK;
c750a3ec 4554 /* might clobber stack_sp */
6d822dc4 4555 entry = hv_iternext(hash);
c07a80fd 4556 SPAGAIN;
79072805 4557
79072805
LW
4558 EXTEND(SP, 2);
4559 if (entry) {
1b6737cc 4560 SV* const sv = hv_iterkeysv(entry);
574c8022 4561 PUSHs(sv); /* won't clobber stack_sp */
54310121 4562 if (gimme == G_ARRAY) {
59af0135 4563 SV *val;
c07a80fd 4564 PUTBACK;
c750a3ec 4565 /* might clobber stack_sp */
6d822dc4 4566 val = hv_iterval(hash, entry);
c07a80fd 4567 SPAGAIN;
59af0135 4568 PUSHs(val);
79072805 4569 }
79072805 4570 }
54310121 4571 else if (gimme == G_SCALAR)
79072805
LW
4572 RETPUSHUNDEF;
4573
4574 RETURN;
4575}
4576
7332a6c4
VP
4577STATIC OP *
4578S_do_delete_local(pTHX)
79072805 4579{
97aff369 4580 dVAR;
39644a26 4581 dSP;
f54cb97a 4582 const I32 gimme = GIMME_V;
7332a6c4
VP
4583 const MAGIC *mg;
4584 HV *stash;
4585
4586 if (PL_op->op_private & OPpSLICE) {
4587 dMARK; dORIGMARK;
4588 SV * const osv = POPs;
4589 const bool tied = SvRMAGICAL(osv)
4590 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4591 const bool can_preserve = SvCANEXISTDELETE(osv)
4592 || mg_find((const SV *)osv, PERL_MAGIC_env);
4593 const U32 type = SvTYPE(osv);
4594 if (type == SVt_PVHV) { /* hash element */
4595 HV * const hv = MUTABLE_HV(osv);
4596 while (++MARK <= SP) {
4597 SV * const keysv = *MARK;
4598 SV *sv = NULL;
4599 bool preeminent = TRUE;
4600 if (can_preserve)
4601 preeminent = hv_exists_ent(hv, keysv, 0);
4602 if (tied) {
4603 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4604 if (he)
4605 sv = HeVAL(he);
4606 else
4607 preeminent = FALSE;
4608 }
4609 else {
4610 sv = hv_delete_ent(hv, keysv, 0, 0);
4611 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4612 }
4613 if (preeminent) {
4614 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4615 if (tied) {
4616 *MARK = sv_mortalcopy(sv);
4617 mg_clear(sv);
4618 } else
4619 *MARK = sv;
4620 }
4621 else {
4622 SAVEHDELETE(hv, keysv);
4623 *MARK = &PL_sv_undef;
4624 }
4625 }
4626 }
4627 else if (type == SVt_PVAV) { /* array element */
4628 if (PL_op->op_flags & OPf_SPECIAL) {
4629 AV * const av = MUTABLE_AV(osv);
4630 while (++MARK <= SP) {
4631 I32 idx = SvIV(*MARK);
4632 SV *sv = NULL;
4633 bool preeminent = TRUE;
4634 if (can_preserve)
4635 preeminent = av_exists(av, idx);
4636 if (tied) {
4637 SV **svp = av_fetch(av, idx, 1);
4638 if (svp)
4639 sv = *svp;
4640 else
4641 preeminent = FALSE;
4642 }
4643 else {
4644 sv = av_delete(av, idx, 0);
4645 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4646 }
4647 if (preeminent) {
4648 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4649 if (tied) {
4650 *MARK = sv_mortalcopy(sv);
4651 mg_clear(sv);
4652 } else
4653 *MARK = sv;
4654 }
4655 else {
4656 SAVEADELETE(av, idx);
4657 *MARK = &PL_sv_undef;
4658 }
4659 }
4660 }
4661 }
4662 else
4663 DIE(aTHX_ "Not a HASH reference");
4664 if (gimme == G_VOID)
4665 SP = ORIGMARK;
4666 else if (gimme == G_SCALAR) {
4667 MARK = ORIGMARK;
4668 if (SP > MARK)
4669 *++MARK = *SP;
4670 else
4671 *++MARK = &PL_sv_undef;
4672 SP = MARK;
4673 }
4674 }
4675 else {
4676 SV * const keysv = POPs;
4677 SV * const osv = POPs;
4678 const bool tied = SvRMAGICAL(osv)
4679 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4680 const bool can_preserve = SvCANEXISTDELETE(osv)
4681 || mg_find((const SV *)osv, PERL_MAGIC_env);
4682 const U32 type = SvTYPE(osv);
4683 SV *sv = NULL;
4684 if (type == SVt_PVHV) {
4685 HV * const hv = MUTABLE_HV(osv);
4686 bool preeminent = TRUE;
4687 if (can_preserve)
4688 preeminent = hv_exists_ent(hv, keysv, 0);
4689 if (tied) {
4690 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4691 if (he)
4692 sv = HeVAL(he);
4693 else
4694 preeminent = FALSE;
4695 }
4696 else {
4697 sv = hv_delete_ent(hv, keysv, 0, 0);
4698 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4699 }
4700 if (preeminent) {
4701 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4702 if (tied) {
4703 SV *nsv = sv_mortalcopy(sv);
4704 mg_clear(sv);
4705 sv = nsv;
4706 }
4707 }
4708 else
4709 SAVEHDELETE(hv, keysv);
4710 }
4711 else if (type == SVt_PVAV) {
4712 if (PL_op->op_flags & OPf_SPECIAL) {
4713 AV * const av = MUTABLE_AV(osv);
4714 I32 idx = SvIV(keysv);
4715 bool preeminent = TRUE;
4716 if (can_preserve)
4717 preeminent = av_exists(av, idx);
4718 if (tied) {
4719 SV **svp = av_fetch(av, idx, 1);
4720 if (svp)
4721 sv = *svp;
4722 else
4723 preeminent = FALSE;
4724 }
4725 else {
4726 sv = av_delete(av, idx, 0);
4727 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4728 }
4729 if (preeminent) {
4730 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4731 if (tied) {
4732 SV *nsv = sv_mortalcopy(sv);
4733 mg_clear(sv);
4734 sv = nsv;
4735 }
4736 }
4737 else
4738 SAVEADELETE(av, idx);
4739 }
4740 else
4741 DIE(aTHX_ "panic: avhv_delete no longer supported");
4742 }
4743 else
4744 DIE(aTHX_ "Not a HASH reference");
4745 if (!sv)
4746 sv = &PL_sv_undef;
4747 if (gimme != G_VOID)
4748 PUSHs(sv);
4749 }
4750
4751 RETURN;
4752}
4753
4754PP(pp_delete)
4755{
4756 dVAR;
4757 dSP;
4758 I32 gimme;
4759 I32 discard;
4760
4761 if (PL_op->op_private & OPpLVAL_INTRO)
4762 return do_delete_local();
4763
4764 gimme = GIMME_V;
4765 discard = (gimme == G_VOID) ? G_DISCARD : 0;
5f05dabc 4766
533c011a 4767 if (PL_op->op_private & OPpSLICE) {
5f05dabc 4768 dMARK; dORIGMARK;
85fbaab2 4769 HV * const hv = MUTABLE_HV(POPs);
1b6737cc 4770 const U32 hvtype = SvTYPE(hv);
01020589
GS
4771 if (hvtype == SVt_PVHV) { /* hash element */
4772 while (++MARK <= SP) {
1b6737cc 4773 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
01020589
GS
4774 *MARK = sv ? sv : &PL_sv_undef;
4775 }
5f05dabc 4776 }
6d822dc4
MS
4777 else if (hvtype == SVt_PVAV) { /* array element */
4778 if (PL_op->op_flags & OPf_SPECIAL) {
4779 while (++MARK <= SP) {
502c6561 4780 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
6d822dc4
MS
4781 *MARK = sv ? sv : &PL_sv_undef;
4782 }
4783 }
01020589
GS
4784 }
4785 else
4786 DIE(aTHX_ "Not a HASH reference");
54310121 4787 if (discard)
4788 SP = ORIGMARK;
4789 else if (gimme == G_SCALAR) {
5f05dabc 4790 MARK = ORIGMARK;
9111c9c0
DM
4791 if (SP > MARK)
4792 *++MARK = *SP;
4793 else
4794 *++MARK = &PL_sv_undef;
5f05dabc 4795 SP = MARK;
4796 }
4797 }
4798 else {
4799 SV *keysv = POPs;
85fbaab2 4800 HV * const hv = MUTABLE_HV(POPs);
295d248e 4801 SV *sv = NULL;
97fcbf96
MB
4802 if (SvTYPE(hv) == SVt_PVHV)
4803 sv = hv_delete_ent(hv, keysv, discard, 0);
01020589
GS
4804 else if (SvTYPE(hv) == SVt_PVAV) {
4805 if (PL_op->op_flags & OPf_SPECIAL)
502c6561 4806 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
af288a60
HS
4807 else
4808 DIE(aTHX_ "panic: avhv_delete no longer supported");
01020589 4809 }
97fcbf96 4810 else
cea2e8a9 4811 DIE(aTHX_ "Not a HASH reference");
5f05dabc 4812 if (!sv)
3280af22 4813 sv = &PL_sv_undef;
54310121 4814 if (!discard)
4815 PUSHs(sv);
79072805 4816 }
79072805
LW
4817 RETURN;
4818}
4819
a0d0e21e 4820PP(pp_exists)
79072805 4821{
97aff369 4822 dVAR;
39644a26 4823 dSP;
afebc493
GS
4824 SV *tmpsv;
4825 HV *hv;
4826
4827 if (PL_op->op_private & OPpEXISTS_SUB) {
4828 GV *gv;
0bd48802 4829 SV * const sv = POPs;
f2c0649b 4830 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
afebc493
GS
4831 if (cv)
4832 RETPUSHYES;
4833 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4834 RETPUSHYES;
4835 RETPUSHNO;
4836 }
4837 tmpsv = POPs;
85fbaab2 4838 hv = MUTABLE_HV(POPs);
c750a3ec 4839 if (SvTYPE(hv) == SVt_PVHV) {
ae77835f 4840 if (hv_exists_ent(hv, tmpsv, 0))
c750a3ec 4841 RETPUSHYES;
ef54e1a4
JH
4842 }
4843 else if (SvTYPE(hv) == SVt_PVAV) {
01020589 4844 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
502c6561 4845 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
01020589
GS
4846 RETPUSHYES;
4847 }
ef54e1a4
JH
4848 }
4849 else {
cea2e8a9 4850 DIE(aTHX_ "Not a HASH reference");
a0d0e21e 4851 }
a0d0e21e
LW
4852 RETPUSHNO;
4853}
79072805 4854
a0d0e21e
LW
4855PP(pp_hslice)
4856{
97aff369 4857 dVAR; dSP; dMARK; dORIGMARK;
85fbaab2 4858 register HV * const hv = MUTABLE_HV(POPs);
1b6737cc
AL
4859 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4860 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
d30e492c 4861 bool can_preserve = FALSE;
79072805 4862
eb85dfd3
DM
4863 if (localizing) {
4864 MAGIC *mg;
4865 HV *stash;
4866
d30e492c
VP
4867 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4868 can_preserve = TRUE;
eb85dfd3
DM
4869 }
4870
6d822dc4 4871 while (++MARK <= SP) {
1b6737cc 4872 SV * const keysv = *MARK;
6d822dc4
MS
4873 SV **svp;
4874 HE *he;
d30e492c
VP
4875 bool preeminent = TRUE;
4876
4877 if (localizing && can_preserve) {
4878 /* If we can determine whether the element exist,
4879 * try to preserve the existenceness of a tied hash
4880 * element by using EXISTS and DELETE if possible.
4881 * Fallback to FETCH and STORE otherwise. */
4882 preeminent = hv_exists_ent(hv, keysv, 0);
6d822dc4 4883 }
eb85dfd3 4884
6d822dc4 4885 he = hv_fetch_ent(hv, keysv, lval, 0);
fe5bfecd 4886 svp = he ? &HeVAL(he) : NULL;
eb85dfd3 4887
6d822dc4
MS
4888 if (lval) {
4889 if (!svp || *svp == &PL_sv_undef) {
be2597df 4890 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
6d822dc4
MS
4891 }
4892 if (localizing) {
7a2e501a 4893 if (HvNAME_get(hv) && isGV(*svp))
159b6efe 4894 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
47cfc530
VP
4895 else if (preeminent)
4896 save_helem_flags(hv, keysv, svp,
4897 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4898 else
4899 SAVEHDELETE(hv, keysv);
6d822dc4
MS
4900 }
4901 }
4902 *MARK = svp ? *svp : &PL_sv_undef;
79072805 4903 }
a0d0e21e
LW
4904 if (GIMME != G_ARRAY) {
4905 MARK = ORIGMARK;
04ab2c87 4906 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e 4907 SP = MARK;
79072805 4908 }
a0d0e21e
LW
4909 RETURN;
4910}
4911
4912/* List operators. */
4913
4914PP(pp_list)
4915{
97aff369 4916 dVAR; dSP; dMARK;
a0d0e21e
LW
4917 if (GIMME != G_ARRAY) {
4918 if (++MARK <= SP)
4919 *MARK = *SP; /* unwanted list, return last item */
8990e307 4920 else
3280af22 4921 *MARK = &PL_sv_undef;
a0d0e21e 4922 SP = MARK;
79072805 4923 }
a0d0e21e 4924 RETURN;
79072805
LW
4925}
4926
a0d0e21e 4927PP(pp_lslice)
79072805 4928{
97aff369 4929 dVAR;
39644a26 4930 dSP;
1b6737cc
AL
4931 SV ** const lastrelem = PL_stack_sp;
4932 SV ** const lastlelem = PL_stack_base + POPMARK;
4933 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4934 register SV ** const firstrelem = lastlelem + 1;
fc15ae8f 4935 const I32 arybase = CopARYBASE_get(PL_curcop);
42e73ed0 4936 I32 is_something_there = FALSE;
1b6737cc
AL
4937
4938 register const I32 max = lastrelem - lastlelem;
a0d0e21e 4939 register SV **lelem;
a0d0e21e
LW
4940
4941 if (GIMME != G_ARRAY) {
4ea561bc 4942 I32 ix = SvIV(*lastlelem);
748a9306
LW
4943 if (ix < 0)
4944 ix += max;
4945 else
4946 ix -= arybase;
a0d0e21e 4947 if (ix < 0 || ix >= max)
3280af22 4948 *firstlelem = &PL_sv_undef;
a0d0e21e
LW
4949 else
4950 *firstlelem = firstrelem[ix];
4951 SP = firstlelem;
4952 RETURN;
4953 }
4954
4955 if (max == 0) {
4956 SP = firstlelem - 1;
4957 RETURN;
4958 }
4959
4960 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4ea561bc 4961 I32 ix = SvIV(*lelem);
c73bf8e3 4962 if (ix < 0)
a0d0e21e 4963 ix += max;
b13b2135 4964 else
748a9306 4965 ix -= arybase;
c73bf8e3
HS
4966 if (ix < 0 || ix >= max)
4967 *lelem = &PL_sv_undef;
4968 else {
4969 is_something_there = TRUE;
4970 if (!(*lelem = firstrelem[ix]))
3280af22 4971 *lelem = &PL_sv_undef;
748a9306 4972 }
79072805 4973 }
4633a7c4
LW
4974 if (is_something_there)
4975 SP = lastlelem;
4976 else
4977 SP = firstlelem - 1;
79072805
LW
4978 RETURN;
4979}
4980
a0d0e21e
LW
4981PP(pp_anonlist)
4982{
97aff369 4983 dVAR; dSP; dMARK; dORIGMARK;
1b6737cc 4984 const I32 items = SP - MARK;
ad64d0ec 4985 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
44a8e56a 4986 SP = ORIGMARK; /* av_make() might realloc stack_sp */
6e449a3a
MHM
4987 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4988 ? newRV_noinc(av) : av);
a0d0e21e
LW
4989 RETURN;
4990}
4991
4992PP(pp_anonhash)
79072805 4993{
97aff369 4994 dVAR; dSP; dMARK; dORIGMARK;
78c72037 4995 HV* const hv = newHV();
a0d0e21e
LW
4996
4997 while (MARK < SP) {
1b6737cc 4998 SV * const key = *++MARK;
561b68a9 4999 SV * const val = newSV(0);
a0d0e21e
LW
5000 if (MARK < SP)
5001 sv_setsv(val, *++MARK);
a2a5de95
NC
5002 else
5003 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
f12c7020 5004 (void)hv_store_ent(hv,key,val,0);
79072805 5005 }
a0d0e21e 5006 SP = ORIGMARK;
6e449a3a 5007 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
ad64d0ec 5008 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
79072805
LW
5009 RETURN;
5010}
5011
d4fc4415
FC
5012static AV *
5013S_deref_plain_array(pTHX_ AV *ary)
5014{
5015 if (SvTYPE(ary) == SVt_PVAV) return ary;
d2d95e13 5016 SvGETMAGIC((SV *)ary);
d4fc4415
FC
5017 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
5018 Perl_die(aTHX_ "Not an ARRAY reference");
5019 else if (SvOBJECT(SvRV(ary)))
5020 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
5021 return (AV *)SvRV(ary);
5022}
5023
5024#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
5025# define DEREF_PLAIN_ARRAY(ary) \
5026 ({ \
5027 AV *aRrRay = ary; \
5028 SvTYPE(aRrRay) == SVt_PVAV \
5029 ? aRrRay \
5030 : S_deref_plain_array(aTHX_ aRrRay); \
5031 })
5032#else
5033# define DEREF_PLAIN_ARRAY(ary) \
5034 ( \
3b0f6d32 5035 PL_Sv = (SV *)(ary), \
d4fc4415
FC
5036 SvTYPE(PL_Sv) == SVt_PVAV \
5037 ? (AV *)PL_Sv \
3b0f6d32 5038 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
d4fc4415
FC
5039 )
5040#endif
5041
a0d0e21e 5042PP(pp_splice)
79072805 5043{
27da23d5 5044 dVAR; dSP; dMARK; dORIGMARK;
5cd408a2 5045 int num_args = (SP - MARK);
d4fc4415 5046 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
a0d0e21e
LW
5047 register SV **src;
5048 register SV **dst;
5049 register I32 i;
5050 register I32 offset;
5051 register I32 length;
5052 I32 newlen;
5053 I32 after;
5054 I32 diff;
ad64d0ec 5055 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 5056
1b6737cc 5057 if (mg) {
af71faff
NC
5058 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
5059 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5060 sp - mark);
93965878 5061 }
79072805 5062
a0d0e21e 5063 SP++;
79072805 5064
a0d0e21e 5065 if (++MARK < SP) {
4ea561bc 5066 offset = i = SvIV(*MARK);
a0d0e21e 5067 if (offset < 0)
93965878 5068 offset += AvFILLp(ary) + 1;
a0d0e21e 5069 else
fc15ae8f 5070 offset -= CopARYBASE_get(PL_curcop);
84902520 5071 if (offset < 0)
cea2e8a9 5072 DIE(aTHX_ PL_no_aelem, i);
a0d0e21e
LW
5073 if (++MARK < SP) {
5074 length = SvIVx(*MARK++);
48cdf507
GA
5075 if (length < 0) {
5076 length += AvFILLp(ary) - offset + 1;
5077 if (length < 0)
5078 length = 0;
5079 }
79072805
LW
5080 }
5081 else
a0d0e21e 5082 length = AvMAX(ary) + 1; /* close enough to infinity */
79072805 5083 }
a0d0e21e
LW
5084 else {
5085 offset = 0;
5086 length = AvMAX(ary) + 1;
5087 }
8cbc2e3b 5088 if (offset > AvFILLp(ary) + 1) {
5cd408a2
EB
5089 if (num_args > 2)
5090 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
93965878 5091 offset = AvFILLp(ary) + 1;
8cbc2e3b 5092 }
93965878 5093 after = AvFILLp(ary) + 1 - (offset + length);
a0d0e21e
LW
5094 if (after < 0) { /* not that much array */
5095 length += after; /* offset+length now in array */
5096 after = 0;
5097 if (!AvALLOC(ary))
5098 av_extend(ary, 0);
5099 }
5100
5101 /* At this point, MARK .. SP-1 is our new LIST */
5102
5103 newlen = SP - MARK;
5104 diff = newlen - length;
13d7cbc1
GS
5105 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5106 av_reify(ary);
a0d0e21e 5107
50528de0
WL
5108 /* make new elements SVs now: avoid problems if they're from the array */
5109 for (dst = MARK, i = newlen; i; i--) {
1b6737cc 5110 SV * const h = *dst;
f2b990bf 5111 *dst++ = newSVsv(h);
50528de0
WL
5112 }
5113
a0d0e21e 5114 if (diff < 0) { /* shrinking the area */
95b63a38 5115 SV **tmparyval = NULL;
a0d0e21e 5116 if (newlen) {
a02a5408 5117 Newx(tmparyval, newlen, SV*); /* so remember insertion */
a0d0e21e 5118 Copy(MARK, tmparyval, newlen, SV*);
79072805 5119 }
a0d0e21e
LW
5120
5121 MARK = ORIGMARK + 1;
5122 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5123 MEXTEND(MARK, length);
5124 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5125 if (AvREAL(ary)) {
bbce6d69 5126 EXTEND_MORTAL(length);
36477c24 5127 for (i = length, dst = MARK; i; i--) {
486ec47a 5128 sv_2mortal(*dst); /* free them eventually */
36477c24 5129 dst++;
5130 }
a0d0e21e
LW
5131 }
5132 MARK += length - 1;
79072805 5133 }
a0d0e21e
LW
5134 else {
5135 *MARK = AvARRAY(ary)[offset+length-1];
5136 if (AvREAL(ary)) {
d689ffdd 5137 sv_2mortal(*MARK);
a0d0e21e
LW
5138 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5139 SvREFCNT_dec(*dst++); /* free them now */
79072805 5140 }
a0d0e21e 5141 }
93965878 5142 AvFILLp(ary) += diff;
a0d0e21e
LW
5143
5144 /* pull up or down? */
5145
5146 if (offset < after) { /* easier to pull up */
5147 if (offset) { /* esp. if nothing to pull */
5148 src = &AvARRAY(ary)[offset-1];
5149 dst = src - diff; /* diff is negative */
5150 for (i = offset; i > 0; i--) /* can't trust Copy */
5151 *dst-- = *src--;
79072805 5152 }
a0d0e21e 5153 dst = AvARRAY(ary);
9c6bc640 5154 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
a0d0e21e
LW
5155 AvMAX(ary) += diff;
5156 }
5157 else {
5158 if (after) { /* anything to pull down? */
5159 src = AvARRAY(ary) + offset + length;
5160 dst = src + diff; /* diff is negative */
5161 Move(src, dst, after, SV*);
79072805 5162 }
93965878 5163 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
a0d0e21e
LW
5164 /* avoid later double free */
5165 }
5166 i = -diff;
5167 while (i)
3280af22 5168 dst[--i] = &PL_sv_undef;
a0d0e21e
LW
5169
5170 if (newlen) {
50528de0 5171 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e
LW
5172 Safefree(tmparyval);
5173 }
5174 }
5175 else { /* no, expanding (or same) */
d3961450 5176 SV** tmparyval = NULL;
a0d0e21e 5177 if (length) {
a02a5408 5178 Newx(tmparyval, length, SV*); /* so remember deletion */
a0d0e21e
LW
5179 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5180 }
5181
5182 if (diff > 0) { /* expanding */
a0d0e21e 5183 /* push up or down? */
a0d0e21e
LW
5184 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5185 if (offset) {
5186 src = AvARRAY(ary);
5187 dst = src - diff;
5188 Move(src, dst, offset, SV*);
79072805 5189 }
9c6bc640 5190 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
a0d0e21e 5191 AvMAX(ary) += diff;
93965878 5192 AvFILLp(ary) += diff;
79072805
LW
5193 }
5194 else {
93965878
NIS
5195 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5196 av_extend(ary, AvFILLp(ary) + diff);
5197 AvFILLp(ary) += diff;
a0d0e21e
LW
5198
5199 if (after) {
93965878 5200 dst = AvARRAY(ary) + AvFILLp(ary);
a0d0e21e
LW
5201 src = dst - diff;
5202 for (i = after; i; i--) {
5203 *dst-- = *src--;
5204 }
79072805
LW
5205 }
5206 }
a0d0e21e
LW
5207 }
5208
50528de0
WL
5209 if (newlen) {
5210 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e 5211 }
50528de0 5212
a0d0e21e
LW
5213 MARK = ORIGMARK + 1;
5214 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5215 if (length) {
5216 Copy(tmparyval, MARK, length, SV*);
5217 if (AvREAL(ary)) {
bbce6d69 5218 EXTEND_MORTAL(length);
36477c24 5219 for (i = length, dst = MARK; i; i--) {
486ec47a 5220 sv_2mortal(*dst); /* free them eventually */
36477c24 5221 dst++;
5222 }
79072805
LW
5223 }
5224 }
a0d0e21e
LW
5225 MARK += length - 1;
5226 }
5227 else if (length--) {
5228 *MARK = tmparyval[length];
5229 if (AvREAL(ary)) {
d689ffdd 5230 sv_2mortal(*MARK);
a0d0e21e
LW
5231 while (length-- > 0)
5232 SvREFCNT_dec(tmparyval[length]);
79072805 5233 }
79072805 5234 }
a0d0e21e 5235 else
3280af22 5236 *MARK = &PL_sv_undef;
d3961450 5237 Safefree(tmparyval);
79072805 5238 }
474af990
FR
5239
5240 if (SvMAGICAL(ary))
5241 mg_set(MUTABLE_SV(ary));
5242
a0d0e21e 5243 SP = MARK;
79072805
LW
5244 RETURN;
5245}
5246
a0d0e21e 5247PP(pp_push)
79072805 5248{
27da23d5 5249 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5250 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5251 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
79072805 5252
1b6737cc 5253 if (mg) {
ad64d0ec 5254 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
93965878
NIS
5255 PUSHMARK(MARK);
5256 PUTBACK;
d343c3ef 5257 ENTER_with_name("call_PUSH");
864dbfa3 5258 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5259 LEAVE_with_name("call_PUSH");
93965878 5260 SPAGAIN;
93965878 5261 }
a60c0954 5262 else {
89c14e2e 5263 PL_delaymagic = DM_DELAY;
a60c0954 5264 for (++MARK; MARK <= SP; MARK++) {
561b68a9 5265 SV * const sv = newSV(0);
a60c0954
NIS
5266 if (*MARK)
5267 sv_setsv(sv, *MARK);
0a75904b 5268 av_store(ary, AvFILLp(ary)+1, sv);
a60c0954 5269 }
354b0578 5270 if (PL_delaymagic & DM_ARRAY_ISA)
ad64d0ec 5271 mg_set(MUTABLE_SV(ary));
89c14e2e
BB
5272
5273 PL_delaymagic = 0;
6eeabd23
VP
5274 }
5275 SP = ORIGMARK;
5276 if (OP_GIMME(PL_op, 0) != G_VOID) {
5277 PUSHi( AvFILL(ary) + 1 );
79072805 5278 }
79072805
LW
5279 RETURN;
5280}
5281
a0d0e21e 5282PP(pp_shift)
79072805 5283{
97aff369 5284 dVAR;
39644a26 5285 dSP;
538f5756 5286 AV * const av = PL_op->op_flags & OPf_SPECIAL
d4fc4415 5287 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
789b4bc9 5288 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
79072805 5289 EXTEND(SP, 1);
c2b4a044 5290 assert (sv);
d689ffdd 5291 if (AvREAL(av))
a0d0e21e
LW
5292 (void)sv_2mortal(sv);
5293 PUSHs(sv);
79072805 5294 RETURN;
79072805
LW
5295}
5296
a0d0e21e 5297PP(pp_unshift)
79072805 5298{
27da23d5 5299 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5300 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5301 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 5302
1b6737cc 5303 if (mg) {
ad64d0ec 5304 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
7fd66d9d 5305 PUSHMARK(MARK);
93965878 5306 PUTBACK;
d343c3ef 5307 ENTER_with_name("call_UNSHIFT");
864dbfa3 5308 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
d343c3ef 5309 LEAVE_with_name("call_UNSHIFT");
93965878 5310 SPAGAIN;
93965878 5311 }
a60c0954 5312 else {
1b6737cc 5313 register I32 i = 0;
a60c0954
NIS
5314 av_unshift(ary, SP - MARK);
5315 while (MARK < SP) {
1b6737cc 5316 SV * const sv = newSVsv(*++MARK);
a60c0954
NIS
5317 (void)av_store(ary, i++, sv);
5318 }
79072805 5319 }
a0d0e21e 5320 SP = ORIGMARK;
6eeabd23 5321 if (OP_GIMME(PL_op, 0) != G_VOID) {
5658d0a9
LR
5322 PUSHi( AvFILL(ary) + 1 );
5323 }
79072805 5324 RETURN;
79072805
LW
5325}
5326
a0d0e21e 5327PP(pp_reverse)
79072805 5328{
97aff369 5329 dVAR; dSP; dMARK;
79072805 5330
a0d0e21e 5331 if (GIMME == G_ARRAY) {
484c818f
VP
5332 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5333 AV *av;
5334
5335 /* See pp_sort() */
5336 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5337 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5338 av = MUTABLE_AV((*SP));
5339 /* In-place reversing only happens in void context for the array
5340 * assignment. We don't need to push anything on the stack. */
5341 SP = MARK;
5342
5343 if (SvMAGICAL(av)) {
5344 I32 i, j;
5345 register SV *tmp = sv_newmortal();
5346 /* For SvCANEXISTDELETE */
5347 HV *stash;
5348 const MAGIC *mg;
5349 bool can_preserve = SvCANEXISTDELETE(av);
5350
5351 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5352 register SV *begin, *end;
5353
5354 if (can_preserve) {
5355 if (!av_exists(av, i)) {
5356 if (av_exists(av, j)) {
5357 register SV *sv = av_delete(av, j, 0);
5358 begin = *av_fetch(av, i, TRUE);
5359 sv_setsv_mg(begin, sv);
5360 }
5361 continue;
5362 }
5363 else if (!av_exists(av, j)) {
5364 register SV *sv = av_delete(av, i, 0);
5365 end = *av_fetch(av, j, TRUE);
5366 sv_setsv_mg(end, sv);
5367 continue;
5368 }
5369 }
5370
5371 begin = *av_fetch(av, i, TRUE);
5372 end = *av_fetch(av, j, TRUE);
5373 sv_setsv(tmp, begin);
5374 sv_setsv_mg(begin, end);
5375 sv_setsv_mg(end, tmp);
5376 }
5377 }
5378 else {
5379 SV **begin = AvARRAY(av);
484c818f 5380
95a26d8e
VP
5381 if (begin) {
5382 SV **end = begin + AvFILLp(av);
5383
5384 while (begin < end) {
5385 register SV * const tmp = *begin;
5386 *begin++ = *end;
5387 *end-- = tmp;
5388 }
484c818f
VP
5389 }
5390 }
5391 }
5392 else {
5393 SV **oldsp = SP;
5394 MARK++;
5395 while (MARK < SP) {
5396 register SV * const tmp = *MARK;
5397 *MARK++ = *SP;
5398 *SP-- = tmp;
5399 }
5400 /* safe as long as stack cannot get extended in the above */
5401 SP = oldsp;
a0d0e21e 5402 }
79072805
LW
5403 }
5404 else {
a0d0e21e
LW
5405 register char *up;
5406 register char *down;
5407 register I32 tmp;
5408 dTARGET;
5409 STRLEN len;
79072805 5410
7e2040f0 5411 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 5412 if (SP - MARK > 1)
3280af22 5413 do_join(TARG, &PL_sv_no, MARK, SP);
1e21d011 5414 else {
789bd863 5415 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
1e21d011
B
5416 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5417 report_uninit(TARG);
5418 }
5419
a0d0e21e
LW
5420 up = SvPV_force(TARG, len);
5421 if (len > 1) {
7e2040f0 5422 if (DO_UTF8(TARG)) { /* first reverse each character */
dfe13c55 5423 U8* s = (U8*)SvPVX(TARG);
349d4f2f 5424 const U8* send = (U8*)(s + len);
a0ed51b3 5425 while (s < send) {
d742c382 5426 if (UTF8_IS_INVARIANT(*s)) {
a0ed51b3
LW
5427 s++;
5428 continue;
5429 }
5430 else {
9041c2e3 5431 if (!utf8_to_uvchr(s, 0))
a0dbb045 5432 break;
dfe13c55 5433 up = (char*)s;
a0ed51b3 5434 s += UTF8SKIP(s);
dfe13c55 5435 down = (char*)(s - 1);
a0dbb045 5436 /* reverse this character */
a0ed51b3
LW
5437 while (down > up) {
5438 tmp = *up;
5439 *up++ = *down;
eb160463 5440 *down-- = (char)tmp;
a0ed51b3
LW
5441 }
5442 }
5443 }
5444 up = SvPVX(TARG);
5445 }
a0d0e21e
LW
5446 down = SvPVX(TARG) + len - 1;
5447 while (down > up) {
5448 tmp = *up;
5449 *up++ = *down;
eb160463 5450 *down-- = (char)tmp;
a0d0e21e 5451 }
3aa33fe5 5452 (void)SvPOK_only_UTF8(TARG);
79072805 5453 }
a0d0e21e
LW
5454 SP = MARK + 1;
5455 SETTARG;
79072805 5456 }
a0d0e21e 5457 RETURN;
79072805
LW
5458}
5459
a0d0e21e 5460PP(pp_split)
79072805 5461{
27da23d5 5462 dVAR; dSP; dTARG;
a0d0e21e 5463 AV *ary;
467f0320 5464 register IV limit = POPi; /* note, negative is forever */
1b6737cc 5465 SV * const sv = POPs;
a0d0e21e 5466 STRLEN len;
727b7506 5467 register const char *s = SvPV_const(sv, len);
1b6737cc 5468 const bool do_utf8 = DO_UTF8(sv);
727b7506 5469 const char *strend = s + len;
44a8e56a 5470 register PMOP *pm;
d9f97599 5471 register REGEXP *rx;
a0d0e21e 5472 register SV *dstr;
727b7506 5473 register const char *m;
a0d0e21e 5474 I32 iters = 0;
bb7a0f54 5475 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
792b2c16 5476 I32 maxiters = slen + 10;
c1a7495a 5477 I32 trailing_empty = 0;
727b7506 5478 const char *orig;
1b6737cc 5479 const I32 origlimit = limit;
a0d0e21e
LW
5480 I32 realarray = 0;
5481 I32 base;
f54cb97a 5482 const I32 gimme = GIMME_V;
941446f6 5483 bool gimme_scalar;
f54cb97a 5484 const I32 oldsave = PL_savestack_ix;
437d3b4e 5485 U32 make_mortal = SVs_TEMP;
7fba1cd6 5486 bool multiline = 0;
b37c2d43 5487 MAGIC *mg = NULL;
79072805 5488
44a8e56a 5489#ifdef DEBUGGING
5490 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5491#else
5492 pm = (PMOP*)POPs;
5493#endif
a0d0e21e 5494 if (!pm || !s)
2269b42e 5495 DIE(aTHX_ "panic: pp_split");
aaa362c4 5496 rx = PM_GETRE(pm);
bbce6d69 5497
a62b1201 5498 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
07bc277f 5499 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
bbce6d69 5500
a30b2f1f 5501 RX_MATCH_UTF8_set(rx, do_utf8);
d9f424b2 5502
971a9dd3 5503#ifdef USE_ITHREADS
20e98b0f 5504 if (pm->op_pmreplrootu.op_pmtargetoff) {
159b6efe 5505 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
20e98b0f 5506 }
971a9dd3 5507#else
20e98b0f
NC
5508 if (pm->op_pmreplrootu.op_pmtargetgv) {
5509 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
971a9dd3 5510 }
20e98b0f 5511#endif
79072805 5512 else
7d49f689 5513 ary = NULL;
a0d0e21e
LW
5514 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5515 realarray = 1;
8ec5e241 5516 PUTBACK;
a0d0e21e
LW
5517 av_extend(ary,0);
5518 av_clear(ary);
8ec5e241 5519 SPAGAIN;
ad64d0ec 5520 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
8ec5e241 5521 PUSHMARK(SP);
ad64d0ec 5522 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
8ec5e241
NIS
5523 }
5524 else {
1c0b011c 5525 if (!AvREAL(ary)) {
1b6737cc 5526 I32 i;
1c0b011c 5527 AvREAL_on(ary);
abff13bb 5528 AvREIFY_off(ary);
1c0b011c 5529 for (i = AvFILLp(ary); i >= 0; i--)
3280af22 5530 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
1c0b011c
NIS
5531 }
5532 /* temporarily switch stacks */
8b7059b1 5533 SAVESWITCHSTACK(PL_curstack, ary);
8ec5e241 5534 make_mortal = 0;
1c0b011c 5535 }
79072805 5536 }
3280af22 5537 base = SP - PL_stack_base;
a0d0e21e 5538 orig = s;
07bc277f 5539 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
613f191e
TS
5540 if (do_utf8) {
5541 while (*s == ' ' || is_utf8_space((U8*)s))
5542 s += UTF8SKIP(s);
5543 }
a62b1201 5544 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
bbce6d69 5545 while (isSPACE_LC(*s))
5546 s++;
5547 }
5548 else {
5549 while (isSPACE(*s))
5550 s++;
5551 }
a0d0e21e 5552 }
73134a2e 5553 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
7fba1cd6 5554 multiline = 1;
c07a80fd 5555 }
5556
941446f6
FC
5557 gimme_scalar = gimme == G_SCALAR && !ary;
5558
a0d0e21e
LW
5559 if (!limit)
5560 limit = maxiters + 2;
07bc277f 5561 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
a0d0e21e 5562 while (--limit) {
bbce6d69 5563 m = s;
8727f688
YO
5564 /* this one uses 'm' and is a negative test */
5565 if (do_utf8) {
613f191e
TS
5566 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5567 const int t = UTF8SKIP(m);
5568 /* is_utf8_space returns FALSE for malform utf8 */
5569 if (strend - m < t)
5570 m = strend;
5571 else
5572 m += t;
5573 }
a62b1201
KW
5574 }
5575 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5576 while (m < strend && !isSPACE_LC(*m))
5577 ++m;
5578 } else {
5579 while (m < strend && !isSPACE(*m))
5580 ++m;
5581 }
a0d0e21e
LW
5582 if (m >= strend)
5583 break;
bbce6d69 5584
c1a7495a
BB
5585 if (gimme_scalar) {
5586 iters++;
5587 if (m-s == 0)
5588 trailing_empty++;
5589 else
5590 trailing_empty = 0;
5591 } else {
5592 dstr = newSVpvn_flags(s, m-s,
5593 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5594 XPUSHs(dstr);
5595 }
bbce6d69 5596
613f191e
TS
5597 /* skip the whitespace found last */
5598 if (do_utf8)
5599 s = m + UTF8SKIP(m);
5600 else
5601 s = m + 1;
5602
8727f688
YO
5603 /* this one uses 's' and is a positive test */
5604 if (do_utf8) {
613f191e 5605 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
8727f688 5606 s += UTF8SKIP(s);
a62b1201
KW
5607 }
5608 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5609 while (s < strend && isSPACE_LC(*s))
5610 ++s;
5611 } else {
5612 while (s < strend && isSPACE(*s))
5613 ++s;
5614 }
79072805
LW
5615 }
5616 }
07bc277f 5617 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
a0d0e21e 5618 while (--limit) {
a6e20a40
AL
5619 for (m = s; m < strend && *m != '\n'; m++)
5620 ;
a0d0e21e
LW
5621 m++;
5622 if (m >= strend)
5623 break;
c1a7495a
BB
5624
5625 if (gimme_scalar) {
5626 iters++;
5627 if (m-s == 0)
5628 trailing_empty++;
5629 else
5630 trailing_empty = 0;
5631 } else {
5632 dstr = newSVpvn_flags(s, m-s,
5633 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5634 XPUSHs(dstr);
5635 }
a0d0e21e
LW
5636 s = m;
5637 }
5638 }
07bc277f 5639 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
640f820d
AB
5640 /*
5641 Pre-extend the stack, either the number of bytes or
5642 characters in the string or a limited amount, triggered by:
5643
5644 my ($x, $y) = split //, $str;
5645 or
5646 split //, $str, $i;
5647 */
c1a7495a
BB
5648 if (!gimme_scalar) {
5649 const U32 items = limit - 1;
5650 if (items < slen)
5651 EXTEND(SP, items);
5652 else
5653 EXTEND(SP, slen);
5654 }
640f820d 5655
e9515b0f
AB
5656 if (do_utf8) {
5657 while (--limit) {
5658 /* keep track of how many bytes we skip over */
5659 m = s;
640f820d 5660 s += UTF8SKIP(s);
c1a7495a
BB
5661 if (gimme_scalar) {
5662 iters++;
5663 if (s-m == 0)
5664 trailing_empty++;
5665 else
5666 trailing_empty = 0;
5667 } else {
5668 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
640f820d 5669
c1a7495a
BB
5670 PUSHs(dstr);
5671 }
640f820d 5672
e9515b0f
AB
5673 if (s >= strend)
5674 break;
5675 }
5676 } else {
5677 while (--limit) {
c1a7495a
BB
5678 if (gimme_scalar) {
5679 iters++;
5680 } else {
5681 dstr = newSVpvn(s, 1);
e9515b0f 5682
e9515b0f 5683
c1a7495a
BB
5684 if (make_mortal)
5685 sv_2mortal(dstr);
640f820d 5686
c1a7495a
BB
5687 PUSHs(dstr);
5688 }
5689
5690 s++;
e9515b0f
AB
5691
5692 if (s >= strend)
5693 break;
5694 }
640f820d
AB
5695 }
5696 }
3c8556c3 5697 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
07bc277f
NC
5698 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5699 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5700 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5701 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
f9f4320a 5702 SV * const csv = CALLREG_INTUIT_STRING(rx);
cf93c79d 5703
07bc277f 5704 len = RX_MINLENRET(rx);
3c8556c3 5705 if (len == 1 && !RX_UTF8(rx) && !tail) {
1b6737cc 5706 const char c = *SvPV_nolen_const(csv);
a0d0e21e 5707 while (--limit) {
a6e20a40
AL
5708 for (m = s; m < strend && *m != c; m++)
5709 ;
a0d0e21e
LW
5710 if (m >= strend)
5711 break;
c1a7495a
BB
5712 if (gimme_scalar) {
5713 iters++;
5714 if (m-s == 0)
5715 trailing_empty++;
5716 else
5717 trailing_empty = 0;
5718 } else {
5719 dstr = newSVpvn_flags(s, m-s,
5720 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5721 XPUSHs(dstr);
5722 }
93f04dac
JH
5723 /* The rx->minlen is in characters but we want to step
5724 * s ahead by bytes. */
1aa99e6b
IH
5725 if (do_utf8)
5726 s = (char*)utf8_hop((U8*)m, len);
5727 else
5728 s = m + len; /* Fake \n at the end */
a0d0e21e
LW
5729 }
5730 }
5731 else {
a0d0e21e 5732 while (s < strend && --limit &&
f722798b 5733 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
7fba1cd6 5734 csv, multiline ? FBMrf_MULTILINE : 0)) )
a0d0e21e 5735 {
c1a7495a
BB
5736 if (gimme_scalar) {
5737 iters++;
5738 if (m-s == 0)
5739 trailing_empty++;
5740 else
5741 trailing_empty = 0;
5742 } else {
5743 dstr = newSVpvn_flags(s, m-s,
5744 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5745 XPUSHs(dstr);
5746 }
93f04dac
JH
5747 /* The rx->minlen is in characters but we want to step
5748 * s ahead by bytes. */
1aa99e6b
IH
5749 if (do_utf8)
5750 s = (char*)utf8_hop((U8*)m, len);
5751 else
5752 s = m + len; /* Fake \n at the end */
a0d0e21e 5753 }
463ee0b2 5754 }
463ee0b2 5755 }
a0d0e21e 5756 else {
07bc277f 5757 maxiters += slen * RX_NPARENS(rx);
080c2dec 5758 while (s < strend && --limit)
bbce6d69 5759 {
1b6737cc 5760 I32 rex_return;
080c2dec 5761 PUTBACK;
f9f4320a 5762 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
bfafcb9a 5763 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
080c2dec 5764 SPAGAIN;
1b6737cc 5765 if (rex_return == 0)
080c2dec 5766 break;
d9f97599 5767 TAINT_IF(RX_MATCH_TAINTED(rx));
07bc277f 5768 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
a0d0e21e
LW
5769 m = s;
5770 s = orig;
07bc277f 5771 orig = RX_SUBBEG(rx);
a0d0e21e
LW
5772 s = orig + (m - s);
5773 strend = s + (strend - m);
5774 }
07bc277f 5775 m = RX_OFFS(rx)[0].start + orig;
c1a7495a
BB
5776
5777 if (gimme_scalar) {
5778 iters++;
5779 if (m-s == 0)
5780 trailing_empty++;
5781 else
5782 trailing_empty = 0;
5783 } else {
5784 dstr = newSVpvn_flags(s, m-s,
5785 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5786 XPUSHs(dstr);
5787 }
07bc277f 5788 if (RX_NPARENS(rx)) {
1b6737cc 5789 I32 i;
07bc277f
NC
5790 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5791 s = RX_OFFS(rx)[i].start + orig;
5792 m = RX_OFFS(rx)[i].end + orig;
6de67870
JP
5793
5794 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5795 parens that didn't match -- they should be set to
5796 undef, not the empty string */
c1a7495a
BB
5797 if (gimme_scalar) {
5798 iters++;
5799 if (m-s == 0)
5800 trailing_empty++;
5801 else
5802 trailing_empty = 0;
5803 } else {
5804 if (m >= orig && s >= orig) {
5805 dstr = newSVpvn_flags(s, m-s,
5806 (do_utf8 ? SVf_UTF8 : 0)
5807 | make_mortal);
5808 }
5809 else
5810 dstr = &PL_sv_undef; /* undef, not "" */
5811 XPUSHs(dstr);
748a9306 5812 }
c1a7495a 5813
a0d0e21e
LW
5814 }
5815 }
07bc277f 5816 s = RX_OFFS(rx)[0].end + orig;
a0d0e21e 5817 }
79072805 5818 }
8ec5e241 5819
c1a7495a
BB
5820 if (!gimme_scalar) {
5821 iters = (SP - PL_stack_base) - base;
5822 }
a0d0e21e 5823 if (iters > maxiters)
cea2e8a9 5824 DIE(aTHX_ "Split loop");
8ec5e241 5825
a0d0e21e
LW
5826 /* keep field after final delim? */
5827 if (s < strend || (iters && origlimit)) {
c1a7495a
BB
5828 if (!gimme_scalar) {
5829 const STRLEN l = strend - s;
5830 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5831 XPUSHs(dstr);
5832 }
a0d0e21e 5833 iters++;
79072805 5834 }
a0d0e21e 5835 else if (!origlimit) {
c1a7495a
BB
5836 if (gimme_scalar) {
5837 iters -= trailing_empty;
5838 } else {
5839 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5840 if (TOPs && !make_mortal)
5841 sv_2mortal(TOPs);
5842 *SP-- = &PL_sv_undef;
5843 iters--;
5844 }
89900bd3 5845 }
a0d0e21e 5846 }
8ec5e241 5847
8b7059b1
DM
5848 PUTBACK;
5849 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5850 SPAGAIN;
a0d0e21e 5851 if (realarray) {
8ec5e241 5852 if (!mg) {
1c0b011c
NIS
5853 if (SvSMAGICAL(ary)) {
5854 PUTBACK;
ad64d0ec 5855 mg_set(MUTABLE_SV(ary));
1c0b011c
NIS
5856 SPAGAIN;
5857 }
5858 if (gimme == G_ARRAY) {
5859 EXTEND(SP, iters);
5860 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5861 SP += iters;
5862 RETURN;
5863 }
8ec5e241 5864 }
1c0b011c 5865 else {
fb73857a 5866 PUTBACK;
d343c3ef 5867 ENTER_with_name("call_PUSH");
864dbfa3 5868 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5869 LEAVE_with_name("call_PUSH");
fb73857a 5870 SPAGAIN;
8ec5e241 5871 if (gimme == G_ARRAY) {
1b6737cc 5872 I32 i;
8ec5e241
NIS
5873 /* EXTEND should not be needed - we just popped them */
5874 EXTEND(SP, iters);
5875 for (i=0; i < iters; i++) {
5876 SV **svp = av_fetch(ary, i, FALSE);
3280af22 5877 PUSHs((svp) ? *svp : &PL_sv_undef);
8ec5e241 5878 }
1c0b011c
NIS
5879 RETURN;
5880 }
a0d0e21e
LW
5881 }
5882 }
5883 else {
5884 if (gimme == G_ARRAY)
5885 RETURN;
5886 }
7f18b612
YST
5887
5888 GETTARGET;
5889 PUSHi(iters);
5890 RETURN;
79072805 5891}
85e6fe83 5892
c5917253
NC
5893PP(pp_once)
5894{
5895 dSP;
5896 SV *const sv = PAD_SVl(PL_op->op_targ);
5897
5898 if (SvPADSTALE(sv)) {
5899 /* First time. */
5900 SvPADSTALE_off(sv);
5901 RETURNOP(cLOGOP->op_other);
5902 }
5903 RETURNOP(cLOGOP->op_next);
5904}
5905
c0329465
MB
5906PP(pp_lock)
5907{
97aff369 5908 dVAR;
39644a26 5909 dSP;
c0329465 5910 dTOPss;
e55aaa0e 5911 SV *retsv = sv;
076a2a80 5912 assert(SvTYPE(retsv) != SVt_PVCV);
68795e93 5913 SvLOCK(sv);
076a2a80 5914 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV) {
e55aaa0e
MB
5915 retsv = refto(retsv);
5916 }
5917 SETs(retsv);
c0329465
MB
5918 RETURN;
5919}
a863c7d1 5920
65bca31a
NC
5921
5922PP(unimplemented_op)
5923{
97aff369 5924 dVAR;
361ed549
NC
5925 const Optype op_type = PL_op->op_type;
5926 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5927 with out of range op numbers - it only "special" cases op_custom.
5928 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5929 if we get here for a custom op then that means that the custom op didn't
5930 have an implementation. Given that OP_NAME() looks up the custom op
5931 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5932 registers &PL_unimplemented_op as the address of their custom op.
5933 NULL doesn't generate a useful error message. "custom" does. */
5934 const char *const name = op_type >= OP_max
5935 ? "[out of range]" : PL_op_name[PL_op->op_type];
7627e6d0
NC
5936 if(OP_IS_SOCKET(op_type))
5937 DIE(aTHX_ PL_no_sock_func, name);
361ed549 5938 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
65bca31a
NC
5939}
5940
867fa1e2
YO
5941PP(pp_boolkeys)
5942{
5943 dVAR;
5944 dSP;
5945 HV * const hv = (HV*)POPs;
5946
fd1d9b5c
FC
5947 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5948
867fa1e2
YO
5949 if (SvRMAGICAL(hv)) {
5950 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5951 if (mg) {
5952 XPUSHs(magic_scalarpack(hv, mg));
5953 RETURN;
5954 }
5955 }
5956
1b95d04f 5957 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
867fa1e2
YO
5958 RETURN;
5959}
5960
e609e586
NC
5961/*
5962 * Local variables:
5963 * c-indentation-style: bsd
5964 * c-basic-offset: 4
5965 * indent-tabs-mode: t
5966 * End:
5967 *
37442d52
RGS
5968 * ex: set ts=8 sts=4 sw=4 noet:
5969 */