This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
add a couple missing LEAVEs in perlio_async_run()
[perl5.git] / pp.c
CommitLineData
a0d0e21e 1/* pp.c
79072805 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
79072805 5 *
a0d0e21e
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
79072805 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
a0d0e21e 17 */
79072805 18
166f8a29
DM
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
79072805 26#include "EXTERN.h"
864dbfa3 27#define PERL_IN_PP_C
79072805 28#include "perl.h"
77bc9082 29#include "keywords.h"
79072805 30
a4af207c
JH
31#include "reentr.h"
32
dfe9444c
AD
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
8ac85365
NIS
39#endif
40
0630166f
SP
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
13017935
SM
49/* variations on pp_null */
50
93a17b20
LW
51PP(pp_stub)
52{
97aff369 53 dVAR;
39644a26 54 dSP;
54310121 55 if (GIMME_V == G_SCALAR)
3280af22 56 XPUSHs(&PL_sv_undef);
93a17b20
LW
57 RETURN;
58}
59
79072805
LW
60/* Pushy stuff. */
61
93a17b20
LW
62PP(pp_padav)
63{
97aff369 64 dVAR; dSP; dTARGET;
13017935 65 I32 gimme;
e190e9b4 66 assert(SvTYPE(TARG) == SVt_PVAV);
533c011a 67 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
85e6fe83 70 EXTEND(SP, 1);
533c011a 71 if (PL_op->op_flags & OPf_REF) {
85e6fe83 72 PUSHs(TARG);
93a17b20 73 RETURN;
40c94d11
FC
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
79 PUSHs(TARG);
80 RETURN;
40c94d11 81 }
85e6fe83 82 }
13017935
SM
83 gimme = GIMME_V;
84 if (gimme == G_ARRAY) {
502c6561 85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83 86 EXTEND(SP, maxarg);
93965878
NIS
87 if (SvMAGICAL(TARG)) {
88 U32 i;
eb160463 89 for (i=0; i < (U32)maxarg; i++) {
502c6561 90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
3280af22 91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93965878
NIS
92 }
93 }
94 else {
502c6561 95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93965878 96 }
85e6fe83
LW
97 SP += maxarg;
98 }
13017935 99 else if (gimme == G_SCALAR) {
1b6737cc 100 SV* const sv = sv_newmortal();
502c6561 101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83
LW
102 sv_setiv(sv, maxarg);
103 PUSHs(sv);
104 }
105 RETURN;
93a17b20
LW
106}
107
108PP(pp_padhv)
109{
97aff369 110 dVAR; dSP; dTARGET;
54310121 111 I32 gimme;
112
e190e9b4 113 assert(SvTYPE(TARG) == SVt_PVHV);
93a17b20 114 XPUSHs(TARG);
533c011a 115 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
533c011a 118 if (PL_op->op_flags & OPf_REF)
93a17b20 119 RETURN;
40c94d11
FC
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
125 RETURN;
40c94d11 126 }
78f9721b 127 }
54310121 128 gimme = GIMME_V;
129 if (gimme == G_ARRAY) {
981b7185 130 RETURNOP(Perl_do_kv(aTHX));
85e6fe83 131 }
54310121 132 else if (gimme == G_SCALAR) {
85fbaab2 133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
85e6fe83 134 SETs(sv);
85e6fe83 135 }
54310121 136 RETURN;
93a17b20
LW
137}
138
79072805
LW
139/* Translations. */
140
4bdf8368 141static const char S_no_symref_sv[] =
def89bff
NC
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
143
6f7909da
FC
144/* In some cases this function inspects PL_op. If this function is called
145 for new op types, more bool parameters may need to be added in place of
146 the checks.
147
148 When noinit is true, the absence of a gv will cause a retval of undef.
149 This is unrelated to the cv-to-gv assignment case.
8ec5e241 150
6f7909da
FC
151 Make sure to use SPAGAIN after calling this.
152*/
153
154static SV *
155S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
156 const bool noinit)
157{
14f0f125 158 dVAR;
f64c9ac5 159 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
ed6116ce 160 if (SvROK(sv)) {
93d7320b
DM
161 if (SvAMAGIC(sv)) {
162 sv = amagic_deref_call(sv, to_gv_amg);
93d7320b 163 }
e4a1664f 164 wasref:
ed6116ce 165 sv = SvRV(sv);
b1dadf13 166 if (SvTYPE(sv) == SVt_PVIO) {
159b6efe 167 GV * const gv = MUTABLE_GV(sv_newmortal());
b1dadf13 168 gv_init(gv, 0, "", 0, 0);
a45c7426 169 GvIOp(gv) = MUTABLE_IO(sv);
b37c2d43 170 SvREFCNT_inc_void_NN(sv);
ad64d0ec 171 sv = MUTABLE_SV(gv);
ef54e1a4 172 }
6e592b3a 173 else if (!isGV_with_GP(sv))
6f7909da 174 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
79072805
LW
175 }
176 else {
6e592b3a 177 if (!isGV_with_GP(sv)) {
f132ae69 178 if (!SvOK(sv)) {
b13b2135 179 /* If this is a 'my' scalar and flag is set then vivify
853846ea 180 * NI-S 1999/05/07
b13b2135 181 */
f132ae69 182 if (vivify_sv && sv != &PL_sv_undef) {
2c8ac474 183 GV *gv;
ce74145d
FC
184 if (SvREADONLY(sv))
185 Perl_croak_no_modify(aTHX);
2c8ac474
GS
186 if (cUNOP->op_targ) {
187 STRLEN len;
0bd48802
AL
188 SV * const namesv = PAD_SV(cUNOP->op_targ);
189 const char * const name = SvPV(namesv, len);
159b6efe 190 gv = MUTABLE_GV(newSV(0));
2c8ac474
GS
191 gv_init(gv, CopSTASH(PL_curcop), name, len, 0);
192 }
193 else {
0bd48802 194 const char * const name = CopSTASHPV(PL_curcop);
2c8ac474 195 gv = newGVgen(name);
1d8d4d2a 196 }
43230e26 197 prepare_SV_for_RV(sv);
ad64d0ec 198 SvRV_set(sv, MUTABLE_SV(gv));
853846ea 199 SvROK_on(sv);
1d8d4d2a 200 SvSETMAGIC(sv);
853846ea 201 goto wasref;
2c8ac474 202 }
6f7909da
FC
203 if (PL_op->op_flags & OPf_REF || strict)
204 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
599cee73 205 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 206 report_uninit(sv);
6f7909da 207 return &PL_sv_undef;
a0d0e21e 208 }
6f7909da 209 if (noinit)
35cd451c 210 {
77cb3b01
FC
211 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
212 sv, GV_ADDMG, SVt_PVGV
23496c6e 213 ))))
6f7909da 214 return &PL_sv_undef;
35cd451c
GS
215 }
216 else {
6f7909da
FC
217 if (strict)
218 return
219 (SV *)Perl_die(aTHX_
220 S_no_symref_sv,
221 sv,
222 (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
223 "a symbol"
224 );
e26df76a
NC
225 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
226 == OPpDONT_INIT_GV) {
227 /* We are the target of a coderef assignment. Return
228 the scalar unchanged, and let pp_sasssign deal with
229 things. */
6f7909da 230 return sv;
e26df76a 231 }
77cb3b01 232 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
35cd451c 233 }
2acc3314 234 /* FAKE globs in the symbol table cause weird bugs (#77810) */
96293f45 235 SvFAKE_off(sv);
93a17b20 236 }
79072805 237 }
96293f45 238 if (SvFAKE(sv)) {
2acc3314 239 SV *newsv = sv_newmortal();
5cf4b255 240 sv_setsv_flags(newsv, sv, 0);
2acc3314 241 SvFAKE_off(newsv);
d8906c05 242 sv = newsv;
2acc3314 243 }
6f7909da
FC
244 return sv;
245}
246
247PP(pp_rv2gv)
248{
249 dVAR; dSP; dTOPss;
250
251 sv = S_rv2gv(aTHX_
252 sv, PL_op->op_private & OPpDEREF,
253 PL_op->op_private & HINT_STRICT_REFS,
254 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
255 || PL_op->op_type == OP_READLINE
256 );
257 SPAGAIN;
d8906c05
FC
258 if (PL_op->op_private & OPpLVAL_INTRO)
259 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
260 SETs(sv);
79072805
LW
261 RETURN;
262}
263
dc3c76f8
NC
264/* Helper function for pp_rv2sv and pp_rv2av */
265GV *
fe9845cc
RB
266Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
267 const svtype type, SV ***spp)
dc3c76f8
NC
268{
269 dVAR;
270 GV *gv;
271
7918f24d
NC
272 PERL_ARGS_ASSERT_SOFTREF2XV;
273
dc3c76f8
NC
274 if (PL_op->op_private & HINT_STRICT_REFS) {
275 if (SvOK(sv))
10b53e54 276 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
dc3c76f8
NC
277 else
278 Perl_die(aTHX_ PL_no_usym, what);
279 }
280 if (!SvOK(sv)) {
fd1d9b5c
FC
281 if (
282 PL_op->op_flags & OPf_REF &&
283 PL_op->op_next->op_type != OP_BOOLKEYS
284 )
dc3c76f8
NC
285 Perl_die(aTHX_ PL_no_usym, what);
286 if (ckWARN(WARN_UNINITIALIZED))
287 report_uninit(sv);
288 if (type != SVt_PV && GIMME_V == G_ARRAY) {
289 (*spp)--;
290 return NULL;
291 }
292 **spp = &PL_sv_undef;
293 return NULL;
294 }
295 if ((PL_op->op_flags & OPf_SPECIAL) &&
296 !(PL_op->op_flags & OPf_MOD))
297 {
77cb3b01 298 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
dc3c76f8
NC
299 {
300 **spp = &PL_sv_undef;
301 return NULL;
302 }
303 }
304 else {
77cb3b01 305 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
dc3c76f8
NC
306 }
307 return gv;
308}
309
79072805
LW
310PP(pp_rv2sv)
311{
97aff369 312 dVAR; dSP; dTOPss;
c445ea15 313 GV *gv = NULL;
79072805 314
9026059d 315 SvGETMAGIC(sv);
ed6116ce 316 if (SvROK(sv)) {
93d7320b
DM
317 if (SvAMAGIC(sv)) {
318 sv = amagic_deref_call(sv, to_sv_amg);
319 SPAGAIN;
320 }
f5284f61 321
ed6116ce 322 sv = SvRV(sv);
79072805
LW
323 switch (SvTYPE(sv)) {
324 case SVt_PVAV:
325 case SVt_PVHV:
326 case SVt_PVCV:
cbae9b9f
YST
327 case SVt_PVFM:
328 case SVt_PVIO:
cea2e8a9 329 DIE(aTHX_ "Not a SCALAR reference");
42d0e0b7 330 default: NOOP;
79072805
LW
331 }
332 }
333 else {
159b6efe 334 gv = MUTABLE_GV(sv);
748a9306 335
6e592b3a 336 if (!isGV_with_GP(gv)) {
dc3c76f8
NC
337 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
338 if (!gv)
339 RETURN;
463ee0b2 340 }
29c711a3 341 sv = GvSVn(gv);
a0d0e21e 342 }
533c011a 343 if (PL_op->op_flags & OPf_MOD) {
82d03984
RGS
344 if (PL_op->op_private & OPpLVAL_INTRO) {
345 if (cUNOP->op_first->op_type == OP_NULL)
159b6efe 346 sv = save_scalar(MUTABLE_GV(TOPs));
82d03984
RGS
347 else if (gv)
348 sv = save_scalar(gv);
349 else
f1f66076 350 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
82d03984 351 }
533c011a 352 else if (PL_op->op_private & OPpDEREF)
9026059d 353 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
79072805 354 }
a0d0e21e 355 SETs(sv);
79072805
LW
356 RETURN;
357}
358
359PP(pp_av2arylen)
360{
97aff369 361 dVAR; dSP;
502c6561 362 AV * const av = MUTABLE_AV(TOPs);
02d85cc3
EB
363 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
364 if (lvalue) {
365 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
366 if (!*sv) {
367 *sv = newSV_type(SVt_PVMG);
368 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
369 }
370 SETs(*sv);
371 } else {
e1dccc0d 372 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
79072805 373 }
79072805
LW
374 RETURN;
375}
376
a0d0e21e
LW
377PP(pp_pos)
378{
2154eca7 379 dVAR; dSP; dPOPss;
8ec5e241 380
78f9721b 381 if (PL_op->op_flags & OPf_MOD || LVRET) {
16eb5365
FC
382 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
383 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
384 LvTYPE(ret) = '.';
385 LvTARG(ret) = SvREFCNT_inc_simple(sv);
2154eca7 386 PUSHs(ret); /* no SvSETMAGIC */
a0d0e21e
LW
387 RETURN;
388 }
389 else {
a0d0e21e 390 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
1b6737cc 391 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
565764a8 392 if (mg && mg->mg_len >= 0) {
2154eca7 393 dTARGET;
a0ed51b3 394 I32 i = mg->mg_len;
7e2040f0 395 if (DO_UTF8(sv))
a0ed51b3 396 sv_pos_b2u(sv, &i);
e1dccc0d 397 PUSHi(i);
a0d0e21e
LW
398 RETURN;
399 }
400 }
401 RETPUSHUNDEF;
402 }
403}
404
79072805
LW
405PP(pp_rv2cv)
406{
97aff369 407 dVAR; dSP;
79072805 408 GV *gv;
1eced8f8 409 HV *stash_unused;
c445ea15 410 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
9da346da 411 ? GV_ADDMG
c445ea15
AL
412 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
413 ? GV_ADD|GV_NOEXPAND
414 : GV_ADD;
4633a7c4
LW
415 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
416 /* (But not in defined().) */
e26df76a 417
1eced8f8 418 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
07055b4c
CS
419 if (cv) {
420 if (CvCLONE(cv))
ad64d0ec 421 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
d32f2495
SC
422 if ((PL_op->op_private & OPpLVAL_INTRO)) {
423 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
424 cv = GvCV(gv);
425 if (!CvLVALUE(cv))
426 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
427 }
07055b4c 428 }
e26df76a 429 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
ea726b52 430 cv = MUTABLE_CV(gv);
e26df76a 431 }
07055b4c 432 else
ea726b52 433 cv = MUTABLE_CV(&PL_sv_undef);
ad64d0ec 434 SETs(MUTABLE_SV(cv));
79072805
LW
435 RETURN;
436}
437
c07a80fd 438PP(pp_prototype)
439{
97aff369 440 dVAR; dSP;
c07a80fd 441 CV *cv;
442 HV *stash;
443 GV *gv;
fabdb6c0 444 SV *ret = &PL_sv_undef;
c07a80fd 445
b6c543e3 446 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
e3f73d4e 447 const char * s = SvPVX_const(TOPs);
b6c543e3 448 if (strnEQ(s, "CORE::", 6)) {
be1b855b 449 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
b66130dd
FC
450 if (!code || code == -KEY_CORE)
451 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
452 if (code < 0) { /* Overridable. */
453 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
454 if (sv) ret = sv;
455 }
b8c38f0a 456 goto set;
b6c543e3
IZ
457 }
458 }
f2c0649b 459 cv = sv_2cv(TOPs, &stash, &gv, 0);
5f05dabc 460 if (cv && SvPOK(cv))
59cd0e26 461 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
b6c543e3 462 set:
c07a80fd 463 SETs(ret);
464 RETURN;
465}
466
a0d0e21e
LW
467PP(pp_anoncode)
468{
97aff369 469 dVAR; dSP;
ea726b52 470 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
a5f75d66 471 if (CvCLONE(cv))
ad64d0ec 472 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
5f05dabc 473 EXTEND(SP,1);
ad64d0ec 474 PUSHs(MUTABLE_SV(cv));
a0d0e21e
LW
475 RETURN;
476}
477
478PP(pp_srefgen)
79072805 479{
97aff369 480 dVAR; dSP;
71be2cbc 481 *SP = refto(*SP);
79072805 482 RETURN;
8ec5e241 483}
a0d0e21e
LW
484
485PP(pp_refgen)
486{
97aff369 487 dVAR; dSP; dMARK;
a0d0e21e 488 if (GIMME != G_ARRAY) {
5f0b1d4e
GS
489 if (++MARK <= SP)
490 *MARK = *SP;
491 else
3280af22 492 *MARK = &PL_sv_undef;
5f0b1d4e
GS
493 *MARK = refto(*MARK);
494 SP = MARK;
495 RETURN;
a0d0e21e 496 }
bbce6d69 497 EXTEND_MORTAL(SP - MARK);
71be2cbc 498 while (++MARK <= SP)
499 *MARK = refto(*MARK);
a0d0e21e 500 RETURN;
79072805
LW
501}
502
76e3520e 503STATIC SV*
cea2e8a9 504S_refto(pTHX_ SV *sv)
71be2cbc 505{
97aff369 506 dVAR;
71be2cbc 507 SV* rv;
508
7918f24d
NC
509 PERL_ARGS_ASSERT_REFTO;
510
71be2cbc 511 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
512 if (LvTARGLEN(sv))
68dc0745 513 vivify_defelem(sv);
514 if (!(sv = LvTARG(sv)))
3280af22 515 sv = &PL_sv_undef;
0dd88869 516 else
b37c2d43 517 SvREFCNT_inc_void_NN(sv);
71be2cbc 518 }
d8b46c1b 519 else if (SvTYPE(sv) == SVt_PVAV) {
502c6561
NC
520 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
521 av_reify(MUTABLE_AV(sv));
d8b46c1b 522 SvTEMP_off(sv);
b37c2d43 523 SvREFCNT_inc_void_NN(sv);
d8b46c1b 524 }
f2933f5f
DM
525 else if (SvPADTMP(sv) && !IS_PADGV(sv))
526 sv = newSVsv(sv);
71be2cbc 527 else {
528 SvTEMP_off(sv);
b37c2d43 529 SvREFCNT_inc_void_NN(sv);
71be2cbc 530 }
531 rv = sv_newmortal();
4df7f6af 532 sv_upgrade(rv, SVt_IV);
b162af07 533 SvRV_set(rv, sv);
71be2cbc 534 SvROK_on(rv);
535 return rv;
536}
537
79072805
LW
538PP(pp_ref)
539{
97aff369 540 dVAR; dSP; dTARGET;
e1ec3a88 541 const char *pv;
1b6737cc 542 SV * const sv = POPs;
f12c7020 543
5b295bef
RD
544 if (sv)
545 SvGETMAGIC(sv);
f12c7020 546
a0d0e21e 547 if (!sv || !SvROK(sv))
4633a7c4 548 RETPUSHNO;
79072805 549
cba0b539
FR
550 pv = sv_reftype(SvRV(sv),TRUE);
551 PUSHp(pv, strlen(pv));
79072805
LW
552 RETURN;
553}
554
555PP(pp_bless)
556{
97aff369 557 dVAR; dSP;
463ee0b2 558 HV *stash;
79072805 559
463ee0b2 560 if (MAXARG == 1)
c2f922f1 561 curstash:
11faa288 562 stash = CopSTASH(PL_curcop);
7b8d334a 563 else {
1b6737cc 564 SV * const ssv = POPs;
7b8d334a 565 STRLEN len;
e1ec3a88 566 const char *ptr;
81689caa 567
c2f922f1
FC
568 if (!ssv) goto curstash;
569 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
81689caa 570 Perl_croak(aTHX_ "Attempt to bless into a reference");
5c144d81 571 ptr = SvPV_const(ssv,len);
a2a5de95
NC
572 if (len == 0)
573 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
574 "Explicit blessing to '' (assuming package main)");
da51bb9b 575 stash = gv_stashpvn(ptr, len, GV_ADD);
7b8d334a 576 }
a0d0e21e 577
5d3fdfeb 578 (void)sv_bless(TOPs, stash);
79072805
LW
579 RETURN;
580}
581
fb73857a 582PP(pp_gelem)
583{
97aff369 584 dVAR; dSP;
b13b2135 585
1b6737cc
AL
586 SV *sv = POPs;
587 const char * const elem = SvPV_nolen_const(sv);
159b6efe 588 GV * const gv = MUTABLE_GV(POPs);
c445ea15 589 SV * tmpRef = NULL;
1b6737cc 590
c445ea15 591 sv = NULL;
c4ba80c3
NC
592 if (elem) {
593 /* elem will always be NUL terminated. */
1b6737cc 594 const char * const second_letter = elem + 1;
c4ba80c3
NC
595 switch (*elem) {
596 case 'A':
1b6737cc 597 if (strEQ(second_letter, "RRAY"))
ad64d0ec 598 tmpRef = MUTABLE_SV(GvAV(gv));
c4ba80c3
NC
599 break;
600 case 'C':
1b6737cc 601 if (strEQ(second_letter, "ODE"))
ad64d0ec 602 tmpRef = MUTABLE_SV(GvCVu(gv));
c4ba80c3
NC
603 break;
604 case 'F':
1b6737cc 605 if (strEQ(second_letter, "ILEHANDLE")) {
c4ba80c3
NC
606 /* finally deprecated in 5.8.0 */
607 deprecate("*glob{FILEHANDLE}");
ad64d0ec 608 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
609 }
610 else
1b6737cc 611 if (strEQ(second_letter, "ORMAT"))
ad64d0ec 612 tmpRef = MUTABLE_SV(GvFORM(gv));
c4ba80c3
NC
613 break;
614 case 'G':
1b6737cc 615 if (strEQ(second_letter, "LOB"))
ad64d0ec 616 tmpRef = MUTABLE_SV(gv);
c4ba80c3
NC
617 break;
618 case 'H':
1b6737cc 619 if (strEQ(second_letter, "ASH"))
ad64d0ec 620 tmpRef = MUTABLE_SV(GvHV(gv));
c4ba80c3
NC
621 break;
622 case 'I':
1b6737cc 623 if (*second_letter == 'O' && !elem[2])
ad64d0ec 624 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
625 break;
626 case 'N':
1b6737cc 627 if (strEQ(second_letter, "AME"))
a663657d 628 sv = newSVhek(GvNAME_HEK(gv));
c4ba80c3
NC
629 break;
630 case 'P':
1b6737cc 631 if (strEQ(second_letter, "ACKAGE")) {
7fa3a4ab
NC
632 const HV * const stash = GvSTASH(gv);
633 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
396482e1 634 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
c4ba80c3
NC
635 }
636 break;
637 case 'S':
1b6737cc 638 if (strEQ(second_letter, "CALAR"))
f9d52e31 639 tmpRef = GvSVn(gv);
c4ba80c3 640 break;
39b99f21 641 }
fb73857a 642 }
76e3520e
GS
643 if (tmpRef)
644 sv = newRV(tmpRef);
fb73857a 645 if (sv)
646 sv_2mortal(sv);
647 else
3280af22 648 sv = &PL_sv_undef;
fb73857a 649 XPUSHs(sv);
650 RETURN;
651}
652
a0d0e21e 653/* Pattern matching */
79072805 654
a0d0e21e 655PP(pp_study)
79072805 656{
97aff369 657 dVAR; dSP; dPOPss;
a0d0e21e 658 register unsigned char *s;
72de20cd 659 char *sfirst_raw;
a0d0e21e 660 STRLEN len;
4185c919 661 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
72de20cd
NC
662 U8 quanta;
663 STRLEN size;
4185c919
NC
664
665 if (mg && SvSCREAM(sv))
666 RETPUSHYES;
a0d0e21e 667
a4f4e906 668 s = (unsigned char*)(SvPV(sv, len));
bc9a5256 669 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
a4f4e906
NC
670 /* No point in studying a zero length string, and not safe to study
671 anything that doesn't appear to be a simple scalar (and hence might
672 change between now and when the regexp engine runs without our set
bd473224 673 magic ever running) such as a reference to an object with overloaded
bc9a5256
NC
674 stringification. Also refuse to study an FBM scalar, as this gives
675 more flexibility in SV flag usage. No real-world code would ever
676 end up studying an FBM scalar, so this isn't a real pessimisation.
72de20cd
NC
677 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
678 the study length limit from I32_MAX to U32_MAX - 1.
bc9a5256 679 */
a4f4e906
NC
680 RETPUSHNO;
681 }
682
72de20cd
NC
683 if (len < 0xFF) {
684 quanta = 1;
685 } else if (len < 0xFFFF) {
686 quanta = 2;
687 } else
688 quanta = 4;
a0d0e21e 689
72de20cd
NC
690 size = (256 + len) * quanta;
691 sfirst_raw = (char *)safemalloc(size);
692
693 if (!sfirst_raw)
cea2e8a9 694 DIE(aTHX_ "do_study: out of memory");
a0d0e21e 695
4185c919
NC
696 SvSCREAM_on(sv);
697 if (!mg)
698 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
72de20cd
NC
699 mg->mg_ptr = sfirst_raw;
700 mg->mg_len = size;
701 mg->mg_private = quanta;
702
703 memset(sfirst_raw, ~0, 256 * quanta);
704
705 /* The assumption here is that most studied strings are fairly short, hence
706 the pain of the extra code is worth it, given the memory savings.
707 80 character string, 336 bytes as U8, down from 1344 as U32
708 800 character string, 2112 bytes as U16, down from 4224 as U32
709 */
710
711 if (quanta == 1) {
712 U8 *const sfirst = (U8 *)sfirst_raw;
713 U8 *const snext = sfirst + 256;
714 while (len-- > 0) {
715 const U8 ch = s[len];
716 snext[len] = sfirst[ch];
717 sfirst[ch] = len;
718 }
719 } else if (quanta == 2) {
720 U16 *const sfirst = (U16 *)sfirst_raw;
721 U16 *const snext = sfirst + 256;
722 while (len-- > 0) {
723 const U8 ch = s[len];
724 snext[len] = sfirst[ch];
725 sfirst[ch] = len;
726 }
727 } else {
728 U32 *const sfirst = (U32 *)sfirst_raw;
729 U32 *const snext = sfirst + 256;
730 while (len-- > 0) {
731 const U8 ch = s[len];
732 snext[len] = sfirst[ch];
733 sfirst[ch] = len;
734 }
79072805
LW
735 }
736
1e422769 737 RETPUSHYES;
79072805
LW
738}
739
a0d0e21e 740PP(pp_trans)
79072805 741{
97aff369 742 dVAR; dSP; dTARG;
a0d0e21e
LW
743 SV *sv;
744
533c011a 745 if (PL_op->op_flags & OPf_STACKED)
a0d0e21e 746 sv = POPs;
59f00321
RGS
747 else if (PL_op->op_private & OPpTARGET_MY)
748 sv = GETTARGET;
79072805 749 else {
54b9620d 750 sv = DEFSV;
a0d0e21e 751 EXTEND(SP,1);
79072805 752 }
adbc6bb1 753 TARG = sv_newmortal();
bb16bae8
FC
754 if(PL_op->op_type == OP_TRANSR) {
755 SV * const newsv = newSVsv(sv);
756 do_trans(newsv);
757 mPUSHs(newsv);
758 }
759 else PUSHi(do_trans(sv));
a0d0e21e 760 RETURN;
79072805
LW
761}
762
a0d0e21e 763/* Lvalue operators. */
79072805 764
81745e4e
NC
765static void
766S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
767{
768 dVAR;
769 STRLEN len;
770 char *s;
771
772 PERL_ARGS_ASSERT_DO_CHOMP;
773
774 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
775 return;
776 if (SvTYPE(sv) == SVt_PVAV) {
777 I32 i;
778 AV *const av = MUTABLE_AV(sv);
779 const I32 max = AvFILL(av);
780
781 for (i = 0; i <= max; i++) {
782 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
783 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
784 do_chomp(retval, sv, chomping);
785 }
786 return;
787 }
788 else if (SvTYPE(sv) == SVt_PVHV) {
789 HV* const hv = MUTABLE_HV(sv);
790 HE* entry;
791 (void)hv_iterinit(hv);
792 while ((entry = hv_iternext(hv)))
793 do_chomp(retval, hv_iterval(hv,entry), chomping);
794 return;
795 }
796 else if (SvREADONLY(sv)) {
797 if (SvFAKE(sv)) {
798 /* SV is copy-on-write */
799 sv_force_normal_flags(sv, 0);
800 }
801 if (SvREADONLY(sv))
802 Perl_croak_no_modify(aTHX);
803 }
804
805 if (PL_encoding) {
806 if (!SvUTF8(sv)) {
807 /* XXX, here sv is utf8-ized as a side-effect!
808 If encoding.pm is used properly, almost string-generating
809 operations, including literal strings, chr(), input data, etc.
810 should have been utf8-ized already, right?
811 */
812 sv_recode_to_utf8(sv, PL_encoding);
813 }
814 }
815
816 s = SvPV(sv, len);
817 if (chomping) {
818 char *temp_buffer = NULL;
819 SV *svrecode = NULL;
820
821 if (s && len) {
822 s += --len;
823 if (RsPARA(PL_rs)) {
824 if (*s != '\n')
825 goto nope;
826 ++SvIVX(retval);
827 while (len && s[-1] == '\n') {
828 --len;
829 --s;
830 ++SvIVX(retval);
831 }
832 }
833 else {
834 STRLEN rslen, rs_charlen;
835 const char *rsptr = SvPV_const(PL_rs, rslen);
836
837 rs_charlen = SvUTF8(PL_rs)
838 ? sv_len_utf8(PL_rs)
839 : rslen;
840
841 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
842 /* Assumption is that rs is shorter than the scalar. */
843 if (SvUTF8(PL_rs)) {
844 /* RS is utf8, scalar is 8 bit. */
845 bool is_utf8 = TRUE;
846 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
847 &rslen, &is_utf8);
848 if (is_utf8) {
849 /* Cannot downgrade, therefore cannot possibly match
850 */
851 assert (temp_buffer == rsptr);
852 temp_buffer = NULL;
853 goto nope;
854 }
855 rsptr = temp_buffer;
856 }
857 else if (PL_encoding) {
858 /* RS is 8 bit, encoding.pm is used.
859 * Do not recode PL_rs as a side-effect. */
860 svrecode = newSVpvn(rsptr, rslen);
861 sv_recode_to_utf8(svrecode, PL_encoding);
862 rsptr = SvPV_const(svrecode, rslen);
863 rs_charlen = sv_len_utf8(svrecode);
864 }
865 else {
866 /* RS is 8 bit, scalar is utf8. */
867 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
868 rsptr = temp_buffer;
869 }
870 }
871 if (rslen == 1) {
872 if (*s != *rsptr)
873 goto nope;
874 ++SvIVX(retval);
875 }
876 else {
877 if (len < rslen - 1)
878 goto nope;
879 len -= rslen - 1;
880 s -= rslen - 1;
881 if (memNE(s, rsptr, rslen))
882 goto nope;
883 SvIVX(retval) += rs_charlen;
884 }
885 }
886 s = SvPV_force_nolen(sv);
887 SvCUR_set(sv, len);
888 *SvEND(sv) = '\0';
889 SvNIOK_off(sv);
890 SvSETMAGIC(sv);
891 }
892 nope:
893
894 SvREFCNT_dec(svrecode);
895
896 Safefree(temp_buffer);
897 } else {
898 if (len && !SvPOK(sv))
899 s = SvPV_force_nomg(sv, len);
900 if (DO_UTF8(sv)) {
901 if (s && len) {
902 char * const send = s + len;
903 char * const start = s;
904 s = send - 1;
905 while (s > start && UTF8_IS_CONTINUATION(*s))
906 s--;
907 if (is_utf8_string((U8*)s, send - s)) {
908 sv_setpvn(retval, s, send - s);
909 *s = '\0';
910 SvCUR_set(sv, s - start);
911 SvNIOK_off(sv);
912 SvUTF8_on(retval);
913 }
914 }
915 else
916 sv_setpvs(retval, "");
917 }
918 else if (s && len) {
919 s += --len;
920 sv_setpvn(retval, s, 1);
921 *s = '\0';
922 SvCUR_set(sv, len);
923 SvUTF8_off(sv);
924 SvNIOK_off(sv);
925 }
926 else
927 sv_setpvs(retval, "");
928 SvSETMAGIC(sv);
929 }
930}
931
a0d0e21e
LW
932PP(pp_schop)
933{
97aff369 934 dVAR; dSP; dTARGET;
fa54efae
NC
935 const bool chomping = PL_op->op_type == OP_SCHOMP;
936
937 if (chomping)
938 sv_setiv(TARG, 0);
939 do_chomp(TARG, TOPs, chomping);
a0d0e21e
LW
940 SETTARG;
941 RETURN;
79072805
LW
942}
943
a0d0e21e 944PP(pp_chop)
79072805 945{
97aff369 946 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
fa54efae 947 const bool chomping = PL_op->op_type == OP_CHOMP;
8ec5e241 948
fa54efae
NC
949 if (chomping)
950 sv_setiv(TARG, 0);
20cf1f79 951 while (MARK < SP)
fa54efae 952 do_chomp(TARG, *++MARK, chomping);
20cf1f79
NC
953 SP = ORIGMARK;
954 XPUSHTARG;
a0d0e21e 955 RETURN;
79072805
LW
956}
957
a0d0e21e
LW
958PP(pp_undef)
959{
97aff369 960 dVAR; dSP;
a0d0e21e
LW
961 SV *sv;
962
533c011a 963 if (!PL_op->op_private) {
774d564b 964 EXTEND(SP, 1);
a0d0e21e 965 RETPUSHUNDEF;
774d564b 966 }
79072805 967
a0d0e21e
LW
968 sv = POPs;
969 if (!sv)
970 RETPUSHUNDEF;
85e6fe83 971
765f542d 972 SV_CHECK_THINKFIRST_COW_DROP(sv);
85e6fe83 973
a0d0e21e
LW
974 switch (SvTYPE(sv)) {
975 case SVt_NULL:
976 break;
977 case SVt_PVAV:
502c6561 978 av_undef(MUTABLE_AV(sv));
a0d0e21e
LW
979 break;
980 case SVt_PVHV:
85fbaab2 981 hv_undef(MUTABLE_HV(sv));
a0d0e21e
LW
982 break;
983 case SVt_PVCV:
a2a5de95
NC
984 if (cv_const_sv((const CV *)sv))
985 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
986 CvANON((const CV *)sv) ? "(anonymous)"
987 : GvENAME(CvGV((const CV *)sv)));
5f66b61c 988 /* FALLTHROUGH */
9607fc9c 989 case SVt_PVFM:
6fc92669
GS
990 {
991 /* let user-undef'd sub keep its identity */
ea726b52
NC
992 GV* const gv = CvGV((const CV *)sv);
993 cv_undef(MUTABLE_CV(sv));
b3f91e91 994 CvGV_set(MUTABLE_CV(sv), gv);
6fc92669 995 }
a0d0e21e 996 break;
8e07c86e 997 case SVt_PVGV:
6e592b3a 998 if (SvFAKE(sv)) {
3280af22 999 SvSetMagicSV(sv, &PL_sv_undef);
6e592b3a
BM
1000 break;
1001 }
1002 else if (isGV_with_GP(sv)) {
20408e3c 1003 GP *gp;
dd69841b
BB
1004 HV *stash;
1005
dd69841b 1006 /* undef *Pkg::meth_name ... */
e530fb81
FC
1007 bool method_changed
1008 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1009 && HvENAME_get(stash);
1010 /* undef *Foo:: */
1011 if((stash = GvHV((const GV *)sv))) {
1012 if(HvENAME_get(stash))
1013 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1014 else stash = NULL;
1015 }
dd69841b 1016
159b6efe 1017 gp_free(MUTABLE_GV(sv));
a02a5408 1018 Newxz(gp, 1, GP);
c43ae56f 1019 GvGP_set(sv, gp_ref(gp));
561b68a9 1020 GvSV(sv) = newSV(0);
57843af0 1021 GvLINE(sv) = CopLINE(PL_curcop);
159b6efe 1022 GvEGV(sv) = MUTABLE_GV(sv);
20408e3c 1023 GvMULTI_on(sv);
e530fb81
FC
1024
1025 if(stash)
afdbe55d 1026 mro_package_moved(NULL, stash, (const GV *)sv, 0);
e530fb81
FC
1027 stash = NULL;
1028 /* undef *Foo::ISA */
1029 if( strEQ(GvNAME((const GV *)sv), "ISA")
1030 && (stash = GvSTASH((const GV *)sv))
1031 && (method_changed || HvENAME(stash)) )
1032 mro_isa_changed_in(stash);
1033 else if(method_changed)
1034 mro_method_changed_in(
da9043f5 1035 GvSTASH((const GV *)sv)
e530fb81
FC
1036 );
1037
6e592b3a 1038 break;
20408e3c 1039 }
6e592b3a 1040 /* FALL THROUGH */
a0d0e21e 1041 default:
b15aece3 1042 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
8bd4d4c5 1043 SvPV_free(sv);
c445ea15 1044 SvPV_set(sv, NULL);
4633a7c4 1045 SvLEN_set(sv, 0);
a0d0e21e 1046 }
0c34ef67 1047 SvOK_off(sv);
4633a7c4 1048 SvSETMAGIC(sv);
79072805 1049 }
a0d0e21e
LW
1050
1051 RETPUSHUNDEF;
79072805
LW
1052}
1053
a0d0e21e
LW
1054PP(pp_postinc)
1055{
97aff369 1056 dVAR; dSP; dTARGET;
c22c99bc
FC
1057 const bool inc =
1058 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
60092ce4 1059 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
6ad8f254 1060 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1061 if (SvROK(TOPs))
1062 TARG = sv_newmortal();
a0d0e21e 1063 sv_setsv(TARG, TOPs);
3510b4a1 1064 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
c22c99bc 1065 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
55497cff 1066 {
c22c99bc 1067 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
55497cff 1068 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306 1069 }
c22c99bc 1070 else if (inc)
6f1401dc 1071 sv_inc_nomg(TOPs);
c22c99bc 1072 else sv_dec_nomg(TOPs);
a0d0e21e 1073 SvSETMAGIC(TOPs);
1e54a23f 1074 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
c22c99bc 1075 if (inc && !SvOK(TARG))
a0d0e21e
LW
1076 sv_setiv(TARG, 0);
1077 SETs(TARG);
1078 return NORMAL;
1079}
79072805 1080
a0d0e21e
LW
1081/* Ordinary operators. */
1082
1083PP(pp_pow)
1084{
800401ee 1085 dVAR; dSP; dATARGET; SV *svl, *svr;
58d76dfd 1086#ifdef PERL_PRESERVE_IVUV
52a96ae6
HS
1087 bool is_int = 0;
1088#endif
6f1401dc
DM
1089 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1090 svr = TOPs;
1091 svl = TOPm1s;
52a96ae6
HS
1092#ifdef PERL_PRESERVE_IVUV
1093 /* For integer to integer power, we do the calculation by hand wherever
1094 we're sure it is safe; otherwise we call pow() and try to convert to
1095 integer afterwards. */
58d76dfd 1096 {
6f1401dc 1097 SvIV_please_nomg(svr);
800401ee 1098 if (SvIOK(svr)) {
6f1401dc 1099 SvIV_please_nomg(svl);
800401ee 1100 if (SvIOK(svl)) {
900658e3
PF
1101 UV power;
1102 bool baseuok;
1103 UV baseuv;
1104
800401ee
JH
1105 if (SvUOK(svr)) {
1106 power = SvUVX(svr);
900658e3 1107 } else {
800401ee 1108 const IV iv = SvIVX(svr);
900658e3
PF
1109 if (iv >= 0) {
1110 power = iv;
1111 } else {
1112 goto float_it; /* Can't do negative powers this way. */
1113 }
1114 }
1115
800401ee 1116 baseuok = SvUOK(svl);
900658e3 1117 if (baseuok) {
800401ee 1118 baseuv = SvUVX(svl);
900658e3 1119 } else {
800401ee 1120 const IV iv = SvIVX(svl);
900658e3
PF
1121 if (iv >= 0) {
1122 baseuv = iv;
1123 baseuok = TRUE; /* effectively it's a UV now */
1124 } else {
1125 baseuv = -iv; /* abs, baseuok == false records sign */
1126 }
1127 }
52a96ae6
HS
1128 /* now we have integer ** positive integer. */
1129 is_int = 1;
1130
1131 /* foo & (foo - 1) is zero only for a power of 2. */
58d76dfd 1132 if (!(baseuv & (baseuv - 1))) {
52a96ae6 1133 /* We are raising power-of-2 to a positive integer.
58d76dfd
JH
1134 The logic here will work for any base (even non-integer
1135 bases) but it can be less accurate than
1136 pow (base,power) or exp (power * log (base)) when the
1137 intermediate values start to spill out of the mantissa.
1138 With powers of 2 we know this can't happen.
1139 And powers of 2 are the favourite thing for perl
1140 programmers to notice ** not doing what they mean. */
1141 NV result = 1.0;
1142 NV base = baseuok ? baseuv : -(NV)baseuv;
900658e3
PF
1143
1144 if (power & 1) {
1145 result *= base;
1146 }
1147 while (power >>= 1) {
1148 base *= base;
1149 if (power & 1) {
1150 result *= base;
1151 }
1152 }
58d76dfd
JH
1153 SP--;
1154 SETn( result );
6f1401dc 1155 SvIV_please_nomg(svr);
58d76dfd 1156 RETURN;
52a96ae6
HS
1157 } else {
1158 register unsigned int highbit = 8 * sizeof(UV);
900658e3
PF
1159 register unsigned int diff = 8 * sizeof(UV);
1160 while (diff >>= 1) {
1161 highbit -= diff;
1162 if (baseuv >> highbit) {
1163 highbit += diff;
1164 }
52a96ae6
HS
1165 }
1166 /* we now have baseuv < 2 ** highbit */
1167 if (power * highbit <= 8 * sizeof(UV)) {
1168 /* result will definitely fit in UV, so use UV math
1169 on same algorithm as above */
1170 register UV result = 1;
1171 register UV base = baseuv;
f2338a2e 1172 const bool odd_power = cBOOL(power & 1);
900658e3
PF
1173 if (odd_power) {
1174 result *= base;
1175 }
1176 while (power >>= 1) {
1177 base *= base;
1178 if (power & 1) {
52a96ae6 1179 result *= base;
52a96ae6
HS
1180 }
1181 }
1182 SP--;
0615a994 1183 if (baseuok || !odd_power)
52a96ae6
HS
1184 /* answer is positive */
1185 SETu( result );
1186 else if (result <= (UV)IV_MAX)
1187 /* answer negative, fits in IV */
1188 SETi( -(IV)result );
1189 else if (result == (UV)IV_MIN)
1190 /* 2's complement assumption: special case IV_MIN */
1191 SETi( IV_MIN );
1192 else
1193 /* answer negative, doesn't fit */
1194 SETn( -(NV)result );
1195 RETURN;
1196 }
1197 }
1198 }
1199 }
58d76dfd 1200 }
52a96ae6 1201 float_it:
58d76dfd 1202#endif
a0d0e21e 1203 {
6f1401dc
DM
1204 NV right = SvNV_nomg(svr);
1205 NV left = SvNV_nomg(svl);
4efa5a16 1206 (void)POPs;
3aaeb624
JA
1207
1208#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1209 /*
1210 We are building perl with long double support and are on an AIX OS
1211 afflicted with a powl() function that wrongly returns NaNQ for any
1212 negative base. This was reported to IBM as PMR #23047-379 on
1213 03/06/2006. The problem exists in at least the following versions
1214 of AIX and the libm fileset, and no doubt others as well:
1215
1216 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1217 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1218 AIX 5.2.0 bos.adt.libm 5.2.0.85
1219
1220 So, until IBM fixes powl(), we provide the following workaround to
1221 handle the problem ourselves. Our logic is as follows: for
1222 negative bases (left), we use fmod(right, 2) to check if the
1223 exponent is an odd or even integer:
1224
1225 - if odd, powl(left, right) == -powl(-left, right)
1226 - if even, powl(left, right) == powl(-left, right)
1227
1228 If the exponent is not an integer, the result is rightly NaNQ, so
1229 we just return that (as NV_NAN).
1230 */
1231
1232 if (left < 0.0) {
1233 NV mod2 = Perl_fmod( right, 2.0 );
1234 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1235 SETn( -Perl_pow( -left, right) );
1236 } else if (mod2 == 0.0) { /* even integer */
1237 SETn( Perl_pow( -left, right) );
1238 } else { /* fractional power */
1239 SETn( NV_NAN );
1240 }
1241 } else {
1242 SETn( Perl_pow( left, right) );
1243 }
1244#else
52a96ae6 1245 SETn( Perl_pow( left, right) );
3aaeb624
JA
1246#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1247
52a96ae6
HS
1248#ifdef PERL_PRESERVE_IVUV
1249 if (is_int)
6f1401dc 1250 SvIV_please_nomg(svr);
52a96ae6
HS
1251#endif
1252 RETURN;
93a17b20 1253 }
a0d0e21e
LW
1254}
1255
1256PP(pp_multiply)
1257{
800401ee 1258 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1259 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1260 svr = TOPs;
1261 svl = TOPm1s;
28e5dec8 1262#ifdef PERL_PRESERVE_IVUV
6f1401dc 1263 SvIV_please_nomg(svr);
800401ee 1264 if (SvIOK(svr)) {
28e5dec8
JH
1265 /* Unless the left argument is integer in range we are going to have to
1266 use NV maths. Hence only attempt to coerce the right argument if
1267 we know the left is integer. */
1268 /* Left operand is defined, so is it IV? */
6f1401dc 1269 SvIV_please_nomg(svl);
800401ee
JH
1270 if (SvIOK(svl)) {
1271 bool auvok = SvUOK(svl);
1272 bool buvok = SvUOK(svr);
28e5dec8
JH
1273 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1274 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1275 UV alow;
1276 UV ahigh;
1277 UV blow;
1278 UV bhigh;
1279
1280 if (auvok) {
800401ee 1281 alow = SvUVX(svl);
28e5dec8 1282 } else {
800401ee 1283 const IV aiv = SvIVX(svl);
28e5dec8
JH
1284 if (aiv >= 0) {
1285 alow = aiv;
1286 auvok = TRUE; /* effectively it's a UV now */
1287 } else {
1288 alow = -aiv; /* abs, auvok == false records sign */
1289 }
1290 }
1291 if (buvok) {
800401ee 1292 blow = SvUVX(svr);
28e5dec8 1293 } else {
800401ee 1294 const IV biv = SvIVX(svr);
28e5dec8
JH
1295 if (biv >= 0) {
1296 blow = biv;
1297 buvok = TRUE; /* effectively it's a UV now */
1298 } else {
1299 blow = -biv; /* abs, buvok == false records sign */
1300 }
1301 }
1302
1303 /* If this does sign extension on unsigned it's time for plan B */
1304 ahigh = alow >> (4 * sizeof (UV));
1305 alow &= botmask;
1306 bhigh = blow >> (4 * sizeof (UV));
1307 blow &= botmask;
1308 if (ahigh && bhigh) {
6f207bd3 1309 NOOP;
28e5dec8
JH
1310 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1311 which is overflow. Drop to NVs below. */
1312 } else if (!ahigh && !bhigh) {
1313 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1314 so the unsigned multiply cannot overflow. */
c445ea15 1315 const UV product = alow * blow;
28e5dec8
JH
1316 if (auvok == buvok) {
1317 /* -ve * -ve or +ve * +ve gives a +ve result. */
1318 SP--;
1319 SETu( product );
1320 RETURN;
1321 } else if (product <= (UV)IV_MIN) {
1322 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1323 /* -ve result, which could overflow an IV */
1324 SP--;
25716404 1325 SETi( -(IV)product );
28e5dec8
JH
1326 RETURN;
1327 } /* else drop to NVs below. */
1328 } else {
1329 /* One operand is large, 1 small */
1330 UV product_middle;
1331 if (bhigh) {
1332 /* swap the operands */
1333 ahigh = bhigh;
1334 bhigh = blow; /* bhigh now the temp var for the swap */
1335 blow = alow;
1336 alow = bhigh;
1337 }
1338 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1339 multiplies can't overflow. shift can, add can, -ve can. */
1340 product_middle = ahigh * blow;
1341 if (!(product_middle & topmask)) {
1342 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1343 UV product_low;
1344 product_middle <<= (4 * sizeof (UV));
1345 product_low = alow * blow;
1346
1347 /* as for pp_add, UV + something mustn't get smaller.
1348 IIRC ANSI mandates this wrapping *behaviour* for
1349 unsigned whatever the actual representation*/
1350 product_low += product_middle;
1351 if (product_low >= product_middle) {
1352 /* didn't overflow */
1353 if (auvok == buvok) {
1354 /* -ve * -ve or +ve * +ve gives a +ve result. */
1355 SP--;
1356 SETu( product_low );
1357 RETURN;
1358 } else if (product_low <= (UV)IV_MIN) {
1359 /* 2s complement assumption again */
1360 /* -ve result, which could overflow an IV */
1361 SP--;
25716404 1362 SETi( -(IV)product_low );
28e5dec8
JH
1363 RETURN;
1364 } /* else drop to NVs below. */
1365 }
1366 } /* product_middle too large */
1367 } /* ahigh && bhigh */
800401ee
JH
1368 } /* SvIOK(svl) */
1369 } /* SvIOK(svr) */
28e5dec8 1370#endif
a0d0e21e 1371 {
6f1401dc
DM
1372 NV right = SvNV_nomg(svr);
1373 NV left = SvNV_nomg(svl);
4efa5a16 1374 (void)POPs;
a0d0e21e
LW
1375 SETn( left * right );
1376 RETURN;
79072805 1377 }
a0d0e21e
LW
1378}
1379
1380PP(pp_divide)
1381{
800401ee 1382 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1383 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1384 svr = TOPs;
1385 svl = TOPm1s;
5479d192 1386 /* Only try to do UV divide first
68795e93 1387 if ((SLOPPYDIVIDE is true) or
5479d192
NC
1388 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1389 to preserve))
1390 The assumption is that it is better to use floating point divide
1391 whenever possible, only doing integer divide first if we can't be sure.
1392 If NV_PRESERVES_UV is true then we know at compile time that no UV
1393 can be too large to preserve, so don't need to compile the code to
1394 test the size of UVs. */
1395
a0d0e21e 1396#ifdef SLOPPYDIVIDE
5479d192
NC
1397# define PERL_TRY_UV_DIVIDE
1398 /* ensure that 20./5. == 4. */
a0d0e21e 1399#else
5479d192
NC
1400# ifdef PERL_PRESERVE_IVUV
1401# ifndef NV_PRESERVES_UV
1402# define PERL_TRY_UV_DIVIDE
1403# endif
1404# endif
a0d0e21e 1405#endif
5479d192
NC
1406
1407#ifdef PERL_TRY_UV_DIVIDE
6f1401dc 1408 SvIV_please_nomg(svr);
800401ee 1409 if (SvIOK(svr)) {
6f1401dc 1410 SvIV_please_nomg(svl);
800401ee
JH
1411 if (SvIOK(svl)) {
1412 bool left_non_neg = SvUOK(svl);
1413 bool right_non_neg = SvUOK(svr);
5479d192
NC
1414 UV left;
1415 UV right;
1416
1417 if (right_non_neg) {
800401ee 1418 right = SvUVX(svr);
5479d192
NC
1419 }
1420 else {
800401ee 1421 const IV biv = SvIVX(svr);
5479d192
NC
1422 if (biv >= 0) {
1423 right = biv;
1424 right_non_neg = TRUE; /* effectively it's a UV now */
1425 }
1426 else {
1427 right = -biv;
1428 }
1429 }
1430 /* historically undef()/0 gives a "Use of uninitialized value"
1431 warning before dieing, hence this test goes here.
1432 If it were immediately before the second SvIV_please, then
1433 DIE() would be invoked before left was even inspected, so
486ec47a 1434 no inspection would give no warning. */
5479d192
NC
1435 if (right == 0)
1436 DIE(aTHX_ "Illegal division by zero");
1437
1438 if (left_non_neg) {
800401ee 1439 left = SvUVX(svl);
5479d192
NC
1440 }
1441 else {
800401ee 1442 const IV aiv = SvIVX(svl);
5479d192
NC
1443 if (aiv >= 0) {
1444 left = aiv;
1445 left_non_neg = TRUE; /* effectively it's a UV now */
1446 }
1447 else {
1448 left = -aiv;
1449 }
1450 }
1451
1452 if (left >= right
1453#ifdef SLOPPYDIVIDE
1454 /* For sloppy divide we always attempt integer division. */
1455#else
1456 /* Otherwise we only attempt it if either or both operands
1457 would not be preserved by an NV. If both fit in NVs
0c2ee62a
NC
1458 we fall through to the NV divide code below. However,
1459 as left >= right to ensure integer result here, we know that
1460 we can skip the test on the right operand - right big
1461 enough not to be preserved can't get here unless left is
1462 also too big. */
1463
1464 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
5479d192
NC
1465#endif
1466 ) {
1467 /* Integer division can't overflow, but it can be imprecise. */
1b6737cc 1468 const UV result = left / right;
5479d192
NC
1469 if (result * right == left) {
1470 SP--; /* result is valid */
1471 if (left_non_neg == right_non_neg) {
1472 /* signs identical, result is positive. */
1473 SETu( result );
1474 RETURN;
1475 }
1476 /* 2s complement assumption */
1477 if (result <= (UV)IV_MIN)
91f3b821 1478 SETi( -(IV)result );
5479d192
NC
1479 else {
1480 /* It's exact but too negative for IV. */
1481 SETn( -(NV)result );
1482 }
1483 RETURN;
1484 } /* tried integer divide but it was not an integer result */
32fdb065 1485 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
5479d192
NC
1486 } /* left wasn't SvIOK */
1487 } /* right wasn't SvIOK */
1488#endif /* PERL_TRY_UV_DIVIDE */
1489 {
6f1401dc
DM
1490 NV right = SvNV_nomg(svr);
1491 NV left = SvNV_nomg(svl);
4efa5a16 1492 (void)POPs;(void)POPs;
ebc6a117
PD
1493#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1494 if (! Perl_isnan(right) && right == 0.0)
1495#else
5479d192 1496 if (right == 0.0)
ebc6a117 1497#endif
5479d192
NC
1498 DIE(aTHX_ "Illegal division by zero");
1499 PUSHn( left / right );
1500 RETURN;
79072805 1501 }
a0d0e21e
LW
1502}
1503
1504PP(pp_modulo)
1505{
6f1401dc
DM
1506 dVAR; dSP; dATARGET;
1507 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
a0d0e21e 1508 {
9c5ffd7c
JH
1509 UV left = 0;
1510 UV right = 0;
dc656993
JH
1511 bool left_neg = FALSE;
1512 bool right_neg = FALSE;
e2c88acc
NC
1513 bool use_double = FALSE;
1514 bool dright_valid = FALSE;
9c5ffd7c
JH
1515 NV dright = 0.0;
1516 NV dleft = 0.0;
6f1401dc
DM
1517 SV * const svr = TOPs;
1518 SV * const svl = TOPm1s;
1519 SvIV_please_nomg(svr);
800401ee
JH
1520 if (SvIOK(svr)) {
1521 right_neg = !SvUOK(svr);
e2c88acc 1522 if (!right_neg) {
800401ee 1523 right = SvUVX(svr);
e2c88acc 1524 } else {
800401ee 1525 const IV biv = SvIVX(svr);
e2c88acc
NC
1526 if (biv >= 0) {
1527 right = biv;
1528 right_neg = FALSE; /* effectively it's a UV now */
1529 } else {
1530 right = -biv;
1531 }
1532 }
1533 }
1534 else {
6f1401dc 1535 dright = SvNV_nomg(svr);
787eafbd
IZ
1536 right_neg = dright < 0;
1537 if (right_neg)
1538 dright = -dright;
e2c88acc
NC
1539 if (dright < UV_MAX_P1) {
1540 right = U_V(dright);
1541 dright_valid = TRUE; /* In case we need to use double below. */
1542 } else {
1543 use_double = TRUE;
1544 }
787eafbd 1545 }
a0d0e21e 1546
e2c88acc
NC
1547 /* At this point use_double is only true if right is out of range for
1548 a UV. In range NV has been rounded down to nearest UV and
1549 use_double false. */
6f1401dc 1550 SvIV_please_nomg(svl);
800401ee
JH
1551 if (!use_double && SvIOK(svl)) {
1552 if (SvIOK(svl)) {
1553 left_neg = !SvUOK(svl);
e2c88acc 1554 if (!left_neg) {
800401ee 1555 left = SvUVX(svl);
e2c88acc 1556 } else {
800401ee 1557 const IV aiv = SvIVX(svl);
e2c88acc
NC
1558 if (aiv >= 0) {
1559 left = aiv;
1560 left_neg = FALSE; /* effectively it's a UV now */
1561 } else {
1562 left = -aiv;
1563 }
1564 }
1565 }
1566 }
787eafbd 1567 else {
6f1401dc 1568 dleft = SvNV_nomg(svl);
787eafbd
IZ
1569 left_neg = dleft < 0;
1570 if (left_neg)
1571 dleft = -dleft;
68dc0745 1572
e2c88acc
NC
1573 /* This should be exactly the 5.6 behaviour - if left and right are
1574 both in range for UV then use U_V() rather than floor. */
1575 if (!use_double) {
1576 if (dleft < UV_MAX_P1) {
1577 /* right was in range, so is dleft, so use UVs not double.
1578 */
1579 left = U_V(dleft);
1580 }
1581 /* left is out of range for UV, right was in range, so promote
1582 right (back) to double. */
1583 else {
1584 /* The +0.5 is used in 5.6 even though it is not strictly
1585 consistent with the implicit +0 floor in the U_V()
1586 inside the #if 1. */
1587 dleft = Perl_floor(dleft + 0.5);
1588 use_double = TRUE;
1589 if (dright_valid)
1590 dright = Perl_floor(dright + 0.5);
1591 else
1592 dright = right;
1593 }
1594 }
1595 }
6f1401dc 1596 sp -= 2;
787eafbd 1597 if (use_double) {
65202027 1598 NV dans;
787eafbd 1599
787eafbd 1600 if (!dright)
cea2e8a9 1601 DIE(aTHX_ "Illegal modulus zero");
787eafbd 1602
65202027 1603 dans = Perl_fmod(dleft, dright);
787eafbd
IZ
1604 if ((left_neg != right_neg) && dans)
1605 dans = dright - dans;
1606 if (right_neg)
1607 dans = -dans;
1608 sv_setnv(TARG, dans);
1609 }
1610 else {
1611 UV ans;
1612
787eafbd 1613 if (!right)
cea2e8a9 1614 DIE(aTHX_ "Illegal modulus zero");
787eafbd
IZ
1615
1616 ans = left % right;
1617 if ((left_neg != right_neg) && ans)
1618 ans = right - ans;
1619 if (right_neg) {
1620 /* XXX may warn: unary minus operator applied to unsigned type */
1621 /* could change -foo to be (~foo)+1 instead */
1622 if (ans <= ~((UV)IV_MAX)+1)
1623 sv_setiv(TARG, ~ans+1);
1624 else
65202027 1625 sv_setnv(TARG, -(NV)ans);
787eafbd
IZ
1626 }
1627 else
1628 sv_setuv(TARG, ans);
1629 }
1630 PUSHTARG;
1631 RETURN;
79072805 1632 }
a0d0e21e 1633}
79072805 1634
a0d0e21e
LW
1635PP(pp_repeat)
1636{
6f1401dc 1637 dVAR; dSP; dATARGET;
2b573ace 1638 register IV count;
6f1401dc
DM
1639 SV *sv;
1640
1641 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1642 /* TODO: think of some way of doing list-repeat overloading ??? */
1643 sv = POPs;
1644 SvGETMAGIC(sv);
1645 }
1646 else {
1647 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1648 sv = POPs;
1649 }
1650
2b573ace
JH
1651 if (SvIOKp(sv)) {
1652 if (SvUOK(sv)) {
6f1401dc 1653 const UV uv = SvUV_nomg(sv);
2b573ace
JH
1654 if (uv > IV_MAX)
1655 count = IV_MAX; /* The best we can do? */
1656 else
1657 count = uv;
1658 } else {
6f1401dc 1659 const IV iv = SvIV_nomg(sv);
2b573ace
JH
1660 if (iv < 0)
1661 count = 0;
1662 else
1663 count = iv;
1664 }
1665 }
1666 else if (SvNOKp(sv)) {
6f1401dc 1667 const NV nv = SvNV_nomg(sv);
2b573ace
JH
1668 if (nv < 0.0)
1669 count = 0;
1670 else
1671 count = (IV)nv;
1672 }
1673 else
6f1401dc
DM
1674 count = SvIV_nomg(sv);
1675
533c011a 1676 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
a0d0e21e 1677 dMARK;
0bd48802
AL
1678 static const char oom_list_extend[] = "Out of memory during list extend";
1679 const I32 items = SP - MARK;
1680 const I32 max = items * count;
79072805 1681
2b573ace
JH
1682 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1683 /* Did the max computation overflow? */
27d5b266 1684 if (items > 0 && max > 0 && (max < items || max < count))
2b573ace 1685 Perl_croak(aTHX_ oom_list_extend);
a0d0e21e
LW
1686 MEXTEND(MARK, max);
1687 if (count > 1) {
1688 while (SP > MARK) {
976c8a39
JH
1689#if 0
1690 /* This code was intended to fix 20010809.028:
1691
1692 $x = 'abcd';
1693 for (($x =~ /./g) x 2) {
1694 print chop; # "abcdabcd" expected as output.
1695 }
1696
1697 * but that change (#11635) broke this code:
1698
1699 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1700
1701 * I can't think of a better fix that doesn't introduce
1702 * an efficiency hit by copying the SVs. The stack isn't
1703 * refcounted, and mortalisation obviously doesn't
1704 * Do The Right Thing when the stack has more than
1705 * one pointer to the same mortal value.
1706 * .robin.
1707 */
e30acc16
RH
1708 if (*SP) {
1709 *SP = sv_2mortal(newSVsv(*SP));
1710 SvREADONLY_on(*SP);
1711 }
976c8a39
JH
1712#else
1713 if (*SP)
1714 SvTEMP_off((*SP));
1715#endif
a0d0e21e 1716 SP--;
79072805 1717 }
a0d0e21e
LW
1718 MARK++;
1719 repeatcpy((char*)(MARK + items), (char*)MARK,
ad64d0ec 1720 items * sizeof(const SV *), count - 1);
a0d0e21e 1721 SP += max;
79072805 1722 }
a0d0e21e
LW
1723 else if (count <= 0)
1724 SP -= items;
79072805 1725 }
a0d0e21e 1726 else { /* Note: mark already snarfed by pp_list */
0bd48802 1727 SV * const tmpstr = POPs;
a0d0e21e 1728 STRLEN len;
9b877dbb 1729 bool isutf;
2b573ace
JH
1730 static const char oom_string_extend[] =
1731 "Out of memory during string extend";
a0d0e21e 1732
6f1401dc
DM
1733 if (TARG != tmpstr)
1734 sv_setsv_nomg(TARG, tmpstr);
1735 SvPV_force_nomg(TARG, len);
9b877dbb 1736 isutf = DO_UTF8(TARG);
8ebc5c01 1737 if (count != 1) {
1738 if (count < 1)
1739 SvCUR_set(TARG, 0);
1740 else {
c445ea15 1741 const STRLEN max = (UV)count * len;
19a94d75 1742 if (len > MEM_SIZE_MAX / count)
2b573ace
JH
1743 Perl_croak(aTHX_ oom_string_extend);
1744 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
8569b9dc 1745 SvGROW(TARG, max + 1);
a0d0e21e 1746 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
b162af07 1747 SvCUR_set(TARG, SvCUR(TARG) * count);
7a4c00b4 1748 }
a0d0e21e 1749 *SvEND(TARG) = '\0';
a0d0e21e 1750 }
dfcb284a
GS
1751 if (isutf)
1752 (void)SvPOK_only_UTF8(TARG);
1753 else
1754 (void)SvPOK_only(TARG);
b80b6069
RH
1755
1756 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1757 /* The parser saw this as a list repeat, and there
1758 are probably several items on the stack. But we're
1759 in scalar context, and there's no pp_list to save us
1760 now. So drop the rest of the items -- robin@kitsite.com
1761 */
1762 dMARK;
1763 SP = MARK;
1764 }
a0d0e21e 1765 PUSHTARG;
79072805 1766 }
a0d0e21e
LW
1767 RETURN;
1768}
79072805 1769
a0d0e21e
LW
1770PP(pp_subtract)
1771{
800401ee 1772 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
6f1401dc
DM
1773 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1774 svr = TOPs;
1775 svl = TOPm1s;
800401ee 1776 useleft = USE_LEFT(svl);
28e5dec8 1777#ifdef PERL_PRESERVE_IVUV
7dca457a
NC
1778 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1779 "bad things" happen if you rely on signed integers wrapping. */
6f1401dc 1780 SvIV_please_nomg(svr);
800401ee 1781 if (SvIOK(svr)) {
28e5dec8
JH
1782 /* Unless the left argument is integer in range we are going to have to
1783 use NV maths. Hence only attempt to coerce the right argument if
1784 we know the left is integer. */
9c5ffd7c
JH
1785 register UV auv = 0;
1786 bool auvok = FALSE;
7dca457a
NC
1787 bool a_valid = 0;
1788
28e5dec8 1789 if (!useleft) {
7dca457a
NC
1790 auv = 0;
1791 a_valid = auvok = 1;
1792 /* left operand is undef, treat as zero. */
28e5dec8
JH
1793 } else {
1794 /* Left operand is defined, so is it IV? */
6f1401dc 1795 SvIV_please_nomg(svl);
800401ee
JH
1796 if (SvIOK(svl)) {
1797 if ((auvok = SvUOK(svl)))
1798 auv = SvUVX(svl);
7dca457a 1799 else {
800401ee 1800 register const IV aiv = SvIVX(svl);
7dca457a
NC
1801 if (aiv >= 0) {
1802 auv = aiv;
1803 auvok = 1; /* Now acting as a sign flag. */
1804 } else { /* 2s complement assumption for IV_MIN */
1805 auv = (UV)-aiv;
28e5dec8 1806 }
7dca457a
NC
1807 }
1808 a_valid = 1;
1809 }
1810 }
1811 if (a_valid) {
1812 bool result_good = 0;
1813 UV result;
1814 register UV buv;
800401ee 1815 bool buvok = SvUOK(svr);
9041c2e3 1816
7dca457a 1817 if (buvok)
800401ee 1818 buv = SvUVX(svr);
7dca457a 1819 else {
800401ee 1820 register const IV biv = SvIVX(svr);
7dca457a
NC
1821 if (biv >= 0) {
1822 buv = biv;
1823 buvok = 1;
1824 } else
1825 buv = (UV)-biv;
1826 }
1827 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
602f51c4 1828 else "IV" now, independent of how it came in.
7dca457a
NC
1829 if a, b represents positive, A, B negative, a maps to -A etc
1830 a - b => (a - b)
1831 A - b => -(a + b)
1832 a - B => (a + b)
1833 A - B => -(a - b)
1834 all UV maths. negate result if A negative.
1835 subtract if signs same, add if signs differ. */
1836
1837 if (auvok ^ buvok) {
1838 /* Signs differ. */
1839 result = auv + buv;
1840 if (result >= auv)
1841 result_good = 1;
1842 } else {
1843 /* Signs same */
1844 if (auv >= buv) {
1845 result = auv - buv;
1846 /* Must get smaller */
1847 if (result <= auv)
1848 result_good = 1;
1849 } else {
1850 result = buv - auv;
1851 if (result <= buv) {
1852 /* result really should be -(auv-buv). as its negation
1853 of true value, need to swap our result flag */
1854 auvok = !auvok;
1855 result_good = 1;
28e5dec8 1856 }
28e5dec8
JH
1857 }
1858 }
7dca457a
NC
1859 if (result_good) {
1860 SP--;
1861 if (auvok)
1862 SETu( result );
1863 else {
1864 /* Negate result */
1865 if (result <= (UV)IV_MIN)
1866 SETi( -(IV)result );
1867 else {
1868 /* result valid, but out of range for IV. */
1869 SETn( -(NV)result );
1870 }
1871 }
1872 RETURN;
1873 } /* Overflow, drop through to NVs. */
28e5dec8
JH
1874 }
1875 }
1876#endif
a0d0e21e 1877 {
6f1401dc 1878 NV value = SvNV_nomg(svr);
4efa5a16
RD
1879 (void)POPs;
1880
28e5dec8
JH
1881 if (!useleft) {
1882 /* left operand is undef, treat as zero - value */
1883 SETn(-value);
1884 RETURN;
1885 }
6f1401dc 1886 SETn( SvNV_nomg(svl) - value );
28e5dec8 1887 RETURN;
79072805 1888 }
a0d0e21e 1889}
79072805 1890
a0d0e21e
LW
1891PP(pp_left_shift)
1892{
6f1401dc 1893 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1894 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1895 svr = POPs;
1896 svl = TOPs;
a0d0e21e 1897 {
6f1401dc 1898 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1899 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1900 const IV i = SvIV_nomg(svl);
972b05a9 1901 SETi(i << shift);
d0ba1bd2
JH
1902 }
1903 else {
6f1401dc 1904 const UV u = SvUV_nomg(svl);
972b05a9 1905 SETu(u << shift);
d0ba1bd2 1906 }
55497cff 1907 RETURN;
79072805 1908 }
a0d0e21e 1909}
79072805 1910
a0d0e21e
LW
1911PP(pp_right_shift)
1912{
6f1401dc 1913 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1914 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1915 svr = POPs;
1916 svl = TOPs;
a0d0e21e 1917 {
6f1401dc 1918 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1919 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1920 const IV i = SvIV_nomg(svl);
972b05a9 1921 SETi(i >> shift);
d0ba1bd2
JH
1922 }
1923 else {
6f1401dc 1924 const UV u = SvUV_nomg(svl);
972b05a9 1925 SETu(u >> shift);
d0ba1bd2 1926 }
a0d0e21e 1927 RETURN;
93a17b20 1928 }
79072805
LW
1929}
1930
a0d0e21e 1931PP(pp_lt)
79072805 1932{
6f1401dc 1933 dVAR; dSP;
33efebe6
DM
1934 SV *left, *right;
1935
a42d0242 1936 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
33efebe6
DM
1937 right = POPs;
1938 left = TOPs;
1939 SETs(boolSV(
1940 (SvIOK_notUV(left) && SvIOK_notUV(right))
1941 ? (SvIVX(left) < SvIVX(right))
1942 : (do_ncmp(left, right) == -1)
1943 ));
1944 RETURN;
a0d0e21e 1945}
79072805 1946
a0d0e21e
LW
1947PP(pp_gt)
1948{
6f1401dc 1949 dVAR; dSP;
33efebe6 1950 SV *left, *right;
1b6737cc 1951
33efebe6
DM
1952 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1953 right = POPs;
1954 left = TOPs;
1955 SETs(boolSV(
1956 (SvIOK_notUV(left) && SvIOK_notUV(right))
1957 ? (SvIVX(left) > SvIVX(right))
1958 : (do_ncmp(left, right) == 1)
1959 ));
1960 RETURN;
a0d0e21e
LW
1961}
1962
1963PP(pp_le)
1964{
6f1401dc 1965 dVAR; dSP;
33efebe6 1966 SV *left, *right;
1b6737cc 1967
33efebe6
DM
1968 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1969 right = POPs;
1970 left = TOPs;
1971 SETs(boolSV(
1972 (SvIOK_notUV(left) && SvIOK_notUV(right))
1973 ? (SvIVX(left) <= SvIVX(right))
1974 : (do_ncmp(left, right) <= 0)
1975 ));
1976 RETURN;
a0d0e21e
LW
1977}
1978
1979PP(pp_ge)
1980{
6f1401dc 1981 dVAR; dSP;
33efebe6
DM
1982 SV *left, *right;
1983
1984 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1985 right = POPs;
1986 left = TOPs;
1987 SETs(boolSV(
1988 (SvIOK_notUV(left) && SvIOK_notUV(right))
1989 ? (SvIVX(left) >= SvIVX(right))
1990 : ( (do_ncmp(left, right) & 2) == 0)
1991 ));
1992 RETURN;
1993}
1b6737cc 1994
33efebe6
DM
1995PP(pp_ne)
1996{
1997 dVAR; dSP;
1998 SV *left, *right;
1999
2000 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2001 right = POPs;
2002 left = TOPs;
2003 SETs(boolSV(
2004 (SvIOK_notUV(left) && SvIOK_notUV(right))
2005 ? (SvIVX(left) != SvIVX(right))
2006 : (do_ncmp(left, right) != 0)
2007 ));
2008 RETURN;
2009}
1b6737cc 2010
33efebe6
DM
2011/* compare left and right SVs. Returns:
2012 * -1: <
2013 * 0: ==
2014 * 1: >
2015 * 2: left or right was a NaN
2016 */
2017I32
2018Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2019{
2020 dVAR;
1b6737cc 2021
33efebe6
DM
2022 PERL_ARGS_ASSERT_DO_NCMP;
2023#ifdef PERL_PRESERVE_IVUV
2024 SvIV_please_nomg(right);
2025 /* Fortunately it seems NaN isn't IOK */
2026 if (SvIOK(right)) {
2027 SvIV_please_nomg(left);
2028 if (SvIOK(left)) {
2029 if (!SvUOK(left)) {
2030 const IV leftiv = SvIVX(left);
2031 if (!SvUOK(right)) {
2032 /* ## IV <=> IV ## */
2033 const IV rightiv = SvIVX(right);
2034 return (leftiv > rightiv) - (leftiv < rightiv);
28e5dec8 2035 }
33efebe6
DM
2036 /* ## IV <=> UV ## */
2037 if (leftiv < 0)
2038 /* As (b) is a UV, it's >=0, so it must be < */
2039 return -1;
2040 {
2041 const UV rightuv = SvUVX(right);
2042 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
28e5dec8 2043 }
28e5dec8 2044 }
79072805 2045
33efebe6
DM
2046 if (SvUOK(right)) {
2047 /* ## UV <=> UV ## */
2048 const UV leftuv = SvUVX(left);
2049 const UV rightuv = SvUVX(right);
2050 return (leftuv > rightuv) - (leftuv < rightuv);
28e5dec8 2051 }
33efebe6
DM
2052 /* ## UV <=> IV ## */
2053 {
2054 const IV rightiv = SvIVX(right);
2055 if (rightiv < 0)
2056 /* As (a) is a UV, it's >=0, so it cannot be < */
2057 return 1;
2058 {
2059 const UV leftuv = SvUVX(left);
2060 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
28e5dec8 2061 }
28e5dec8 2062 }
33efebe6 2063 /* NOTREACHED */
28e5dec8
JH
2064 }
2065 }
2066#endif
a0d0e21e 2067 {
33efebe6
DM
2068 NV const rnv = SvNV_nomg(right);
2069 NV const lnv = SvNV_nomg(left);
2070
cab190d4 2071#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
33efebe6
DM
2072 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2073 return 2;
2074 }
2075 return (lnv > rnv) - (lnv < rnv);
cab190d4 2076#else
33efebe6
DM
2077 if (lnv < rnv)
2078 return -1;
2079 if (lnv > rnv)
2080 return 1;
2081 if (lnv == rnv)
2082 return 0;
2083 return 2;
cab190d4 2084#endif
a0d0e21e 2085 }
79072805
LW
2086}
2087
33efebe6 2088
a0d0e21e 2089PP(pp_ncmp)
79072805 2090{
33efebe6
DM
2091 dVAR; dSP;
2092 SV *left, *right;
2093 I32 value;
a42d0242 2094 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
33efebe6
DM
2095 right = POPs;
2096 left = TOPs;
2097 value = do_ncmp(left, right);
2098 if (value == 2) {
3280af22 2099 SETs(&PL_sv_undef);
79072805 2100 }
33efebe6
DM
2101 else {
2102 dTARGET;
2103 SETi(value);
2104 }
2105 RETURN;
a0d0e21e 2106}
79072805 2107
afd9910b 2108PP(pp_sle)
a0d0e21e 2109{
97aff369 2110 dVAR; dSP;
79072805 2111
afd9910b
NC
2112 int amg_type = sle_amg;
2113 int multiplier = 1;
2114 int rhs = 1;
79072805 2115
afd9910b
NC
2116 switch (PL_op->op_type) {
2117 case OP_SLT:
2118 amg_type = slt_amg;
2119 /* cmp < 0 */
2120 rhs = 0;
2121 break;
2122 case OP_SGT:
2123 amg_type = sgt_amg;
2124 /* cmp > 0 */
2125 multiplier = -1;
2126 rhs = 0;
2127 break;
2128 case OP_SGE:
2129 amg_type = sge_amg;
2130 /* cmp >= 0 */
2131 multiplier = -1;
2132 break;
79072805 2133 }
79072805 2134
6f1401dc 2135 tryAMAGICbin_MG(amg_type, AMGf_set);
a0d0e21e
LW
2136 {
2137 dPOPTOPssrl;
1b6737cc 2138 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2139 ? sv_cmp_locale_flags(left, right, 0)
2140 : sv_cmp_flags(left, right, 0));
afd9910b 2141 SETs(boolSV(cmp * multiplier < rhs));
a0d0e21e
LW
2142 RETURN;
2143 }
2144}
79072805 2145
36477c24 2146PP(pp_seq)
2147{
6f1401dc
DM
2148 dVAR; dSP;
2149 tryAMAGICbin_MG(seq_amg, AMGf_set);
36477c24 2150 {
2151 dPOPTOPssrl;
078504b2 2152 SETs(boolSV(sv_eq_flags(left, right, 0)));
a0d0e21e
LW
2153 RETURN;
2154 }
2155}
79072805 2156
a0d0e21e 2157PP(pp_sne)
79072805 2158{
6f1401dc
DM
2159 dVAR; dSP;
2160 tryAMAGICbin_MG(sne_amg, AMGf_set);
a0d0e21e
LW
2161 {
2162 dPOPTOPssrl;
078504b2 2163 SETs(boolSV(!sv_eq_flags(left, right, 0)));
a0d0e21e 2164 RETURN;
463ee0b2 2165 }
79072805
LW
2166}
2167
a0d0e21e 2168PP(pp_scmp)
79072805 2169{
6f1401dc
DM
2170 dVAR; dSP; dTARGET;
2171 tryAMAGICbin_MG(scmp_amg, 0);
a0d0e21e
LW
2172 {
2173 dPOPTOPssrl;
1b6737cc 2174 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2175 ? sv_cmp_locale_flags(left, right, 0)
2176 : sv_cmp_flags(left, right, 0));
bbce6d69 2177 SETi( cmp );
a0d0e21e
LW
2178 RETURN;
2179 }
2180}
79072805 2181
55497cff 2182PP(pp_bit_and)
2183{
6f1401dc
DM
2184 dVAR; dSP; dATARGET;
2185 tryAMAGICbin_MG(band_amg, AMGf_assign);
a0d0e21e
LW
2186 {
2187 dPOPTOPssrl;
4633a7c4 2188 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2189 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2190 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2191 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2192 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
972b05a9 2193 SETi(i);
d0ba1bd2
JH
2194 }
2195 else {
1b6737cc 2196 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
972b05a9 2197 SETu(u);
d0ba1bd2 2198 }
b20c4ee1
FC
2199 if (left_ro_nonnum) SvNIOK_off(left);
2200 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2201 }
2202 else {
533c011a 2203 do_vop(PL_op->op_type, TARG, left, right);
a0d0e21e
LW
2204 SETTARG;
2205 }
2206 RETURN;
2207 }
2208}
79072805 2209
a0d0e21e
LW
2210PP(pp_bit_or)
2211{
3658c1f1
NC
2212 dVAR; dSP; dATARGET;
2213 const int op_type = PL_op->op_type;
2214
6f1401dc 2215 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
a0d0e21e
LW
2216 {
2217 dPOPTOPssrl;
4633a7c4 2218 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2219 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2220 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2221 if (PL_op->op_private & HINT_INTEGER) {
3658c1f1
NC
2222 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2223 const IV r = SvIV_nomg(right);
2224 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2225 SETi(result);
d0ba1bd2
JH
2226 }
2227 else {
3658c1f1
NC
2228 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2229 const UV r = SvUV_nomg(right);
2230 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2231 SETu(result);
d0ba1bd2 2232 }
b20c4ee1
FC
2233 if (left_ro_nonnum) SvNIOK_off(left);
2234 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2235 }
2236 else {
3658c1f1 2237 do_vop(op_type, TARG, left, right);
a0d0e21e
LW
2238 SETTARG;
2239 }
2240 RETURN;
79072805 2241 }
a0d0e21e 2242}
79072805 2243
a0d0e21e
LW
2244PP(pp_negate)
2245{
6f1401dc
DM
2246 dVAR; dSP; dTARGET;
2247 tryAMAGICun_MG(neg_amg, AMGf_numeric);
a0d0e21e 2248 {
6f1401dc 2249 SV * const sv = TOPs;
1b6737cc 2250 const int flags = SvFLAGS(sv);
a5b92898 2251
886a4465 2252 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
a5b92898
RB
2253 SvIV_please( sv );
2254 }
2255
28e5dec8
JH
2256 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2257 /* It's publicly an integer, or privately an integer-not-float */
2258 oops_its_an_int:
9b0e499b
GS
2259 if (SvIsUV(sv)) {
2260 if (SvIVX(sv) == IV_MIN) {
28e5dec8 2261 /* 2s complement assumption. */
9b0e499b
GS
2262 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2263 RETURN;
2264 }
2265 else if (SvUVX(sv) <= IV_MAX) {
beccb14c 2266 SETi(-SvIVX(sv));
9b0e499b
GS
2267 RETURN;
2268 }
2269 }
2270 else if (SvIVX(sv) != IV_MIN) {
2271 SETi(-SvIVX(sv));
2272 RETURN;
2273 }
28e5dec8
JH
2274#ifdef PERL_PRESERVE_IVUV
2275 else {
2276 SETu((UV)IV_MIN);
2277 RETURN;
2278 }
2279#endif
9b0e499b
GS
2280 }
2281 if (SvNIOKp(sv))
6f1401dc 2282 SETn(-SvNV_nomg(sv));
4633a7c4 2283 else if (SvPOKp(sv)) {
a0d0e21e 2284 STRLEN len;
6f1401dc 2285 const char * const s = SvPV_nomg_const(sv, len);
bbce6d69 2286 if (isIDFIRST(*s)) {
76f68e9b 2287 sv_setpvs(TARG, "-");
a0d0e21e 2288 sv_catsv(TARG, sv);
79072805 2289 }
a0d0e21e 2290 else if (*s == '+' || *s == '-') {
6f1401dc
DM
2291 sv_setsv_nomg(TARG, sv);
2292 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
79072805 2293 }
8eb28a70 2294 else if (DO_UTF8(sv)) {
6f1401dc 2295 SvIV_please_nomg(sv);
8eb28a70
JH
2296 if (SvIOK(sv))
2297 goto oops_its_an_int;
2298 if (SvNOK(sv))
6f1401dc 2299 sv_setnv(TARG, -SvNV_nomg(sv));
8eb28a70 2300 else {
76f68e9b 2301 sv_setpvs(TARG, "-");
8eb28a70
JH
2302 sv_catsv(TARG, sv);
2303 }
834a4ddd 2304 }
28e5dec8 2305 else {
6f1401dc 2306 SvIV_please_nomg(sv);
8eb28a70
JH
2307 if (SvIOK(sv))
2308 goto oops_its_an_int;
6f1401dc 2309 sv_setnv(TARG, -SvNV_nomg(sv));
28e5dec8 2310 }
a0d0e21e 2311 SETTARG;
79072805 2312 }
4633a7c4 2313 else
6f1401dc 2314 SETn(-SvNV_nomg(sv));
79072805 2315 }
a0d0e21e 2316 RETURN;
79072805
LW
2317}
2318
a0d0e21e 2319PP(pp_not)
79072805 2320{
6f1401dc
DM
2321 dVAR; dSP;
2322 tryAMAGICun_MG(not_amg, AMGf_set);
06c841cf 2323 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
a0d0e21e 2324 return NORMAL;
79072805
LW
2325}
2326
a0d0e21e 2327PP(pp_complement)
79072805 2328{
6f1401dc 2329 dVAR; dSP; dTARGET;
a42d0242 2330 tryAMAGICun_MG(compl_amg, AMGf_numeric);
a0d0e21e
LW
2331 {
2332 dTOPss;
4633a7c4 2333 if (SvNIOKp(sv)) {
d0ba1bd2 2334 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2335 const IV i = ~SvIV_nomg(sv);
972b05a9 2336 SETi(i);
d0ba1bd2
JH
2337 }
2338 else {
1b6737cc 2339 const UV u = ~SvUV_nomg(sv);
972b05a9 2340 SETu(u);
d0ba1bd2 2341 }
a0d0e21e
LW
2342 }
2343 else {
51723571 2344 register U8 *tmps;
55497cff 2345 register I32 anum;
a0d0e21e
LW
2346 STRLEN len;
2347
10516c54 2348 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
891f9566 2349 sv_setsv_nomg(TARG, sv);
6f1401dc 2350 tmps = (U8*)SvPV_force_nomg(TARG, len);
a0d0e21e 2351 anum = len;
1d68d6cd 2352 if (SvUTF8(TARG)) {
a1ca4561 2353 /* Calculate exact length, let's not estimate. */
1d68d6cd 2354 STRLEN targlen = 0;
ba210ebe 2355 STRLEN l;
a1ca4561
YST
2356 UV nchar = 0;
2357 UV nwide = 0;
01f6e806 2358 U8 * const send = tmps + len;
74d49cd0
TS
2359 U8 * const origtmps = tmps;
2360 const UV utf8flags = UTF8_ALLOW_ANYUV;
1d68d6cd 2361
1d68d6cd 2362 while (tmps < send) {
74d49cd0
TS
2363 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2364 tmps += l;
5bbb0b5a 2365 targlen += UNISKIP(~c);
a1ca4561
YST
2366 nchar++;
2367 if (c > 0xff)
2368 nwide++;
1d68d6cd
SC
2369 }
2370
2371 /* Now rewind strings and write them. */
74d49cd0 2372 tmps = origtmps;
a1ca4561
YST
2373
2374 if (nwide) {
01f6e806
AL
2375 U8 *result;
2376 U8 *p;
2377
74d49cd0 2378 Newx(result, targlen + 1, U8);
01f6e806 2379 p = result;
a1ca4561 2380 while (tmps < send) {
74d49cd0
TS
2381 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2382 tmps += l;
01f6e806 2383 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
a1ca4561 2384 }
01f6e806 2385 *p = '\0';
c1c21316
NC
2386 sv_usepvn_flags(TARG, (char*)result, targlen,
2387 SV_HAS_TRAILING_NUL);
a1ca4561
YST
2388 SvUTF8_on(TARG);
2389 }
2390 else {
01f6e806
AL
2391 U8 *result;
2392 U8 *p;
2393
74d49cd0 2394 Newx(result, nchar + 1, U8);
01f6e806 2395 p = result;
a1ca4561 2396 while (tmps < send) {
74d49cd0
TS
2397 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2398 tmps += l;
01f6e806 2399 *p++ = ~c;
a1ca4561 2400 }
01f6e806 2401 *p = '\0';
c1c21316 2402 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
d0a21e00 2403 SvUTF8_off(TARG);
1d68d6cd 2404 }
ec93b65f 2405 SETTARG;
1d68d6cd
SC
2406 RETURN;
2407 }
a0d0e21e 2408#ifdef LIBERAL
51723571
JH
2409 {
2410 register long *tmpl;
2411 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2412 *tmps = ~*tmps;
2413 tmpl = (long*)tmps;
bb7a0f54 2414 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
51723571
JH
2415 *tmpl = ~*tmpl;
2416 tmps = (U8*)tmpl;
2417 }
a0d0e21e
LW
2418#endif
2419 for ( ; anum > 0; anum--, tmps++)
2420 *tmps = ~*tmps;
ec93b65f 2421 SETTARG;
a0d0e21e
LW
2422 }
2423 RETURN;
2424 }
79072805
LW
2425}
2426
a0d0e21e
LW
2427/* integer versions of some of the above */
2428
a0d0e21e 2429PP(pp_i_multiply)
79072805 2430{
6f1401dc
DM
2431 dVAR; dSP; dATARGET;
2432 tryAMAGICbin_MG(mult_amg, AMGf_assign);
a0d0e21e 2433 {
6f1401dc 2434 dPOPTOPiirl_nomg;
a0d0e21e
LW
2435 SETi( left * right );
2436 RETURN;
2437 }
79072805
LW
2438}
2439
a0d0e21e 2440PP(pp_i_divide)
79072805 2441{
85935d8e 2442 IV num;
6f1401dc
DM
2443 dVAR; dSP; dATARGET;
2444 tryAMAGICbin_MG(div_amg, AMGf_assign);
a0d0e21e 2445 {
6f1401dc 2446 dPOPTOPssrl;
85935d8e 2447 IV value = SvIV_nomg(right);
a0d0e21e 2448 if (value == 0)
ece1bcef 2449 DIE(aTHX_ "Illegal division by zero");
85935d8e 2450 num = SvIV_nomg(left);
a0cec769
YST
2451
2452 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2453 if (value == -1)
2454 value = - num;
2455 else
2456 value = num / value;
6f1401dc 2457 SETi(value);
a0d0e21e
LW
2458 RETURN;
2459 }
79072805
LW
2460}
2461
befad5d1 2462#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2463STATIC
2464PP(pp_i_modulo_0)
befad5d1
NC
2465#else
2466PP(pp_i_modulo)
2467#endif
224ec323
JH
2468{
2469 /* This is the vanilla old i_modulo. */
6f1401dc
DM
2470 dVAR; dSP; dATARGET;
2471 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2472 {
6f1401dc 2473 dPOPTOPiirl_nomg;
224ec323
JH
2474 if (!right)
2475 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2476 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2477 if (right == -1)
2478 SETi( 0 );
2479 else
2480 SETi( left % right );
224ec323
JH
2481 RETURN;
2482 }
2483}
2484
11010fa3 2485#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2486STATIC
2487PP(pp_i_modulo_1)
befad5d1 2488
224ec323 2489{
224ec323 2490 /* This is the i_modulo with the workaround for the _moddi3 bug
fce2b89e 2491 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
224ec323 2492 * See below for pp_i_modulo. */
6f1401dc
DM
2493 dVAR; dSP; dATARGET;
2494 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2495 {
6f1401dc 2496 dPOPTOPiirl_nomg;
224ec323
JH
2497 if (!right)
2498 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2499 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2500 if (right == -1)
2501 SETi( 0 );
2502 else
2503 SETi( left % PERL_ABS(right) );
224ec323
JH
2504 RETURN;
2505 }
224ec323
JH
2506}
2507
a0d0e21e 2508PP(pp_i_modulo)
79072805 2509{
6f1401dc
DM
2510 dVAR; dSP; dATARGET;
2511 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2512 {
6f1401dc 2513 dPOPTOPiirl_nomg;
224ec323
JH
2514 if (!right)
2515 DIE(aTHX_ "Illegal modulus zero");
2516 /* The assumption is to use hereafter the old vanilla version... */
2517 PL_op->op_ppaddr =
2518 PL_ppaddr[OP_I_MODULO] =
1c127fab 2519 Perl_pp_i_modulo_0;
224ec323
JH
2520 /* .. but if we have glibc, we might have a buggy _moddi3
2521 * (at least glicb 2.2.5 is known to have this bug), in other
2522 * words our integer modulus with negative quad as the second
2523 * argument might be broken. Test for this and re-patch the
2524 * opcode dispatch table if that is the case, remembering to
2525 * also apply the workaround so that this first round works
2526 * right, too. See [perl #9402] for more information. */
224ec323
JH
2527 {
2528 IV l = 3;
2529 IV r = -10;
2530 /* Cannot do this check with inlined IV constants since
2531 * that seems to work correctly even with the buggy glibc. */
2532 if (l % r == -3) {
2533 /* Yikes, we have the bug.
2534 * Patch in the workaround version. */
2535 PL_op->op_ppaddr =
2536 PL_ppaddr[OP_I_MODULO] =
2537 &Perl_pp_i_modulo_1;
2538 /* Make certain we work right this time, too. */
32fdb065 2539 right = PERL_ABS(right);
224ec323
JH
2540 }
2541 }
a0cec769
YST
2542 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2543 if (right == -1)
2544 SETi( 0 );
2545 else
2546 SETi( left % right );
224ec323
JH
2547 RETURN;
2548 }
79072805 2549}
befad5d1 2550#endif
79072805 2551
a0d0e21e 2552PP(pp_i_add)
79072805 2553{
6f1401dc
DM
2554 dVAR; dSP; dATARGET;
2555 tryAMAGICbin_MG(add_amg, AMGf_assign);
a0d0e21e 2556 {
6f1401dc 2557 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2558 SETi( left + right );
2559 RETURN;
79072805 2560 }
79072805
LW
2561}
2562
a0d0e21e 2563PP(pp_i_subtract)
79072805 2564{
6f1401dc
DM
2565 dVAR; dSP; dATARGET;
2566 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
a0d0e21e 2567 {
6f1401dc 2568 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2569 SETi( left - right );
2570 RETURN;
79072805 2571 }
79072805
LW
2572}
2573
a0d0e21e 2574PP(pp_i_lt)
79072805 2575{
6f1401dc
DM
2576 dVAR; dSP;
2577 tryAMAGICbin_MG(lt_amg, AMGf_set);
a0d0e21e 2578 {
96b6b87f 2579 dPOPTOPiirl_nomg;
54310121 2580 SETs(boolSV(left < right));
a0d0e21e
LW
2581 RETURN;
2582 }
79072805
LW
2583}
2584
a0d0e21e 2585PP(pp_i_gt)
79072805 2586{
6f1401dc
DM
2587 dVAR; dSP;
2588 tryAMAGICbin_MG(gt_amg, AMGf_set);
a0d0e21e 2589 {
96b6b87f 2590 dPOPTOPiirl_nomg;
54310121 2591 SETs(boolSV(left > right));
a0d0e21e
LW
2592 RETURN;
2593 }
79072805
LW
2594}
2595
a0d0e21e 2596PP(pp_i_le)
79072805 2597{
6f1401dc
DM
2598 dVAR; dSP;
2599 tryAMAGICbin_MG(le_amg, AMGf_set);
a0d0e21e 2600 {
96b6b87f 2601 dPOPTOPiirl_nomg;
54310121 2602 SETs(boolSV(left <= right));
a0d0e21e 2603 RETURN;
85e6fe83 2604 }
79072805
LW
2605}
2606
a0d0e21e 2607PP(pp_i_ge)
79072805 2608{
6f1401dc
DM
2609 dVAR; dSP;
2610 tryAMAGICbin_MG(ge_amg, AMGf_set);
a0d0e21e 2611 {
96b6b87f 2612 dPOPTOPiirl_nomg;
54310121 2613 SETs(boolSV(left >= right));
a0d0e21e
LW
2614 RETURN;
2615 }
79072805
LW
2616}
2617
a0d0e21e 2618PP(pp_i_eq)
79072805 2619{
6f1401dc
DM
2620 dVAR; dSP;
2621 tryAMAGICbin_MG(eq_amg, AMGf_set);
a0d0e21e 2622 {
96b6b87f 2623 dPOPTOPiirl_nomg;
54310121 2624 SETs(boolSV(left == right));
a0d0e21e
LW
2625 RETURN;
2626 }
79072805
LW
2627}
2628
a0d0e21e 2629PP(pp_i_ne)
79072805 2630{
6f1401dc
DM
2631 dVAR; dSP;
2632 tryAMAGICbin_MG(ne_amg, AMGf_set);
a0d0e21e 2633 {
96b6b87f 2634 dPOPTOPiirl_nomg;
54310121 2635 SETs(boolSV(left != right));
a0d0e21e
LW
2636 RETURN;
2637 }
79072805
LW
2638}
2639
a0d0e21e 2640PP(pp_i_ncmp)
79072805 2641{
6f1401dc
DM
2642 dVAR; dSP; dTARGET;
2643 tryAMAGICbin_MG(ncmp_amg, 0);
a0d0e21e 2644 {
96b6b87f 2645 dPOPTOPiirl_nomg;
a0d0e21e 2646 I32 value;
79072805 2647
a0d0e21e 2648 if (left > right)
79072805 2649 value = 1;
a0d0e21e 2650 else if (left < right)
79072805 2651 value = -1;
a0d0e21e 2652 else
79072805 2653 value = 0;
a0d0e21e
LW
2654 SETi(value);
2655 RETURN;
79072805 2656 }
85e6fe83
LW
2657}
2658
2659PP(pp_i_negate)
2660{
6f1401dc
DM
2661 dVAR; dSP; dTARGET;
2662 tryAMAGICun_MG(neg_amg, 0);
2663 {
2664 SV * const sv = TOPs;
2665 IV const i = SvIV_nomg(sv);
2666 SETi(-i);
2667 RETURN;
2668 }
85e6fe83
LW
2669}
2670
79072805
LW
2671/* High falutin' math. */
2672
2673PP(pp_atan2)
2674{
6f1401dc
DM
2675 dVAR; dSP; dTARGET;
2676 tryAMAGICbin_MG(atan2_amg, 0);
a0d0e21e 2677 {
096c060c 2678 dPOPTOPnnrl_nomg;
a1021d57 2679 SETn(Perl_atan2(left, right));
a0d0e21e
LW
2680 RETURN;
2681 }
79072805
LW
2682}
2683
2684PP(pp_sin)
2685{
71302fe3
NC
2686 dVAR; dSP; dTARGET;
2687 int amg_type = sin_amg;
2688 const char *neg_report = NULL;
bc81784a 2689 NV (*func)(NV) = Perl_sin;
71302fe3
NC
2690 const int op_type = PL_op->op_type;
2691
2692 switch (op_type) {
2693 case OP_COS:
2694 amg_type = cos_amg;
bc81784a 2695 func = Perl_cos;
71302fe3
NC
2696 break;
2697 case OP_EXP:
2698 amg_type = exp_amg;
bc81784a 2699 func = Perl_exp;
71302fe3
NC
2700 break;
2701 case OP_LOG:
2702 amg_type = log_amg;
bc81784a 2703 func = Perl_log;
71302fe3
NC
2704 neg_report = "log";
2705 break;
2706 case OP_SQRT:
2707 amg_type = sqrt_amg;
bc81784a 2708 func = Perl_sqrt;
71302fe3
NC
2709 neg_report = "sqrt";
2710 break;
a0d0e21e 2711 }
79072805 2712
6f1401dc
DM
2713
2714 tryAMAGICun_MG(amg_type, 0);
a0d0e21e 2715 {
6f1401dc
DM
2716 SV * const arg = POPs;
2717 const NV value = SvNV_nomg(arg);
71302fe3
NC
2718 if (neg_report) {
2719 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2720 SET_NUMERIC_STANDARD();
2721 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2722 }
2723 }
2724 XPUSHn(func(value));
a0d0e21e
LW
2725 RETURN;
2726 }
79072805
LW
2727}
2728
56cb0a1c
AD
2729/* Support Configure command-line overrides for rand() functions.
2730 After 5.005, perhaps we should replace this by Configure support
2731 for drand48(), random(), or rand(). For 5.005, though, maintain
2732 compatibility by calling rand() but allow the user to override it.
2733 See INSTALL for details. --Andy Dougherty 15 July 1998
2734*/
85ab1d1d
JH
2735/* Now it's after 5.005, and Configure supports drand48() and random(),
2736 in addition to rand(). So the overrides should not be needed any more.
2737 --Jarkko Hietaniemi 27 September 1998
2738 */
2739
2740#ifndef HAS_DRAND48_PROTO
20ce7b12 2741extern double drand48 (void);
56cb0a1c
AD
2742#endif
2743
79072805
LW
2744PP(pp_rand)
2745{
97aff369 2746 dVAR; dSP; dTARGET;
65202027 2747 NV value;
79072805
LW
2748 if (MAXARG < 1)
2749 value = 1.0;
94ec06bc
FC
2750 else if (!TOPs) {
2751 value = 1.0; (void)POPs;
2752 }
79072805
LW
2753 else
2754 value = POPn;
2755 if (value == 0.0)
2756 value = 1.0;
80252599 2757 if (!PL_srand_called) {
85ab1d1d 2758 (void)seedDrand01((Rand_seed_t)seed());
80252599 2759 PL_srand_called = TRUE;
93dc8474 2760 }
85ab1d1d 2761 value *= Drand01();
79072805
LW
2762 XPUSHn(value);
2763 RETURN;
2764}
2765
2766PP(pp_srand)
2767{
83832992 2768 dVAR; dSP; dTARGET;
d22667bf 2769 const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
85ab1d1d 2770 (void)seedDrand01((Rand_seed_t)anum);
80252599 2771 PL_srand_called = TRUE;
da1010ec
NC
2772 if (anum)
2773 XPUSHu(anum);
2774 else {
2775 /* Historically srand always returned true. We can avoid breaking
2776 that like this: */
2777 sv_setpvs(TARG, "0 but true");
2778 XPUSHTARG;
2779 }
83832992 2780 RETURN;
79072805
LW
2781}
2782
79072805
LW
2783PP(pp_int)
2784{
6f1401dc
DM
2785 dVAR; dSP; dTARGET;
2786 tryAMAGICun_MG(int_amg, AMGf_numeric);
774d564b 2787 {
6f1401dc
DM
2788 SV * const sv = TOPs;
2789 const IV iv = SvIV_nomg(sv);
28e5dec8
JH
2790 /* XXX it's arguable that compiler casting to IV might be subtly
2791 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2792 else preferring IV has introduced a subtle behaviour change bug. OTOH
2793 relying on floating point to be accurate is a bug. */
2794
c781a409 2795 if (!SvOK(sv)) {
922c4365 2796 SETu(0);
c781a409
RD
2797 }
2798 else if (SvIOK(sv)) {
2799 if (SvIsUV(sv))
6f1401dc 2800 SETu(SvUV_nomg(sv));
c781a409 2801 else
28e5dec8 2802 SETi(iv);
c781a409 2803 }
c781a409 2804 else {
6f1401dc 2805 const NV value = SvNV_nomg(sv);
1048ea30 2806 if (value >= 0.0) {
28e5dec8
JH
2807 if (value < (NV)UV_MAX + 0.5) {
2808 SETu(U_V(value));
2809 } else {
059a1014 2810 SETn(Perl_floor(value));
28e5dec8 2811 }
1048ea30 2812 }
28e5dec8
JH
2813 else {
2814 if (value > (NV)IV_MIN - 0.5) {
2815 SETi(I_V(value));
2816 } else {
1bbae031 2817 SETn(Perl_ceil(value));
28e5dec8
JH
2818 }
2819 }
774d564b 2820 }
79072805 2821 }
79072805
LW
2822 RETURN;
2823}
2824
463ee0b2
LW
2825PP(pp_abs)
2826{
6f1401dc
DM
2827 dVAR; dSP; dTARGET;
2828 tryAMAGICun_MG(abs_amg, AMGf_numeric);
a0d0e21e 2829 {
6f1401dc 2830 SV * const sv = TOPs;
28e5dec8 2831 /* This will cache the NV value if string isn't actually integer */
6f1401dc 2832 const IV iv = SvIV_nomg(sv);
a227d84d 2833
800401ee 2834 if (!SvOK(sv)) {
922c4365 2835 SETu(0);
800401ee
JH
2836 }
2837 else if (SvIOK(sv)) {
28e5dec8 2838 /* IVX is precise */
800401ee 2839 if (SvIsUV(sv)) {
6f1401dc 2840 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
28e5dec8
JH
2841 } else {
2842 if (iv >= 0) {
2843 SETi(iv);
2844 } else {
2845 if (iv != IV_MIN) {
2846 SETi(-iv);
2847 } else {
2848 /* 2s complement assumption. Also, not really needed as
2849 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2850 SETu(IV_MIN);
2851 }
a227d84d 2852 }
28e5dec8
JH
2853 }
2854 } else{
6f1401dc 2855 const NV value = SvNV_nomg(sv);
774d564b 2856 if (value < 0.0)
1b6737cc 2857 SETn(-value);
a4474c9e
DD
2858 else
2859 SETn(value);
774d564b 2860 }
a0d0e21e 2861 }
774d564b 2862 RETURN;
463ee0b2
LW
2863}
2864
79072805
LW
2865PP(pp_oct)
2866{
97aff369 2867 dVAR; dSP; dTARGET;
5c144d81 2868 const char *tmps;
53305cf1 2869 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
6f894ead 2870 STRLEN len;
53305cf1
NC
2871 NV result_nv;
2872 UV result_uv;
1b6737cc 2873 SV* const sv = POPs;
79072805 2874
349d4f2f 2875 tmps = (SvPV_const(sv, len));
2bc69dc4
NIS
2876 if (DO_UTF8(sv)) {
2877 /* If Unicode, try to downgrade
2878 * If not possible, croak. */
1b6737cc 2879 SV* const tsv = sv_2mortal(newSVsv(sv));
2bc69dc4
NIS
2880
2881 SvUTF8_on(tsv);
2882 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 2883 tmps = SvPV_const(tsv, len);
2bc69dc4 2884 }
daa2adfd
NC
2885 if (PL_op->op_type == OP_HEX)
2886 goto hex;
2887
6f894ead 2888 while (*tmps && len && isSPACE(*tmps))
53305cf1 2889 tmps++, len--;
9e24b6e2 2890 if (*tmps == '0')
53305cf1 2891 tmps++, len--;
a674e8db 2892 if (*tmps == 'x' || *tmps == 'X') {
daa2adfd 2893 hex:
53305cf1 2894 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
daa2adfd 2895 }
a674e8db 2896 else if (*tmps == 'b' || *tmps == 'B')
53305cf1 2897 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
464e2e8a 2898 else
53305cf1
NC
2899 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2900
2901 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2902 XPUSHn(result_nv);
2903 }
2904 else {
2905 XPUSHu(result_uv);
2906 }
79072805
LW
2907 RETURN;
2908}
2909
2910/* String stuff. */
2911
2912PP(pp_length)
2913{
97aff369 2914 dVAR; dSP; dTARGET;
0bd48802 2915 SV * const sv = TOPs;
a0ed51b3 2916
656266fc 2917 if (SvGAMAGIC(sv)) {
9f621bb0
NC
2918 /* For an overloaded or magic scalar, we can't know in advance if
2919 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2920 it likes to cache the length. Maybe that should be a documented
2921 feature of it.
92331800
NC
2922 */
2923 STRLEN len;
9f621bb0
NC
2924 const char *const p
2925 = sv_2pv_flags(sv, &len,
2926 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
92331800 2927
d88e091f 2928 if (!p) {
9407f9c1
DL
2929 if (!SvPADTMP(TARG)) {
2930 sv_setsv(TARG, &PL_sv_undef);
2931 SETTARG;
2932 }
2933 SETs(&PL_sv_undef);
d88e091f 2934 }
9f621bb0 2935 else if (DO_UTF8(sv)) {
899be101 2936 SETi(utf8_length((U8*)p, (U8*)p + len));
92331800
NC
2937 }
2938 else
2939 SETi(len);
656266fc 2940 } else if (SvOK(sv)) {
9f621bb0
NC
2941 /* Neither magic nor overloaded. */
2942 if (DO_UTF8(sv))
2943 SETi(sv_len_utf8(sv));
2944 else
2945 SETi(sv_len(sv));
656266fc 2946 } else {
9407f9c1
DL
2947 if (!SvPADTMP(TARG)) {
2948 sv_setsv_nomg(TARG, &PL_sv_undef);
2949 SETTARG;
2950 }
2951 SETs(&PL_sv_undef);
92331800 2952 }
79072805
LW
2953 RETURN;
2954}
2955
2956PP(pp_substr)
2957{
97aff369 2958 dVAR; dSP; dTARGET;
79072805 2959 SV *sv;
463ee0b2 2960 STRLEN curlen;
9402d6ed 2961 STRLEN utf8_curlen;
777f7c56
EB
2962 SV * pos_sv;
2963 IV pos1_iv;
2964 int pos1_is_uv;
2965 IV pos2_iv;
2966 int pos2_is_uv;
2967 SV * len_sv;
2968 IV len_iv = 0;
2969 int len_is_uv = 1;
050e6362 2970 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
e1ec3a88 2971 const char *tmps;
9402d6ed 2972 SV *repl_sv = NULL;
cbbf8932 2973 const char *repl = NULL;
7b8d334a 2974 STRLEN repl_len;
7bc95ae1 2975 int num_args = PL_op->op_private & 7;
13e30c65 2976 bool repl_need_utf8_upgrade = FALSE;
9402d6ed 2977 bool repl_is_utf8 = FALSE;
79072805 2978
78f9721b
SM
2979 if (num_args > 2) {
2980 if (num_args > 3) {
7bc95ae1 2981 if((repl_sv = POPs)) {
83003860 2982 repl = SvPV_const(repl_sv, repl_len);
9402d6ed 2983 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
7bc95ae1
FC
2984 }
2985 else num_args--;
2986 }
2987 if ((len_sv = POPs)) {
2988 len_iv = SvIV(len_sv);
2989 len_is_uv = SvIOK_UV(len_sv);
7b8d334a 2990 }
7bc95ae1 2991 else num_args--;
5d82c453 2992 }
777f7c56
EB
2993 pos_sv = POPs;
2994 pos1_iv = SvIV(pos_sv);
2995 pos1_is_uv = SvIOK_UV(pos_sv);
79072805 2996 sv = POPs;
849ca7ee 2997 PUTBACK;
9402d6ed
JH
2998 if (repl_sv) {
2999 if (repl_is_utf8) {
3000 if (!DO_UTF8(sv))
3001 sv_utf8_upgrade(sv);
3002 }
13e30c65
JH
3003 else if (DO_UTF8(sv))
3004 repl_need_utf8_upgrade = TRUE;
9402d6ed 3005 }
5c144d81 3006 tmps = SvPV_const(sv, curlen);
7e2040f0 3007 if (DO_UTF8(sv)) {
9402d6ed
JH
3008 utf8_curlen = sv_len_utf8(sv);
3009 if (utf8_curlen == curlen)
3010 utf8_curlen = 0;
a0ed51b3 3011 else
9402d6ed 3012 curlen = utf8_curlen;
a0ed51b3 3013 }
d1c2b58a 3014 else
9402d6ed 3015 utf8_curlen = 0;
a0ed51b3 3016
e1dccc0d
Z
3017 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3018 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3019 pos1_iv += curlen;
777f7c56 3020 }
e1dccc0d
Z
3021 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3022 goto bound_fail;
777f7c56
EB
3023
3024 if (num_args > 2) {
3025 if (!len_is_uv && len_iv < 0) {
3026 pos2_iv = curlen + len_iv;
3027 if (curlen)
3028 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3029 else
3030 pos2_is_uv = 0;
3031 } else { /* len_iv >= 0 */
3032 if (!pos1_is_uv && pos1_iv < 0) {
3033 pos2_iv = pos1_iv + len_iv;
3034 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3035 } else {
3036 if ((UV)len_iv > curlen-(UV)pos1_iv)
3037 pos2_iv = curlen;
3038 else
3039 pos2_iv = pos1_iv+len_iv;
3040 pos2_is_uv = 1;
3041 }
5d82c453 3042 }
2304df62 3043 }
79072805 3044 else {
777f7c56
EB
3045 pos2_iv = curlen;
3046 pos2_is_uv = 1;
3047 }
3048
3049 if (!pos2_is_uv && pos2_iv < 0) {
3050 if (!pos1_is_uv && pos1_iv < 0)
1c900557 3051 goto bound_fail;
777f7c56
EB
3052 pos2_iv = 0;
3053 }
3054 else if (!pos1_is_uv && pos1_iv < 0)
3055 pos1_iv = 0;
3056
3057 if ((UV)pos2_iv < (UV)pos1_iv)
3058 pos2_iv = pos1_iv;
3059 if ((UV)pos2_iv > curlen)
3060 pos2_iv = curlen;
3061
3062 {
3063 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3064 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3065 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
777f7c56 3066 STRLEN byte_len = len;
d931b1be
NC
3067 STRLEN byte_pos = utf8_curlen
3068 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3069
2154eca7
EB
3070 if (lvalue && !repl) {
3071 SV * ret;
3072
3073 if (!SvGMAGICAL(sv)) {
3074 if (SvROK(sv)) {
3075 SvPV_force_nolen(sv);
3076 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3077 "Attempt to use reference as lvalue in substr");
3078 }
3079 if (isGV_with_GP(sv))
3080 SvPV_force_nolen(sv);
3081 else if (SvOK(sv)) /* is it defined ? */
3082 (void)SvPOK_only_UTF8(sv);
3083 else
3084 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
781e7547 3085 }
2154eca7
EB
3086
3087 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3088 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3089 LvTYPE(ret) = 'x';
3090 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3091 LvTARGOFF(ret) = pos;
3092 LvTARGLEN(ret) = len;
3093
3094 SPAGAIN;
3095 PUSHs(ret); /* avoid SvSETMAGIC here */
3096 RETURN;
781e7547
DM
3097 }
3098
2154eca7
EB
3099 SvTAINTED_off(TARG); /* decontaminate */
3100 SvUTF8_off(TARG); /* decontaminate */
3101
3102 tmps += byte_pos;
777f7c56 3103 sv_setpvn(TARG, tmps, byte_len);
12aa1545 3104#ifdef USE_LOCALE_COLLATE
14befaf4 3105 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
12aa1545 3106#endif
9402d6ed 3107 if (utf8_curlen)
7f66633b 3108 SvUTF8_on(TARG);
2154eca7 3109
f7928d6c 3110 if (repl) {
13e30c65
JH
3111 SV* repl_sv_copy = NULL;
3112
3113 if (repl_need_utf8_upgrade) {
3114 repl_sv_copy = newSVsv(repl_sv);
3115 sv_utf8_upgrade(repl_sv_copy);
349d4f2f 3116 repl = SvPV_const(repl_sv_copy, repl_len);
13e30c65
JH
3117 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3118 }
502d9230
VP
3119 if (!SvOK(sv))
3120 sv_setpvs(sv, "");
777f7c56 3121 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
9402d6ed 3122 if (repl_is_utf8)
f7928d6c 3123 SvUTF8_on(sv);
ef8d46e8 3124 SvREFCNT_dec(repl_sv_copy);
f7928d6c 3125 }
79072805 3126 }
849ca7ee 3127 SPAGAIN;
e27c778f
FC
3128 SvSETMAGIC(TARG);
3129 PUSHs(TARG);
79072805 3130 RETURN;
777f7c56 3131
1c900557 3132bound_fail:
777f7c56
EB
3133 if (lvalue || repl)
3134 Perl_croak(aTHX_ "substr outside of string");
3135 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3136 RETPUSHUNDEF;
79072805
LW
3137}
3138
3139PP(pp_vec)
3140{
2154eca7 3141 dVAR; dSP;
1b6737cc
AL
3142 register const IV size = POPi;
3143 register const IV offset = POPi;
3144 register SV * const src = POPs;
3145 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2154eca7 3146 SV * ret;
a0d0e21e 3147
81e118e0 3148 if (lvalue) { /* it's an lvalue! */
2154eca7
EB
3149 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3150 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3151 LvTYPE(ret) = 'v';
3152 LvTARG(ret) = SvREFCNT_inc_simple(src);
3153 LvTARGOFF(ret) = offset;
3154 LvTARGLEN(ret) = size;
3155 }
3156 else {
3157 dTARGET;
3158 SvTAINTED_off(TARG); /* decontaminate */
3159 ret = TARG;
79072805
LW
3160 }
3161
2154eca7
EB
3162 sv_setuv(ret, do_vecget(src, offset, size));
3163 PUSHs(ret);
79072805
LW
3164 RETURN;
3165}
3166
3167PP(pp_index)
3168{
97aff369 3169 dVAR; dSP; dTARGET;
79072805
LW
3170 SV *big;
3171 SV *little;
c445ea15 3172 SV *temp = NULL;
ad66a58c 3173 STRLEN biglen;
2723d216 3174 STRLEN llen = 0;
79072805
LW
3175 I32 offset;
3176 I32 retval;
73ee8be2
NC
3177 const char *big_p;
3178 const char *little_p;
2f040f7f
NC
3179 bool big_utf8;
3180 bool little_utf8;
2723d216 3181 const bool is_index = PL_op->op_type == OP_INDEX;
d3e26383 3182 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
79072805 3183
e1dccc0d
Z
3184 if (threeargs)
3185 offset = POPi;
79072805
LW
3186 little = POPs;
3187 big = POPs;
73ee8be2
NC
3188 big_p = SvPV_const(big, biglen);
3189 little_p = SvPV_const(little, llen);
3190
e609e586
NC
3191 big_utf8 = DO_UTF8(big);
3192 little_utf8 = DO_UTF8(little);
3193 if (big_utf8 ^ little_utf8) {
3194 /* One needs to be upgraded. */
2f040f7f
NC
3195 if (little_utf8 && !PL_encoding) {
3196 /* Well, maybe instead we might be able to downgrade the small
3197 string? */
1eced8f8 3198 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
2f040f7f
NC
3199 &little_utf8);
3200 if (little_utf8) {
3201 /* If the large string is ISO-8859-1, and it's not possible to
3202 convert the small string to ISO-8859-1, then there is no
3203 way that it could be found anywhere by index. */
3204 retval = -1;
3205 goto fail;
3206 }
e609e586 3207
2f040f7f
NC
3208 /* At this point, pv is a malloc()ed string. So donate it to temp
3209 to ensure it will get free()d */
3210 little = temp = newSV(0);
73ee8be2
NC
3211 sv_usepvn(temp, pv, llen);
3212 little_p = SvPVX(little);
e609e586 3213 } else {
73ee8be2
NC
3214 temp = little_utf8
3215 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
2f040f7f
NC
3216
3217 if (PL_encoding) {
3218 sv_recode_to_utf8(temp, PL_encoding);
3219 } else {
3220 sv_utf8_upgrade(temp);
3221 }
3222 if (little_utf8) {
3223 big = temp;
3224 big_utf8 = TRUE;
73ee8be2 3225 big_p = SvPV_const(big, biglen);
2f040f7f
NC
3226 } else {
3227 little = temp;
73ee8be2 3228 little_p = SvPV_const(little, llen);
2f040f7f 3229 }
e609e586
NC
3230 }
3231 }
73ee8be2
NC
3232 if (SvGAMAGIC(big)) {
3233 /* Life just becomes a lot easier if I use a temporary here.
3234 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3235 will trigger magic and overloading again, as will fbm_instr()
3236 */
59cd0e26
NC
3237 big = newSVpvn_flags(big_p, biglen,
3238 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3239 big_p = SvPVX(big);
3240 }
e4e44778 3241 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
73ee8be2
NC
3242 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3243 warn on undef, and we've already triggered a warning with the
3244 SvPV_const some lines above. We can't remove that, as we need to
3245 call some SvPV to trigger overloading early and find out if the
3246 string is UTF-8.
3247 This is all getting to messy. The API isn't quite clean enough,
3248 because data access has side effects.
3249 */
59cd0e26
NC
3250 little = newSVpvn_flags(little_p, llen,
3251 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3252 little_p = SvPVX(little);
3253 }
e609e586 3254
d3e26383 3255 if (!threeargs)
2723d216 3256 offset = is_index ? 0 : biglen;
a0ed51b3 3257 else {
ad66a58c 3258 if (big_utf8 && offset > 0)
a0ed51b3 3259 sv_pos_u2b(big, &offset, 0);
73ee8be2
NC
3260 if (!is_index)
3261 offset += llen;
a0ed51b3 3262 }
79072805
LW
3263 if (offset < 0)
3264 offset = 0;
ad66a58c
NC
3265 else if (offset > (I32)biglen)
3266 offset = biglen;
73ee8be2
NC
3267 if (!(little_p = is_index
3268 ? fbm_instr((unsigned char*)big_p + offset,
3269 (unsigned char*)big_p + biglen, little, 0)
3270 : rninstr(big_p, big_p + offset,
3271 little_p, little_p + llen)))
a0ed51b3 3272 retval = -1;
ad66a58c 3273 else {
73ee8be2 3274 retval = little_p - big_p;
ad66a58c
NC
3275 if (retval > 0 && big_utf8)
3276 sv_pos_b2u(big, &retval);
3277 }
ef8d46e8 3278 SvREFCNT_dec(temp);
2723d216 3279 fail:
e1dccc0d 3280 PUSHi(retval);
79072805
LW
3281 RETURN;
3282}
3283
3284PP(pp_sprintf)
3285{
97aff369 3286 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3e6bd4bf 3287 SvTAINTED_off(TARG);
79072805 3288 do_sprintf(TARG, SP-MARK, MARK+1);
bbce6d69 3289 TAINT_IF(SvTAINTED(TARG));
79072805
LW
3290 SP = ORIGMARK;
3291 PUSHTARG;
3292 RETURN;
3293}
3294
79072805
LW
3295PP(pp_ord)
3296{
97aff369 3297 dVAR; dSP; dTARGET;
1eced8f8 3298
7df053ec 3299 SV *argsv = POPs;
ba210ebe 3300 STRLEN len;
349d4f2f 3301 const U8 *s = (U8*)SvPV_const(argsv, len);
121910a4 3302
799ef3cb 3303 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
1eced8f8 3304 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
799ef3cb 3305 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
121910a4
JH
3306 argsv = tmpsv;
3307 }
79072805 3308
872c91ae 3309 XPUSHu(DO_UTF8(argsv) ?
89ebb4a3 3310 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
5fc32dea 3311 (UV)(*s & 0xff));
68795e93 3312
79072805
LW
3313 RETURN;
3314}
3315
463ee0b2
LW
3316PP(pp_chr)
3317{
97aff369 3318 dVAR; dSP; dTARGET;
463ee0b2 3319 char *tmps;
8a064bd6
JH
3320 UV value;
3321
3322 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3323 ||
3324 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3325 if (IN_BYTES) {
3326 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3327 } else {
3328 (void) POPs; /* Ignore the argument value. */
3329 value = UNICODE_REPLACEMENT;
3330 }
3331 } else {
3332 value = POPu;
3333 }
463ee0b2 3334
862a34c6 3335 SvUPGRADE(TARG,SVt_PV);
a0ed51b3 3336
0064a8a9 3337 if (value > 255 && !IN_BYTES) {
eb160463 3338 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
62961d2e 3339 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
349d4f2f 3340 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
a0ed51b3
LW
3341 *tmps = '\0';
3342 (void)SvPOK_only(TARG);
aa6ffa16 3343 SvUTF8_on(TARG);
a0ed51b3
LW
3344 XPUSHs(TARG);
3345 RETURN;
3346 }
3347
748a9306 3348 SvGROW(TARG,2);
463ee0b2
LW
3349 SvCUR_set(TARG, 1);
3350 tmps = SvPVX(TARG);
eb160463 3351 *tmps++ = (char)value;
748a9306 3352 *tmps = '\0';
a0d0e21e 3353 (void)SvPOK_only(TARG);
4c5ed6e2 3354
88632417 3355 if (PL_encoding && !IN_BYTES) {
799ef3cb 3356 sv_recode_to_utf8(TARG, PL_encoding);
88632417
JH
3357 tmps = SvPVX(TARG);
3358 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
4c5ed6e2
TS
3359 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3360 SvGROW(TARG, 2);
d5a15ac2 3361 tmps = SvPVX(TARG);
4c5ed6e2
TS
3362 SvCUR_set(TARG, 1);
3363 *tmps++ = (char)value;
88632417 3364 *tmps = '\0';
4c5ed6e2 3365 SvUTF8_off(TARG);
88632417
JH
3366 }
3367 }
4c5ed6e2 3368
463ee0b2
LW
3369 XPUSHs(TARG);
3370 RETURN;
3371}
3372
79072805
LW
3373PP(pp_crypt)
3374{
79072805 3375#ifdef HAS_CRYPT
97aff369 3376 dVAR; dSP; dTARGET;
5f74f29c 3377 dPOPTOPssrl;
85c16d83 3378 STRLEN len;
10516c54 3379 const char *tmps = SvPV_const(left, len);
2bc69dc4 3380
85c16d83 3381 if (DO_UTF8(left)) {
2bc69dc4 3382 /* If Unicode, try to downgrade.
f2791508
JH
3383 * If not possible, croak.
3384 * Yes, we made this up. */
1b6737cc 3385 SV* const tsv = sv_2mortal(newSVsv(left));
2bc69dc4 3386
f2791508 3387 SvUTF8_on(tsv);
2bc69dc4 3388 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 3389 tmps = SvPV_const(tsv, len);
85c16d83 3390 }
05404ffe
JH
3391# ifdef USE_ITHREADS
3392# ifdef HAS_CRYPT_R
3393 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3394 /* This should be threadsafe because in ithreads there is only
3395 * one thread per interpreter. If this would not be true,
3396 * we would need a mutex to protect this malloc. */
3397 PL_reentrant_buffer->_crypt_struct_buffer =
3398 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3399#if defined(__GLIBC__) || defined(__EMX__)
3400 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3401 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3402 /* work around glibc-2.2.5 bug */
3403 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3404 }
05404ffe 3405#endif
6ab58e4d 3406 }
05404ffe
JH
3407# endif /* HAS_CRYPT_R */
3408# endif /* USE_ITHREADS */
5f74f29c 3409# ifdef FCRYPT
83003860 3410 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3411# else
83003860 3412 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3413# endif
ec93b65f 3414 SETTARG;
4808266b 3415 RETURN;
79072805 3416#else
b13b2135 3417 DIE(aTHX_
79072805
LW
3418 "The crypt() function is unimplemented due to excessive paranoia.");
3419#endif
79072805
LW
3420}
3421
00f254e2
KW
3422/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3423 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3424
00f254e2
KW
3425/* Below are several macros that generate code */
3426/* Generates code to store a unicode codepoint c that is known to occupy
3427 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3428#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3429 STMT_START { \
3430 *(p) = UTF8_TWO_BYTE_HI(c); \
3431 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3432 } STMT_END
3433
3434/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3435 * available byte after the two bytes */
3436#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3437 STMT_START { \
3438 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3439 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3440 } STMT_END
3441
3442/* Generates code to store the upper case of latin1 character l which is known
3443 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3444 * are only two characters that fit this description, and this macro knows
3445 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3446 * bytes */
3447#define STORE_NON_LATIN1_UC(p, l) \
3448STMT_START { \
3449 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3450 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3451 } else { /* Must be the following letter */ \
3452 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3453 } \
3454} STMT_END
3455
3456/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3457 * after the character stored */
3458#define CAT_NON_LATIN1_UC(p, l) \
3459STMT_START { \
3460 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3461 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3462 } else { \
3463 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3464 } \
3465} STMT_END
3466
3467/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3468 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3469 * and must require two bytes to store it. Advances p to point to the next
3470 * available position */
3471#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3472STMT_START { \
3473 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3474 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3475 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3476 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3477 } else {/* else is one of the other two special cases */ \
3478 CAT_NON_LATIN1_UC((p), (l)); \
3479 } \
3480} STMT_END
3481
79072805
LW
3482PP(pp_ucfirst)
3483{
00f254e2
KW
3484 /* Actually is both lcfirst() and ucfirst(). Only the first character
3485 * changes. This means that possibly we can change in-place, ie., just
3486 * take the source and change that one character and store it back, but not
3487 * if read-only etc, or if the length changes */
3488
97aff369 3489 dVAR;
39644a26 3490 dSP;
d54190f6 3491 SV *source = TOPs;
00f254e2 3492 STRLEN slen; /* slen is the byte length of the whole SV. */
d54190f6
NC
3493 STRLEN need;
3494 SV *dest;
00f254e2
KW
3495 bool inplace; /* ? Convert first char only, in-place */
3496 bool doing_utf8 = FALSE; /* ? using utf8 */
3497 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
12e9c124 3498 const int op_type = PL_op->op_type;
d54190f6
NC
3499 const U8 *s;
3500 U8 *d;
3501 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
00f254e2
KW
3502 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3503 * stored as UTF-8 at s. */
3504 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3505 * lowercased) character stored in tmpbuf. May be either
3506 * UTF-8 or not, but in either case is the number of bytes */
d54190f6
NC
3507
3508 SvGETMAGIC(source);
3509 if (SvOK(source)) {
3510 s = (const U8*)SvPV_nomg_const(source, slen);
3511 } else {
0a0ffbce
RGS
3512 if (ckWARN(WARN_UNINITIALIZED))
3513 report_uninit(source);
1eced8f8 3514 s = (const U8*)"";
d54190f6
NC
3515 slen = 0;
3516 }
a0ed51b3 3517
00f254e2
KW
3518 /* We may be able to get away with changing only the first character, in
3519 * place, but not if read-only, etc. Later we may discover more reasons to
3520 * not convert in-place. */
3521 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3522
3523 /* First calculate what the changed first character should be. This affects
3524 * whether we can just swap it out, leaving the rest of the string unchanged,
3525 * or even if have to convert the dest to UTF-8 when the source isn't */
3526
3527 if (! slen) { /* If empty */
3528 need = 1; /* still need a trailing NUL */
3529 }
3530 else if (DO_UTF8(source)) { /* Is the source utf8? */
d54190f6 3531 doing_utf8 = TRUE;
00f254e2 3532
00f254e2
KW
3533 if (UTF8_IS_INVARIANT(*s)) {
3534
3535 /* An invariant source character is either ASCII or, in EBCDIC, an
3536 * ASCII equivalent or a caseless C1 control. In both these cases,
3537 * the lower and upper cases of any character are also invariants
3538 * (and title case is the same as upper case). So it is safe to
3539 * use the simple case change macros which avoid the overhead of
3540 * the general functions. Note that if perl were to be extended to
3541 * do locale handling in UTF-8 strings, this wouldn't be true in,
3542 * for example, Lithuanian or Turkic. */
3543 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3544 tculen = ulen = 1;
3545 need = slen + 1;
12e9c124 3546 }
00f254e2
KW
3547 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3548 U8 chr;
3549
3550 /* Similarly, if the source character isn't invariant but is in the
3551 * latin1 range (or EBCDIC equivalent thereof), we have the case
3552 * changes compiled into perl, and can avoid the overhead of the
3553 * general functions. In this range, the characters are stored as
3554 * two UTF-8 bytes, and it so happens that any changed-case version
3555 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3556 tculen = ulen = 2;
3557 need = slen + 1;
3558
3559 /* Convert the two source bytes to a single Unicode code point
3560 * value, change case and save for below */
356979f4 3561 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
00f254e2
KW
3562 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3563 U8 lower = toLOWER_LATIN1(chr);
3564 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3565 }
3566 else { /* ucfirst */
3567 U8 upper = toUPPER_LATIN1_MOD(chr);
3568
3569 /* Most of the latin1 range characters are well-behaved. Their
3570 * title and upper cases are the same, and are also in the
3571 * latin1 range. The macro above returns their upper (hence
3572 * title) case, and all that need be done is to save the result
3573 * for below. However, several characters are problematic, and
3574 * have to be handled specially. The MOD in the macro name
3575 * above means that these tricky characters all get mapped to
3576 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3577 * This mapping saves some tests for the majority of the
3578 * characters */
3579
3580 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3581
3582 /* Not tricky. Just save it. */
3583 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3584 }
3585 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3586
3587 /* This one is tricky because it is two characters long,
3588 * though the UTF-8 is still two bytes, so the stored
3589 * length doesn't change */
3590 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3591 *(tmpbuf + 1) = 's';
3592 }
3593 else {
3594
3595 /* The other two have their title and upper cases the same,
3596 * but are tricky because the changed-case characters
3597 * aren't in the latin1 range. They, however, do fit into
3598 * two UTF-8 bytes */
3599 STORE_NON_LATIN1_UC(tmpbuf, chr);
3600 }
3601 }
3602 }
3603 else {
00f254e2
KW
3604
3605 /* Here, can't short-cut the general case */
3606
3607 utf8_to_uvchr(s, &ulen);
3608 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3609 else toLOWER_utf8(s, tmpbuf, &tculen);
3610
3611 /* we can't do in-place if the length changes. */
3612 if (ulen != tculen) inplace = FALSE;
3613 need = slen + 1 - ulen + tculen;
00f254e2 3614 }
d54190f6 3615 }
00f254e2
KW
3616 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3617 * latin1 is treated as caseless. Note that a locale takes
3618 * precedence */
3619 tculen = 1; /* Most characters will require one byte, but this will
3620 * need to be overridden for the tricky ones */
3621 need = slen + 1;
3622
3623 if (op_type == OP_LCFIRST) {
d54190f6 3624
00f254e2
KW
3625 /* lower case the first letter: no trickiness for any character */
3626 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3627 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3628 }
3629 /* is ucfirst() */
3630 else if (IN_LOCALE_RUNTIME) {
3631 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3632 * have upper and title case different
3633 */
3634 }
3635 else if (! IN_UNI_8_BIT) {
3636 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3637 * on EBCDIC machines whatever the
3638 * native function does */
3639 }
3640 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3641 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3642
3643 /* tmpbuf now has the correct title case for all latin1 characters
3644 * except for the several ones that have tricky handling. All
3645 * of these are mapped by the MOD to the letter below. */
3646 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3647
3648 /* The length is going to change, with all three of these, so
3649 * can't replace just the first character */
3650 inplace = FALSE;
3651
3652 /* We use the original to distinguish between these tricky
3653 * cases */
3654 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3655 /* Two character title case 'Ss', but can remain non-UTF-8 */
3656 need = slen + 2;
3657 *tmpbuf = 'S';
3658 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3659 tculen = 2;
3660 }
3661 else {
d54190f6 3662
00f254e2
KW
3663 /* The other two tricky ones have their title case outside
3664 * latin1. It is the same as their upper case. */
3665 doing_utf8 = TRUE;
3666 STORE_NON_LATIN1_UC(tmpbuf, *s);
3667
3668 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3669 * and their upper cases is 2. */
3670 tculen = ulen = 2;
3671
3672 /* The entire result will have to be in UTF-8. Assume worst
3673 * case sizing in conversion. (all latin1 characters occupy
3674 * at most two bytes in utf8) */
3675 convert_source_to_utf8 = TRUE;
3676 need = slen * 2 + 1;
3677 }
3678 } /* End of is one of the three special chars */
3679 } /* End of use Unicode (Latin1) semantics */
3680 } /* End of changing the case of the first character */
3681
3682 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3683 * generate the result */
3684 if (inplace) {
3685
3686 /* We can convert in place. This means we change just the first
3687 * character without disturbing the rest; no need to grow */
d54190f6
NC
3688 dest = source;
3689 s = d = (U8*)SvPV_force_nomg(source, slen);
3690 } else {
3691 dTARGET;
3692
3693 dest = TARG;
3694
00f254e2
KW
3695 /* Here, we can't convert in place; we earlier calculated how much
3696 * space we will need, so grow to accommodate that */
d54190f6 3697 SvUPGRADE(dest, SVt_PV);
3b416f41 3698 d = (U8*)SvGROW(dest, need);
d54190f6
NC
3699 (void)SvPOK_only(dest);
3700
3701 SETs(dest);
d54190f6 3702 }
44bc797b 3703
d54190f6 3704 if (doing_utf8) {
00f254e2
KW
3705 if (! inplace) {
3706 if (! convert_source_to_utf8) {
3707
3708 /* Here both source and dest are in UTF-8, but have to create
3709 * the entire output. We initialize the result to be the
3710 * title/lower cased first character, and then append the rest
3711 * of the string. */
3712 sv_setpvn(dest, (char*)tmpbuf, tculen);
3713 if (slen > ulen) {
3714 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3715 }
3716 }
3717 else {
3718 const U8 *const send = s + slen;
3719
3720 /* Here the dest needs to be in UTF-8, but the source isn't,
3721 * except we earlier UTF-8'd the first character of the source
3722 * into tmpbuf. First put that into dest, and then append the
3723 * rest of the source, converting it to UTF-8 as we go. */
3724
3725 /* Assert tculen is 2 here because the only two characters that
3726 * get to this part of the code have 2-byte UTF-8 equivalents */
3727 *d++ = *tmpbuf;
3728 *d++ = *(tmpbuf + 1);
3729 s++; /* We have just processed the 1st char */
3730
3731 for (; s < send; s++) {
3732 d = uvchr_to_utf8(d, *s);
3733 }
3734 *d = '\0';
3735 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3736 }
d54190f6 3737 SvUTF8_on(dest);
a0ed51b3 3738 }
00f254e2 3739 else { /* in-place UTF-8. Just overwrite the first character */
d54190f6
NC
3740 Copy(tmpbuf, d, tculen, U8);
3741 SvCUR_set(dest, need - 1);
a0ed51b3 3742 }
a0ed51b3 3743 }
00f254e2
KW
3744 else { /* Neither source nor dest are in or need to be UTF-8 */
3745 if (slen) {
2de3dbcc 3746 if (IN_LOCALE_RUNTIME) {
31351b04 3747 TAINT;
d54190f6 3748 SvTAINTED_on(dest);
31351b04 3749 }
00f254e2
KW
3750 if (inplace) { /* in-place, only need to change the 1st char */
3751 *d = *tmpbuf;
3752 }
3753 else { /* Not in-place */
3754
3755 /* Copy the case-changed character(s) from tmpbuf */
3756 Copy(tmpbuf, d, tculen, U8);
3757 d += tculen - 1; /* Code below expects d to point to final
3758 * character stored */
3759 }
3760 }
3761 else { /* empty source */
3762 /* See bug #39028: Don't taint if empty */
d54190f6
NC
3763 *d = *s;
3764 }
3765
00f254e2
KW
3766 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3767 * the destination to retain that flag */
d54190f6
NC
3768 if (SvUTF8(source))
3769 SvUTF8_on(dest);
3770
00f254e2 3771 if (!inplace) { /* Finish the rest of the string, unchanged */
d54190f6
NC
3772 /* This will copy the trailing NUL */
3773 Copy(s + 1, d + 1, slen, U8);
3774 SvCUR_set(dest, need - 1);
bbce6d69 3775 }
bbce6d69 3776 }
539689e7
FC
3777 if (dest != source && SvTAINTED(source))
3778 SvTAINT(dest);
d54190f6 3779 SvSETMAGIC(dest);
79072805
LW
3780 RETURN;
3781}
3782
67306194
NC
3783/* There's so much setup/teardown code common between uc and lc, I wonder if
3784 it would be worth merging the two, and just having a switch outside each
00f254e2 3785 of the three tight loops. There is less and less commonality though */
79072805
LW
3786PP(pp_uc)
3787{
97aff369 3788 dVAR;
39644a26 3789 dSP;
67306194 3790 SV *source = TOPs;
463ee0b2 3791 STRLEN len;
67306194
NC
3792 STRLEN min;
3793 SV *dest;
3794 const U8 *s;
3795 U8 *d;
79072805 3796
67306194
NC
3797 SvGETMAGIC(source);
3798
3799 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
00f254e2
KW
3800 && SvTEMP(source) && !DO_UTF8(source)
3801 && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
3802
3803 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
3804 * make the loop tight, so we overwrite the source with the dest before
3805 * looking at it, and we need to look at the original source
3806 * afterwards. There would also need to be code added to handle
3807 * switching to not in-place in midstream if we run into characters
3808 * that change the length.
3809 */
67306194
NC
3810 dest = source;
3811 s = d = (U8*)SvPV_force_nomg(source, len);
3812 min = len + 1;
3813 } else {
a0ed51b3 3814 dTARGET;
a0ed51b3 3815
67306194 3816 dest = TARG;
128c9517 3817
67306194
NC
3818 /* The old implementation would copy source into TARG at this point.
3819 This had the side effect that if source was undef, TARG was now
3820 an undefined SV with PADTMP set, and they don't warn inside
3821 sv_2pv_flags(). However, we're now getting the PV direct from
3822 source, which doesn't have PADTMP set, so it would warn. Hence the
3823 little games. */
3824
3825 if (SvOK(source)) {
3826 s = (const U8*)SvPV_nomg_const(source, len);
3827 } else {
0a0ffbce
RGS
3828 if (ckWARN(WARN_UNINITIALIZED))
3829 report_uninit(source);
1eced8f8 3830 s = (const U8*)"";
67306194 3831 len = 0;
a0ed51b3 3832 }
67306194
NC
3833 min = len + 1;
3834
3835 SvUPGRADE(dest, SVt_PV);
3b416f41 3836 d = (U8*)SvGROW(dest, min);
67306194
NC
3837 (void)SvPOK_only(dest);
3838
3839 SETs(dest);
a0ed51b3 3840 }
31351b04 3841
67306194
NC
3842 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
3843 to check DO_UTF8 again here. */
3844
3845 if (DO_UTF8(source)) {
3846 const U8 *const send = s + len;
3847 U8 tmpbuf[UTF8_MAXBYTES+1];
3848
4c8a458a
KW
3849 /* All occurrences of these are to be moved to follow any other marks.
3850 * This is context-dependent. We may not be passed enough context to
3851 * move the iota subscript beyond all of them, but we do the best we can
3852 * with what we're given. The result is always better than if we
3853 * hadn't done this. And, the problem would only arise if we are
3854 * passed a character without all its combining marks, which would be
3855 * the caller's mistake. The information this is based on comes from a
3856 * comment in Unicode SpecialCasing.txt, (and the Standard's text
3857 * itself) and so can't be checked properly to see if it ever gets
3858 * revised. But the likelihood of it changing is remote */
00f254e2 3859 bool in_iota_subscript = FALSE;
00f254e2 3860
67306194 3861 while (s < send) {
00f254e2
KW
3862 if (in_iota_subscript && ! is_utf8_mark(s)) {
3863 /* A non-mark. Time to output the iota subscript */
3864#define GREEK_CAPITAL_LETTER_IOTA 0x0399
3865#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
3866
3867 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3868 in_iota_subscript = FALSE;
3869 }
00f254e2 3870
00f254e2
KW
3871 /* If the UTF-8 character is invariant, then it is in the range
3872 * known by the standard macro; result is only one byte long */
3873 if (UTF8_IS_INVARIANT(*s)) {
3874 *d++ = toUPPER(*s);
3875 s++;
3876 }
3877 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3878
3879 /* Likewise, if it fits in a byte, its case change is in our
3880 * table */
e1a8dbf5 3881 U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
00f254e2
KW
3882 U8 upper = toUPPER_LATIN1_MOD(orig);
3883 CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
e1a8dbf5 3884 s += 2;
00f254e2
KW
3885 }
3886 else {
00f254e2
KW
3887
3888 /* Otherwise, need the general UTF-8 case. Get the changed
3889 * case value and copy it to the output buffer */
3890
3891 const STRLEN u = UTF8SKIP(s);
3892 STRLEN ulen;
67306194 3893
00f254e2 3894 const UV uv = toUPPER_utf8(s, tmpbuf, &ulen);
4c8a458a
KW
3895 if (uv == GREEK_CAPITAL_LETTER_IOTA
3896 && utf8_to_uvchr(s, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
3897 {
00f254e2
KW
3898 in_iota_subscript = TRUE;
3899 }
3900 else {
00f254e2
KW
3901 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
3902 /* If the eventually required minimum size outgrows
3903 * the available space, we need to grow. */
3904 const UV o = d - (U8*)SvPVX_const(dest);
3905
3906 /* If someone uppercases one million U+03B0s we
3907 * SvGROW() one million times. Or we could try
3908 * guessing how much to allocate without allocating too
4c8a458a
KW
3909 * much. Such is life. See corresponding comment in
3910 * lc code for another option */
00f254e2
KW
3911 SvGROW(dest, min);
3912 d = (U8*)SvPVX(dest) + o;
3913 }
3914 Copy(tmpbuf, d, ulen, U8);
3915 d += ulen;
00f254e2 3916 }
00f254e2 3917 s += u;
67306194 3918 }
67306194 3919 }
4c8a458a
KW
3920 if (in_iota_subscript) {
3921 CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
3922 }
67306194
NC
3923 SvUTF8_on(dest);
3924 *d = '\0';
3925 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4c8a458a
KW
3926 }
3927 else { /* Not UTF-8 */
67306194
NC
3928 if (len) {
3929 const U8 *const send = s + len;
00f254e2
KW
3930
3931 /* Use locale casing if in locale; regular style if not treating
3932 * latin1 as having case; otherwise the latin1 casing. Do the
3933 * whole thing in a tight loop, for speed, */
2de3dbcc 3934 if (IN_LOCALE_RUNTIME) {
31351b04 3935 TAINT;
67306194
NC
3936 SvTAINTED_on(dest);
3937 for (; s < send; d++, s++)
3938 *d = toUPPER_LC(*s);
31351b04 3939 }
00f254e2
KW
3940 else if (! IN_UNI_8_BIT) {
3941 for (; s < send; d++, s++) {
67306194 3942 *d = toUPPER(*s);
00f254e2 3943 }
31351b04 3944 }
00f254e2
KW
3945 else {
3946 for (; s < send; d++, s++) {
3947 *d = toUPPER_LATIN1_MOD(*s);
3948 if (*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) continue;
3949
3950 /* The mainstream case is the tight loop above. To avoid
3951 * extra tests in that, all three characters that require
3952 * special handling are mapped by the MOD to the one tested
3953 * just above.
3954 * Use the source to distinguish between the three cases */
3955
3956 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3957
3958 /* uc() of this requires 2 characters, but they are
3959 * ASCII. If not enough room, grow the string */
3960 if (SvLEN(dest) < ++min) {
3961 const UV o = d - (U8*)SvPVX_const(dest);
3962 SvGROW(dest, min);
3963 d = (U8*)SvPVX(dest) + o;
3964 }
3965 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
3966 continue; /* Back to the tight loop; still in ASCII */
3967 }
3968
3969 /* The other two special handling characters have their
3970 * upper cases outside the latin1 range, hence need to be
3971 * in UTF-8, so the whole result needs to be in UTF-8. So,
3972 * here we are somewhere in the middle of processing a
3973 * non-UTF-8 string, and realize that we will have to convert
3974 * the whole thing to UTF-8. What to do? There are
3975 * several possibilities. The simplest to code is to
3976 * convert what we have so far, set a flag, and continue on
3977 * in the loop. The flag would be tested each time through
3978 * the loop, and if set, the next character would be
3979 * converted to UTF-8 and stored. But, I (khw) didn't want
3980 * to slow down the mainstream case at all for this fairly
3981 * rare case, so I didn't want to add a test that didn't
3982 * absolutely have to be there in the loop, besides the
3983 * possibility that it would get too complicated for
3984 * optimizers to deal with. Another possibility is to just
3985 * give up, convert the source to UTF-8, and restart the
3986 * function that way. Another possibility is to convert
3987 * both what has already been processed and what is yet to
3988 * come separately to UTF-8, then jump into the loop that
3989 * handles UTF-8. But the most efficient time-wise of the
3990 * ones I could think of is what follows, and turned out to
3991 * not require much extra code. */
3992
3993 /* Convert what we have so far into UTF-8, telling the
3994 * function that we know it should be converted, and to
3995 * allow extra space for what we haven't processed yet.
3996 * Assume the worst case space requirements for converting
3997 * what we haven't processed so far: that it will require
3998 * two bytes for each remaining source character, plus the
3999 * NUL at the end. This may cause the string pointer to
4000 * move, so re-find it. */
4001
4002 len = d - (U8*)SvPVX_const(dest);
4003 SvCUR_set(dest, len);
4004 len = sv_utf8_upgrade_flags_grow(dest,
4005 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4006 (send -s) * 2 + 1);
4007 d = (U8*)SvPVX(dest) + len;
4008
4009 /* And append the current character's upper case in UTF-8 */
4010 CAT_NON_LATIN1_UC(d, *s);
4011
4012 /* Now process the remainder of the source, converting to
4013 * upper and UTF-8. If a resulting byte is invariant in
4014 * UTF-8, output it as-is, otherwise convert to UTF-8 and
4015 * append it to the output. */
4016
4017 s++;
4018 for (; s < send; s++) {
4019 U8 upper = toUPPER_LATIN1_MOD(*s);
4020 if UTF8_IS_INVARIANT(upper) {
4021 *d++ = upper;
4022 }
4023 else {
4024 CAT_TWO_BYTE_UNI_UPPER_MOD(d, *s, upper);
4025 }
4026 }
4027
4028 /* Here have processed the whole source; no need to continue
4029 * with the outer loop. Each character has been converted
4030 * to upper case and converted to UTF-8 */
4031
4032 break;
4033 } /* End of processing all latin1-style chars */
4034 } /* End of processing all chars */
4035 } /* End of source is not empty */
4036
67306194 4037 if (source != dest) {
00f254e2 4038 *d = '\0'; /* Here d points to 1 after last char, add NUL */
67306194
NC
4039 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4040 }
00f254e2 4041 } /* End of isn't utf8 */
539689e7
FC
4042 if (dest != source && SvTAINTED(source))
4043 SvTAINT(dest);
67306194 4044 SvSETMAGIC(dest);
79072805
LW
4045 RETURN;
4046}
4047
4048PP(pp_lc)
4049{
97aff369 4050 dVAR;
39644a26 4051 dSP;
ec9af7d4 4052 SV *source = TOPs;
463ee0b2 4053 STRLEN len;
ec9af7d4
NC
4054 STRLEN min;
4055 SV *dest;
4056 const U8 *s;
4057 U8 *d;
79072805 4058
ec9af7d4
NC
4059 SvGETMAGIC(source);
4060
4061 if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
17fa0776 4062 && SvTEMP(source) && !DO_UTF8(source)) {
ec9af7d4 4063
00f254e2
KW
4064 /* We can convert in place, as lowercasing anything in the latin1 range
4065 * (or else DO_UTF8 would have been on) doesn't lengthen it */
ec9af7d4
NC
4066 dest = source;
4067 s = d = (U8*)SvPV_force_nomg(source, len);
4068 min = len + 1;
4069 } else {
a0ed51b3 4070 dTARGET;
a0ed51b3 4071
ec9af7d4
NC
4072 dest = TARG;
4073
4074 /* The old implementation would copy source into TARG at this point.
4075 This had the side effect that if source was undef, TARG was now
4076 an undefined SV with PADTMP set, and they don't warn inside
4077 sv_2pv_flags(). However, we're now getting the PV direct from
4078 source, which doesn't have PADTMP set, so it would warn. Hence the
4079 little games. */
4080
4081 if (SvOK(source)) {
4082 s = (const U8*)SvPV_nomg_const(source, len);
4083 } else {
0a0ffbce
RGS
4084 if (ckWARN(WARN_UNINITIALIZED))
4085 report_uninit(source);
1eced8f8 4086 s = (const U8*)"";
ec9af7d4 4087 len = 0;
a0ed51b3 4088 }
ec9af7d4 4089 min = len + 1;
128c9517 4090
ec9af7d4 4091 SvUPGRADE(dest, SVt_PV);
3b416f41 4092 d = (U8*)SvGROW(dest, min);
ec9af7d4
NC
4093 (void)SvPOK_only(dest);
4094
4095 SETs(dest);
4096 }
4097
4098 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4099 to check DO_UTF8 again here. */
4100
4101 if (DO_UTF8(source)) {
4102 const U8 *const send = s + len;
4103 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4104
4105 while (s < send) {
00f254e2 4106 if (UTF8_IS_INVARIANT(*s)) {
89ebb4a3 4107
00f254e2 4108 /* Invariant characters use the standard mappings compiled in.
ec9af7d4 4109 */
00f254e2
KW
4110 *d++ = toLOWER(*s);
4111 s++;
ec9af7d4 4112 }
00f254e2 4113 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
89ebb4a3 4114
00f254e2 4115 /* As do the ones in the Latin1 range */
e1a8dbf5 4116 U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
00f254e2 4117 CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
e1a8dbf5 4118 s += 2;
a0ed51b3 4119 }
00f254e2 4120 else {
00f254e2
KW
4121 /* Here, is utf8 not in Latin-1 range, have to go out and get
4122 * the mappings from the tables. */
4123
4124 const STRLEN u = UTF8SKIP(s);
4125 STRLEN ulen;
4126
00f254e2
KW
4127#ifndef CONTEXT_DEPENDENT_CASING
4128 toLOWER_utf8(s, tmpbuf, &ulen);
4129#else
9d2ba0fa
KW
4130/* This is ifdefd out because it probably is the wrong thing to do. The right
4131 * thing is probably to have an I/O layer that converts final sigma to regular
4132 * on input and vice versa (under the correct circumstances) on output. In
4133 * effect, the final sigma is just a glyph variation when the regular one
4134 * occurs at the end of a word. And we don't really know what's going to be
4135 * the end of the word until it is finally output, as splitting and joining can
4136 * occur at any time and change what once was the word end to be in the middle,
4137 * and vice versa. */
00f254e2
KW
4138
4139 const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
4140
4141 /* If the lower case is a small sigma, it may be that we need
4142 * to change it to a final sigma. This happens at the end of
4143 * a word that contains more than just this character, and only
4144 * when we started with a capital sigma. */
4145 if (uv == UNICODE_GREEK_SMALL_LETTER_SIGMA &&
4146 s > send - len && /* Makes sure not the first letter */
4147 utf8_to_uvchr(s, 0) == UNICODE_GREEK_CAPITAL_LETTER_SIGMA
4148 ) {
4149
4150 /* We use the algorithm in:
4151 * http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf (C
4152 * is a CAPITAL SIGMA): If C is preceded by a sequence
4153 * consisting of a cased letter and a case-ignorable
4154 * sequence, and C is not followed by a sequence consisting
4155 * of a case ignorable sequence and then a cased letter,
4156 * then when lowercasing C, C becomes a final sigma */
4157
4158 /* To determine if this is the end of a word, need to peek
4159 * ahead. Look at the next character */
4160 const U8 *peek = s + u;
4161
4162 /* Skip any case ignorable characters */
4163 while (peek < send && is_utf8_case_ignorable(peek)) {
4164 peek += UTF8SKIP(peek);
4165 }
4166
4167 /* If we reached the end of the string without finding any
4168 * non-case ignorable characters, or if the next such one
4169 * is not-cased, then we have met the conditions for it
4170 * being a final sigma with regards to peek ahead, and so
4171 * must do peek behind for the remaining conditions. (We
4172 * know there is stuff behind to look at since we tested
4173 * above that this isn't the first letter) */
4174 if (peek >= send || ! is_utf8_cased(peek)) {
4175 peek = utf8_hop(s, -1);
4176
4177 /* Here are at the beginning of the first character
4178 * before the original upper case sigma. Keep backing
4179 * up, skipping any case ignorable characters */
4180 while (is_utf8_case_ignorable(peek)) {
4181 peek = utf8_hop(peek, -1);
4182 }
4183
4184 /* Here peek points to the first byte of the closest
4185 * non-case-ignorable character before the capital
4186 * sigma. If it is cased, then by the Unicode
4187 * algorithm, we should use a small final sigma instead
4188 * of what we have */
4189 if (is_utf8_cased(peek)) {
4190 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf,
4191 UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA);
4192 }
4193 }
4194 }
4195 else { /* Not a context sensitive mapping */
4196#endif /* End of commented out context sensitive */
4197 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4198
4199 /* If the eventually required minimum size outgrows
4200 * the available space, we need to grow. */
4201 const UV o = d - (U8*)SvPVX_const(dest);
4202
4203 /* If someone lowercases one million U+0130s we
4204 * SvGROW() one million times. Or we could try
4205 * guessing how much to allocate without allocating too
4206 * much. Such is life. Another option would be to
4207 * grow an extra byte or two more each time we need to
4208 * grow, which would cut down the million to 500K, with
4209 * little waste */
4210 SvGROW(dest, min);
4211 d = (U8*)SvPVX(dest) + o;
4212 }
4213#ifdef CONTEXT_DEPENDENT_CASING
4214 }
4215#endif
4216 /* Copy the newly lowercased letter to the output buffer we're
4217 * building */
4218 Copy(tmpbuf, d, ulen, U8);
4219 d += ulen;
4220 s += u;
00f254e2 4221 }
00f254e2 4222 } /* End of looping through the source string */
ec9af7d4
NC
4223 SvUTF8_on(dest);
4224 *d = '\0';
4225 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
00f254e2 4226 } else { /* Not utf8 */
31351b04 4227 if (len) {
ec9af7d4 4228 const U8 *const send = s + len;
00f254e2
KW
4229
4230 /* Use locale casing if in locale; regular style if not treating
4231 * latin1 as having case; otherwise the latin1 casing. Do the
4232 * whole thing in a tight loop, for speed, */
2de3dbcc 4233 if (IN_LOCALE_RUNTIME) {
31351b04 4234 TAINT;
ec9af7d4
NC
4235 SvTAINTED_on(dest);
4236 for (; s < send; d++, s++)
4237 *d = toLOWER_LC(*s);
31351b04 4238 }
00f254e2
KW
4239 else if (! IN_UNI_8_BIT) {
4240 for (; s < send; d++, s++) {
ec9af7d4 4241 *d = toLOWER(*s);
00f254e2
KW
4242 }
4243 }
4244 else {
4245 for (; s < send; d++, s++) {
4246 *d = toLOWER_LATIN1(*s);
4247 }
31351b04 4248 }
bbce6d69 4249 }
ec9af7d4
NC
4250 if (source != dest) {
4251 *d = '\0';
4252 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4253 }
79072805 4254 }
539689e7
FC
4255 if (dest != source && SvTAINTED(source))
4256 SvTAINT(dest);
ec9af7d4 4257 SvSETMAGIC(dest);
79072805
LW
4258 RETURN;
4259}
4260
a0d0e21e 4261PP(pp_quotemeta)
79072805 4262{
97aff369 4263 dVAR; dSP; dTARGET;
1b6737cc 4264 SV * const sv = TOPs;
a0d0e21e 4265 STRLEN len;
0d46e09a 4266 register const char *s = SvPV_const(sv,len);
79072805 4267
7e2040f0 4268 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 4269 if (len) {
1b6737cc 4270 register char *d;
862a34c6 4271 SvUPGRADE(TARG, SVt_PV);
c07a80fd 4272 SvGROW(TARG, (len * 2) + 1);
a0d0e21e 4273 d = SvPVX(TARG);
7e2040f0 4274 if (DO_UTF8(sv)) {
0dd2cdef 4275 while (len) {
fd400ab9 4276 if (UTF8_IS_CONTINUED(*s)) {
0dd2cdef
LW
4277 STRLEN ulen = UTF8SKIP(s);
4278 if (ulen > len)
4279 ulen = len;
4280 len -= ulen;
4281 while (ulen--)
4282 *d++ = *s++;
4283 }
4284 else {
4285 if (!isALNUM(*s))
4286 *d++ = '\\';
4287 *d++ = *s++;
4288 len--;
4289 }
4290 }
7e2040f0 4291 SvUTF8_on(TARG);
0dd2cdef
LW
4292 }
4293 else {
4294 while (len--) {
4295 if (!isALNUM(*s))
4296 *d++ = '\\';
4297 *d++ = *s++;
4298 }
79072805 4299 }
a0d0e21e 4300 *d = '\0';
349d4f2f 4301 SvCUR_set(TARG, d - SvPVX_const(TARG));
3aa33fe5 4302 (void)SvPOK_only_UTF8(TARG);
79072805 4303 }
a0d0e21e
LW
4304 else
4305 sv_setpvn(TARG, s, len);
ec93b65f 4306 SETTARG;
79072805
LW
4307 RETURN;
4308}
4309
a0d0e21e 4310/* Arrays. */
79072805 4311
a0d0e21e 4312PP(pp_aslice)
79072805 4313{
97aff369 4314 dVAR; dSP; dMARK; dORIGMARK;
502c6561 4315 register AV *const av = MUTABLE_AV(POPs);
1b6737cc 4316 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
79072805 4317
a0d0e21e 4318 if (SvTYPE(av) == SVt_PVAV) {
4ad10a0b
VP
4319 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4320 bool can_preserve = FALSE;
4321
4322 if (localizing) {
4323 MAGIC *mg;
4324 HV *stash;
4325
4326 can_preserve = SvCANEXISTDELETE(av);
4327 }
4328
4329 if (lval && localizing) {
1b6737cc 4330 register SV **svp;
748a9306 4331 I32 max = -1;
924508f0 4332 for (svp = MARK + 1; svp <= SP; svp++) {
4ea561bc 4333 const I32 elem = SvIV(*svp);
748a9306
LW
4334 if (elem > max)
4335 max = elem;
4336 }
4337 if (max > AvMAX(av))
4338 av_extend(av, max);
4339 }
4ad10a0b 4340
a0d0e21e 4341 while (++MARK <= SP) {
1b6737cc 4342 register SV **svp;
4ea561bc 4343 I32 elem = SvIV(*MARK);
4ad10a0b 4344 bool preeminent = TRUE;
a0d0e21e 4345
4ad10a0b
VP
4346 if (localizing && can_preserve) {
4347 /* If we can determine whether the element exist,
4348 * Try to preserve the existenceness of a tied array
4349 * element by using EXISTS and DELETE if possible.
4350 * Fallback to FETCH and STORE otherwise. */
4351 preeminent = av_exists(av, elem);
4352 }
4353
a0d0e21e
LW
4354 svp = av_fetch(av, elem, lval);
4355 if (lval) {
3280af22 4356 if (!svp || *svp == &PL_sv_undef)
cea2e8a9 4357 DIE(aTHX_ PL_no_aelem, elem);
4ad10a0b
VP
4358 if (localizing) {
4359 if (preeminent)
4360 save_aelem(av, elem, svp);
4361 else
4362 SAVEADELETE(av, elem);
4363 }
79072805 4364 }
3280af22 4365 *MARK = svp ? *svp : &PL_sv_undef;
79072805
LW
4366 }
4367 }
748a9306 4368 if (GIMME != G_ARRAY) {
a0d0e21e 4369 MARK = ORIGMARK;
04ab2c87 4370 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e
LW
4371 SP = MARK;
4372 }
79072805
LW
4373 RETURN;
4374}
4375
cba5a3b0
DG
4376/* Smart dereferencing for keys, values and each */
4377PP(pp_rkeys)
4378{
4379 dVAR;
4380 dSP;
4381 dPOPss;
4382
7ac5715b
FC
4383 SvGETMAGIC(sv);
4384
4385 if (
4386 !SvROK(sv)
4387 || (sv = SvRV(sv),
4388 (SvTYPE(sv) != SVt_PVHV && SvTYPE(sv) != SVt_PVAV)
4389 || SvOBJECT(sv)
4390 )
4391 ) {
4392 DIE(aTHX_
4393 "Type of argument to %s must be unblessed hashref or arrayref",
4c540399 4394 PL_op_desc[PL_op->op_type] );
cba5a3b0
DG
4395 }
4396
d8065907
FC
4397 if (PL_op->op_flags & OPf_SPECIAL && SvTYPE(sv) == SVt_PVAV)
4398 DIE(aTHX_
4399 "Can't modify %s in %s",
4400 PL_op_desc[PL_op->op_type], PL_op_desc[PL_op->op_next->op_type]
4401 );
4402
cba5a3b0
DG
4403 /* Delegate to correct function for op type */
4404 PUSHs(sv);
4405 if (PL_op->op_type == OP_RKEYS || PL_op->op_type == OP_RVALUES) {
4406 return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
4407 }
4408 else {
4409 return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
4410 }
4411}
4412
878d132a
NC
4413PP(pp_aeach)
4414{
4415 dVAR;
4416 dSP;
502c6561 4417 AV *array = MUTABLE_AV(POPs);
878d132a 4418 const I32 gimme = GIMME_V;
453d94a9 4419 IV *iterp = Perl_av_iter_p(aTHX_ array);
878d132a
NC
4420 const IV current = (*iterp)++;
4421
4422 if (current > av_len(array)) {
4423 *iterp = 0;
4424 if (gimme == G_SCALAR)
4425 RETPUSHUNDEF;
4426 else
4427 RETURN;
4428 }
4429
4430 EXTEND(SP, 2);
e1dccc0d 4431 mPUSHi(current);
878d132a
NC
4432 if (gimme == G_ARRAY) {
4433 SV **const element = av_fetch(array, current, 0);
4434 PUSHs(element ? *element : &PL_sv_undef);
4435 }
4436 RETURN;
4437}
4438
4439PP(pp_akeys)
4440{
4441 dVAR;
4442 dSP;
502c6561 4443 AV *array = MUTABLE_AV(POPs);
878d132a
NC
4444 const I32 gimme = GIMME_V;
4445
4446 *Perl_av_iter_p(aTHX_ array) = 0;
4447
4448 if (gimme == G_SCALAR) {
4449 dTARGET;
4450 PUSHi(av_len(array) + 1);
4451 }
4452 else if (gimme == G_ARRAY) {
4453 IV n = Perl_av_len(aTHX_ array);
e1dccc0d 4454 IV i;
878d132a
NC
4455
4456 EXTEND(SP, n + 1);
4457
cba5a3b0 4458 if (PL_op->op_type == OP_AKEYS || PL_op->op_type == OP_RKEYS) {
e1dccc0d 4459 for (i = 0; i <= n; i++) {
878d132a
NC
4460 mPUSHi(i);
4461 }
4462 }
4463 else {
4464 for (i = 0; i <= n; i++) {
4465 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
4466 PUSHs(elem ? *elem : &PL_sv_undef);
4467 }
4468 }
4469 }
4470 RETURN;
4471}
4472
79072805
LW
4473/* Associative arrays. */
4474
4475PP(pp_each)
4476{
97aff369 4477 dVAR;
39644a26 4478 dSP;
85fbaab2 4479 HV * hash = MUTABLE_HV(POPs);
c07a80fd 4480 HE *entry;
f54cb97a 4481 const I32 gimme = GIMME_V;
8ec5e241 4482
c07a80fd 4483 PUTBACK;
c750a3ec 4484 /* might clobber stack_sp */
6d822dc4 4485 entry = hv_iternext(hash);
c07a80fd 4486 SPAGAIN;
79072805 4487
79072805
LW
4488 EXTEND(SP, 2);
4489 if (entry) {
1b6737cc 4490 SV* const sv = hv_iterkeysv(entry);
574c8022 4491 PUSHs(sv); /* won't clobber stack_sp */
54310121 4492 if (gimme == G_ARRAY) {
59af0135 4493 SV *val;
c07a80fd 4494 PUTBACK;
c750a3ec 4495 /* might clobber stack_sp */
6d822dc4 4496 val = hv_iterval(hash, entry);
c07a80fd 4497 SPAGAIN;
59af0135 4498 PUSHs(val);
79072805 4499 }
79072805 4500 }
54310121 4501 else if (gimme == G_SCALAR)
79072805
LW
4502 RETPUSHUNDEF;
4503
4504 RETURN;
4505}
4506
7332a6c4
VP
4507STATIC OP *
4508S_do_delete_local(pTHX)
79072805 4509{
97aff369 4510 dVAR;
39644a26 4511 dSP;
f54cb97a 4512 const I32 gimme = GIMME_V;
7332a6c4
VP
4513 const MAGIC *mg;
4514 HV *stash;
4515
4516 if (PL_op->op_private & OPpSLICE) {
4517 dMARK; dORIGMARK;
4518 SV * const osv = POPs;
4519 const bool tied = SvRMAGICAL(osv)
4520 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4521 const bool can_preserve = SvCANEXISTDELETE(osv)
4522 || mg_find((const SV *)osv, PERL_MAGIC_env);
4523 const U32 type = SvTYPE(osv);
4524 if (type == SVt_PVHV) { /* hash element */
4525 HV * const hv = MUTABLE_HV(osv);
4526 while (++MARK <= SP) {
4527 SV * const keysv = *MARK;
4528 SV *sv = NULL;
4529 bool preeminent = TRUE;
4530 if (can_preserve)
4531 preeminent = hv_exists_ent(hv, keysv, 0);
4532 if (tied) {
4533 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4534 if (he)
4535 sv = HeVAL(he);
4536 else
4537 preeminent = FALSE;
4538 }
4539 else {
4540 sv = hv_delete_ent(hv, keysv, 0, 0);
4541 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4542 }
4543 if (preeminent) {
4544 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4545 if (tied) {
4546 *MARK = sv_mortalcopy(sv);
4547 mg_clear(sv);
4548 } else
4549 *MARK = sv;
4550 }
4551 else {
4552 SAVEHDELETE(hv, keysv);
4553 *MARK = &PL_sv_undef;
4554 }
4555 }
4556 }
4557 else if (type == SVt_PVAV) { /* array element */
4558 if (PL_op->op_flags & OPf_SPECIAL) {
4559 AV * const av = MUTABLE_AV(osv);
4560 while (++MARK <= SP) {
4561 I32 idx = SvIV(*MARK);
4562 SV *sv = NULL;
4563 bool preeminent = TRUE;
4564 if (can_preserve)
4565 preeminent = av_exists(av, idx);
4566 if (tied) {
4567 SV **svp = av_fetch(av, idx, 1);
4568 if (svp)
4569 sv = *svp;
4570 else
4571 preeminent = FALSE;
4572 }
4573 else {
4574 sv = av_delete(av, idx, 0);
4575 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4576 }
4577 if (preeminent) {
4578 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4579 if (tied) {
4580 *MARK = sv_mortalcopy(sv);
4581 mg_clear(sv);
4582 } else
4583 *MARK = sv;
4584 }
4585 else {
4586 SAVEADELETE(av, idx);
4587 *MARK = &PL_sv_undef;
4588 }
4589 }
4590 }
4591 }
4592 else
4593 DIE(aTHX_ "Not a HASH reference");
4594 if (gimme == G_VOID)
4595 SP = ORIGMARK;
4596 else if (gimme == G_SCALAR) {
4597 MARK = ORIGMARK;
4598 if (SP > MARK)
4599 *++MARK = *SP;
4600 else
4601 *++MARK = &PL_sv_undef;
4602 SP = MARK;
4603 }
4604 }
4605 else {
4606 SV * const keysv = POPs;
4607 SV * const osv = POPs;
4608 const bool tied = SvRMAGICAL(osv)
4609 && mg_find((const SV *)osv, PERL_MAGIC_tied);
4610 const bool can_preserve = SvCANEXISTDELETE(osv)
4611 || mg_find((const SV *)osv, PERL_MAGIC_env);
4612 const U32 type = SvTYPE(osv);
4613 SV *sv = NULL;
4614 if (type == SVt_PVHV) {
4615 HV * const hv = MUTABLE_HV(osv);
4616 bool preeminent = TRUE;
4617 if (can_preserve)
4618 preeminent = hv_exists_ent(hv, keysv, 0);
4619 if (tied) {
4620 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
4621 if (he)
4622 sv = HeVAL(he);
4623 else
4624 preeminent = FALSE;
4625 }
4626 else {
4627 sv = hv_delete_ent(hv, keysv, 0, 0);
4628 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4629 }
4630 if (preeminent) {
4631 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
4632 if (tied) {
4633 SV *nsv = sv_mortalcopy(sv);
4634 mg_clear(sv);
4635 sv = nsv;
4636 }
4637 }
4638 else
4639 SAVEHDELETE(hv, keysv);
4640 }
4641 else if (type == SVt_PVAV) {
4642 if (PL_op->op_flags & OPf_SPECIAL) {
4643 AV * const av = MUTABLE_AV(osv);
4644 I32 idx = SvIV(keysv);
4645 bool preeminent = TRUE;
4646 if (can_preserve)
4647 preeminent = av_exists(av, idx);
4648 if (tied) {
4649 SV **svp = av_fetch(av, idx, 1);
4650 if (svp)
4651 sv = *svp;
4652 else
4653 preeminent = FALSE;
4654 }
4655 else {
4656 sv = av_delete(av, idx, 0);
4657 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
4658 }
4659 if (preeminent) {
4660 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
4661 if (tied) {
4662 SV *nsv = sv_mortalcopy(sv);
4663 mg_clear(sv);
4664 sv = nsv;
4665 }
4666 }
4667 else
4668 SAVEADELETE(av, idx);
4669 }
4670 else
4671 DIE(aTHX_ "panic: avhv_delete no longer supported");
4672 }
4673 else
4674 DIE(aTHX_ "Not a HASH reference");
4675 if (!sv)
4676 sv = &PL_sv_undef;
4677 if (gimme != G_VOID)
4678 PUSHs(sv);
4679 }
4680
4681 RETURN;
4682}
4683
4684PP(pp_delete)
4685{
4686 dVAR;
4687 dSP;
4688 I32 gimme;
4689 I32 discard;
4690
4691 if (PL_op->op_private & OPpLVAL_INTRO)
4692 return do_delete_local();
4693
4694 gimme = GIMME_V;
4695 discard = (gimme == G_VOID) ? G_DISCARD : 0;
5f05dabc 4696
533c011a 4697 if (PL_op->op_private & OPpSLICE) {
5f05dabc 4698 dMARK; dORIGMARK;
85fbaab2 4699 HV * const hv = MUTABLE_HV(POPs);
1b6737cc 4700 const U32 hvtype = SvTYPE(hv);
01020589
GS
4701 if (hvtype == SVt_PVHV) { /* hash element */
4702 while (++MARK <= SP) {
1b6737cc 4703 SV * const sv = hv_delete_ent(hv, *MARK, discard, 0);
01020589
GS
4704 *MARK = sv ? sv : &PL_sv_undef;
4705 }
5f05dabc 4706 }
6d822dc4
MS
4707 else if (hvtype == SVt_PVAV) { /* array element */
4708 if (PL_op->op_flags & OPf_SPECIAL) {
4709 while (++MARK <= SP) {
502c6561 4710 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*MARK), discard);
6d822dc4
MS
4711 *MARK = sv ? sv : &PL_sv_undef;
4712 }
4713 }
01020589
GS
4714 }
4715 else
4716 DIE(aTHX_ "Not a HASH reference");
54310121 4717 if (discard)
4718 SP = ORIGMARK;
4719 else if (gimme == G_SCALAR) {
5f05dabc 4720 MARK = ORIGMARK;
9111c9c0
DM
4721 if (SP > MARK)
4722 *++MARK = *SP;
4723 else
4724 *++MARK = &PL_sv_undef;
5f05dabc 4725 SP = MARK;
4726 }
4727 }
4728 else {
4729 SV *keysv = POPs;
85fbaab2 4730 HV * const hv = MUTABLE_HV(POPs);
295d248e 4731 SV *sv = NULL;
97fcbf96
MB
4732 if (SvTYPE(hv) == SVt_PVHV)
4733 sv = hv_delete_ent(hv, keysv, discard, 0);
01020589
GS
4734 else if (SvTYPE(hv) == SVt_PVAV) {
4735 if (PL_op->op_flags & OPf_SPECIAL)
502c6561 4736 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
af288a60
HS
4737 else
4738 DIE(aTHX_ "panic: avhv_delete no longer supported");
01020589 4739 }
97fcbf96 4740 else
cea2e8a9 4741 DIE(aTHX_ "Not a HASH reference");
5f05dabc 4742 if (!sv)
3280af22 4743 sv = &PL_sv_undef;
54310121 4744 if (!discard)
4745 PUSHs(sv);
79072805 4746 }
79072805
LW
4747 RETURN;
4748}
4749
a0d0e21e 4750PP(pp_exists)
79072805 4751{
97aff369 4752 dVAR;
39644a26 4753 dSP;
afebc493
GS
4754 SV *tmpsv;
4755 HV *hv;
4756
4757 if (PL_op->op_private & OPpEXISTS_SUB) {
4758 GV *gv;
0bd48802 4759 SV * const sv = POPs;
f2c0649b 4760 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
afebc493
GS
4761 if (cv)
4762 RETPUSHYES;
4763 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
4764 RETPUSHYES;
4765 RETPUSHNO;
4766 }
4767 tmpsv = POPs;
85fbaab2 4768 hv = MUTABLE_HV(POPs);
c750a3ec 4769 if (SvTYPE(hv) == SVt_PVHV) {
ae77835f 4770 if (hv_exists_ent(hv, tmpsv, 0))
c750a3ec 4771 RETPUSHYES;
ef54e1a4
JH
4772 }
4773 else if (SvTYPE(hv) == SVt_PVAV) {
01020589 4774 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
502c6561 4775 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
01020589
GS
4776 RETPUSHYES;
4777 }
ef54e1a4
JH
4778 }
4779 else {
cea2e8a9 4780 DIE(aTHX_ "Not a HASH reference");
a0d0e21e 4781 }
a0d0e21e
LW
4782 RETPUSHNO;
4783}
79072805 4784
a0d0e21e
LW
4785PP(pp_hslice)
4786{
97aff369 4787 dVAR; dSP; dMARK; dORIGMARK;
85fbaab2 4788 register HV * const hv = MUTABLE_HV(POPs);
1b6737cc
AL
4789 register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4790 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
d30e492c 4791 bool can_preserve = FALSE;
79072805 4792
eb85dfd3
DM
4793 if (localizing) {
4794 MAGIC *mg;
4795 HV *stash;
4796
d30e492c
VP
4797 if (SvCANEXISTDELETE(hv) || mg_find((const SV *)hv, PERL_MAGIC_env))
4798 can_preserve = TRUE;
eb85dfd3
DM
4799 }
4800
6d822dc4 4801 while (++MARK <= SP) {
1b6737cc 4802 SV * const keysv = *MARK;
6d822dc4
MS
4803 SV **svp;
4804 HE *he;
d30e492c
VP
4805 bool preeminent = TRUE;
4806
4807 if (localizing && can_preserve) {
4808 /* If we can determine whether the element exist,
4809 * try to preserve the existenceness of a tied hash
4810 * element by using EXISTS and DELETE if possible.
4811 * Fallback to FETCH and STORE otherwise. */
4812 preeminent = hv_exists_ent(hv, keysv, 0);
6d822dc4 4813 }
eb85dfd3 4814
6d822dc4 4815 he = hv_fetch_ent(hv, keysv, lval, 0);
fe5bfecd 4816 svp = he ? &HeVAL(he) : NULL;
eb85dfd3 4817
6d822dc4
MS
4818 if (lval) {
4819 if (!svp || *svp == &PL_sv_undef) {
be2597df 4820 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
6d822dc4
MS
4821 }
4822 if (localizing) {
7a2e501a 4823 if (HvNAME_get(hv) && isGV(*svp))
159b6efe 4824 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
47cfc530
VP
4825 else if (preeminent)
4826 save_helem_flags(hv, keysv, svp,
4827 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
4828 else
4829 SAVEHDELETE(hv, keysv);
6d822dc4
MS
4830 }
4831 }
4832 *MARK = svp ? *svp : &PL_sv_undef;
79072805 4833 }
a0d0e21e
LW
4834 if (GIMME != G_ARRAY) {
4835 MARK = ORIGMARK;
04ab2c87 4836 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
a0d0e21e 4837 SP = MARK;
79072805 4838 }
a0d0e21e
LW
4839 RETURN;
4840}
4841
4842/* List operators. */
4843
4844PP(pp_list)
4845{
97aff369 4846 dVAR; dSP; dMARK;
a0d0e21e
LW
4847 if (GIMME != G_ARRAY) {
4848 if (++MARK <= SP)
4849 *MARK = *SP; /* unwanted list, return last item */
8990e307 4850 else
3280af22 4851 *MARK = &PL_sv_undef;
a0d0e21e 4852 SP = MARK;
79072805 4853 }
a0d0e21e 4854 RETURN;
79072805
LW
4855}
4856
a0d0e21e 4857PP(pp_lslice)
79072805 4858{
97aff369 4859 dVAR;
39644a26 4860 dSP;
1b6737cc
AL
4861 SV ** const lastrelem = PL_stack_sp;
4862 SV ** const lastlelem = PL_stack_base + POPMARK;
4863 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
4864 register SV ** const firstrelem = lastlelem + 1;
42e73ed0 4865 I32 is_something_there = FALSE;
1b6737cc
AL
4866
4867 register const I32 max = lastrelem - lastlelem;
a0d0e21e 4868 register SV **lelem;
a0d0e21e
LW
4869
4870 if (GIMME != G_ARRAY) {
4ea561bc 4871 I32 ix = SvIV(*lastlelem);
748a9306
LW
4872 if (ix < 0)
4873 ix += max;
a0d0e21e 4874 if (ix < 0 || ix >= max)
3280af22 4875 *firstlelem = &PL_sv_undef;
a0d0e21e
LW
4876 else
4877 *firstlelem = firstrelem[ix];
4878 SP = firstlelem;
4879 RETURN;
4880 }
4881
4882 if (max == 0) {
4883 SP = firstlelem - 1;
4884 RETURN;
4885 }
4886
4887 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
4ea561bc 4888 I32 ix = SvIV(*lelem);
c73bf8e3 4889 if (ix < 0)
a0d0e21e 4890 ix += max;
c73bf8e3
HS
4891 if (ix < 0 || ix >= max)
4892 *lelem = &PL_sv_undef;
4893 else {
4894 is_something_there = TRUE;
4895 if (!(*lelem = firstrelem[ix]))
3280af22 4896 *lelem = &PL_sv_undef;
748a9306 4897 }
79072805 4898 }
4633a7c4
LW
4899 if (is_something_there)
4900 SP = lastlelem;
4901 else
4902 SP = firstlelem - 1;
79072805
LW
4903 RETURN;
4904}
4905
a0d0e21e
LW
4906PP(pp_anonlist)
4907{
97aff369 4908 dVAR; dSP; dMARK; dORIGMARK;
1b6737cc 4909 const I32 items = SP - MARK;
ad64d0ec 4910 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
44a8e56a 4911 SP = ORIGMARK; /* av_make() might realloc stack_sp */
6e449a3a
MHM
4912 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
4913 ? newRV_noinc(av) : av);
a0d0e21e
LW
4914 RETURN;
4915}
4916
4917PP(pp_anonhash)
79072805 4918{
97aff369 4919 dVAR; dSP; dMARK; dORIGMARK;
78c72037 4920 HV* const hv = newHV();
a0d0e21e
LW
4921
4922 while (MARK < SP) {
1b6737cc 4923 SV * const key = *++MARK;
561b68a9 4924 SV * const val = newSV(0);
a0d0e21e
LW
4925 if (MARK < SP)
4926 sv_setsv(val, *++MARK);
a2a5de95
NC
4927 else
4928 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
f12c7020 4929 (void)hv_store_ent(hv,key,val,0);
79072805 4930 }
a0d0e21e 4931 SP = ORIGMARK;
6e449a3a 4932 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
ad64d0ec 4933 ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
79072805
LW
4934 RETURN;
4935}
4936
d4fc4415
FC
4937static AV *
4938S_deref_plain_array(pTHX_ AV *ary)
4939{
4940 if (SvTYPE(ary) == SVt_PVAV) return ary;
d2d95e13 4941 SvGETMAGIC((SV *)ary);
d4fc4415
FC
4942 if (!SvROK(ary) || SvTYPE(SvRV(ary)) != SVt_PVAV)
4943 Perl_die(aTHX_ "Not an ARRAY reference");
4944 else if (SvOBJECT(SvRV(ary)))
4945 Perl_die(aTHX_ "Not an unblessed ARRAY reference");
4946 return (AV *)SvRV(ary);
4947}
4948
4949#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
4950# define DEREF_PLAIN_ARRAY(ary) \
4951 ({ \
4952 AV *aRrRay = ary; \
4953 SvTYPE(aRrRay) == SVt_PVAV \
4954 ? aRrRay \
4955 : S_deref_plain_array(aTHX_ aRrRay); \
4956 })
4957#else
4958# define DEREF_PLAIN_ARRAY(ary) \
4959 ( \
3b0f6d32 4960 PL_Sv = (SV *)(ary), \
d4fc4415
FC
4961 SvTYPE(PL_Sv) == SVt_PVAV \
4962 ? (AV *)PL_Sv \
3b0f6d32 4963 : S_deref_plain_array(aTHX_ (AV *)PL_Sv) \
d4fc4415
FC
4964 )
4965#endif
4966
a0d0e21e 4967PP(pp_splice)
79072805 4968{
27da23d5 4969 dVAR; dSP; dMARK; dORIGMARK;
5cd408a2 4970 int num_args = (SP - MARK);
d4fc4415 4971 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
a0d0e21e
LW
4972 register SV **src;
4973 register SV **dst;
4974 register I32 i;
4975 register I32 offset;
4976 register I32 length;
4977 I32 newlen;
4978 I32 after;
4979 I32 diff;
ad64d0ec 4980 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 4981
1b6737cc 4982 if (mg) {
af71faff
NC
4983 return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg,
4984 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
4985 sp - mark);
93965878 4986 }
79072805 4987
a0d0e21e 4988 SP++;
79072805 4989
a0d0e21e 4990 if (++MARK < SP) {
4ea561bc 4991 offset = i = SvIV(*MARK);
a0d0e21e 4992 if (offset < 0)
93965878 4993 offset += AvFILLp(ary) + 1;
84902520 4994 if (offset < 0)
cea2e8a9 4995 DIE(aTHX_ PL_no_aelem, i);
a0d0e21e
LW
4996 if (++MARK < SP) {
4997 length = SvIVx(*MARK++);
48cdf507
GA
4998 if (length < 0) {
4999 length += AvFILLp(ary) - offset + 1;
5000 if (length < 0)
5001 length = 0;
5002 }
79072805
LW
5003 }
5004 else
a0d0e21e 5005 length = AvMAX(ary) + 1; /* close enough to infinity */
79072805 5006 }
a0d0e21e
LW
5007 else {
5008 offset = 0;
5009 length = AvMAX(ary) + 1;
5010 }
8cbc2e3b 5011 if (offset > AvFILLp(ary) + 1) {
5cd408a2
EB
5012 if (num_args > 2)
5013 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
93965878 5014 offset = AvFILLp(ary) + 1;
8cbc2e3b 5015 }
93965878 5016 after = AvFILLp(ary) + 1 - (offset + length);
a0d0e21e
LW
5017 if (after < 0) { /* not that much array */
5018 length += after; /* offset+length now in array */
5019 after = 0;
5020 if (!AvALLOC(ary))
5021 av_extend(ary, 0);
5022 }
5023
5024 /* At this point, MARK .. SP-1 is our new LIST */
5025
5026 newlen = SP - MARK;
5027 diff = newlen - length;
13d7cbc1
GS
5028 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5029 av_reify(ary);
a0d0e21e 5030
50528de0
WL
5031 /* make new elements SVs now: avoid problems if they're from the array */
5032 for (dst = MARK, i = newlen; i; i--) {
1b6737cc 5033 SV * const h = *dst;
f2b990bf 5034 *dst++ = newSVsv(h);
50528de0
WL
5035 }
5036
a0d0e21e 5037 if (diff < 0) { /* shrinking the area */
95b63a38 5038 SV **tmparyval = NULL;
a0d0e21e 5039 if (newlen) {
a02a5408 5040 Newx(tmparyval, newlen, SV*); /* so remember insertion */
a0d0e21e 5041 Copy(MARK, tmparyval, newlen, SV*);
79072805 5042 }
a0d0e21e
LW
5043
5044 MARK = ORIGMARK + 1;
5045 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5046 MEXTEND(MARK, length);
5047 Copy(AvARRAY(ary)+offset, MARK, length, SV*);
5048 if (AvREAL(ary)) {
bbce6d69 5049 EXTEND_MORTAL(length);
36477c24 5050 for (i = length, dst = MARK; i; i--) {
486ec47a 5051 sv_2mortal(*dst); /* free them eventually */
36477c24 5052 dst++;
5053 }
a0d0e21e
LW
5054 }
5055 MARK += length - 1;
79072805 5056 }
a0d0e21e
LW
5057 else {
5058 *MARK = AvARRAY(ary)[offset+length-1];
5059 if (AvREAL(ary)) {
d689ffdd 5060 sv_2mortal(*MARK);
a0d0e21e
LW
5061 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5062 SvREFCNT_dec(*dst++); /* free them now */
79072805 5063 }
a0d0e21e 5064 }
93965878 5065 AvFILLp(ary) += diff;
a0d0e21e
LW
5066
5067 /* pull up or down? */
5068
5069 if (offset < after) { /* easier to pull up */
5070 if (offset) { /* esp. if nothing to pull */
5071 src = &AvARRAY(ary)[offset-1];
5072 dst = src - diff; /* diff is negative */
5073 for (i = offset; i > 0; i--) /* can't trust Copy */
5074 *dst-- = *src--;
79072805 5075 }
a0d0e21e 5076 dst = AvARRAY(ary);
9c6bc640 5077 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
a0d0e21e
LW
5078 AvMAX(ary) += diff;
5079 }
5080 else {
5081 if (after) { /* anything to pull down? */
5082 src = AvARRAY(ary) + offset + length;
5083 dst = src + diff; /* diff is negative */
5084 Move(src, dst, after, SV*);
79072805 5085 }
93965878 5086 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
a0d0e21e
LW
5087 /* avoid later double free */
5088 }
5089 i = -diff;
5090 while (i)
3280af22 5091 dst[--i] = &PL_sv_undef;
a0d0e21e
LW
5092
5093 if (newlen) {
50528de0 5094 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e
LW
5095 Safefree(tmparyval);
5096 }
5097 }
5098 else { /* no, expanding (or same) */
d3961450 5099 SV** tmparyval = NULL;
a0d0e21e 5100 if (length) {
a02a5408 5101 Newx(tmparyval, length, SV*); /* so remember deletion */
a0d0e21e
LW
5102 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5103 }
5104
5105 if (diff > 0) { /* expanding */
a0d0e21e 5106 /* push up or down? */
a0d0e21e
LW
5107 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5108 if (offset) {
5109 src = AvARRAY(ary);
5110 dst = src - diff;
5111 Move(src, dst, offset, SV*);
79072805 5112 }
9c6bc640 5113 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
a0d0e21e 5114 AvMAX(ary) += diff;
93965878 5115 AvFILLp(ary) += diff;
79072805
LW
5116 }
5117 else {
93965878
NIS
5118 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5119 av_extend(ary, AvFILLp(ary) + diff);
5120 AvFILLp(ary) += diff;
a0d0e21e
LW
5121
5122 if (after) {
93965878 5123 dst = AvARRAY(ary) + AvFILLp(ary);
a0d0e21e
LW
5124 src = dst - diff;
5125 for (i = after; i; i--) {
5126 *dst-- = *src--;
5127 }
79072805
LW
5128 }
5129 }
a0d0e21e
LW
5130 }
5131
50528de0
WL
5132 if (newlen) {
5133 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
a0d0e21e 5134 }
50528de0 5135
a0d0e21e
LW
5136 MARK = ORIGMARK + 1;
5137 if (GIMME == G_ARRAY) { /* copy return vals to stack */
5138 if (length) {
5139 Copy(tmparyval, MARK, length, SV*);
5140 if (AvREAL(ary)) {
bbce6d69 5141 EXTEND_MORTAL(length);
36477c24 5142 for (i = length, dst = MARK; i; i--) {
486ec47a 5143 sv_2mortal(*dst); /* free them eventually */
36477c24 5144 dst++;
5145 }
79072805
LW
5146 }
5147 }
a0d0e21e
LW
5148 MARK += length - 1;
5149 }
5150 else if (length--) {
5151 *MARK = tmparyval[length];
5152 if (AvREAL(ary)) {
d689ffdd 5153 sv_2mortal(*MARK);
a0d0e21e
LW
5154 while (length-- > 0)
5155 SvREFCNT_dec(tmparyval[length]);
79072805 5156 }
79072805 5157 }
a0d0e21e 5158 else
3280af22 5159 *MARK = &PL_sv_undef;
d3961450 5160 Safefree(tmparyval);
79072805 5161 }
474af990
FR
5162
5163 if (SvMAGICAL(ary))
5164 mg_set(MUTABLE_SV(ary));
5165
a0d0e21e 5166 SP = MARK;
79072805
LW
5167 RETURN;
5168}
5169
a0d0e21e 5170PP(pp_push)
79072805 5171{
27da23d5 5172 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5173 register AV * const ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5174 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
79072805 5175
1b6737cc 5176 if (mg) {
ad64d0ec 5177 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
93965878
NIS
5178 PUSHMARK(MARK);
5179 PUTBACK;
d343c3ef 5180 ENTER_with_name("call_PUSH");
864dbfa3 5181 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5182 LEAVE_with_name("call_PUSH");
93965878 5183 SPAGAIN;
93965878 5184 }
a60c0954 5185 else {
89c14e2e 5186 PL_delaymagic = DM_DELAY;
a60c0954 5187 for (++MARK; MARK <= SP; MARK++) {
561b68a9 5188 SV * const sv = newSV(0);
a60c0954
NIS
5189 if (*MARK)
5190 sv_setsv(sv, *MARK);
0a75904b 5191 av_store(ary, AvFILLp(ary)+1, sv);
a60c0954 5192 }
354b0578 5193 if (PL_delaymagic & DM_ARRAY_ISA)
ad64d0ec 5194 mg_set(MUTABLE_SV(ary));
89c14e2e
BB
5195
5196 PL_delaymagic = 0;
6eeabd23
VP
5197 }
5198 SP = ORIGMARK;
5199 if (OP_GIMME(PL_op, 0) != G_VOID) {
5200 PUSHi( AvFILL(ary) + 1 );
79072805 5201 }
79072805
LW
5202 RETURN;
5203}
5204
a0d0e21e 5205PP(pp_shift)
79072805 5206{
97aff369 5207 dVAR;
39644a26 5208 dSP;
538f5756 5209 AV * const av = PL_op->op_flags & OPf_SPECIAL
d4fc4415 5210 ? MUTABLE_AV(GvAV(PL_defgv)) : DEREF_PLAIN_ARRAY(MUTABLE_AV(POPs));
789b4bc9 5211 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
79072805 5212 EXTEND(SP, 1);
c2b4a044 5213 assert (sv);
d689ffdd 5214 if (AvREAL(av))
a0d0e21e
LW
5215 (void)sv_2mortal(sv);
5216 PUSHs(sv);
79072805 5217 RETURN;
79072805
LW
5218}
5219
a0d0e21e 5220PP(pp_unshift)
79072805 5221{
27da23d5 5222 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
d4fc4415 5223 register AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK));
ad64d0ec 5224 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
93965878 5225
1b6737cc 5226 if (mg) {
ad64d0ec 5227 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
7fd66d9d 5228 PUSHMARK(MARK);
93965878 5229 PUTBACK;
d343c3ef 5230 ENTER_with_name("call_UNSHIFT");
864dbfa3 5231 call_method("UNSHIFT",G_SCALAR|G_DISCARD);
d343c3ef 5232 LEAVE_with_name("call_UNSHIFT");
93965878 5233 SPAGAIN;
93965878 5234 }
a60c0954 5235 else {
1b6737cc 5236 register I32 i = 0;
a60c0954
NIS
5237 av_unshift(ary, SP - MARK);
5238 while (MARK < SP) {
1b6737cc 5239 SV * const sv = newSVsv(*++MARK);
a60c0954
NIS
5240 (void)av_store(ary, i++, sv);
5241 }
79072805 5242 }
a0d0e21e 5243 SP = ORIGMARK;
6eeabd23 5244 if (OP_GIMME(PL_op, 0) != G_VOID) {
5658d0a9
LR
5245 PUSHi( AvFILL(ary) + 1 );
5246 }
79072805 5247 RETURN;
79072805
LW
5248}
5249
a0d0e21e 5250PP(pp_reverse)
79072805 5251{
97aff369 5252 dVAR; dSP; dMARK;
79072805 5253
a0d0e21e 5254 if (GIMME == G_ARRAY) {
484c818f
VP
5255 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5256 AV *av;
5257
5258 /* See pp_sort() */
5259 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5260 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5261 av = MUTABLE_AV((*SP));
5262 /* In-place reversing only happens in void context for the array
5263 * assignment. We don't need to push anything on the stack. */
5264 SP = MARK;
5265
5266 if (SvMAGICAL(av)) {
5267 I32 i, j;
5268 register SV *tmp = sv_newmortal();
5269 /* For SvCANEXISTDELETE */
5270 HV *stash;
5271 const MAGIC *mg;
5272 bool can_preserve = SvCANEXISTDELETE(av);
5273
5274 for (i = 0, j = av_len(av); i < j; ++i, --j) {
5275 register SV *begin, *end;
5276
5277 if (can_preserve) {
5278 if (!av_exists(av, i)) {
5279 if (av_exists(av, j)) {
5280 register SV *sv = av_delete(av, j, 0);
5281 begin = *av_fetch(av, i, TRUE);
5282 sv_setsv_mg(begin, sv);
5283 }
5284 continue;
5285 }
5286 else if (!av_exists(av, j)) {
5287 register SV *sv = av_delete(av, i, 0);
5288 end = *av_fetch(av, j, TRUE);
5289 sv_setsv_mg(end, sv);
5290 continue;
5291 }
5292 }
5293
5294 begin = *av_fetch(av, i, TRUE);
5295 end = *av_fetch(av, j, TRUE);
5296 sv_setsv(tmp, begin);
5297 sv_setsv_mg(begin, end);
5298 sv_setsv_mg(end, tmp);
5299 }
5300 }
5301 else {
5302 SV **begin = AvARRAY(av);
484c818f 5303
95a26d8e
VP
5304 if (begin) {
5305 SV **end = begin + AvFILLp(av);
5306
5307 while (begin < end) {
5308 register SV * const tmp = *begin;
5309 *begin++ = *end;
5310 *end-- = tmp;
5311 }
484c818f
VP
5312 }
5313 }
5314 }
5315 else {
5316 SV **oldsp = SP;
5317 MARK++;
5318 while (MARK < SP) {
5319 register SV * const tmp = *MARK;
5320 *MARK++ = *SP;
5321 *SP-- = tmp;
5322 }
5323 /* safe as long as stack cannot get extended in the above */
5324 SP = oldsp;
a0d0e21e 5325 }
79072805
LW
5326 }
5327 else {
a0d0e21e
LW
5328 register char *up;
5329 register char *down;
5330 register I32 tmp;
5331 dTARGET;
5332 STRLEN len;
79072805 5333
7e2040f0 5334 SvUTF8_off(TARG); /* decontaminate */
a0d0e21e 5335 if (SP - MARK > 1)
3280af22 5336 do_join(TARG, &PL_sv_no, MARK, SP);
1e21d011 5337 else {
789bd863 5338 sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv());
1e21d011
B
5339 if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED))
5340 report_uninit(TARG);
5341 }
5342
a0d0e21e
LW
5343 up = SvPV_force(TARG, len);
5344 if (len > 1) {
7e2040f0 5345 if (DO_UTF8(TARG)) { /* first reverse each character */
dfe13c55 5346 U8* s = (U8*)SvPVX(TARG);
349d4f2f 5347 const U8* send = (U8*)(s + len);
a0ed51b3 5348 while (s < send) {
d742c382 5349 if (UTF8_IS_INVARIANT(*s)) {
a0ed51b3
LW
5350 s++;
5351 continue;
5352 }
5353 else {
9041c2e3 5354 if (!utf8_to_uvchr(s, 0))
a0dbb045 5355 break;
dfe13c55 5356 up = (char*)s;
a0ed51b3 5357 s += UTF8SKIP(s);
dfe13c55 5358 down = (char*)(s - 1);
a0dbb045 5359 /* reverse this character */
a0ed51b3
LW
5360 while (down > up) {
5361 tmp = *up;
5362 *up++ = *down;
eb160463 5363 *down-- = (char)tmp;
a0ed51b3
LW
5364 }
5365 }
5366 }
5367 up = SvPVX(TARG);
5368 }
a0d0e21e
LW
5369 down = SvPVX(TARG) + len - 1;
5370 while (down > up) {
5371 tmp = *up;
5372 *up++ = *down;
eb160463 5373 *down-- = (char)tmp;
a0d0e21e 5374 }
3aa33fe5 5375 (void)SvPOK_only_UTF8(TARG);
79072805 5376 }
a0d0e21e
LW
5377 SP = MARK + 1;
5378 SETTARG;
79072805 5379 }
a0d0e21e 5380 RETURN;
79072805
LW
5381}
5382
a0d0e21e 5383PP(pp_split)
79072805 5384{
27da23d5 5385 dVAR; dSP; dTARG;
a0d0e21e 5386 AV *ary;
467f0320 5387 register IV limit = POPi; /* note, negative is forever */
1b6737cc 5388 SV * const sv = POPs;
a0d0e21e 5389 STRLEN len;
727b7506 5390 register const char *s = SvPV_const(sv, len);
1b6737cc 5391 const bool do_utf8 = DO_UTF8(sv);
727b7506 5392 const char *strend = s + len;
44a8e56a 5393 register PMOP *pm;
d9f97599 5394 register REGEXP *rx;
a0d0e21e 5395 register SV *dstr;
727b7506 5396 register const char *m;
a0d0e21e 5397 I32 iters = 0;
bb7a0f54 5398 const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
792b2c16 5399 I32 maxiters = slen + 10;
c1a7495a 5400 I32 trailing_empty = 0;
727b7506 5401 const char *orig;
1b6737cc 5402 const I32 origlimit = limit;
a0d0e21e
LW
5403 I32 realarray = 0;
5404 I32 base;
f54cb97a 5405 const I32 gimme = GIMME_V;
941446f6 5406 bool gimme_scalar;
f54cb97a 5407 const I32 oldsave = PL_savestack_ix;
437d3b4e 5408 U32 make_mortal = SVs_TEMP;
7fba1cd6 5409 bool multiline = 0;
b37c2d43 5410 MAGIC *mg = NULL;
79072805 5411
44a8e56a 5412#ifdef DEBUGGING
5413 Copy(&LvTARGOFF(POPs), &pm, 1, PMOP*);
5414#else
5415 pm = (PMOP*)POPs;
5416#endif
a0d0e21e 5417 if (!pm || !s)
2269b42e 5418 DIE(aTHX_ "panic: pp_split");
aaa362c4 5419 rx = PM_GETRE(pm);
bbce6d69 5420
a62b1201 5421 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
07bc277f 5422 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
bbce6d69 5423
a30b2f1f 5424 RX_MATCH_UTF8_set(rx, do_utf8);
d9f424b2 5425
971a9dd3 5426#ifdef USE_ITHREADS
20e98b0f 5427 if (pm->op_pmreplrootu.op_pmtargetoff) {
159b6efe 5428 ary = GvAVn(MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff)));
20e98b0f 5429 }
971a9dd3 5430#else
20e98b0f
NC
5431 if (pm->op_pmreplrootu.op_pmtargetgv) {
5432 ary = GvAVn(pm->op_pmreplrootu.op_pmtargetgv);
971a9dd3 5433 }
20e98b0f 5434#endif
79072805 5435 else
7d49f689 5436 ary = NULL;
a0d0e21e
LW
5437 if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
5438 realarray = 1;
8ec5e241 5439 PUTBACK;
a0d0e21e
LW
5440 av_extend(ary,0);
5441 av_clear(ary);
8ec5e241 5442 SPAGAIN;
ad64d0ec 5443 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
8ec5e241 5444 PUSHMARK(SP);
ad64d0ec 5445 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
8ec5e241
NIS
5446 }
5447 else {
1c0b011c 5448 if (!AvREAL(ary)) {
1b6737cc 5449 I32 i;
1c0b011c 5450 AvREAL_on(ary);
abff13bb 5451 AvREIFY_off(ary);
1c0b011c 5452 for (i = AvFILLp(ary); i >= 0; i--)
3280af22 5453 AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
1c0b011c
NIS
5454 }
5455 /* temporarily switch stacks */
8b7059b1 5456 SAVESWITCHSTACK(PL_curstack, ary);
8ec5e241 5457 make_mortal = 0;
1c0b011c 5458 }
79072805 5459 }
3280af22 5460 base = SP - PL_stack_base;
a0d0e21e 5461 orig = s;
07bc277f 5462 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
613f191e
TS
5463 if (do_utf8) {
5464 while (*s == ' ' || is_utf8_space((U8*)s))
5465 s += UTF8SKIP(s);
5466 }
a62b1201 5467 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
bbce6d69 5468 while (isSPACE_LC(*s))
5469 s++;
5470 }
5471 else {
5472 while (isSPACE(*s))
5473 s++;
5474 }
a0d0e21e 5475 }
73134a2e 5476 if (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) {
7fba1cd6 5477 multiline = 1;
c07a80fd 5478 }
5479
941446f6
FC
5480 gimme_scalar = gimme == G_SCALAR && !ary;
5481
a0d0e21e
LW
5482 if (!limit)
5483 limit = maxiters + 2;
07bc277f 5484 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
a0d0e21e 5485 while (--limit) {
bbce6d69 5486 m = s;
8727f688
YO
5487 /* this one uses 'm' and is a negative test */
5488 if (do_utf8) {
613f191e
TS
5489 while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
5490 const int t = UTF8SKIP(m);
5491 /* is_utf8_space returns FALSE for malform utf8 */
5492 if (strend - m < t)
5493 m = strend;
5494 else
5495 m += t;
5496 }
a62b1201
KW
5497 }
5498 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5499 while (m < strend && !isSPACE_LC(*m))
5500 ++m;
5501 } else {
5502 while (m < strend && !isSPACE(*m))
5503 ++m;
5504 }
a0d0e21e
LW
5505 if (m >= strend)
5506 break;
bbce6d69 5507
c1a7495a
BB
5508 if (gimme_scalar) {
5509 iters++;
5510 if (m-s == 0)
5511 trailing_empty++;
5512 else
5513 trailing_empty = 0;
5514 } else {
5515 dstr = newSVpvn_flags(s, m-s,
5516 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5517 XPUSHs(dstr);
5518 }
bbce6d69 5519
613f191e
TS
5520 /* skip the whitespace found last */
5521 if (do_utf8)
5522 s = m + UTF8SKIP(m);
5523 else
5524 s = m + 1;
5525
8727f688
YO
5526 /* this one uses 's' and is a positive test */
5527 if (do_utf8) {
613f191e 5528 while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
8727f688 5529 s += UTF8SKIP(s);
a62b1201
KW
5530 }
5531 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
8727f688
YO
5532 while (s < strend && isSPACE_LC(*s))
5533 ++s;
5534 } else {
5535 while (s < strend && isSPACE(*s))
5536 ++s;
5537 }
79072805
LW
5538 }
5539 }
07bc277f 5540 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
a0d0e21e 5541 while (--limit) {
a6e20a40
AL
5542 for (m = s; m < strend && *m != '\n'; m++)
5543 ;
a0d0e21e
LW
5544 m++;
5545 if (m >= strend)
5546 break;
c1a7495a
BB
5547
5548 if (gimme_scalar) {
5549 iters++;
5550 if (m-s == 0)
5551 trailing_empty++;
5552 else
5553 trailing_empty = 0;
5554 } else {
5555 dstr = newSVpvn_flags(s, m-s,
5556 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5557 XPUSHs(dstr);
5558 }
a0d0e21e
LW
5559 s = m;
5560 }
5561 }
07bc277f 5562 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
640f820d
AB
5563 /*
5564 Pre-extend the stack, either the number of bytes or
5565 characters in the string or a limited amount, triggered by:
5566
5567 my ($x, $y) = split //, $str;
5568 or
5569 split //, $str, $i;
5570 */
c1a7495a
BB
5571 if (!gimme_scalar) {
5572 const U32 items = limit - 1;
5573 if (items < slen)
5574 EXTEND(SP, items);
5575 else
5576 EXTEND(SP, slen);
5577 }
640f820d 5578
e9515b0f
AB
5579 if (do_utf8) {
5580 while (--limit) {
5581 /* keep track of how many bytes we skip over */
5582 m = s;
640f820d 5583 s += UTF8SKIP(s);
c1a7495a
BB
5584 if (gimme_scalar) {
5585 iters++;
5586 if (s-m == 0)
5587 trailing_empty++;
5588 else
5589 trailing_empty = 0;
5590 } else {
5591 dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
640f820d 5592
c1a7495a
BB
5593 PUSHs(dstr);
5594 }
640f820d 5595
e9515b0f
AB
5596 if (s >= strend)
5597 break;
5598 }
5599 } else {
5600 while (--limit) {
c1a7495a
BB
5601 if (gimme_scalar) {
5602 iters++;
5603 } else {
5604 dstr = newSVpvn(s, 1);
e9515b0f 5605
e9515b0f 5606
c1a7495a
BB
5607 if (make_mortal)
5608 sv_2mortal(dstr);
640f820d 5609
c1a7495a
BB
5610 PUSHs(dstr);
5611 }
5612
5613 s++;
e9515b0f
AB
5614
5615 if (s >= strend)
5616 break;
5617 }
640f820d
AB
5618 }
5619 }
3c8556c3 5620 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
07bc277f
NC
5621 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
5622 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
5623 && !(RX_EXTFLAGS(rx) & RXf_ANCH)) {
5624 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
f9f4320a 5625 SV * const csv = CALLREG_INTUIT_STRING(rx);
cf93c79d 5626
07bc277f 5627 len = RX_MINLENRET(rx);
3c8556c3 5628 if (len == 1 && !RX_UTF8(rx) && !tail) {
1b6737cc 5629 const char c = *SvPV_nolen_const(csv);
a0d0e21e 5630 while (--limit) {
a6e20a40
AL
5631 for (m = s; m < strend && *m != c; m++)
5632 ;
a0d0e21e
LW
5633 if (m >= strend)
5634 break;
c1a7495a
BB
5635 if (gimme_scalar) {
5636 iters++;
5637 if (m-s == 0)
5638 trailing_empty++;
5639 else
5640 trailing_empty = 0;
5641 } else {
5642 dstr = newSVpvn_flags(s, m-s,
5643 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5644 XPUSHs(dstr);
5645 }
93f04dac
JH
5646 /* The rx->minlen is in characters but we want to step
5647 * s ahead by bytes. */
1aa99e6b
IH
5648 if (do_utf8)
5649 s = (char*)utf8_hop((U8*)m, len);
5650 else
5651 s = m + len; /* Fake \n at the end */
a0d0e21e
LW
5652 }
5653 }
5654 else {
a0d0e21e 5655 while (s < strend && --limit &&
f722798b 5656 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
7fba1cd6 5657 csv, multiline ? FBMrf_MULTILINE : 0)) )
a0d0e21e 5658 {
c1a7495a
BB
5659 if (gimme_scalar) {
5660 iters++;
5661 if (m-s == 0)
5662 trailing_empty++;
5663 else
5664 trailing_empty = 0;
5665 } else {
5666 dstr = newSVpvn_flags(s, m-s,
5667 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5668 XPUSHs(dstr);
5669 }
93f04dac
JH
5670 /* The rx->minlen is in characters but we want to step
5671 * s ahead by bytes. */
1aa99e6b
IH
5672 if (do_utf8)
5673 s = (char*)utf8_hop((U8*)m, len);
5674 else
5675 s = m + len; /* Fake \n at the end */
a0d0e21e 5676 }
463ee0b2 5677 }
463ee0b2 5678 }
a0d0e21e 5679 else {
07bc277f 5680 maxiters += slen * RX_NPARENS(rx);
080c2dec 5681 while (s < strend && --limit)
bbce6d69 5682 {
1b6737cc 5683 I32 rex_return;
080c2dec 5684 PUTBACK;
f9f4320a 5685 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
bfafcb9a 5686 sv, NULL, SvSCREAM(sv) ? REXEC_SCREAM : 0);
080c2dec 5687 SPAGAIN;
1b6737cc 5688 if (rex_return == 0)
080c2dec 5689 break;
d9f97599 5690 TAINT_IF(RX_MATCH_TAINTED(rx));
07bc277f 5691 if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
a0d0e21e
LW
5692 m = s;
5693 s = orig;
07bc277f 5694 orig = RX_SUBBEG(rx);
a0d0e21e
LW
5695 s = orig + (m - s);
5696 strend = s + (strend - m);
5697 }
07bc277f 5698 m = RX_OFFS(rx)[0].start + orig;
c1a7495a
BB
5699
5700 if (gimme_scalar) {
5701 iters++;
5702 if (m-s == 0)
5703 trailing_empty++;
5704 else
5705 trailing_empty = 0;
5706 } else {
5707 dstr = newSVpvn_flags(s, m-s,
5708 (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5709 XPUSHs(dstr);
5710 }
07bc277f 5711 if (RX_NPARENS(rx)) {
1b6737cc 5712 I32 i;
07bc277f
NC
5713 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
5714 s = RX_OFFS(rx)[i].start + orig;
5715 m = RX_OFFS(rx)[i].end + orig;
6de67870
JP
5716
5717 /* japhy (07/27/01) -- the (m && s) test doesn't catch
5718 parens that didn't match -- they should be set to
5719 undef, not the empty string */
c1a7495a
BB
5720 if (gimme_scalar) {
5721 iters++;
5722 if (m-s == 0)
5723 trailing_empty++;
5724 else
5725 trailing_empty = 0;
5726 } else {
5727 if (m >= orig && s >= orig) {
5728 dstr = newSVpvn_flags(s, m-s,
5729 (do_utf8 ? SVf_UTF8 : 0)
5730 | make_mortal);
5731 }
5732 else
5733 dstr = &PL_sv_undef; /* undef, not "" */
5734 XPUSHs(dstr);
748a9306 5735 }
c1a7495a 5736
a0d0e21e
LW
5737 }
5738 }
07bc277f 5739 s = RX_OFFS(rx)[0].end + orig;
a0d0e21e 5740 }
79072805 5741 }
8ec5e241 5742
c1a7495a
BB
5743 if (!gimme_scalar) {
5744 iters = (SP - PL_stack_base) - base;
5745 }
a0d0e21e 5746 if (iters > maxiters)
cea2e8a9 5747 DIE(aTHX_ "Split loop");
8ec5e241 5748
a0d0e21e
LW
5749 /* keep field after final delim? */
5750 if (s < strend || (iters && origlimit)) {
c1a7495a
BB
5751 if (!gimme_scalar) {
5752 const STRLEN l = strend - s;
5753 dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
5754 XPUSHs(dstr);
5755 }
a0d0e21e 5756 iters++;
79072805 5757 }
a0d0e21e 5758 else if (!origlimit) {
c1a7495a
BB
5759 if (gimme_scalar) {
5760 iters -= trailing_empty;
5761 } else {
5762 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
5763 if (TOPs && !make_mortal)
5764 sv_2mortal(TOPs);
5765 *SP-- = &PL_sv_undef;
5766 iters--;
5767 }
89900bd3 5768 }
a0d0e21e 5769 }
8ec5e241 5770
8b7059b1
DM
5771 PUTBACK;
5772 LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
5773 SPAGAIN;
a0d0e21e 5774 if (realarray) {
8ec5e241 5775 if (!mg) {
1c0b011c
NIS
5776 if (SvSMAGICAL(ary)) {
5777 PUTBACK;
ad64d0ec 5778 mg_set(MUTABLE_SV(ary));
1c0b011c
NIS
5779 SPAGAIN;
5780 }
5781 if (gimme == G_ARRAY) {
5782 EXTEND(SP, iters);
5783 Copy(AvARRAY(ary), SP + 1, iters, SV*);
5784 SP += iters;
5785 RETURN;
5786 }
8ec5e241 5787 }
1c0b011c 5788 else {
fb73857a 5789 PUTBACK;
d343c3ef 5790 ENTER_with_name("call_PUSH");
864dbfa3 5791 call_method("PUSH",G_SCALAR|G_DISCARD);
d343c3ef 5792 LEAVE_with_name("call_PUSH");
fb73857a 5793 SPAGAIN;
8ec5e241 5794 if (gimme == G_ARRAY) {
1b6737cc 5795 I32 i;
8ec5e241
NIS
5796 /* EXTEND should not be needed - we just popped them */
5797 EXTEND(SP, iters);
5798 for (i=0; i < iters; i++) {
5799 SV **svp = av_fetch(ary, i, FALSE);
3280af22 5800 PUSHs((svp) ? *svp : &PL_sv_undef);
8ec5e241 5801 }
1c0b011c
NIS
5802 RETURN;
5803 }
a0d0e21e
LW
5804 }
5805 }
5806 else {
5807 if (gimme == G_ARRAY)
5808 RETURN;
5809 }
7f18b612
YST
5810
5811 GETTARGET;
5812 PUSHi(iters);
5813 RETURN;
79072805 5814}
85e6fe83 5815
c5917253
NC
5816PP(pp_once)
5817{
5818 dSP;
5819 SV *const sv = PAD_SVl(PL_op->op_targ);
5820
5821 if (SvPADSTALE(sv)) {
5822 /* First time. */
5823 SvPADSTALE_off(sv);
5824 RETURNOP(cLOGOP->op_other);
5825 }
5826 RETURNOP(cLOGOP->op_next);
5827}
5828
c0329465
MB
5829PP(pp_lock)
5830{
97aff369 5831 dVAR;
39644a26 5832 dSP;
c0329465 5833 dTOPss;
e55aaa0e 5834 SV *retsv = sv;
68795e93 5835 SvLOCK(sv);
f79aa60b
FC
5836 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
5837 || SvTYPE(retsv) == SVt_PVCV) {
e55aaa0e
MB
5838 retsv = refto(retsv);
5839 }
5840 SETs(retsv);
c0329465
MB
5841 RETURN;
5842}
a863c7d1 5843
65bca31a
NC
5844
5845PP(unimplemented_op)
5846{
97aff369 5847 dVAR;
361ed549
NC
5848 const Optype op_type = PL_op->op_type;
5849 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
5850 with out of range op numbers - it only "special" cases op_custom.
5851 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
5852 if we get here for a custom op then that means that the custom op didn't
5853 have an implementation. Given that OP_NAME() looks up the custom op
5854 by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
5855 registers &PL_unimplemented_op as the address of their custom op.
5856 NULL doesn't generate a useful error message. "custom" does. */
5857 const char *const name = op_type >= OP_max
5858 ? "[out of range]" : PL_op_name[PL_op->op_type];
7627e6d0
NC
5859 if(OP_IS_SOCKET(op_type))
5860 DIE(aTHX_ PL_no_sock_func, name);
361ed549 5861 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
65bca31a
NC
5862}
5863
867fa1e2
YO
5864PP(pp_boolkeys)
5865{
5866 dVAR;
5867 dSP;
5868 HV * const hv = (HV*)POPs;
5869
fd1d9b5c
FC
5870 if (SvTYPE(hv) != SVt_PVHV) { XPUSHs(&PL_sv_no); RETURN; }
5871
867fa1e2
YO
5872 if (SvRMAGICAL(hv)) {
5873 MAGIC * const mg = mg_find((SV*)hv, PERL_MAGIC_tied);
5874 if (mg) {
5875 XPUSHs(magic_scalarpack(hv, mg));
5876 RETURN;
5877 }
5878 }
5879
1b95d04f 5880 XPUSHs(boolSV(HvUSEDKEYS(hv) != 0));
867fa1e2
YO
5881 RETURN;
5882}
5883
deb8a388
FC
5884/* For sorting out arguments passed to a &CORE:: subroutine */
5885PP(pp_coreargs)
5886{
5887 dSP;
7fa5bd9b 5888 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
19c481f4 5889 int defgv = PL_opargs[opnum] & OA_DEFGV, whicharg = 0;
7fa5bd9b 5890 AV * const at_ = GvAV(PL_defgv);
46e00a91 5891 SV **svp = AvARRAY(at_);
19c481f4 5892 I32 minargs = 0, maxargs = 0, numargs = AvFILLp(at_)+1;
7fa5bd9b 5893 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
46e00a91 5894 bool seen_question = 0;
7fa5bd9b 5895 const char *err = NULL;
3e6568b4 5896 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
7fa5bd9b 5897
46e00a91
FC
5898 /* Count how many args there are first, to get some idea how far to
5899 extend the stack. */
7fa5bd9b 5900 while (oa) {
bf0571fd 5901 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
7fa5bd9b 5902 maxargs++;
46e00a91
FC
5903 if (oa & OA_OPTIONAL) seen_question = 1;
5904 if (!seen_question) minargs++;
7fa5bd9b
FC
5905 oa >>= 4;
5906 }
5907
5908 if(numargs < minargs) err = "Not enough";
5909 else if(numargs > maxargs) err = "Too many";
5910 if (err)
5911 /* diag_listed_as: Too many arguments for %s */
5912 Perl_croak(aTHX_
5913 "%s arguments for %s", err,
5914 opnum ? OP_DESC(PL_op->op_next) : SvPV_nolen_const(cSVOP_sv)
5915 );
5916
5917 /* Reset the stack pointer. Without this, we end up returning our own
5918 arguments in list context, in addition to the values we are supposed
5919 to return. nextstate usually does this on sub entry, but we need
5920 to run the next op with the caller’s hints, so we cannot have a
5921 nextstate. */
5922 SP = PL_stack_base + cxstack[cxstack_ix].blk_oldsp;
5923
46e00a91
FC
5924 if(!maxargs) RETURN;
5925
bf0571fd
FC
5926 /* We do this here, rather than with a separate pushmark op, as it has
5927 to come in between two things this function does (stack reset and
5928 arg pushing). This seems the easiest way to do it. */
3e6568b4 5929 if (pushmark) {
bf0571fd
FC
5930 PUTBACK;
5931 (void)Perl_pp_pushmark(aTHX);
5932 }
5933
5934 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
c931b036 5935 PUTBACK; /* The code below can die in various places. */
46e00a91
FC
5936
5937 oa = PL_opargs[opnum] >> OASHIFT;
3e6568b4 5938 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
c931b036 5939 whicharg++;
46e00a91
FC
5940 switch (oa & 7) {
5941 case OA_SCALAR:
d6d78e19
FC
5942 if (!numargs && defgv && whicharg == minargs + 1) {
5943 PERL_SI * const oldsi = PL_curstackinfo;
5944 I32 const oldcxix = oldsi->si_cxix;
5945 CV *caller;
5946 if (oldcxix) oldsi->si_cxix--;
5947 else PL_curstackinfo = oldsi->si_prev;
5948 caller = find_runcv(NULL);
5949 PL_curstackinfo = oldsi;
5950 oldsi->si_cxix = oldcxix;
5951 PUSHs(find_rundefsv2(
5952 caller,cxstack[cxstack_ix].blk_oldcop->cop_seq
5953 ));
5954 }
5955 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
46e00a91 5956 break;
bf0571fd
FC
5957 case OA_LIST:
5958 while (numargs--) {
5959 PUSHs(svp && *svp ? *svp : &PL_sv_undef);
5960 svp++;
5961 }
5962 RETURN;
19c481f4
FC
5963 case OA_HVREF:
5964 if (!svp || !*svp || !SvROK(*svp)
5965 || SvTYPE(SvRV(*svp)) != SVt_PVHV)
5966 DIE(aTHX_
5967 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
5968 "Type of arg %d to &CORE::%s must be hash reference",
5969 whicharg, OP_DESC(PL_op->op_next)
5970 );
5971 PUSHs(SvRV(*svp));
5972 break;
c931b036 5973 case OA_FILEREF:
30901a8a
FC
5974 if (!numargs) PUSHs(NULL);
5975 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
c931b036
FC
5976 /* no magic here, as the prototype will have added an extra
5977 refgen and we just want what was there before that */
5978 PUSHs(SvRV(*svp));
5979 else {
5980 const bool constr = PL_op->op_private & whicharg;
5981 PUSHs(S_rv2gv(aTHX_
5982 svp && *svp ? *svp : &PL_sv_undef,
5983 constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS,
5984 !constr
5985 ));
5986 }
5987 break;
c72a5629 5988 case OA_SCALARREF:
17008668
FC
5989 {
5990 const bool wantscalar =
5991 PL_op->op_private & OPpCOREARGS_SCALARMOD;
c72a5629 5992 if (!svp || !*svp || !SvROK(*svp)
17008668
FC
5993 /* We have to permit globrefs even for the \$ proto, as
5994 *foo is indistinguishable from ${\*foo}, and the proto-
5995 type permits the latter. */
5996 || SvTYPE(SvRV(*svp)) > (
efe889ae
FC
5997 wantscalar ? SVt_PVLV
5998 : opnum == OP_LOCK ? SVt_PVCV
5999 : SVt_PVHV
17008668 6000 )
c72a5629
FC
6001 )
6002 DIE(aTHX_
6003 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
17008668
FC
6004 "Type of arg %d to &CORE::%s must be %s",
6005 whicharg, OP_DESC(PL_op->op_next),
6006 wantscalar
6007 ? "scalar reference"
efe889ae
FC
6008 : opnum == OP_LOCK
6009 ? "reference to one of [$@%&*]"
6010 : "reference to one of [$@%*]"
c72a5629
FC
6011 );
6012 PUSHs(SvRV(*svp));
6013 break;
17008668 6014 }
46e00a91 6015 default:
46e00a91
FC
6016 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
6017 }
6018 oa = oa >> 4;
6019 }
6020
deb8a388
FC
6021 RETURN;
6022}
6023
e609e586
NC
6024/*
6025 * Local variables:
6026 * c-indentation-style: bsd
6027 * c-basic-offset: 4
6028 * indent-tabs-mode: t
6029 * End:
6030 *
37442d52
RGS
6031 * ex: set ts=8 sts=4 sw=4 noet:
6032 */