This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pp.c: pp_rv2gv UTF8 cleanup.
[perl5.git] / pp.c
CommitLineData
a0d0e21e 1/* pp.c
79072805 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
79072805 5 *
a0d0e21e
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
79072805 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
a0d0e21e 17 */
79072805 18
166f8a29
DM
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
79072805 26#include "EXTERN.h"
864dbfa3 27#define PERL_IN_PP_C
79072805 28#include "perl.h"
77bc9082 29#include "keywords.h"
79072805 30
a4af207c
JH
31#include "reentr.h"
32
dfe9444c
AD
33/* XXX I can't imagine anyone who doesn't have this actually _needs_
34 it, since pid_t is an integral type.
35 --AD 2/20/1998
36*/
37#ifdef NEED_GETPID_PROTO
38extern Pid_t getpid (void);
8ac85365
NIS
39#endif
40
0630166f
SP
41/*
42 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
43 * This switches them over to IEEE.
44 */
45#if defined(LIBM_LIB_VERSION)
46 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
47#endif
48
13017935
SM
49/* variations on pp_null */
50
93a17b20
LW
51PP(pp_stub)
52{
97aff369 53 dVAR;
39644a26 54 dSP;
54310121 55 if (GIMME_V == G_SCALAR)
3280af22 56 XPUSHs(&PL_sv_undef);
93a17b20
LW
57 RETURN;
58}
59
79072805
LW
60/* Pushy stuff. */
61
93a17b20
LW
62PP(pp_padav)
63{
97aff369 64 dVAR; dSP; dTARGET;
13017935 65 I32 gimme;
e190e9b4 66 assert(SvTYPE(TARG) == SVt_PVAV);
533c011a 67 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
68 if (!(PL_op->op_private & OPpPAD_STATE))
69 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
85e6fe83 70 EXTEND(SP, 1);
533c011a 71 if (PL_op->op_flags & OPf_REF) {
85e6fe83 72 PUSHs(TARG);
93a17b20 73 RETURN;
40c94d11
FC
74 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
75 const I32 flags = is_lvalue_sub();
76 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
77 if (GIMME == G_SCALAR)
78 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
79 PUSHs(TARG);
80 RETURN;
40c94d11 81 }
85e6fe83 82 }
13017935
SM
83 gimme = GIMME_V;
84 if (gimme == G_ARRAY) {
502c6561 85 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83 86 EXTEND(SP, maxarg);
93965878
NIS
87 if (SvMAGICAL(TARG)) {
88 U32 i;
eb160463 89 for (i=0; i < (U32)maxarg; i++) {
502c6561 90 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
3280af22 91 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93965878
NIS
92 }
93 }
94 else {
502c6561 95 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93965878 96 }
85e6fe83
LW
97 SP += maxarg;
98 }
13017935 99 else if (gimme == G_SCALAR) {
1b6737cc 100 SV* const sv = sv_newmortal();
502c6561 101 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83
LW
102 sv_setiv(sv, maxarg);
103 PUSHs(sv);
104 }
105 RETURN;
93a17b20
LW
106}
107
108PP(pp_padhv)
109{
97aff369 110 dVAR; dSP; dTARGET;
54310121
PP
111 I32 gimme;
112
e190e9b4 113 assert(SvTYPE(TARG) == SVt_PVHV);
93a17b20 114 XPUSHs(TARG);
533c011a 115 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
116 if (!(PL_op->op_private & OPpPAD_STATE))
117 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
533c011a 118 if (PL_op->op_flags & OPf_REF)
93a17b20 119 RETURN;
40c94d11
FC
120 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
121 const I32 flags = is_lvalue_sub();
122 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b
SM
123 if (GIMME == G_SCALAR)
124 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
125 RETURN;
40c94d11 126 }
78f9721b 127 }
54310121
PP
128 gimme = GIMME_V;
129 if (gimme == G_ARRAY) {
981b7185 130 RETURNOP(Perl_do_kv(aTHX));
85e6fe83 131 }
54310121 132 else if (gimme == G_SCALAR) {
85fbaab2 133 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
85e6fe83 134 SETs(sv);
85e6fe83 135 }
54310121 136 RETURN;
93a17b20
LW
137}
138
79072805
LW
139/* Translations. */
140
4bdf8368 141static const char S_no_symref_sv[] =
def89bff
NC
142 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
143
6f7909da
FC
144/* In some cases this function inspects PL_op. If this function is called
145 for new op types, more bool parameters may need to be added in place of
146 the checks.
147
148 When noinit is true, the absence of a gv will cause a retval of undef.
149 This is unrelated to the cv-to-gv assignment case.
8ec5e241 150
6f7909da
FC
151 Make sure to use SPAGAIN after calling this.
152*/
153
154static SV *
155S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
156 const bool noinit)
157{
14f0f125 158 dVAR;
f64c9ac5 159 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
ed6116ce 160 if (SvROK(sv)) {
93d7320b
DM
161 if (SvAMAGIC(sv)) {
162 sv = amagic_deref_call(sv, to_gv_amg);
93d7320b 163 }
e4a1664f 164 wasref:
ed6116ce 165 sv = SvRV(sv);
b1dadf13 166 if (SvTYPE(sv) == SVt_PVIO) {
159b6efe 167 GV * const gv = MUTABLE_GV(sv_newmortal());
b1dadf13 168 gv_init(gv, 0, "", 0, 0);
a45c7426 169 GvIOp(gv) = MUTABLE_IO(sv);
b37c2d43 170 SvREFCNT_inc_void_NN(sv);
ad64d0ec 171 sv = MUTABLE_SV(gv);
ef54e1a4 172 }
6e592b3a 173 else if (!isGV_with_GP(sv))
6f7909da 174 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
79072805
LW
175 }
176 else {
6e592b3a 177 if (!isGV_with_GP(sv)) {
f132ae69 178 if (!SvOK(sv)) {
b13b2135 179 /* If this is a 'my' scalar and flag is set then vivify
853846ea 180 * NI-S 1999/05/07
b13b2135 181 */
f132ae69 182 if (vivify_sv && sv != &PL_sv_undef) {
2c8ac474 183 GV *gv;
ce74145d
FC
184 if (SvREADONLY(sv))
185 Perl_croak_no_modify(aTHX);
2c8ac474 186 if (cUNOP->op_targ) {
0bd48802 187 SV * const namesv = PAD_SV(cUNOP->op_targ);
159b6efe 188 gv = MUTABLE_GV(newSV(0));
6b10071b 189 gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
2c8ac474
GS
190 }
191 else {
0bd48802 192 const char * const name = CopSTASHPV(PL_curcop);
6b10071b
BF
193 gv = newGVgen_flags(name,
194 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
1d8d4d2a 195 }
43230e26 196 prepare_SV_for_RV(sv);
ad64d0ec 197 SvRV_set(sv, MUTABLE_SV(gv));
853846ea 198 SvROK_on(sv);
1d8d4d2a 199 SvSETMAGIC(sv);
853846ea 200 goto wasref;
2c8ac474 201 }
6f7909da
FC
202 if (PL_op->op_flags & OPf_REF || strict)
203 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
599cee73 204 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 205 report_uninit(sv);
6f7909da 206 return &PL_sv_undef;
a0d0e21e 207 }
6f7909da 208 if (noinit)
35cd451c 209 {
77cb3b01
FC
210 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
211 sv, GV_ADDMG, SVt_PVGV
23496c6e 212 ))))
6f7909da 213 return &PL_sv_undef;
35cd451c
GS
214 }
215 else {
6f7909da
FC
216 if (strict)
217 return
218 (SV *)Perl_die(aTHX_
219 S_no_symref_sv,
220 sv,
221 (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""),
222 "a symbol"
223 );
e26df76a
NC
224 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
225 == OPpDONT_INIT_GV) {
226 /* We are the target of a coderef assignment. Return
227 the scalar unchanged, and let pp_sasssign deal with
228 things. */
6f7909da 229 return sv;
e26df76a 230 }
77cb3b01 231 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
35cd451c 232 }
2acc3314 233 /* FAKE globs in the symbol table cause weird bugs (#77810) */
96293f45 234 SvFAKE_off(sv);
93a17b20 235 }
79072805 236 }
96293f45 237 if (SvFAKE(sv)) {
2acc3314 238 SV *newsv = sv_newmortal();
5cf4b255 239 sv_setsv_flags(newsv, sv, 0);
2acc3314 240 SvFAKE_off(newsv);
d8906c05 241 sv = newsv;
2acc3314 242 }
6f7909da
FC
243 return sv;
244}
245
246PP(pp_rv2gv)
247{
248 dVAR; dSP; dTOPss;
249
250 sv = S_rv2gv(aTHX_
251 sv, PL_op->op_private & OPpDEREF,
252 PL_op->op_private & HINT_STRICT_REFS,
253 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
254 || PL_op->op_type == OP_READLINE
255 );
256 SPAGAIN;
d8906c05
FC
257 if (PL_op->op_private & OPpLVAL_INTRO)
258 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
259 SETs(sv);
79072805
LW
260 RETURN;
261}
262
dc3c76f8
NC
263/* Helper function for pp_rv2sv and pp_rv2av */
264GV *
fe9845cc
RB
265Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
266 const svtype type, SV ***spp)
dc3c76f8
NC
267{
268 dVAR;
269 GV *gv;
270
7918f24d
NC
271 PERL_ARGS_ASSERT_SOFTREF2XV;
272
dc3c76f8
NC
273 if (PL_op->op_private & HINT_STRICT_REFS) {
274 if (SvOK(sv))
10b53e54 275 Perl_die(aTHX_ S_no_symref_sv, sv, (SvPOK(sv) && SvCUR(sv)>32 ? "..." : ""), what);
dc3c76f8
NC
276 else
277 Perl_die(aTHX_ PL_no_usym, what);
278 }
279 if (!SvOK(sv)) {
fd1d9b5c
FC
280 if (
281 PL_op->op_flags & OPf_REF &&
282 PL_op->op_next->op_type != OP_BOOLKEYS
283 )
dc3c76f8
NC
284 Perl_die(aTHX_ PL_no_usym, what);
285 if (ckWARN(WARN_UNINITIALIZED))
286 report_uninit(sv);
287 if (type != SVt_PV && GIMME_V == G_ARRAY) {
288 (*spp)--;
289 return NULL;
290 }
291 **spp = &PL_sv_undef;
292 return NULL;
293 }
294 if ((PL_op->op_flags & OPf_SPECIAL) &&
295 !(PL_op->op_flags & OPf_MOD))
296 {
77cb3b01 297 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
dc3c76f8
NC
298 {
299 **spp = &PL_sv_undef;
300 return NULL;
301 }
302 }
303 else {
77cb3b01 304 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
dc3c76f8
NC
305 }
306 return gv;
307}
308
79072805
LW
309PP(pp_rv2sv)
310{
97aff369 311 dVAR; dSP; dTOPss;
c445ea15 312 GV *gv = NULL;
79072805 313
9026059d 314 SvGETMAGIC(sv);
ed6116ce 315 if (SvROK(sv)) {
93d7320b
DM
316 if (SvAMAGIC(sv)) {
317 sv = amagic_deref_call(sv, to_sv_amg);
318 SPAGAIN;
319 }
f5284f61 320
ed6116ce 321 sv = SvRV(sv);
79072805
LW
322 switch (SvTYPE(sv)) {
323 case SVt_PVAV:
324 case SVt_PVHV:
325 case SVt_PVCV:
cbae9b9f
YST
326 case SVt_PVFM:
327 case SVt_PVIO:
cea2e8a9 328 DIE(aTHX_ "Not a SCALAR reference");
42d0e0b7 329 default: NOOP;
79072805
LW
330 }
331 }
332 else {
159b6efe 333 gv = MUTABLE_GV(sv);
748a9306 334
6e592b3a 335 if (!isGV_with_GP(gv)) {
dc3c76f8
NC
336 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
337 if (!gv)
338 RETURN;
463ee0b2 339 }
29c711a3 340 sv = GvSVn(gv);
a0d0e21e 341 }
533c011a 342 if (PL_op->op_flags & OPf_MOD) {
82d03984
RGS
343 if (PL_op->op_private & OPpLVAL_INTRO) {
344 if (cUNOP->op_first->op_type == OP_NULL)
159b6efe 345 sv = save_scalar(MUTABLE_GV(TOPs));
82d03984
RGS
346 else if (gv)
347 sv = save_scalar(gv);
348 else
f1f66076 349 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
82d03984 350 }
533c011a 351 else if (PL_op->op_private & OPpDEREF)
9026059d 352 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
79072805 353 }
a0d0e21e 354 SETs(sv);
79072805
LW
355 RETURN;
356}
357
358PP(pp_av2arylen)
359{
97aff369 360 dVAR; dSP;
502c6561 361 AV * const av = MUTABLE_AV(TOPs);
02d85cc3
EB
362 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
363 if (lvalue) {
364 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
365 if (!*sv) {
366 *sv = newSV_type(SVt_PVMG);
367 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
368 }
369 SETs(*sv);
370 } else {
e1dccc0d 371 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
79072805 372 }
79072805
LW
373 RETURN;
374}
375
a0d0e21e
LW
376PP(pp_pos)
377{
2154eca7 378 dVAR; dSP; dPOPss;
8ec5e241 379
78f9721b 380 if (PL_op->op_flags & OPf_MOD || LVRET) {
16eb5365
FC
381 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
382 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
383 LvTYPE(ret) = '.';
384 LvTARG(ret) = SvREFCNT_inc_simple(sv);
2154eca7 385 PUSHs(ret); /* no SvSETMAGIC */
a0d0e21e
LW
386 RETURN;
387 }
388 else {
a0d0e21e 389 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
1b6737cc 390 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
565764a8 391 if (mg && mg->mg_len >= 0) {
2154eca7 392 dTARGET;
a0ed51b3 393 I32 i = mg->mg_len;
7e2040f0 394 if (DO_UTF8(sv))
a0ed51b3 395 sv_pos_b2u(sv, &i);
e1dccc0d 396 PUSHi(i);
a0d0e21e
LW
397 RETURN;
398 }
399 }
400 RETPUSHUNDEF;
401 }
402}
403
79072805
LW
404PP(pp_rv2cv)
405{
97aff369 406 dVAR; dSP;
79072805 407 GV *gv;
1eced8f8 408 HV *stash_unused;
c445ea15 409 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
9da346da 410 ? GV_ADDMG
c445ea15
AL
411 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
412 ? GV_ADD|GV_NOEXPAND
413 : GV_ADD;
4633a7c4
LW
414 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
415 /* (But not in defined().) */
e26df76a 416
1eced8f8 417 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
07055b4c
CS
418 if (cv) {
419 if (CvCLONE(cv))
ad64d0ec 420 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
d32f2495
SC
421 if ((PL_op->op_private & OPpLVAL_INTRO)) {
422 if (gv && GvCV(gv) == cv && (gv = gv_autoload4(GvSTASH(gv), GvNAME(gv), GvNAMELEN(gv), FALSE)))
423 cv = GvCV(gv);
424 if (!CvLVALUE(cv))
425 DIE(aTHX_ "Can't modify non-lvalue subroutine call");
426 }
07055b4c 427 }
e26df76a 428 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
ea726b52 429 cv = MUTABLE_CV(gv);
e26df76a 430 }
07055b4c 431 else
ea726b52 432 cv = MUTABLE_CV(&PL_sv_undef);
ad64d0ec 433 SETs(MUTABLE_SV(cv));
79072805
LW
434 RETURN;
435}
436
c07a80fd
PP
437PP(pp_prototype)
438{
97aff369 439 dVAR; dSP;
c07a80fd
PP
440 CV *cv;
441 HV *stash;
442 GV *gv;
fabdb6c0 443 SV *ret = &PL_sv_undef;
c07a80fd 444
b6c543e3 445 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
e3f73d4e 446 const char * s = SvPVX_const(TOPs);
b6c543e3 447 if (strnEQ(s, "CORE::", 6)) {
be1b855b 448 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
b66130dd
FC
449 if (!code || code == -KEY_CORE)
450 DIE(aTHX_ "Can't find an opnumber for \"%s\"", s+6);
451 if (code < 0) { /* Overridable. */
452 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
453 if (sv) ret = sv;
454 }
b8c38f0a 455 goto set;
b6c543e3
IZ
456 }
457 }
f2c0649b 458 cv = sv_2cv(TOPs, &stash, &gv, 0);
5f05dabc 459 if (cv && SvPOK(cv))
59cd0e26 460 ret = newSVpvn_flags(SvPVX_const(cv), SvCUR(cv), SVs_TEMP);
b6c543e3 461 set:
c07a80fd
PP
462 SETs(ret);
463 RETURN;
464}
465
a0d0e21e
LW
466PP(pp_anoncode)
467{
97aff369 468 dVAR; dSP;
ea726b52 469 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
a5f75d66 470 if (CvCLONE(cv))
ad64d0ec 471 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
5f05dabc 472 EXTEND(SP,1);
ad64d0ec 473 PUSHs(MUTABLE_SV(cv));
a0d0e21e
LW
474 RETURN;
475}
476
477PP(pp_srefgen)
79072805 478{
97aff369 479 dVAR; dSP;
71be2cbc 480 *SP = refto(*SP);
79072805 481 RETURN;
8ec5e241 482}
a0d0e21e
LW
483
484PP(pp_refgen)
485{
97aff369 486 dVAR; dSP; dMARK;
a0d0e21e 487 if (GIMME != G_ARRAY) {
5f0b1d4e
GS
488 if (++MARK <= SP)
489 *MARK = *SP;
490 else
3280af22 491 *MARK = &PL_sv_undef;
5f0b1d4e
GS
492 *MARK = refto(*MARK);
493 SP = MARK;
494 RETURN;
a0d0e21e 495 }
bbce6d69 496 EXTEND_MORTAL(SP - MARK);
71be2cbc
PP
497 while (++MARK <= SP)
498 *MARK = refto(*MARK);
a0d0e21e 499 RETURN;
79072805
LW
500}
501
76e3520e 502STATIC SV*
cea2e8a9 503S_refto(pTHX_ SV *sv)
71be2cbc 504{
97aff369 505 dVAR;
71be2cbc
PP
506 SV* rv;
507
7918f24d
NC
508 PERL_ARGS_ASSERT_REFTO;
509
71be2cbc
PP
510 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
511 if (LvTARGLEN(sv))
68dc0745
PP
512 vivify_defelem(sv);
513 if (!(sv = LvTARG(sv)))
3280af22 514 sv = &PL_sv_undef;
0dd88869 515 else
b37c2d43 516 SvREFCNT_inc_void_NN(sv);
71be2cbc 517 }
d8b46c1b 518 else if (SvTYPE(sv) == SVt_PVAV) {
502c6561
NC
519 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
520 av_reify(MUTABLE_AV(sv));
d8b46c1b 521 SvTEMP_off(sv);
b37c2d43 522 SvREFCNT_inc_void_NN(sv);
d8b46c1b 523 }
f2933f5f
DM
524 else if (SvPADTMP(sv) && !IS_PADGV(sv))
525 sv = newSVsv(sv);
71be2cbc
PP
526 else {
527 SvTEMP_off(sv);
b37c2d43 528 SvREFCNT_inc_void_NN(sv);
71be2cbc
PP
529 }
530 rv = sv_newmortal();
4df7f6af 531 sv_upgrade(rv, SVt_IV);
b162af07 532 SvRV_set(rv, sv);
71be2cbc
PP
533 SvROK_on(rv);
534 return rv;
535}
536
79072805
LW
537PP(pp_ref)
538{
97aff369 539 dVAR; dSP; dTARGET;
e1ec3a88 540 const char *pv;
1b6737cc 541 SV * const sv = POPs;
f12c7020 542
5b295bef
RD
543 if (sv)
544 SvGETMAGIC(sv);
f12c7020 545
a0d0e21e 546 if (!sv || !SvROK(sv))
4633a7c4 547 RETPUSHNO;
79072805 548
cba0b539
FR
549 pv = sv_reftype(SvRV(sv),TRUE);
550 PUSHp(pv, strlen(pv));
79072805
LW
551 RETURN;
552}
553
554PP(pp_bless)
555{
97aff369 556 dVAR; dSP;
463ee0b2 557 HV *stash;
79072805 558
463ee0b2 559 if (MAXARG == 1)
c2f922f1 560 curstash:
11faa288 561 stash = CopSTASH(PL_curcop);
7b8d334a 562 else {
1b6737cc 563 SV * const ssv = POPs;
7b8d334a 564 STRLEN len;
e1ec3a88 565 const char *ptr;
81689caa 566
c2f922f1
FC
567 if (!ssv) goto curstash;
568 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
81689caa 569 Perl_croak(aTHX_ "Attempt to bless into a reference");
5c144d81 570 ptr = SvPV_const(ssv,len);
a2a5de95
NC
571 if (len == 0)
572 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
573 "Explicit blessing to '' (assuming package main)");
da51bb9b 574 stash = gv_stashpvn(ptr, len, GV_ADD);
7b8d334a 575 }
a0d0e21e 576
5d3fdfeb 577 (void)sv_bless(TOPs, stash);
79072805
LW
578 RETURN;
579}
580
fb73857a
PP
581PP(pp_gelem)
582{
97aff369 583 dVAR; dSP;
b13b2135 584
1b6737cc
AL
585 SV *sv = POPs;
586 const char * const elem = SvPV_nolen_const(sv);
159b6efe 587 GV * const gv = MUTABLE_GV(POPs);
c445ea15 588 SV * tmpRef = NULL;
1b6737cc 589
c445ea15 590 sv = NULL;
c4ba80c3
NC
591 if (elem) {
592 /* elem will always be NUL terminated. */
1b6737cc 593 const char * const second_letter = elem + 1;
c4ba80c3
NC
594 switch (*elem) {
595 case 'A':
1b6737cc 596 if (strEQ(second_letter, "RRAY"))
ad64d0ec 597 tmpRef = MUTABLE_SV(GvAV(gv));
c4ba80c3
NC
598 break;
599 case 'C':
1b6737cc 600 if (strEQ(second_letter, "ODE"))
ad64d0ec 601 tmpRef = MUTABLE_SV(GvCVu(gv));
c4ba80c3
NC
602 break;
603 case 'F':
1b6737cc 604 if (strEQ(second_letter, "ILEHANDLE")) {
c4ba80c3
NC
605 /* finally deprecated in 5.8.0 */
606 deprecate("*glob{FILEHANDLE}");
ad64d0ec 607 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
608 }
609 else
1b6737cc 610 if (strEQ(second_letter, "ORMAT"))
ad64d0ec 611 tmpRef = MUTABLE_SV(GvFORM(gv));
c4ba80c3
NC
612 break;
613 case 'G':
1b6737cc 614 if (strEQ(second_letter, "LOB"))
ad64d0ec 615 tmpRef = MUTABLE_SV(gv);
c4ba80c3
NC
616 break;
617 case 'H':
1b6737cc 618 if (strEQ(second_letter, "ASH"))
ad64d0ec 619 tmpRef = MUTABLE_SV(GvHV(gv));
c4ba80c3
NC
620 break;
621 case 'I':
1b6737cc 622 if (*second_letter == 'O' && !elem[2])
ad64d0ec 623 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
624 break;
625 case 'N':
1b6737cc 626 if (strEQ(second_letter, "AME"))
a663657d 627 sv = newSVhek(GvNAME_HEK(gv));
c4ba80c3
NC
628 break;
629 case 'P':
1b6737cc 630 if (strEQ(second_letter, "ACKAGE")) {
7fa3a4ab
NC
631 const HV * const stash = GvSTASH(gv);
632 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
396482e1 633 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
c4ba80c3
NC
634 }
635 break;
636 case 'S':
1b6737cc 637 if (strEQ(second_letter, "CALAR"))
f9d52e31 638 tmpRef = GvSVn(gv);
c4ba80c3 639 break;
39b99f21 640 }
fb73857a 641 }
76e3520e
GS
642 if (tmpRef)
643 sv = newRV(tmpRef);
fb73857a
PP
644 if (sv)
645 sv_2mortal(sv);
646 else
3280af22 647 sv = &PL_sv_undef;
fb73857a
PP
648 XPUSHs(sv);
649 RETURN;
650}
651
a0d0e21e 652/* Pattern matching */
79072805 653
a0d0e21e 654PP(pp_study)
79072805 655{
97aff369 656 dVAR; dSP; dPOPss;
a0d0e21e 657 register unsigned char *s;
72de20cd 658 char *sfirst_raw;
a0d0e21e 659 STRLEN len;
4185c919 660 MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL;
72de20cd
NC
661 U8 quanta;
662 STRLEN size;
4185c919
NC
663
664 if (mg && SvSCREAM(sv))
665 RETPUSHYES;
a0d0e21e 666
a4f4e906 667 s = (unsigned char*)(SvPV(sv, len));
bc9a5256 668 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
a4f4e906
NC
669 /* No point in studying a zero length string, and not safe to study
670 anything that doesn't appear to be a simple scalar (and hence might
671 change between now and when the regexp engine runs without our set
bd473224 672 magic ever running) such as a reference to an object with overloaded
bc9a5256
NC
673 stringification. Also refuse to study an FBM scalar, as this gives
674 more flexibility in SV flag usage. No real-world code would ever
675 end up studying an FBM scalar, so this isn't a real pessimisation.
72de20cd
NC
676 Endemic use of I32 in Perl_screaminstr makes it hard to safely push
677 the study length limit from I32_MAX to U32_MAX - 1.
bc9a5256 678 */
a4f4e906
NC
679 RETPUSHNO;
680 }
681
72de20cd
NC
682 if (len < 0xFF) {
683 quanta = 1;
684 } else if (len < 0xFFFF) {
685 quanta = 2;
686 } else
687 quanta = 4;
a0d0e21e 688
72de20cd
NC
689 size = (256 + len) * quanta;
690 sfirst_raw = (char *)safemalloc(size);
691
692 if (!sfirst_raw)
cea2e8a9 693 DIE(aTHX_ "do_study: out of memory");
a0d0e21e 694
4185c919
NC
695 SvSCREAM_on(sv);
696 if (!mg)
697 mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0);
72de20cd
NC
698 mg->mg_ptr = sfirst_raw;
699 mg->mg_len = size;
700 mg->mg_private = quanta;
701
702 memset(sfirst_raw, ~0, 256 * quanta);
703
704 /* The assumption here is that most studied strings are fairly short, hence
705 the pain of the extra code is worth it, given the memory savings.
706 80 character string, 336 bytes as U8, down from 1344 as U32
707 800 character string, 2112 bytes as U16, down from 4224 as U32
708 */
709
710 if (quanta == 1) {
711 U8 *const sfirst = (U8 *)sfirst_raw;
712 U8 *const snext = sfirst + 256;
713 while (len-- > 0) {
714 const U8 ch = s[len];
715 snext[len] = sfirst[ch];
716 sfirst[ch] = len;
717 }
718 } else if (quanta == 2) {
719 U16 *const sfirst = (U16 *)sfirst_raw;
720 U16 *const snext = sfirst + 256;
721 while (len-- > 0) {
722 const U8 ch = s[len];
723 snext[len] = sfirst[ch];
724 sfirst[ch] = len;
725 }
726 } else {
727 U32 *const sfirst = (U32 *)sfirst_raw;
728 U32 *const snext = sfirst + 256;
729 while (len-- > 0) {
730 const U8 ch = s[len];
731 snext[len] = sfirst[ch];
732 sfirst[ch] = len;
733 }
79072805
LW
734 }
735
1e422769 736 RETPUSHYES;
79072805
LW
737}
738
a0d0e21e 739PP(pp_trans)
79072805 740{
97aff369 741 dVAR; dSP; dTARG;
a0d0e21e
LW
742 SV *sv;
743
533c011a 744 if (PL_op->op_flags & OPf_STACKED)
a0d0e21e 745 sv = POPs;
59f00321
RGS
746 else if (PL_op->op_private & OPpTARGET_MY)
747 sv = GETTARGET;
79072805 748 else {
54b9620d 749 sv = DEFSV;
a0d0e21e 750 EXTEND(SP,1);
79072805 751 }
adbc6bb1 752 TARG = sv_newmortal();
bb16bae8
FC
753 if(PL_op->op_type == OP_TRANSR) {
754 SV * const newsv = newSVsv(sv);
755 do_trans(newsv);
756 mPUSHs(newsv);
757 }
758 else PUSHi(do_trans(sv));
a0d0e21e 759 RETURN;
79072805
LW
760}
761
a0d0e21e 762/* Lvalue operators. */
79072805 763
81745e4e
NC
764static void
765S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
766{
767 dVAR;
768 STRLEN len;
769 char *s;
770
771 PERL_ARGS_ASSERT_DO_CHOMP;
772
773 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
774 return;
775 if (SvTYPE(sv) == SVt_PVAV) {
776 I32 i;
777 AV *const av = MUTABLE_AV(sv);
778 const I32 max = AvFILL(av);
779
780 for (i = 0; i <= max; i++) {
781 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
782 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
783 do_chomp(retval, sv, chomping);
784 }
785 return;
786 }
787 else if (SvTYPE(sv) == SVt_PVHV) {
788 HV* const hv = MUTABLE_HV(sv);
789 HE* entry;
790 (void)hv_iterinit(hv);
791 while ((entry = hv_iternext(hv)))
792 do_chomp(retval, hv_iterval(hv,entry), chomping);
793 return;
794 }
795 else if (SvREADONLY(sv)) {
796 if (SvFAKE(sv)) {
797 /* SV is copy-on-write */
798 sv_force_normal_flags(sv, 0);
799 }
800 if (SvREADONLY(sv))
801 Perl_croak_no_modify(aTHX);
802 }
803
804 if (PL_encoding) {
805 if (!SvUTF8(sv)) {
806 /* XXX, here sv is utf8-ized as a side-effect!
807 If encoding.pm is used properly, almost string-generating
808 operations, including literal strings, chr(), input data, etc.
809 should have been utf8-ized already, right?
810 */
811 sv_recode_to_utf8(sv, PL_encoding);
812 }
813 }
814
815 s = SvPV(sv, len);
816 if (chomping) {
817 char *temp_buffer = NULL;
818 SV *svrecode = NULL;
819
820 if (s && len) {
821 s += --len;
822 if (RsPARA(PL_rs)) {
823 if (*s != '\n')
824 goto nope;
825 ++SvIVX(retval);
826 while (len && s[-1] == '\n') {
827 --len;
828 --s;
829 ++SvIVX(retval);
830 }
831 }
832 else {
833 STRLEN rslen, rs_charlen;
834 const char *rsptr = SvPV_const(PL_rs, rslen);
835
836 rs_charlen = SvUTF8(PL_rs)
837 ? sv_len_utf8(PL_rs)
838 : rslen;
839
840 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
841 /* Assumption is that rs is shorter than the scalar. */
842 if (SvUTF8(PL_rs)) {
843 /* RS is utf8, scalar is 8 bit. */
844 bool is_utf8 = TRUE;
845 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
846 &rslen, &is_utf8);
847 if (is_utf8) {
848 /* Cannot downgrade, therefore cannot possibly match
849 */
850 assert (temp_buffer == rsptr);
851 temp_buffer = NULL;
852 goto nope;
853 }
854 rsptr = temp_buffer;
855 }
856 else if (PL_encoding) {
857 /* RS is 8 bit, encoding.pm is used.
858 * Do not recode PL_rs as a side-effect. */
859 svrecode = newSVpvn(rsptr, rslen);
860 sv_recode_to_utf8(svrecode, PL_encoding);
861 rsptr = SvPV_const(svrecode, rslen);
862 rs_charlen = sv_len_utf8(svrecode);
863 }
864 else {
865 /* RS is 8 bit, scalar is utf8. */
866 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
867 rsptr = temp_buffer;
868 }
869 }
870 if (rslen == 1) {
871 if (*s != *rsptr)
872 goto nope;
873 ++SvIVX(retval);
874 }
875 else {
876 if (len < rslen - 1)
877 goto nope;
878 len -= rslen - 1;
879 s -= rslen - 1;
880 if (memNE(s, rsptr, rslen))
881 goto nope;
882 SvIVX(retval) += rs_charlen;
883 }
884 }
885 s = SvPV_force_nolen(sv);
886 SvCUR_set(sv, len);
887 *SvEND(sv) = '\0';
888 SvNIOK_off(sv);
889 SvSETMAGIC(sv);
890 }
891 nope:
892
893 SvREFCNT_dec(svrecode);
894
895 Safefree(temp_buffer);
896 } else {
897 if (len && !SvPOK(sv))
898 s = SvPV_force_nomg(sv, len);
899 if (DO_UTF8(sv)) {
900 if (s && len) {
901 char * const send = s + len;
902 char * const start = s;
903 s = send - 1;
904 while (s > start && UTF8_IS_CONTINUATION(*s))
905 s--;
906 if (is_utf8_string((U8*)s, send - s)) {
907 sv_setpvn(retval, s, send - s);
908 *s = '\0';
909 SvCUR_set(sv, s - start);
910 SvNIOK_off(sv);
911 SvUTF8_on(retval);
912 }
913 }
914 else
915 sv_setpvs(retval, "");
916 }
917 else if (s && len) {
918 s += --len;
919 sv_setpvn(retval, s, 1);
920 *s = '\0';
921 SvCUR_set(sv, len);
922 SvUTF8_off(sv);
923 SvNIOK_off(sv);
924 }
925 else
926 sv_setpvs(retval, "");
927 SvSETMAGIC(sv);
928 }
929}
930
a0d0e21e
LW
931PP(pp_schop)
932{
97aff369 933 dVAR; dSP; dTARGET;
fa54efae
NC
934 const bool chomping = PL_op->op_type == OP_SCHOMP;
935
936 if (chomping)
937 sv_setiv(TARG, 0);
938 do_chomp(TARG, TOPs, chomping);
a0d0e21e
LW
939 SETTARG;
940 RETURN;
79072805
LW
941}
942
a0d0e21e 943PP(pp_chop)
79072805 944{
97aff369 945 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
fa54efae 946 const bool chomping = PL_op->op_type == OP_CHOMP;
8ec5e241 947
fa54efae
NC
948 if (chomping)
949 sv_setiv(TARG, 0);
20cf1f79 950 while (MARK < SP)
fa54efae 951 do_chomp(TARG, *++MARK, chomping);
20cf1f79
NC
952 SP = ORIGMARK;
953 XPUSHTARG;
a0d0e21e 954 RETURN;
79072805
LW
955}
956
a0d0e21e
LW
957PP(pp_undef)
958{
97aff369 959 dVAR; dSP;
a0d0e21e
LW
960 SV *sv;
961
533c011a 962 if (!PL_op->op_private) {
774d564b 963 EXTEND(SP, 1);
a0d0e21e 964 RETPUSHUNDEF;
774d564b 965 }
79072805 966
a0d0e21e
LW
967 sv = POPs;
968 if (!sv)
969 RETPUSHUNDEF;
85e6fe83 970
765f542d 971 SV_CHECK_THINKFIRST_COW_DROP(sv);
85e6fe83 972
a0d0e21e
LW
973 switch (SvTYPE(sv)) {
974 case SVt_NULL:
975 break;
976 case SVt_PVAV:
502c6561 977 av_undef(MUTABLE_AV(sv));
a0d0e21e
LW
978 break;
979 case SVt_PVHV:
85fbaab2 980 hv_undef(MUTABLE_HV(sv));
a0d0e21e
LW
981 break;
982 case SVt_PVCV:
a2a5de95
NC
983 if (cv_const_sv((const CV *)sv))
984 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Constant subroutine %s undefined",
985 CvANON((const CV *)sv) ? "(anonymous)"
986 : GvENAME(CvGV((const CV *)sv)));
5f66b61c 987 /* FALLTHROUGH */
9607fc9c 988 case SVt_PVFM:
6fc92669
GS
989 {
990 /* let user-undef'd sub keep its identity */
ea726b52
NC
991 GV* const gv = CvGV((const CV *)sv);
992 cv_undef(MUTABLE_CV(sv));
b3f91e91 993 CvGV_set(MUTABLE_CV(sv), gv);
6fc92669 994 }
a0d0e21e 995 break;
8e07c86e 996 case SVt_PVGV:
6e592b3a 997 if (SvFAKE(sv)) {
3280af22 998 SvSetMagicSV(sv, &PL_sv_undef);
6e592b3a
BM
999 break;
1000 }
1001 else if (isGV_with_GP(sv)) {
20408e3c 1002 GP *gp;
dd69841b
BB
1003 HV *stash;
1004
dd69841b 1005 /* undef *Pkg::meth_name ... */
e530fb81
FC
1006 bool method_changed
1007 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
1008 && HvENAME_get(stash);
1009 /* undef *Foo:: */
1010 if((stash = GvHV((const GV *)sv))) {
1011 if(HvENAME_get(stash))
1012 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
1013 else stash = NULL;
1014 }
dd69841b 1015
159b6efe 1016 gp_free(MUTABLE_GV(sv));
a02a5408 1017 Newxz(gp, 1, GP);
c43ae56f 1018 GvGP_set(sv, gp_ref(gp));
561b68a9 1019 GvSV(sv) = newSV(0);
57843af0 1020 GvLINE(sv) = CopLINE(PL_curcop);
159b6efe 1021 GvEGV(sv) = MUTABLE_GV(sv);
20408e3c 1022 GvMULTI_on(sv);
e530fb81
FC
1023
1024 if(stash)
afdbe55d 1025 mro_package_moved(NULL, stash, (const GV *)sv, 0);
e530fb81
FC
1026 stash = NULL;
1027 /* undef *Foo::ISA */
1028 if( strEQ(GvNAME((const GV *)sv), "ISA")
1029 && (stash = GvSTASH((const GV *)sv))
1030 && (method_changed || HvENAME(stash)) )
1031 mro_isa_changed_in(stash);
1032 else if(method_changed)
1033 mro_method_changed_in(
da9043f5 1034 GvSTASH((const GV *)sv)
e530fb81
FC
1035 );
1036
6e592b3a 1037 break;
20408e3c 1038 }
6e592b3a 1039 /* FALL THROUGH */
a0d0e21e 1040 default:
b15aece3 1041 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
8bd4d4c5 1042 SvPV_free(sv);
c445ea15 1043 SvPV_set(sv, NULL);
4633a7c4 1044 SvLEN_set(sv, 0);
a0d0e21e 1045 }
0c34ef67 1046 SvOK_off(sv);
4633a7c4 1047 SvSETMAGIC(sv);
79072805 1048 }
a0d0e21e
LW
1049
1050 RETPUSHUNDEF;
79072805
LW
1051}
1052
a0d0e21e
LW
1053PP(pp_postinc)
1054{
97aff369 1055 dVAR; dSP; dTARGET;
c22c99bc
FC
1056 const bool inc =
1057 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
60092ce4 1058 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
6ad8f254 1059 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
1060 if (SvROK(TOPs))
1061 TARG = sv_newmortal();
a0d0e21e 1062 sv_setsv(TARG, TOPs);
3510b4a1 1063 if (!SvREADONLY(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
c22c99bc 1064 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
55497cff 1065 {
c22c99bc 1066 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
55497cff 1067 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306 1068 }
c22c99bc 1069 else if (inc)
6f1401dc 1070 sv_inc_nomg(TOPs);
c22c99bc 1071 else sv_dec_nomg(TOPs);
a0d0e21e 1072 SvSETMAGIC(TOPs);
1e54a23f 1073 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
c22c99bc 1074 if (inc && !SvOK(TARG))
a0d0e21e
LW
1075 sv_setiv(TARG, 0);
1076 SETs(TARG);
1077 return NORMAL;
1078}
79072805 1079
a0d0e21e
LW
1080/* Ordinary operators. */
1081
1082PP(pp_pow)
1083{
800401ee 1084 dVAR; dSP; dATARGET; SV *svl, *svr;
58d76dfd 1085#ifdef PERL_PRESERVE_IVUV
52a96ae6
HS
1086 bool is_int = 0;
1087#endif
6f1401dc
DM
1088 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1089 svr = TOPs;
1090 svl = TOPm1s;
52a96ae6
HS
1091#ifdef PERL_PRESERVE_IVUV
1092 /* For integer to integer power, we do the calculation by hand wherever
1093 we're sure it is safe; otherwise we call pow() and try to convert to
1094 integer afterwards. */
58d76dfd 1095 {
6f1401dc 1096 SvIV_please_nomg(svr);
800401ee 1097 if (SvIOK(svr)) {
6f1401dc 1098 SvIV_please_nomg(svl);
800401ee 1099 if (SvIOK(svl)) {
900658e3
PF
1100 UV power;
1101 bool baseuok;
1102 UV baseuv;
1103
800401ee
JH
1104 if (SvUOK(svr)) {
1105 power = SvUVX(svr);
900658e3 1106 } else {
800401ee 1107 const IV iv = SvIVX(svr);
900658e3
PF
1108 if (iv >= 0) {
1109 power = iv;
1110 } else {
1111 goto float_it; /* Can't do negative powers this way. */
1112 }
1113 }
1114
800401ee 1115 baseuok = SvUOK(svl);
900658e3 1116 if (baseuok) {
800401ee 1117 baseuv = SvUVX(svl);
900658e3 1118 } else {
800401ee 1119 const IV iv = SvIVX(svl);
900658e3
PF
1120 if (iv >= 0) {
1121 baseuv = iv;
1122 baseuok = TRUE; /* effectively it's a UV now */
1123 } else {
1124 baseuv = -iv; /* abs, baseuok == false records sign */
1125 }
1126 }
52a96ae6
HS
1127 /* now we have integer ** positive integer. */
1128 is_int = 1;
1129
1130 /* foo & (foo - 1) is zero only for a power of 2. */
58d76dfd 1131 if (!(baseuv & (baseuv - 1))) {
52a96ae6 1132 /* We are raising power-of-2 to a positive integer.
58d76dfd
JH
1133 The logic here will work for any base (even non-integer
1134 bases) but it can be less accurate than
1135 pow (base,power) or exp (power * log (base)) when the
1136 intermediate values start to spill out of the mantissa.
1137 With powers of 2 we know this can't happen.
1138 And powers of 2 are the favourite thing for perl
1139 programmers to notice ** not doing what they mean. */
1140 NV result = 1.0;
1141 NV base = baseuok ? baseuv : -(NV)baseuv;
900658e3
PF
1142
1143 if (power & 1) {
1144 result *= base;
1145 }
1146 while (power >>= 1) {
1147 base *= base;
1148 if (power & 1) {
1149 result *= base;
1150 }
1151 }
58d76dfd
JH
1152 SP--;
1153 SETn( result );
6f1401dc 1154 SvIV_please_nomg(svr);
58d76dfd 1155 RETURN;
52a96ae6
HS
1156 } else {
1157 register unsigned int highbit = 8 * sizeof(UV);
900658e3
PF
1158 register unsigned int diff = 8 * sizeof(UV);
1159 while (diff >>= 1) {
1160 highbit -= diff;
1161 if (baseuv >> highbit) {
1162 highbit += diff;
1163 }
52a96ae6
HS
1164 }
1165 /* we now have baseuv < 2 ** highbit */
1166 if (power * highbit <= 8 * sizeof(UV)) {
1167 /* result will definitely fit in UV, so use UV math
1168 on same algorithm as above */
1169 register UV result = 1;
1170 register UV base = baseuv;
f2338a2e 1171 const bool odd_power = cBOOL(power & 1);
900658e3
PF
1172 if (odd_power) {
1173 result *= base;
1174 }
1175 while (power >>= 1) {
1176 base *= base;
1177 if (power & 1) {
52a96ae6 1178 result *= base;
52a96ae6
HS
1179 }
1180 }
1181 SP--;
0615a994 1182 if (baseuok || !odd_power)
52a96ae6
HS
1183 /* answer is positive */
1184 SETu( result );
1185 else if (result <= (UV)IV_MAX)
1186 /* answer negative, fits in IV */
1187 SETi( -(IV)result );
1188 else if (result == (UV)IV_MIN)
1189 /* 2's complement assumption: special case IV_MIN */
1190 SETi( IV_MIN );
1191 else
1192 /* answer negative, doesn't fit */
1193 SETn( -(NV)result );
1194 RETURN;
1195 }
1196 }
1197 }
1198 }
58d76dfd 1199 }
52a96ae6 1200 float_it:
58d76dfd 1201#endif
a0d0e21e 1202 {
6f1401dc
DM
1203 NV right = SvNV_nomg(svr);
1204 NV left = SvNV_nomg(svl);
4efa5a16 1205 (void)POPs;
3aaeb624
JA
1206
1207#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1208 /*
1209 We are building perl with long double support and are on an AIX OS
1210 afflicted with a powl() function that wrongly returns NaNQ for any
1211 negative base. This was reported to IBM as PMR #23047-379 on
1212 03/06/2006. The problem exists in at least the following versions
1213 of AIX and the libm fileset, and no doubt others as well:
1214
1215 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1216 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1217 AIX 5.2.0 bos.adt.libm 5.2.0.85
1218
1219 So, until IBM fixes powl(), we provide the following workaround to
1220 handle the problem ourselves. Our logic is as follows: for
1221 negative bases (left), we use fmod(right, 2) to check if the
1222 exponent is an odd or even integer:
1223
1224 - if odd, powl(left, right) == -powl(-left, right)
1225 - if even, powl(left, right) == powl(-left, right)
1226
1227 If the exponent is not an integer, the result is rightly NaNQ, so
1228 we just return that (as NV_NAN).
1229 */
1230
1231 if (left < 0.0) {
1232 NV mod2 = Perl_fmod( right, 2.0 );
1233 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1234 SETn( -Perl_pow( -left, right) );
1235 } else if (mod2 == 0.0) { /* even integer */
1236 SETn( Perl_pow( -left, right) );
1237 } else { /* fractional power */
1238 SETn( NV_NAN );
1239 }
1240 } else {
1241 SETn( Perl_pow( left, right) );
1242 }
1243#else
52a96ae6 1244 SETn( Perl_pow( left, right) );
3aaeb624
JA
1245#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1246
52a96ae6
HS
1247#ifdef PERL_PRESERVE_IVUV
1248 if (is_int)
6f1401dc 1249 SvIV_please_nomg(svr);
52a96ae6
HS
1250#endif
1251 RETURN;
93a17b20 1252 }
a0d0e21e
LW
1253}
1254
1255PP(pp_multiply)
1256{
800401ee 1257 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1258 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1259 svr = TOPs;
1260 svl = TOPm1s;
28e5dec8 1261#ifdef PERL_PRESERVE_IVUV
6f1401dc 1262 SvIV_please_nomg(svr);
800401ee 1263 if (SvIOK(svr)) {
28e5dec8
JH
1264 /* Unless the left argument is integer in range we are going to have to
1265 use NV maths. Hence only attempt to coerce the right argument if
1266 we know the left is integer. */
1267 /* Left operand is defined, so is it IV? */
6f1401dc 1268 SvIV_please_nomg(svl);
800401ee
JH
1269 if (SvIOK(svl)) {
1270 bool auvok = SvUOK(svl);
1271 bool buvok = SvUOK(svr);
28e5dec8
JH
1272 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1273 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1274 UV alow;
1275 UV ahigh;
1276 UV blow;
1277 UV bhigh;
1278
1279 if (auvok) {
800401ee 1280 alow = SvUVX(svl);
28e5dec8 1281 } else {
800401ee 1282 const IV aiv = SvIVX(svl);
28e5dec8
JH
1283 if (aiv >= 0) {
1284 alow = aiv;
1285 auvok = TRUE; /* effectively it's a UV now */
1286 } else {
1287 alow = -aiv; /* abs, auvok == false records sign */
1288 }
1289 }
1290 if (buvok) {
800401ee 1291 blow = SvUVX(svr);
28e5dec8 1292 } else {
800401ee 1293 const IV biv = SvIVX(svr);
28e5dec8
JH
1294 if (biv >= 0) {
1295 blow = biv;
1296 buvok = TRUE; /* effectively it's a UV now */
1297 } else {
1298 blow = -biv; /* abs, buvok == false records sign */
1299 }
1300 }
1301
1302 /* If this does sign extension on unsigned it's time for plan B */
1303 ahigh = alow >> (4 * sizeof (UV));
1304 alow &= botmask;
1305 bhigh = blow >> (4 * sizeof (UV));
1306 blow &= botmask;
1307 if (ahigh && bhigh) {
6f207bd3 1308 NOOP;
28e5dec8
JH
1309 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1310 which is overflow. Drop to NVs below. */
1311 } else if (!ahigh && !bhigh) {
1312 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1313 so the unsigned multiply cannot overflow. */
c445ea15 1314 const UV product = alow * blow;
28e5dec8
JH
1315 if (auvok == buvok) {
1316 /* -ve * -ve or +ve * +ve gives a +ve result. */
1317 SP--;
1318 SETu( product );
1319 RETURN;
1320 } else if (product <= (UV)IV_MIN) {
1321 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1322 /* -ve result, which could overflow an IV */
1323 SP--;
25716404 1324 SETi( -(IV)product );
28e5dec8
JH
1325 RETURN;
1326 } /* else drop to NVs below. */
1327 } else {
1328 /* One operand is large, 1 small */
1329 UV product_middle;
1330 if (bhigh) {
1331 /* swap the operands */
1332 ahigh = bhigh;
1333 bhigh = blow; /* bhigh now the temp var for the swap */
1334 blow = alow;
1335 alow = bhigh;
1336 }
1337 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1338 multiplies can't overflow. shift can, add can, -ve can. */
1339 product_middle = ahigh * blow;
1340 if (!(product_middle & topmask)) {
1341 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1342 UV product_low;
1343 product_middle <<= (4 * sizeof (UV));
1344 product_low = alow * blow;
1345
1346 /* as for pp_add, UV + something mustn't get smaller.
1347 IIRC ANSI mandates this wrapping *behaviour* for
1348 unsigned whatever the actual representation*/
1349 product_low += product_middle;
1350 if (product_low >= product_middle) {
1351 /* didn't overflow */
1352 if (auvok == buvok) {
1353 /* -ve * -ve or +ve * +ve gives a +ve result. */
1354 SP--;
1355 SETu( product_low );
1356 RETURN;
1357 } else if (product_low <= (UV)IV_MIN) {
1358 /* 2s complement assumption again */
1359 /* -ve result, which could overflow an IV */
1360 SP--;
25716404 1361 SETi( -(IV)product_low );
28e5dec8
JH
1362 RETURN;
1363 } /* else drop to NVs below. */
1364 }
1365 } /* product_middle too large */
1366 } /* ahigh && bhigh */
800401ee
JH
1367 } /* SvIOK(svl) */
1368 } /* SvIOK(svr) */
28e5dec8 1369#endif
a0d0e21e 1370 {
6f1401dc
DM
1371 NV right = SvNV_nomg(svr);
1372 NV left = SvNV_nomg(svl);
4efa5a16 1373 (void)POPs;
a0d0e21e
LW
1374 SETn( left * right );
1375 RETURN;
79072805 1376 }
a0d0e21e
LW
1377}
1378
1379PP(pp_divide)
1380{
800401ee 1381 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1382 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1383 svr = TOPs;
1384 svl = TOPm1s;
5479d192 1385 /* Only try to do UV divide first
68795e93 1386 if ((SLOPPYDIVIDE is true) or
5479d192
NC
1387 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1388 to preserve))
1389 The assumption is that it is better to use floating point divide
1390 whenever possible, only doing integer divide first if we can't be sure.
1391 If NV_PRESERVES_UV is true then we know at compile time that no UV
1392 can be too large to preserve, so don't need to compile the code to
1393 test the size of UVs. */
1394
a0d0e21e 1395#ifdef SLOPPYDIVIDE
5479d192
NC
1396# define PERL_TRY_UV_DIVIDE
1397 /* ensure that 20./5. == 4. */
a0d0e21e 1398#else
5479d192
NC
1399# ifdef PERL_PRESERVE_IVUV
1400# ifndef NV_PRESERVES_UV
1401# define PERL_TRY_UV_DIVIDE
1402# endif
1403# endif
a0d0e21e 1404#endif
5479d192
NC
1405
1406#ifdef PERL_TRY_UV_DIVIDE
6f1401dc 1407 SvIV_please_nomg(svr);
800401ee 1408 if (SvIOK(svr)) {
6f1401dc 1409 SvIV_please_nomg(svl);
800401ee
JH
1410 if (SvIOK(svl)) {
1411 bool left_non_neg = SvUOK(svl);
1412 bool right_non_neg = SvUOK(svr);
5479d192
NC
1413 UV left;
1414 UV right;
1415
1416 if (right_non_neg) {
800401ee 1417 right = SvUVX(svr);
5479d192
NC
1418 }
1419 else {
800401ee 1420 const IV biv = SvIVX(svr);
5479d192
NC
1421 if (biv >= 0) {
1422 right = biv;
1423 right_non_neg = TRUE; /* effectively it's a UV now */
1424 }
1425 else {
1426 right = -biv;
1427 }
1428 }
1429 /* historically undef()/0 gives a "Use of uninitialized value"
1430 warning before dieing, hence this test goes here.
1431 If it were immediately before the second SvIV_please, then
1432 DIE() would be invoked before left was even inspected, so
486ec47a 1433 no inspection would give no warning. */
5479d192
NC
1434 if (right == 0)
1435 DIE(aTHX_ "Illegal division by zero");
1436
1437 if (left_non_neg) {
800401ee 1438 left = SvUVX(svl);
5479d192
NC
1439 }
1440 else {
800401ee 1441 const IV aiv = SvIVX(svl);
5479d192
NC
1442 if (aiv >= 0) {
1443 left = aiv;
1444 left_non_neg = TRUE; /* effectively it's a UV now */
1445 }
1446 else {
1447 left = -aiv;
1448 }
1449 }
1450
1451 if (left >= right
1452#ifdef SLOPPYDIVIDE
1453 /* For sloppy divide we always attempt integer division. */
1454#else
1455 /* Otherwise we only attempt it if either or both operands
1456 would not be preserved by an NV. If both fit in NVs
0c2ee62a
NC
1457 we fall through to the NV divide code below. However,
1458 as left >= right to ensure integer result here, we know that
1459 we can skip the test on the right operand - right big
1460 enough not to be preserved can't get here unless left is
1461 also too big. */
1462
1463 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
5479d192
NC
1464#endif
1465 ) {
1466 /* Integer division can't overflow, but it can be imprecise. */
1b6737cc 1467 const UV result = left / right;
5479d192
NC
1468 if (result * right == left) {
1469 SP--; /* result is valid */
1470 if (left_non_neg == right_non_neg) {
1471 /* signs identical, result is positive. */
1472 SETu( result );
1473 RETURN;
1474 }
1475 /* 2s complement assumption */
1476 if (result <= (UV)IV_MIN)
91f3b821 1477 SETi( -(IV)result );
5479d192
NC
1478 else {
1479 /* It's exact but too negative for IV. */
1480 SETn( -(NV)result );
1481 }
1482 RETURN;
1483 } /* tried integer divide but it was not an integer result */
32fdb065 1484 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
5479d192
NC
1485 } /* left wasn't SvIOK */
1486 } /* right wasn't SvIOK */
1487#endif /* PERL_TRY_UV_DIVIDE */
1488 {
6f1401dc
DM
1489 NV right = SvNV_nomg(svr);
1490 NV left = SvNV_nomg(svl);
4efa5a16 1491 (void)POPs;(void)POPs;
ebc6a117
PD
1492#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1493 if (! Perl_isnan(right) && right == 0.0)
1494#else
5479d192 1495 if (right == 0.0)
ebc6a117 1496#endif
5479d192
NC
1497 DIE(aTHX_ "Illegal division by zero");
1498 PUSHn( left / right );
1499 RETURN;
79072805 1500 }
a0d0e21e
LW
1501}
1502
1503PP(pp_modulo)
1504{
6f1401dc
DM
1505 dVAR; dSP; dATARGET;
1506 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
a0d0e21e 1507 {
9c5ffd7c
JH
1508 UV left = 0;
1509 UV right = 0;
dc656993
JH
1510 bool left_neg = FALSE;
1511 bool right_neg = FALSE;
e2c88acc
NC
1512 bool use_double = FALSE;
1513 bool dright_valid = FALSE;
9c5ffd7c
JH
1514 NV dright = 0.0;
1515 NV dleft = 0.0;
6f1401dc
DM
1516 SV * const svr = TOPs;
1517 SV * const svl = TOPm1s;
1518 SvIV_please_nomg(svr);
800401ee
JH
1519 if (SvIOK(svr)) {
1520 right_neg = !SvUOK(svr);
e2c88acc 1521 if (!right_neg) {
800401ee 1522 right = SvUVX(svr);
e2c88acc 1523 } else {
800401ee 1524 const IV biv = SvIVX(svr);
e2c88acc
NC
1525 if (biv >= 0) {
1526 right = biv;
1527 right_neg = FALSE; /* effectively it's a UV now */
1528 } else {
1529 right = -biv;
1530 }
1531 }
1532 }
1533 else {
6f1401dc 1534 dright = SvNV_nomg(svr);
787eafbd
IZ
1535 right_neg = dright < 0;
1536 if (right_neg)
1537 dright = -dright;
e2c88acc
NC
1538 if (dright < UV_MAX_P1) {
1539 right = U_V(dright);
1540 dright_valid = TRUE; /* In case we need to use double below. */
1541 } else {
1542 use_double = TRUE;
1543 }
787eafbd 1544 }
a0d0e21e 1545
e2c88acc
NC
1546 /* At this point use_double is only true if right is out of range for
1547 a UV. In range NV has been rounded down to nearest UV and
1548 use_double false. */
6f1401dc 1549 SvIV_please_nomg(svl);
800401ee
JH
1550 if (!use_double && SvIOK(svl)) {
1551 if (SvIOK(svl)) {
1552 left_neg = !SvUOK(svl);
e2c88acc 1553 if (!left_neg) {
800401ee 1554 left = SvUVX(svl);
e2c88acc 1555 } else {
800401ee 1556 const IV aiv = SvIVX(svl);
e2c88acc
NC
1557 if (aiv >= 0) {
1558 left = aiv;
1559 left_neg = FALSE; /* effectively it's a UV now */
1560 } else {
1561 left = -aiv;
1562 }
1563 }
1564 }
1565 }
787eafbd 1566 else {
6f1401dc 1567 dleft = SvNV_nomg(svl);
787eafbd
IZ
1568 left_neg = dleft < 0;
1569 if (left_neg)
1570 dleft = -dleft;
68dc0745 1571
e2c88acc
NC
1572 /* This should be exactly the 5.6 behaviour - if left and right are
1573 both in range for UV then use U_V() rather than floor. */
1574 if (!use_double) {
1575 if (dleft < UV_MAX_P1) {
1576 /* right was in range, so is dleft, so use UVs not double.
1577 */
1578 left = U_V(dleft);
1579 }
1580 /* left is out of range for UV, right was in range, so promote
1581 right (back) to double. */
1582 else {
1583 /* The +0.5 is used in 5.6 even though it is not strictly
1584 consistent with the implicit +0 floor in the U_V()
1585 inside the #if 1. */
1586 dleft = Perl_floor(dleft + 0.5);
1587 use_double = TRUE;
1588 if (dright_valid)
1589 dright = Perl_floor(dright + 0.5);
1590 else
1591 dright = right;
1592 }
1593 }
1594 }
6f1401dc 1595 sp -= 2;
787eafbd 1596 if (use_double) {
65202027 1597 NV dans;
787eafbd 1598
787eafbd 1599 if (!dright)
cea2e8a9 1600 DIE(aTHX_ "Illegal modulus zero");
787eafbd 1601
65202027 1602 dans = Perl_fmod(dleft, dright);
787eafbd
IZ
1603 if ((left_neg != right_neg) && dans)
1604 dans = dright - dans;
1605 if (right_neg)
1606 dans = -dans;
1607 sv_setnv(TARG, dans);
1608 }
1609 else {
1610 UV ans;
1611
787eafbd 1612 if (!right)
cea2e8a9 1613 DIE(aTHX_ "Illegal modulus zero");
787eafbd
IZ
1614
1615 ans = left % right;
1616 if ((left_neg != right_neg) && ans)
1617 ans = right - ans;
1618 if (right_neg) {
1619 /* XXX may warn: unary minus operator applied to unsigned type */
1620 /* could change -foo to be (~foo)+1 instead */
1621 if (ans <= ~((UV)IV_MAX)+1)
1622 sv_setiv(TARG, ~ans+1);
1623 else
65202027 1624 sv_setnv(TARG, -(NV)ans);
787eafbd
IZ
1625 }
1626 else
1627 sv_setuv(TARG, ans);
1628 }
1629 PUSHTARG;
1630 RETURN;
79072805 1631 }
a0d0e21e 1632}
79072805 1633
a0d0e21e
LW
1634PP(pp_repeat)
1635{
6f1401dc 1636 dVAR; dSP; dATARGET;
2b573ace 1637 register IV count;
6f1401dc
DM
1638 SV *sv;
1639
1640 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1641 /* TODO: think of some way of doing list-repeat overloading ??? */
1642 sv = POPs;
1643 SvGETMAGIC(sv);
1644 }
1645 else {
1646 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1647 sv = POPs;
1648 }
1649
2b573ace
JH
1650 if (SvIOKp(sv)) {
1651 if (SvUOK(sv)) {
6f1401dc 1652 const UV uv = SvUV_nomg(sv);
2b573ace
JH
1653 if (uv > IV_MAX)
1654 count = IV_MAX; /* The best we can do? */
1655 else
1656 count = uv;
1657 } else {
6f1401dc 1658 const IV iv = SvIV_nomg(sv);
2b573ace
JH
1659 if (iv < 0)
1660 count = 0;
1661 else
1662 count = iv;
1663 }
1664 }
1665 else if (SvNOKp(sv)) {
6f1401dc 1666 const NV nv = SvNV_nomg(sv);
2b573ace
JH
1667 if (nv < 0.0)
1668 count = 0;
1669 else
1670 count = (IV)nv;
1671 }
1672 else
6f1401dc
DM
1673 count = SvIV_nomg(sv);
1674
533c011a 1675 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
a0d0e21e 1676 dMARK;
0bd48802
AL
1677 static const char oom_list_extend[] = "Out of memory during list extend";
1678 const I32 items = SP - MARK;
1679 const I32 max = items * count;
79072805 1680
2b573ace
JH
1681 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1682 /* Did the max computation overflow? */
27d5b266 1683 if (items > 0 && max > 0 && (max < items || max < count))
2b573ace 1684 Perl_croak(aTHX_ oom_list_extend);
a0d0e21e
LW
1685 MEXTEND(MARK, max);
1686 if (count > 1) {
1687 while (SP > MARK) {
976c8a39
JH
1688#if 0
1689 /* This code was intended to fix 20010809.028:
1690
1691 $x = 'abcd';
1692 for (($x =~ /./g) x 2) {
1693 print chop; # "abcdabcd" expected as output.
1694 }
1695
1696 * but that change (#11635) broke this code:
1697
1698 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1699
1700 * I can't think of a better fix that doesn't introduce
1701 * an efficiency hit by copying the SVs. The stack isn't
1702 * refcounted, and mortalisation obviously doesn't
1703 * Do The Right Thing when the stack has more than
1704 * one pointer to the same mortal value.
1705 * .robin.
1706 */
e30acc16
RH
1707 if (*SP) {
1708 *SP = sv_2mortal(newSVsv(*SP));
1709 SvREADONLY_on(*SP);
1710 }
976c8a39
JH
1711#else
1712 if (*SP)
1713 SvTEMP_off((*SP));
1714#endif
a0d0e21e 1715 SP--;
79072805 1716 }
a0d0e21e
LW
1717 MARK++;
1718 repeatcpy((char*)(MARK + items), (char*)MARK,
ad64d0ec 1719 items * sizeof(const SV *), count - 1);
a0d0e21e 1720 SP += max;
79072805 1721 }
a0d0e21e
LW
1722 else if (count <= 0)
1723 SP -= items;
79072805 1724 }
a0d0e21e 1725 else { /* Note: mark already snarfed by pp_list */
0bd48802 1726 SV * const tmpstr = POPs;
a0d0e21e 1727 STRLEN len;
9b877dbb 1728 bool isutf;
2b573ace
JH
1729 static const char oom_string_extend[] =
1730 "Out of memory during string extend";
a0d0e21e 1731
6f1401dc
DM
1732 if (TARG != tmpstr)
1733 sv_setsv_nomg(TARG, tmpstr);
1734 SvPV_force_nomg(TARG, len);
9b877dbb 1735 isutf = DO_UTF8(TARG);
8ebc5c01
PP
1736 if (count != 1) {
1737 if (count < 1)
1738 SvCUR_set(TARG, 0);
1739 else {
c445ea15 1740 const STRLEN max = (UV)count * len;
19a94d75 1741 if (len > MEM_SIZE_MAX / count)
2b573ace
JH
1742 Perl_croak(aTHX_ oom_string_extend);
1743 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
8569b9dc 1744 SvGROW(TARG, max + 1);
a0d0e21e 1745 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
b162af07 1746 SvCUR_set(TARG, SvCUR(TARG) * count);
7a4c00b4 1747 }
a0d0e21e 1748 *SvEND(TARG) = '\0';
a0d0e21e 1749 }
dfcb284a
GS
1750 if (isutf)
1751 (void)SvPOK_only_UTF8(TARG);
1752 else
1753 (void)SvPOK_only(TARG);
b80b6069
RH
1754
1755 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1756 /* The parser saw this as a list repeat, and there
1757 are probably several items on the stack. But we're
1758 in scalar context, and there's no pp_list to save us
1759 now. So drop the rest of the items -- robin@kitsite.com
1760 */
1761 dMARK;
1762 SP = MARK;
1763 }
a0d0e21e 1764 PUSHTARG;
79072805 1765 }
a0d0e21e
LW
1766 RETURN;
1767}
79072805 1768
a0d0e21e
LW
1769PP(pp_subtract)
1770{
800401ee 1771 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
6f1401dc
DM
1772 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1773 svr = TOPs;
1774 svl = TOPm1s;
800401ee 1775 useleft = USE_LEFT(svl);
28e5dec8 1776#ifdef PERL_PRESERVE_IVUV
7dca457a
NC
1777 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1778 "bad things" happen if you rely on signed integers wrapping. */
6f1401dc 1779 SvIV_please_nomg(svr);
800401ee 1780 if (SvIOK(svr)) {
28e5dec8
JH
1781 /* Unless the left argument is integer in range we are going to have to
1782 use NV maths. Hence only attempt to coerce the right argument if
1783 we know the left is integer. */
9c5ffd7c
JH
1784 register UV auv = 0;
1785 bool auvok = FALSE;
7dca457a
NC
1786 bool a_valid = 0;
1787
28e5dec8 1788 if (!useleft) {
7dca457a
NC
1789 auv = 0;
1790 a_valid = auvok = 1;
1791 /* left operand is undef, treat as zero. */
28e5dec8
JH
1792 } else {
1793 /* Left operand is defined, so is it IV? */
6f1401dc 1794 SvIV_please_nomg(svl);
800401ee
JH
1795 if (SvIOK(svl)) {
1796 if ((auvok = SvUOK(svl)))
1797 auv = SvUVX(svl);
7dca457a 1798 else {
800401ee 1799 register const IV aiv = SvIVX(svl);
7dca457a
NC
1800 if (aiv >= 0) {
1801 auv = aiv;
1802 auvok = 1; /* Now acting as a sign flag. */
1803 } else { /* 2s complement assumption for IV_MIN */
1804 auv = (UV)-aiv;
28e5dec8 1805 }
7dca457a
NC
1806 }
1807 a_valid = 1;
1808 }
1809 }
1810 if (a_valid) {
1811 bool result_good = 0;
1812 UV result;
1813 register UV buv;
800401ee 1814 bool buvok = SvUOK(svr);
9041c2e3 1815
7dca457a 1816 if (buvok)
800401ee 1817 buv = SvUVX(svr);
7dca457a 1818 else {
800401ee 1819 register const IV biv = SvIVX(svr);
7dca457a
NC
1820 if (biv >= 0) {
1821 buv = biv;
1822 buvok = 1;
1823 } else
1824 buv = (UV)-biv;
1825 }
1826 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
602f51c4 1827 else "IV" now, independent of how it came in.
7dca457a
NC
1828 if a, b represents positive, A, B negative, a maps to -A etc
1829 a - b => (a - b)
1830 A - b => -(a + b)
1831 a - B => (a + b)
1832 A - B => -(a - b)
1833 all UV maths. negate result if A negative.
1834 subtract if signs same, add if signs differ. */
1835
1836 if (auvok ^ buvok) {
1837 /* Signs differ. */
1838 result = auv + buv;
1839 if (result >= auv)
1840 result_good = 1;
1841 } else {
1842 /* Signs same */
1843 if (auv >= buv) {
1844 result = auv - buv;
1845 /* Must get smaller */
1846 if (result <= auv)
1847 result_good = 1;
1848 } else {
1849 result = buv - auv;
1850 if (result <= buv) {
1851 /* result really should be -(auv-buv). as its negation
1852 of true value, need to swap our result flag */
1853 auvok = !auvok;
1854 result_good = 1;
28e5dec8 1855 }
28e5dec8
JH
1856 }
1857 }
7dca457a
NC
1858 if (result_good) {
1859 SP--;
1860 if (auvok)
1861 SETu( result );
1862 else {
1863 /* Negate result */
1864 if (result <= (UV)IV_MIN)
1865 SETi( -(IV)result );
1866 else {
1867 /* result valid, but out of range for IV. */
1868 SETn( -(NV)result );
1869 }
1870 }
1871 RETURN;
1872 } /* Overflow, drop through to NVs. */
28e5dec8
JH
1873 }
1874 }
1875#endif
a0d0e21e 1876 {
6f1401dc 1877 NV value = SvNV_nomg(svr);
4efa5a16
RD
1878 (void)POPs;
1879
28e5dec8
JH
1880 if (!useleft) {
1881 /* left operand is undef, treat as zero - value */
1882 SETn(-value);
1883 RETURN;
1884 }
6f1401dc 1885 SETn( SvNV_nomg(svl) - value );
28e5dec8 1886 RETURN;
79072805 1887 }
a0d0e21e 1888}
79072805 1889
a0d0e21e
LW
1890PP(pp_left_shift)
1891{
6f1401dc 1892 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1893 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1894 svr = POPs;
1895 svl = TOPs;
a0d0e21e 1896 {
6f1401dc 1897 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1898 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1899 const IV i = SvIV_nomg(svl);
972b05a9 1900 SETi(i << shift);
d0ba1bd2
JH
1901 }
1902 else {
6f1401dc 1903 const UV u = SvUV_nomg(svl);
972b05a9 1904 SETu(u << shift);
d0ba1bd2 1905 }
55497cff 1906 RETURN;
79072805 1907 }
a0d0e21e 1908}
79072805 1909
a0d0e21e
LW
1910PP(pp_right_shift)
1911{
6f1401dc 1912 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1913 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1914 svr = POPs;
1915 svl = TOPs;
a0d0e21e 1916 {
6f1401dc 1917 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1918 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1919 const IV i = SvIV_nomg(svl);
972b05a9 1920 SETi(i >> shift);
d0ba1bd2
JH
1921 }
1922 else {
6f1401dc 1923 const UV u = SvUV_nomg(svl);
972b05a9 1924 SETu(u >> shift);
d0ba1bd2 1925 }
a0d0e21e 1926 RETURN;
93a17b20 1927 }
79072805
LW
1928}
1929
a0d0e21e 1930PP(pp_lt)
79072805 1931{
6f1401dc 1932 dVAR; dSP;
33efebe6
DM
1933 SV *left, *right;
1934
a42d0242 1935 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
33efebe6
DM
1936 right = POPs;
1937 left = TOPs;
1938 SETs(boolSV(
1939 (SvIOK_notUV(left) && SvIOK_notUV(right))
1940 ? (SvIVX(left) < SvIVX(right))
1941 : (do_ncmp(left, right) == -1)
1942 ));
1943 RETURN;
a0d0e21e 1944}
79072805 1945
a0d0e21e
LW
1946PP(pp_gt)
1947{
6f1401dc 1948 dVAR; dSP;
33efebe6 1949 SV *left, *right;
1b6737cc 1950
33efebe6
DM
1951 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1952 right = POPs;
1953 left = TOPs;
1954 SETs(boolSV(
1955 (SvIOK_notUV(left) && SvIOK_notUV(right))
1956 ? (SvIVX(left) > SvIVX(right))
1957 : (do_ncmp(left, right) == 1)
1958 ));
1959 RETURN;
a0d0e21e
LW
1960}
1961
1962PP(pp_le)
1963{
6f1401dc 1964 dVAR; dSP;
33efebe6 1965 SV *left, *right;
1b6737cc 1966
33efebe6
DM
1967 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1968 right = POPs;
1969 left = TOPs;
1970 SETs(boolSV(
1971 (SvIOK_notUV(left) && SvIOK_notUV(right))
1972 ? (SvIVX(left) <= SvIVX(right))
1973 : (do_ncmp(left, right) <= 0)
1974 ));
1975 RETURN;
a0d0e21e
LW
1976}
1977
1978PP(pp_ge)
1979{
6f1401dc 1980 dVAR; dSP;
33efebe6
DM
1981 SV *left, *right;
1982
1983 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1984 right = POPs;
1985 left = TOPs;
1986 SETs(boolSV(
1987 (SvIOK_notUV(left) && SvIOK_notUV(right))
1988 ? (SvIVX(left) >= SvIVX(right))
1989 : ( (do_ncmp(left, right) & 2) == 0)
1990 ));
1991 RETURN;
1992}
1b6737cc 1993
33efebe6
DM
1994PP(pp_ne)
1995{
1996 dVAR; dSP;
1997 SV *left, *right;
1998
1999 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
2000 right = POPs;
2001 left = TOPs;
2002 SETs(boolSV(
2003 (SvIOK_notUV(left) && SvIOK_notUV(right))
2004 ? (SvIVX(left) != SvIVX(right))
2005 : (do_ncmp(left, right) != 0)
2006 ));
2007 RETURN;
2008}
1b6737cc 2009
33efebe6
DM
2010/* compare left and right SVs. Returns:
2011 * -1: <
2012 * 0: ==
2013 * 1: >
2014 * 2: left or right was a NaN
2015 */
2016I32
2017Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2018{
2019 dVAR;
1b6737cc 2020
33efebe6
DM
2021 PERL_ARGS_ASSERT_DO_NCMP;
2022#ifdef PERL_PRESERVE_IVUV
2023 SvIV_please_nomg(right);
2024 /* Fortunately it seems NaN isn't IOK */
2025 if (SvIOK(right)) {
2026 SvIV_please_nomg(left);
2027 if (SvIOK(left)) {
2028 if (!SvUOK(left)) {
2029 const IV leftiv = SvIVX(left);
2030 if (!SvUOK(right)) {
2031 /* ## IV <=> IV ## */
2032 const IV rightiv = SvIVX(right);
2033 return (leftiv > rightiv) - (leftiv < rightiv);
28e5dec8 2034 }
33efebe6
DM
2035 /* ## IV <=> UV ## */
2036 if (leftiv < 0)
2037 /* As (b) is a UV, it's >=0, so it must be < */
2038 return -1;
2039 {
2040 const UV rightuv = SvUVX(right);
2041 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
28e5dec8 2042 }
28e5dec8 2043 }
79072805 2044
33efebe6
DM
2045 if (SvUOK(right)) {
2046 /* ## UV <=> UV ## */
2047 const UV leftuv = SvUVX(left);
2048 const UV rightuv = SvUVX(right);
2049 return (leftuv > rightuv) - (leftuv < rightuv);
28e5dec8 2050 }
33efebe6
DM
2051 /* ## UV <=> IV ## */
2052 {
2053 const IV rightiv = SvIVX(right);
2054 if (rightiv < 0)
2055 /* As (a) is a UV, it's >=0, so it cannot be < */
2056 return 1;
2057 {
2058 const UV leftuv = SvUVX(left);
2059 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
28e5dec8 2060 }
28e5dec8 2061 }
33efebe6 2062 /* NOTREACHED */
28e5dec8
JH
2063 }
2064 }
2065#endif
a0d0e21e 2066 {
33efebe6
DM
2067 NV const rnv = SvNV_nomg(right);
2068 NV const lnv = SvNV_nomg(left);
2069
cab190d4 2070#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
33efebe6
DM
2071 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2072 return 2;
2073 }
2074 return (lnv > rnv) - (lnv < rnv);
cab190d4 2075#else
33efebe6
DM
2076 if (lnv < rnv)
2077 return -1;
2078 if (lnv > rnv)
2079 return 1;
2080 if (lnv == rnv)
2081 return 0;
2082 return 2;
cab190d4 2083#endif
a0d0e21e 2084 }
79072805
LW
2085}
2086
33efebe6 2087
a0d0e21e 2088PP(pp_ncmp)
79072805 2089{
33efebe6
DM
2090 dVAR; dSP;
2091 SV *left, *right;
2092 I32 value;
a42d0242 2093 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
33efebe6
DM
2094 right = POPs;
2095 left = TOPs;
2096 value = do_ncmp(left, right);
2097 if (value == 2) {
3280af22 2098 SETs(&PL_sv_undef);
79072805 2099 }
33efebe6
DM
2100 else {
2101 dTARGET;
2102 SETi(value);
2103 }
2104 RETURN;
a0d0e21e 2105}
79072805 2106
afd9910b 2107PP(pp_sle)
a0d0e21e 2108{
97aff369 2109 dVAR; dSP;
79072805 2110
afd9910b
NC
2111 int amg_type = sle_amg;
2112 int multiplier = 1;
2113 int rhs = 1;
79072805 2114
afd9910b
NC
2115 switch (PL_op->op_type) {
2116 case OP_SLT:
2117 amg_type = slt_amg;
2118 /* cmp < 0 */
2119 rhs = 0;
2120 break;
2121 case OP_SGT:
2122 amg_type = sgt_amg;
2123 /* cmp > 0 */
2124 multiplier = -1;
2125 rhs = 0;
2126 break;
2127 case OP_SGE:
2128 amg_type = sge_amg;
2129 /* cmp >= 0 */
2130 multiplier = -1;
2131 break;
79072805 2132 }
79072805 2133
6f1401dc 2134 tryAMAGICbin_MG(amg_type, AMGf_set);
a0d0e21e
LW
2135 {
2136 dPOPTOPssrl;
1b6737cc 2137 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2138 ? sv_cmp_locale_flags(left, right, 0)
2139 : sv_cmp_flags(left, right, 0));
afd9910b 2140 SETs(boolSV(cmp * multiplier < rhs));
a0d0e21e
LW
2141 RETURN;
2142 }
2143}
79072805 2144
36477c24
PP
2145PP(pp_seq)
2146{
6f1401dc
DM
2147 dVAR; dSP;
2148 tryAMAGICbin_MG(seq_amg, AMGf_set);
36477c24
PP
2149 {
2150 dPOPTOPssrl;
078504b2 2151 SETs(boolSV(sv_eq_flags(left, right, 0)));
a0d0e21e
LW
2152 RETURN;
2153 }
2154}
79072805 2155
a0d0e21e 2156PP(pp_sne)
79072805 2157{
6f1401dc
DM
2158 dVAR; dSP;
2159 tryAMAGICbin_MG(sne_amg, AMGf_set);
a0d0e21e
LW
2160 {
2161 dPOPTOPssrl;
078504b2 2162 SETs(boolSV(!sv_eq_flags(left, right, 0)));
a0d0e21e 2163 RETURN;
463ee0b2 2164 }
79072805
LW
2165}
2166
a0d0e21e 2167PP(pp_scmp)
79072805 2168{
6f1401dc
DM
2169 dVAR; dSP; dTARGET;
2170 tryAMAGICbin_MG(scmp_amg, 0);
a0d0e21e
LW
2171 {
2172 dPOPTOPssrl;
1b6737cc 2173 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2174 ? sv_cmp_locale_flags(left, right, 0)
2175 : sv_cmp_flags(left, right, 0));
bbce6d69 2176 SETi( cmp );
a0d0e21e
LW
2177 RETURN;
2178 }
2179}
79072805 2180
55497cff
PP
2181PP(pp_bit_and)
2182{
6f1401dc
DM
2183 dVAR; dSP; dATARGET;
2184 tryAMAGICbin_MG(band_amg, AMGf_assign);
a0d0e21e
LW
2185 {
2186 dPOPTOPssrl;
4633a7c4 2187 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2188 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2189 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2190 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2191 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
972b05a9 2192 SETi(i);
d0ba1bd2
JH
2193 }
2194 else {
1b6737cc 2195 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
972b05a9 2196 SETu(u);
d0ba1bd2 2197 }
b20c4ee1
FC
2198 if (left_ro_nonnum) SvNIOK_off(left);
2199 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2200 }
2201 else {
533c011a 2202 do_vop(PL_op->op_type, TARG, left, right);
a0d0e21e
LW
2203 SETTARG;
2204 }
2205 RETURN;
2206 }
2207}
79072805 2208
a0d0e21e
LW
2209PP(pp_bit_or)
2210{
3658c1f1
NC
2211 dVAR; dSP; dATARGET;
2212 const int op_type = PL_op->op_type;
2213
6f1401dc 2214 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
a0d0e21e
LW
2215 {
2216 dPOPTOPssrl;
4633a7c4 2217 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2218 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2219 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2220 if (PL_op->op_private & HINT_INTEGER) {
3658c1f1
NC
2221 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2222 const IV r = SvIV_nomg(right);
2223 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2224 SETi(result);
d0ba1bd2
JH
2225 }
2226 else {
3658c1f1
NC
2227 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2228 const UV r = SvUV_nomg(right);
2229 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2230 SETu(result);
d0ba1bd2 2231 }
b20c4ee1
FC
2232 if (left_ro_nonnum) SvNIOK_off(left);
2233 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2234 }
2235 else {
3658c1f1 2236 do_vop(op_type, TARG, left, right);
a0d0e21e
LW
2237 SETTARG;
2238 }
2239 RETURN;
79072805 2240 }
a0d0e21e 2241}
79072805 2242
a0d0e21e
LW
2243PP(pp_negate)
2244{
6f1401dc
DM
2245 dVAR; dSP; dTARGET;
2246 tryAMAGICun_MG(neg_amg, AMGf_numeric);
a0d0e21e 2247 {
6f1401dc 2248 SV * const sv = TOPs;
1b6737cc 2249 const int flags = SvFLAGS(sv);
a5b92898 2250
886a4465 2251 if( !SvNIOK( sv ) && looks_like_number( sv ) ){
a5b92898
R
2252 SvIV_please( sv );
2253 }
2254
28e5dec8
JH
2255 if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) {
2256 /* It's publicly an integer, or privately an integer-not-float */
2257 oops_its_an_int:
9b0e499b
GS
2258 if (SvIsUV(sv)) {
2259 if (SvIVX(sv) == IV_MIN) {
28e5dec8 2260 /* 2s complement assumption. */
9b0e499b
GS
2261 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2262 RETURN;
2263 }
2264 else if (SvUVX(sv) <= IV_MAX) {
beccb14c 2265 SETi(-SvIVX(sv));
9b0e499b
GS
2266 RETURN;
2267 }
2268 }
2269 else if (SvIVX(sv) != IV_MIN) {
2270 SETi(-SvIVX(sv));
2271 RETURN;
2272 }
28e5dec8
JH
2273#ifdef PERL_PRESERVE_IVUV
2274 else {
2275 SETu((UV)IV_MIN);
2276 RETURN;
2277 }
2278#endif
9b0e499b
GS
2279 }
2280 if (SvNIOKp(sv))
6f1401dc 2281 SETn(-SvNV_nomg(sv));
4633a7c4 2282 else if (SvPOKp(sv)) {
a0d0e21e 2283 STRLEN len;
6f1401dc 2284 const char * const s = SvPV_nomg_const(sv, len);
bbce6d69 2285 if (isIDFIRST(*s)) {
76f68e9b 2286 sv_setpvs(TARG, "-");
a0d0e21e 2287 sv_catsv(TARG, sv);
79072805 2288 }
a0d0e21e 2289 else if (*s == '+' || *s == '-') {
6f1401dc
DM
2290 sv_setsv_nomg(TARG, sv);
2291 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
79072805 2292 }
8eb28a70 2293 else if (DO_UTF8(sv)) {
6f1401dc 2294 SvIV_please_nomg(sv);
8eb28a70
JH
2295 if (SvIOK(sv))
2296 goto oops_its_an_int;
2297 if (SvNOK(sv))
6f1401dc 2298 sv_setnv(TARG, -SvNV_nomg(sv));
8eb28a70 2299 else {
76f68e9b 2300 sv_setpvs(TARG, "-");
8eb28a70
JH
2301 sv_catsv(TARG, sv);
2302 }
834a4ddd 2303 }
28e5dec8 2304 else {
6f1401dc 2305 SvIV_please_nomg(sv);
8eb28a70
JH
2306 if (SvIOK(sv))
2307 goto oops_its_an_int;
6f1401dc 2308 sv_setnv(TARG, -SvNV_nomg(sv));
28e5dec8 2309 }
a0d0e21e 2310 SETTARG;
79072805 2311 }
4633a7c4 2312 else
6f1401dc 2313 SETn(-SvNV_nomg(sv));
79072805 2314 }
a0d0e21e 2315 RETURN;
79072805
LW
2316}
2317
a0d0e21e 2318PP(pp_not)
79072805 2319{
6f1401dc
DM
2320 dVAR; dSP;
2321 tryAMAGICun_MG(not_amg, AMGf_set);
06c841cf 2322 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
a0d0e21e 2323 return NORMAL;
79072805
LW
2324}
2325
a0d0e21e 2326PP(pp_complement)
79072805 2327{
6f1401dc 2328 dVAR; dSP; dTARGET;
a42d0242 2329 tryAMAGICun_MG(compl_amg, AMGf_numeric);
a0d0e21e
LW
2330 {
2331 dTOPss;
4633a7c4 2332 if (SvNIOKp(sv)) {
d0ba1bd2 2333 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2334 const IV i = ~SvIV_nomg(sv);
972b05a9 2335 SETi(i);
d0ba1bd2
JH
2336 }
2337 else {
1b6737cc 2338 const UV u = ~SvUV_nomg(sv);
972b05a9 2339 SETu(u);
d0ba1bd2 2340 }
a0d0e21e
LW
2341 }
2342 else {
51723571 2343 register U8 *tmps;
55497cff 2344 register I32 anum;
a0d0e21e
LW
2345 STRLEN len;
2346
10516c54 2347 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
891f9566 2348 sv_setsv_nomg(TARG, sv);
6f1401dc 2349 tmps = (U8*)SvPV_force_nomg(TARG, len);
a0d0e21e 2350 anum = len;
1d68d6cd 2351 if (SvUTF8(TARG)) {
a1ca4561 2352 /* Calculate exact length, let's not estimate. */
1d68d6cd 2353 STRLEN targlen = 0;
ba210ebe 2354 STRLEN l;
a1ca4561
YST
2355 UV nchar = 0;
2356 UV nwide = 0;
01f6e806 2357 U8 * const send = tmps + len;
74d49cd0
ST
2358 U8 * const origtmps = tmps;
2359 const UV utf8flags = UTF8_ALLOW_ANYUV;
1d68d6cd 2360
1d68d6cd 2361 while (tmps < send) {
74d49cd0
ST
2362 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2363 tmps += l;
5bbb0b5a 2364 targlen += UNISKIP(~c);
a1ca4561
YST
2365 nchar++;
2366 if (c > 0xff)
2367 nwide++;
1d68d6cd
SC
2368 }
2369
2370 /* Now rewind strings and write them. */
74d49cd0 2371 tmps = origtmps;
a1ca4561
YST
2372
2373 if (nwide) {
01f6e806
AL
2374 U8 *result;
2375 U8 *p;
2376
74d49cd0 2377 Newx(result, targlen + 1, U8);
01f6e806 2378 p = result;
a1ca4561 2379 while (tmps < send) {
74d49cd0
ST
2380 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2381 tmps += l;
01f6e806 2382 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
a1ca4561 2383 }
01f6e806 2384 *p = '\0';
c1c21316
NC
2385 sv_usepvn_flags(TARG, (char*)result, targlen,
2386 SV_HAS_TRAILING_NUL);
a1ca4561
YST
2387 SvUTF8_on(TARG);
2388 }
2389 else {
01f6e806
AL
2390 U8 *result;
2391 U8 *p;
2392
74d49cd0 2393 Newx(result, nchar + 1, U8);
01f6e806 2394 p = result;
a1ca4561 2395 while (tmps < send) {
74d49cd0
ST
2396 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2397 tmps += l;
01f6e806 2398 *p++ = ~c;
a1ca4561 2399 }
01f6e806 2400 *p = '\0';
c1c21316 2401 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
d0a21e00 2402 SvUTF8_off(TARG);
1d68d6cd 2403 }
ec93b65f 2404 SETTARG;
1d68d6cd
SC
2405 RETURN;
2406 }
a0d0e21e 2407#ifdef LIBERAL
51723571
JH
2408 {
2409 register long *tmpl;
2410 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2411 *tmps = ~*tmps;
2412 tmpl = (long*)tmps;
bb7a0f54 2413 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
51723571
JH
2414 *tmpl = ~*tmpl;
2415 tmps = (U8*)tmpl;
2416 }
a0d0e21e
LW
2417#endif
2418 for ( ; anum > 0; anum--, tmps++)
2419 *tmps = ~*tmps;
ec93b65f 2420 SETTARG;
a0d0e21e
LW
2421 }
2422 RETURN;
2423 }
79072805
LW
2424}
2425
a0d0e21e
LW
2426/* integer versions of some of the above */
2427
a0d0e21e 2428PP(pp_i_multiply)
79072805 2429{
6f1401dc
DM
2430 dVAR; dSP; dATARGET;
2431 tryAMAGICbin_MG(mult_amg, AMGf_assign);
a0d0e21e 2432 {
6f1401dc 2433 dPOPTOPiirl_nomg;
a0d0e21e
LW
2434 SETi( left * right );
2435 RETURN;
2436 }
79072805
LW
2437}
2438
a0d0e21e 2439PP(pp_i_divide)
79072805 2440{
85935d8e 2441 IV num;
6f1401dc
DM
2442 dVAR; dSP; dATARGET;
2443 tryAMAGICbin_MG(div_amg, AMGf_assign);
a0d0e21e 2444 {
6f1401dc 2445 dPOPTOPssrl;
85935d8e 2446 IV value = SvIV_nomg(right);
a0d0e21e 2447 if (value == 0)
ece1bcef 2448 DIE(aTHX_ "Illegal division by zero");
85935d8e 2449 num = SvIV_nomg(left);
a0cec769
YST
2450
2451 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2452 if (value == -1)
2453 value = - num;
2454 else
2455 value = num / value;
6f1401dc 2456 SETi(value);
a0d0e21e
LW
2457 RETURN;
2458 }
79072805
LW
2459}
2460
befad5d1 2461#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2462STATIC
2463PP(pp_i_modulo_0)
befad5d1
NC
2464#else
2465PP(pp_i_modulo)
2466#endif
224ec323
JH
2467{
2468 /* This is the vanilla old i_modulo. */
6f1401dc
DM
2469 dVAR; dSP; dATARGET;
2470 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2471 {
6f1401dc 2472 dPOPTOPiirl_nomg;
224ec323
JH
2473 if (!right)
2474 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2475 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2476 if (right == -1)
2477 SETi( 0 );
2478 else
2479 SETi( left % right );
224ec323
JH
2480 RETURN;
2481 }
2482}
2483
11010fa3 2484#if defined(__GLIBC__) && IVSIZE == 8
224ec323
JH
2485STATIC
2486PP(pp_i_modulo_1)
befad5d1 2487
224ec323 2488{
224ec323 2489 /* This is the i_modulo with the workaround for the _moddi3 bug
fce2b89e 2490 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
224ec323 2491 * See below for pp_i_modulo. */
6f1401dc
DM
2492 dVAR; dSP; dATARGET;
2493 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2494 {
6f1401dc 2495 dPOPTOPiirl_nomg;
224ec323
JH
2496 if (!right)
2497 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2498 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2499 if (right == -1)
2500 SETi( 0 );
2501 else
2502 SETi( left % PERL_ABS(right) );
224ec323
JH
2503 RETURN;
2504 }
224ec323
JH
2505}
2506
a0d0e21e 2507PP(pp_i_modulo)
79072805 2508{
6f1401dc
DM
2509 dVAR; dSP; dATARGET;
2510 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2511 {
6f1401dc 2512 dPOPTOPiirl_nomg;
224ec323
JH
2513 if (!right)
2514 DIE(aTHX_ "Illegal modulus zero");
2515 /* The assumption is to use hereafter the old vanilla version... */
2516 PL_op->op_ppaddr =
2517 PL_ppaddr[OP_I_MODULO] =
1c127fab 2518 Perl_pp_i_modulo_0;
224ec323
JH
2519 /* .. but if we have glibc, we might have a buggy _moddi3
2520 * (at least glicb 2.2.5 is known to have this bug), in other
2521 * words our integer modulus with negative quad as the second
2522 * argument might be broken. Test for this and re-patch the
2523 * opcode dispatch table if that is the case, remembering to
2524 * also apply the workaround so that this first round works
2525 * right, too. See [perl #9402] for more information. */
224ec323
JH
2526 {
2527 IV l = 3;
2528 IV r = -10;
2529 /* Cannot do this check with inlined IV constants since
2530 * that seems to work correctly even with the buggy glibc. */
2531 if (l % r == -3) {
2532 /* Yikes, we have the bug.
2533 * Patch in the workaround version. */
2534 PL_op->op_ppaddr =
2535 PL_ppaddr[OP_I_MODULO] =
2536 &Perl_pp_i_modulo_1;
2537 /* Make certain we work right this time, too. */
32fdb065 2538 right = PERL_ABS(right);
224ec323
JH
2539 }
2540 }
a0cec769
YST
2541 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2542 if (right == -1)
2543 SETi( 0 );
2544 else
2545 SETi( left % right );
224ec323
JH
2546 RETURN;
2547 }
79072805 2548}
befad5d1 2549#endif
79072805 2550
a0d0e21e 2551PP(pp_i_add)
79072805 2552{
6f1401dc
DM
2553 dVAR; dSP; dATARGET;
2554 tryAMAGICbin_MG(add_amg, AMGf_assign);
a0d0e21e 2555 {
6f1401dc 2556 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2557 SETi( left + right );
2558 RETURN;
79072805 2559 }
79072805
LW
2560}
2561
a0d0e21e 2562PP(pp_i_subtract)
79072805 2563{
6f1401dc
DM
2564 dVAR; dSP; dATARGET;
2565 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
a0d0e21e 2566 {
6f1401dc 2567 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2568 SETi( left - right );
2569 RETURN;
79072805 2570 }
79072805
LW
2571}
2572
a0d0e21e 2573PP(pp_i_lt)
79072805 2574{
6f1401dc
DM
2575 dVAR; dSP;
2576 tryAMAGICbin_MG(lt_amg, AMGf_set);
a0d0e21e 2577 {
96b6b87f 2578 dPOPTOPiirl_nomg;
54310121 2579 SETs(boolSV(left < right));
a0d0e21e
LW
2580 RETURN;
2581 }
79072805
LW
2582}
2583
a0d0e21e 2584PP(pp_i_gt)
79072805 2585{
6f1401dc
DM
2586 dVAR; dSP;
2587 tryAMAGICbin_MG(gt_amg, AMGf_set);
a0d0e21e 2588 {
96b6b87f 2589 dPOPTOPiirl_nomg;
54310121 2590 SETs(boolSV(left > right));
a0d0e21e
LW
2591 RETURN;
2592 }
79072805
LW
2593}
2594
a0d0e21e 2595PP(pp_i_le)
79072805 2596{
6f1401dc
DM
2597 dVAR; dSP;
2598 tryAMAGICbin_MG(le_amg, AMGf_set);
a0d0e21e 2599 {
96b6b87f 2600 dPOPTOPiirl_nomg;
54310121 2601 SETs(boolSV(left <= right));
a0d0e21e 2602 RETURN;
85e6fe83 2603 }
79072805
LW
2604}
2605
a0d0e21e 2606PP(pp_i_ge)
79072805 2607{
6f1401dc
DM
2608 dVAR; dSP;
2609 tryAMAGICbin_MG(ge_amg, AMGf_set);
a0d0e21e 2610 {
96b6b87f 2611 dPOPTOPiirl_nomg;
54310121 2612 SETs(boolSV(left >= right));
a0d0e21e
LW
2613 RETURN;
2614 }
79072805
LW
2615}
2616
a0d0e21e 2617PP(pp_i_eq)
79072805 2618{
6f1401dc
DM
2619 dVAR; dSP;
2620 tryAMAGICbin_MG(eq_amg, AMGf_set);
a0d0e21e 2621 {
96b6b87f 2622 dPOPTOPiirl_nomg;
54310121 2623 SETs(boolSV(left == right));
a0d0e21e
LW
2624 RETURN;
2625 }
79072805
LW
2626}
2627
a0d0e21e 2628PP(pp_i_ne)
79072805 2629{
6f1401dc
DM
2630 dVAR; dSP;
2631 tryAMAGICbin_MG(ne_amg, AMGf_set);
a0d0e21e 2632 {
96b6b87f 2633 dPOPTOPiirl_nomg;
54310121 2634 SETs(boolSV(left != right));
a0d0e21e
LW
2635 RETURN;
2636 }
79072805
LW
2637}
2638
a0d0e21e 2639PP(pp_i_ncmp)
79072805 2640{
6f1401dc
DM
2641 dVAR; dSP; dTARGET;
2642 tryAMAGICbin_MG(ncmp_amg, 0);
a0d0e21e 2643 {
96b6b87f 2644 dPOPTOPiirl_nomg;
a0d0e21e 2645 I32 value;
79072805 2646
a0d0e21e 2647 if (left > right)
79072805 2648 value = 1;
a0d0e21e 2649 else if (left < right)
79072805 2650 value = -1;
a0d0e21e 2651 else
79072805 2652 value = 0;
a0d0e21e
LW
2653 SETi(value);
2654 RETURN;
79072805 2655 }
85e6fe83
LW
2656}
2657
2658PP(pp_i_negate)
2659{
6f1401dc
DM
2660 dVAR; dSP; dTARGET;
2661 tryAMAGICun_MG(neg_amg, 0);
2662 {
2663 SV * const sv = TOPs;
2664 IV const i = SvIV_nomg(sv);
2665 SETi(-i);
2666 RETURN;
2667 }
85e6fe83
LW
2668}
2669
79072805
LW
2670/* High falutin' math. */
2671
2672PP(pp_atan2)
2673{
6f1401dc
DM
2674 dVAR; dSP; dTARGET;
2675 tryAMAGICbin_MG(atan2_amg, 0);
a0d0e21e 2676 {
096c060c 2677 dPOPTOPnnrl_nomg;
a1021d57 2678 SETn(Perl_atan2(left, right));
a0d0e21e
LW
2679 RETURN;
2680 }
79072805
LW
2681}
2682
2683PP(pp_sin)
2684{
71302fe3
NC
2685 dVAR; dSP; dTARGET;
2686 int amg_type = sin_amg;
2687 const char *neg_report = NULL;
bc81784a 2688 NV (*func)(NV) = Perl_sin;
71302fe3
NC
2689 const int op_type = PL_op->op_type;
2690
2691 switch (op_type) {
2692 case OP_COS:
2693 amg_type = cos_amg;
bc81784a 2694 func = Perl_cos;
71302fe3
NC
2695 break;
2696 case OP_EXP:
2697 amg_type = exp_amg;
bc81784a 2698 func = Perl_exp;
71302fe3
NC
2699 break;
2700 case OP_LOG:
2701 amg_type = log_amg;
bc81784a 2702 func = Perl_log;
71302fe3
NC
2703 neg_report = "log";
2704 break;
2705 case OP_SQRT:
2706 amg_type = sqrt_amg;
bc81784a 2707 func = Perl_sqrt;
71302fe3
NC
2708 neg_report = "sqrt";
2709 break;
a0d0e21e 2710 }
79072805 2711
6f1401dc
DM
2712
2713 tryAMAGICun_MG(amg_type, 0);
a0d0e21e 2714 {
6f1401dc
DM
2715 SV * const arg = POPs;
2716 const NV value = SvNV_nomg(arg);
71302fe3
NC
2717 if (neg_report) {
2718 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2719 SET_NUMERIC_STANDARD();
2720 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2721 }
2722 }
2723 XPUSHn(func(value));
a0d0e21e
LW
2724 RETURN;
2725 }
79072805
LW
2726}
2727
56cb0a1c
AD
2728/* Support Configure command-line overrides for rand() functions.
2729 After 5.005, perhaps we should replace this by Configure support
2730 for drand48(), random(), or rand(). For 5.005, though, maintain
2731 compatibility by calling rand() but allow the user to override it.
2732 See INSTALL for details. --Andy Dougherty 15 July 1998
2733*/
85ab1d1d
JH
2734/* Now it's after 5.005, and Configure supports drand48() and random(),
2735 in addition to rand(). So the overrides should not be needed any more.
2736 --Jarkko Hietaniemi 27 September 1998
2737 */
2738
2739#ifndef HAS_DRAND48_PROTO
20ce7b12 2740extern double drand48 (void);
56cb0a1c
AD
2741#endif
2742
79072805
LW
2743PP(pp_rand)
2744{
97aff369 2745 dVAR; dSP; dTARGET;
65202027 2746 NV value;
79072805
LW
2747 if (MAXARG < 1)
2748 value = 1.0;
94ec06bc
FC
2749 else if (!TOPs) {
2750 value = 1.0; (void)POPs;
2751 }
79072805
LW
2752 else
2753 value = POPn;
2754 if (value == 0.0)
2755 value = 1.0;
80252599 2756 if (!PL_srand_called) {
85ab1d1d 2757 (void)seedDrand01((Rand_seed_t)seed());
80252599 2758 PL_srand_called = TRUE;
93dc8474 2759 }
85ab1d1d 2760 value *= Drand01();
79072805
LW
2761 XPUSHn(value);
2762 RETURN;
2763}
2764
2765PP(pp_srand)
2766{
83832992 2767 dVAR; dSP; dTARGET;
d22667bf 2768 const UV anum = (MAXARG < 1 || (!TOPs && !POPs)) ? seed() : POPu;
85ab1d1d 2769 (void)seedDrand01((Rand_seed_t)anum);
80252599 2770 PL_srand_called = TRUE;
da1010ec
NC
2771 if (anum)
2772 XPUSHu(anum);
2773 else {
2774 /* Historically srand always returned true. We can avoid breaking
2775 that like this: */
2776 sv_setpvs(TARG, "0 but true");
2777 XPUSHTARG;
2778 }
83832992 2779 RETURN;
79072805
LW
2780}
2781
79072805
LW
2782PP(pp_int)
2783{
6f1401dc
DM
2784 dVAR; dSP; dTARGET;
2785 tryAMAGICun_MG(int_amg, AMGf_numeric);
774d564b 2786 {
6f1401dc
DM
2787 SV * const sv = TOPs;
2788 const IV iv = SvIV_nomg(sv);
28e5dec8
JH
2789 /* XXX it's arguable that compiler casting to IV might be subtly
2790 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2791 else preferring IV has introduced a subtle behaviour change bug. OTOH
2792 relying on floating point to be accurate is a bug. */
2793
c781a409 2794 if (!SvOK(sv)) {
922c4365 2795 SETu(0);
c781a409
RD
2796 }
2797 else if (SvIOK(sv)) {
2798 if (SvIsUV(sv))
6f1401dc 2799 SETu(SvUV_nomg(sv));
c781a409 2800 else
28e5dec8 2801 SETi(iv);
c781a409 2802 }
c781a409 2803 else {
6f1401dc 2804 const NV value = SvNV_nomg(sv);
1048ea30 2805 if (value >= 0.0) {
28e5dec8
JH
2806 if (value < (NV)UV_MAX + 0.5) {
2807 SETu(U_V(value));
2808 } else {
059a1014 2809 SETn(Perl_floor(value));
28e5dec8 2810 }
1048ea30 2811 }
28e5dec8
JH
2812 else {
2813 if (value > (NV)IV_MIN - 0.5) {
2814 SETi(I_V(value));
2815 } else {
1bbae031 2816 SETn(Perl_ceil(value));
28e5dec8
JH
2817 }
2818 }
774d564b 2819 }
79072805 2820 }
79072805
LW
2821 RETURN;
2822}
2823
463ee0b2
LW
2824PP(pp_abs)
2825{
6f1401dc
DM
2826 dVAR; dSP; dTARGET;
2827 tryAMAGICun_MG(abs_amg, AMGf_numeric);
a0d0e21e 2828 {
6f1401dc 2829 SV * const sv = TOPs;
28e5dec8 2830 /* This will cache the NV value if string isn't actually integer */
6f1401dc 2831 const IV iv = SvIV_nomg(sv);
a227d84d 2832
800401ee 2833 if (!SvOK(sv)) {
922c4365 2834 SETu(0);
800401ee
JH
2835 }
2836 else if (SvIOK(sv)) {
28e5dec8 2837 /* IVX is precise */
800401ee 2838 if (SvIsUV(sv)) {
6f1401dc 2839 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
28e5dec8
JH
2840 } else {
2841 if (iv >= 0) {
2842 SETi(iv);
2843 } else {
2844 if (iv != IV_MIN) {
2845 SETi(-iv);
2846 } else {
2847 /* 2s complement assumption. Also, not really needed as
2848 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2849 SETu(IV_MIN);
2850 }
a227d84d 2851 }
28e5dec8
JH
2852 }
2853 } else{
6f1401dc 2854 const NV value = SvNV_nomg(sv);
774d564b 2855 if (value < 0.0)
1b6737cc 2856 SETn(-value);
a4474c9e
DD
2857 else
2858 SETn(value);
774d564b 2859 }
a0d0e21e 2860 }
774d564b 2861 RETURN;
463ee0b2
LW
2862}
2863
79072805
LW
2864PP(pp_oct)
2865{
97aff369 2866 dVAR; dSP; dTARGET;
5c144d81 2867 const char *tmps;
53305cf1 2868 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
6f894ead 2869 STRLEN len;
53305cf1
NC
2870 NV result_nv;
2871 UV result_uv;
1b6737cc 2872 SV* const sv = POPs;
79072805 2873
349d4f2f 2874 tmps = (SvPV_const(sv, len));
2bc69dc4
NIS
2875 if (DO_UTF8(sv)) {
2876 /* If Unicode, try to downgrade
2877 * If not possible, croak. */
1b6737cc 2878 SV* const tsv = sv_2mortal(newSVsv(sv));
2bc69dc4
NIS
2879
2880 SvUTF8_on(tsv);
2881 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 2882 tmps = SvPV_const(tsv, len);
2bc69dc4 2883 }
daa2adfd
NC
2884 if (PL_op->op_type == OP_HEX)
2885 goto hex;
2886
6f894ead 2887 while (*tmps && len && isSPACE(*tmps))
53305cf1 2888 tmps++, len--;
9e24b6e2 2889 if (*tmps == '0')
53305cf1 2890 tmps++, len--;
a674e8db 2891 if (*tmps == 'x' || *tmps == 'X') {
daa2adfd 2892 hex:
53305cf1 2893 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
daa2adfd 2894 }
a674e8db 2895 else if (*tmps == 'b' || *tmps == 'B')
53305cf1 2896 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
464e2e8a 2897 else
53305cf1
NC
2898 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2899
2900 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2901 XPUSHn(result_nv);
2902 }
2903 else {
2904 XPUSHu(result_uv);
2905 }
79072805
LW
2906 RETURN;
2907}
2908
2909/* String stuff. */
2910
2911PP(pp_length)
2912{
97aff369 2913 dVAR; dSP; dTARGET;
0bd48802 2914 SV * const sv = TOPs;
a0ed51b3 2915
656266fc 2916 if (SvGAMAGIC(sv)) {
9f621bb0
NC
2917 /* For an overloaded or magic scalar, we can't know in advance if
2918 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2919 it likes to cache the length. Maybe that should be a documented
2920 feature of it.
92331800
NC
2921 */
2922 STRLEN len;
9f621bb0
NC
2923 const char *const p
2924 = sv_2pv_flags(sv, &len,
2925 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
92331800 2926
d88e091f 2927 if (!p) {
9407f9c1
DL
2928 if (!SvPADTMP(TARG)) {
2929 sv_setsv(TARG, &PL_sv_undef);
2930 SETTARG;
2931 }
2932 SETs(&PL_sv_undef);
d88e091f 2933 }
9f621bb0 2934 else if (DO_UTF8(sv)) {
899be101 2935 SETi(utf8_length((U8*)p, (U8*)p + len));
92331800
NC
2936 }
2937 else
2938 SETi(len);
656266fc 2939 } else if (SvOK(sv)) {
9f621bb0
NC
2940 /* Neither magic nor overloaded. */
2941 if (DO_UTF8(sv))
2942 SETi(sv_len_utf8(sv));
2943 else
2944 SETi(sv_len(sv));
656266fc 2945 } else {
9407f9c1
DL
2946 if (!SvPADTMP(TARG)) {
2947 sv_setsv_nomg(TARG, &PL_sv_undef);
2948 SETTARG;
2949 }
2950 SETs(&PL_sv_undef);
92331800 2951 }
79072805
LW
2952 RETURN;
2953}
2954
2955PP(pp_substr)
2956{
97aff369 2957 dVAR; dSP; dTARGET;
79072805 2958 SV *sv;
463ee0b2 2959 STRLEN curlen;
9402d6ed 2960 STRLEN utf8_curlen;
777f7c56
EB
2961 SV * pos_sv;
2962 IV pos1_iv;
2963 int pos1_is_uv;
2964 IV pos2_iv;
2965 int pos2_is_uv;
2966 SV * len_sv;
2967 IV len_iv = 0;
2968 int len_is_uv = 1;
050e6362 2969 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
e1ec3a88 2970 const char *tmps;
9402d6ed 2971 SV *repl_sv = NULL;
cbbf8932 2972 const char *repl = NULL;
7b8d334a 2973 STRLEN repl_len;
7bc95ae1 2974 int num_args = PL_op->op_private & 7;
13e30c65 2975 bool repl_need_utf8_upgrade = FALSE;
9402d6ed 2976 bool repl_is_utf8 = FALSE;
79072805 2977
78f9721b
SM
2978 if (num_args > 2) {
2979 if (num_args > 3) {
7bc95ae1 2980 if((repl_sv = POPs)) {
83003860 2981 repl = SvPV_const(repl_sv, repl_len);
9402d6ed 2982 repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
7bc95ae1
FC
2983 }
2984 else num_args--;
2985 }
2986 if ((len_sv = POPs)) {
2987 len_iv = SvIV(len_sv);
2988 len_is_uv = SvIOK_UV(len_sv);
7b8d334a 2989 }
7bc95ae1 2990 else num_args--;
5d82c453 2991 }
777f7c56
EB
2992 pos_sv = POPs;
2993 pos1_iv = SvIV(pos_sv);
2994 pos1_is_uv = SvIOK_UV(pos_sv);
79072805 2995 sv = POPs;
849ca7ee 2996 PUTBACK;
9402d6ed
JH
2997 if (repl_sv) {
2998 if (repl_is_utf8) {
2999 if (!DO_UTF8(sv))
3000 sv_utf8_upgrade(sv);
3001 }
13e30c65
JH
3002 else if (DO_UTF8(sv))
3003 repl_need_utf8_upgrade = TRUE;
9402d6ed 3004 }
5c144d81 3005 tmps = SvPV_const(sv, curlen);
7e2040f0 3006 if (DO_UTF8(sv)) {
9402d6ed
JH
3007 utf8_curlen = sv_len_utf8(sv);
3008 if (utf8_curlen == curlen)
3009 utf8_curlen = 0;
a0ed51b3 3010 else
9402d6ed 3011 curlen = utf8_curlen;
a0ed51b3 3012 }
d1c2b58a 3013 else
9402d6ed 3014 utf8_curlen = 0;
a0ed51b3 3015
e1dccc0d
Z
3016 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3017 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3018 pos1_iv += curlen;
777f7c56 3019 }
e1dccc0d
Z
3020 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3021 goto bound_fail;
777f7c56
EB
3022
3023 if (num_args > 2) {
3024 if (!len_is_uv && len_iv < 0) {
3025 pos2_iv = curlen + len_iv;
3026 if (curlen)
3027 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3028 else
3029 pos2_is_uv = 0;
3030 } else { /* len_iv >= 0 */
3031 if (!pos1_is_uv && pos1_iv < 0) {
3032 pos2_iv = pos1_iv + len_iv;
3033 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3034 } else {
3035 if ((UV)len_iv > curlen-(UV)pos1_iv)
3036 pos2_iv = curlen;
3037 else
3038 pos2_iv = pos1_iv+len_iv;
3039 pos2_is_uv = 1;
3040 }
5d82c453 3041 }
2304df62 3042 }
79072805 3043 else {
777f7c56
EB
3044 pos2_iv = curlen;
3045 pos2_is_uv = 1;
3046 }
3047
3048 if (!pos2_is_uv && pos2_iv < 0) {
3049 if (!pos1_is_uv && pos1_iv < 0)
1c900557 3050 goto bound_fail;
777f7c56
EB
3051 pos2_iv = 0;
3052 }
3053 else if (!pos1_is_uv && pos1_iv < 0)
3054 pos1_iv = 0;
3055
3056 if ((UV)pos2_iv < (UV)pos1_iv)
3057 pos2_iv = pos1_iv;
3058 if ((UV)pos2_iv > curlen)
3059 pos2_iv = curlen;
3060
3061 {
3062 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3063 const STRLEN pos = (STRLEN)( (UV)pos1_iv );
3064 const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
777f7c56 3065 STRLEN byte_len = len;
d931b1be
NC
3066 STRLEN byte_pos = utf8_curlen
3067 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3068
2154eca7
EB
3069 if (lvalue && !repl) {
3070 SV * ret;
3071
3072 if (!SvGMAGICAL(sv)) {
3073 if (SvROK(sv)) {
3074 SvPV_force_nolen(sv);
3075 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3076 "Attempt to use reference as lvalue in substr");
3077 }
3078 if (isGV_with_GP(sv))
3079 SvPV_force_nolen(sv);
3080 else if (SvOK(sv)) /* is it defined ? */
3081 (void)SvPOK_only_UTF8(sv);
3082 else
3083 sv_setpvs(sv, ""); /* avoid lexical reincarnation */
781e7547 3084 }
2154eca7
EB
3085
3086 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3087 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3088 LvTYPE(ret) = 'x';
3089 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3090 LvTARGOFF(ret) = pos;
3091 LvTARGLEN(ret) = len;
3092
3093 SPAGAIN;
3094 PUSHs(ret); /* avoid SvSETMAGIC here */
3095 RETURN;
781e7547
DM
3096 }
3097
2154eca7
EB
3098 SvTAINTED_off(TARG); /* decontaminate */
3099 SvUTF8_off(TARG); /* decontaminate */
3100
3101 tmps += byte_pos;
777f7c56 3102 sv_setpvn(TARG, tmps, byte_len);
12aa1545 3103#ifdef USE_LOCALE_COLLATE
14befaf4 3104 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
12aa1545 3105#endif
9402d6ed 3106 if (utf8_curlen)
7f66633b 3107 SvUTF8_on(TARG);
2154eca7 3108
f7928d6c 3109 if (repl) {
13e30c65
JH
3110 SV* repl_sv_copy = NULL;
3111
3112 if (repl_need_utf8_upgrade) {
3113 repl_sv_copy = newSVsv(repl_sv);
3114 sv_utf8_upgrade(repl_sv_copy);
349d4f2f 3115 repl = SvPV_const(repl_sv_copy, repl_len);
13e30c65
JH
3116 repl_is_utf8 = DO_UTF8(repl_sv_copy) && SvCUR(sv);
3117 }
502d9230
VP
3118 if (!SvOK(sv))
3119 sv_setpvs(sv, "");
777f7c56 3120 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
9402d6ed 3121 if (repl_is_utf8)
f7928d6c 3122 SvUTF8_on(sv);
ef8d46e8 3123 SvREFCNT_dec(repl_sv_copy);
f7928d6c 3124 }
79072805 3125 }
849ca7ee 3126 SPAGAIN;
e27c778f
FC
3127 SvSETMAGIC(TARG);
3128 PUSHs(TARG);
79072805 3129 RETURN;
777f7c56 3130
1c900557 3131bound_fail:
777f7c56
EB
3132 if (lvalue || repl)
3133 Perl_croak(aTHX_ "substr outside of string");
3134 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3135 RETPUSHUNDEF;
79072805
LW
3136}
3137
3138PP(pp_vec)
3139{
2154eca7 3140 dVAR; dSP;
1b6737cc
AL
3141 register const IV size = POPi;
3142 register const IV offset = POPi;
3143 register SV * const src = POPs;
3144 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2154eca7 3145 SV * ret;
a0d0e21e 3146
81e118e0 3147 if (lvalue) { /* it's an lvalue! */
2154eca7
EB
3148 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3149 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3150 LvTYPE(ret) = 'v';
3151 LvTARG(ret) = SvREFCNT_inc_simple(src);
3152 LvTARGOFF(ret) = offset;
3153 LvTARGLEN(ret) = size;
3154 }
3155 else {
3156 dTARGET;
3157 SvTAINTED_off(TARG); /* decontaminate */
3158 ret = TARG;
79072805
LW
3159 }
3160
2154eca7
EB
3161 sv_setuv(ret, do_vecget(src, offset, size));
3162 PUSHs(ret);
79072805
LW
3163 RETURN;
3164}
3165
3166PP(pp_index)
3167{
97aff369 3168 dVAR; dSP; dTARGET;
79072805
LW
3169 SV *big;
3170 SV *little;
c445ea15 3171 SV *temp = NULL;
ad66a58c 3172 STRLEN biglen;
2723d216 3173 STRLEN llen = 0;
79072805
LW
3174 I32 offset;
3175 I32 retval;
73ee8be2
NC
3176 const char *big_p;
3177 const char *little_p;
2f040f7f
NC
3178 bool big_utf8;
3179 bool little_utf8;
2723d216 3180 const bool is_index = PL_op->op_type == OP_INDEX;
d3e26383 3181 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
79072805 3182
e1dccc0d
Z
3183 if (threeargs)
3184 offset = POPi;
79072805
LW
3185 little = POPs;
3186 big = POPs;
73ee8be2
NC
3187 big_p = SvPV_const(big, biglen);
3188 little_p = SvPV_const(little, llen);
3189
e609e586
NC
3190 big_utf8 = DO_UTF8(big);
3191 little_utf8 = DO_UTF8(little);
3192 if (big_utf8 ^ little_utf8) {
3193 /* One needs to be upgraded. */
2f040f7f
NC
3194 if (little_utf8 && !PL_encoding) {
3195 /* Well, maybe instead we might be able to downgrade the small
3196 string? */
1eced8f8 3197 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
2f040f7f
NC
3198 &little_utf8);
3199 if (little_utf8) {
3200 /* If the large string is ISO-8859-1, and it's not possible to
3201 convert the small string to ISO-8859-1, then there is no
3202 way that it could be found anywhere by index. */
3203 retval = -1;
3204 goto fail;
3205 }
e609e586 3206
2f040f7f
NC
3207 /* At this point, pv is a malloc()ed string. So donate it to temp
3208 to ensure it will get free()d */
3209 little = temp = newSV(0);
73ee8be2
NC
3210 sv_usepvn(temp, pv, llen);
3211 little_p = SvPVX(little);
e609e586 3212 } else {
73ee8be2
NC
3213 temp = little_utf8
3214 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
2f040f7f
NC
3215
3216 if (PL_encoding) {
3217 sv_recode_to_utf8(temp, PL_encoding);
3218 } else {
3219 sv_utf8_upgrade(temp);
3220 }
3221 if (little_utf8) {
3222 big = temp;
3223 big_utf8 = TRUE;
73ee8be2 3224 big_p = SvPV_const(big, biglen);
2f040f7f
NC
3225 } else {
3226 little = temp;
73ee8be2 3227 little_p = SvPV_const(little, llen);
2f040f7f 3228 }
e609e586
NC
3229 }
3230 }
73ee8be2
NC
3231 if (SvGAMAGIC(big)) {
3232 /* Life just becomes a lot easier if I use a temporary here.
3233 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3234 will trigger magic and overloading again, as will fbm_instr()
3235 */
59cd0e26
NC
3236 big = newSVpvn_flags(big_p, biglen,
3237 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3238 big_p = SvPVX(big);
3239 }
e4e44778 3240 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
73ee8be2
NC
3241 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3242 warn on undef, and we've already triggered a warning with the
3243 SvPV_const some lines above. We can't remove that, as we need to
3244 call some SvPV to trigger overloading early and find out if the
3245 string is UTF-8.
3246 This is all getting to messy. The API isn't quite clean enough,
3247 because data access has side effects.
3248 */
59cd0e26
NC
3249 little = newSVpvn_flags(little_p, llen,
3250 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3251 little_p = SvPVX(little);
3252 }
e609e586 3253
d3e26383 3254 if (!threeargs)
2723d216 3255 offset = is_index ? 0 : biglen;
a0ed51b3 3256 else {
ad66a58c 3257 if (big_utf8 && offset > 0)
a0ed51b3 3258 sv_pos_u2b(big, &offset, 0);
73ee8be2
NC
3259 if (!is_index)
3260 offset += llen;
a0ed51b3 3261 }
79072805
LW
3262 if (offset < 0)
3263 offset = 0;
ad66a58c
NC
3264 else if (offset > (I32)biglen)
3265 offset = biglen;
73ee8be2
NC
3266 if (!(little_p = is_index
3267 ? fbm_instr((unsigned char*)big_p + offset,
3268 (unsigned char*)big_p + biglen, little, 0)
3269 : rninstr(big_p, big_p + offset,
3270 little_p, little_p + llen)))
a0ed51b3 3271 retval = -1;
ad66a58c 3272 else {
73ee8be2 3273 retval = little_p - big_p;
ad66a58c
NC
3274 if (retval > 0 && big_utf8)
3275 sv_pos_b2u(big, &retval);
3276 }
ef8d46e8 3277 SvREFCNT_dec(temp);
2723d216 3278 fail:
e1dccc0d 3279 PUSHi(retval);
79072805
LW
3280 RETURN;
3281}
3282
3283PP(pp_sprintf)
3284{
97aff369 3285 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3e6bd4bf 3286 SvTAINTED_off(TARG);
79072805 3287 do_sprintf(TARG, SP-MARK, MARK+1);
bbce6d69 3288 TAINT_IF(SvTAINTED(TARG));
79072805
LW
3289 SP = ORIGMARK;
3290 PUSHTARG;
3291 RETURN;
3292}
3293
79072805
LW
3294PP(pp_ord)
3295{
97aff369 3296 dVAR; dSP; dTARGET;
1eced8f8 3297
7df053ec 3298 SV *argsv = POPs;
ba210ebe 3299 STRLEN len;
349d4f2f 3300 const U8 *s = (U8*)SvPV_const(argsv, len);
121910a4 3301
799ef3cb 3302 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
1eced8f8 3303 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
799ef3cb 3304 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
121910a4
JH
3305 argsv = tmpsv;
3306 }
79072805 3307
872c91ae 3308 XPUSHu(DO_UTF8(argsv) ?
89ebb4a3 3309 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
5fc32dea 3310 (UV)(*s & 0xff));
68795e93 3311
79072805
LW
3312 RETURN;
3313}
3314
463ee0b2
LW
3315PP(pp_chr)
3316{
97aff369 3317 dVAR; dSP; dTARGET;
463ee0b2 3318 char *tmps;
8a064bd6
JH
3319 UV value;
3320
3321 if (((SvIOK_notUV(TOPs) && SvIV(TOPs) < 0)
3322 ||
3323 (SvNOK(TOPs) && SvNV(TOPs) < 0.0))) {
3324 if (IN_BYTES) {
3325 value = POPu; /* chr(-1) eq chr(0xff), etc. */
3326 } else {
3327 (void) POPs; /* Ignore the argument value. */
3328 value = UNICODE_REPLACEMENT;
3329 }
3330 } else {
3331 value = POPu;
3332 }
463ee0b2 3333
862a34c6 3334 SvUPGRADE(TARG,SVt_PV);
a0ed51b3 3335
0064a8a9 3336 if (value > 255 && !IN_BYTES) {
eb160463 3337 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
62961d2e 3338 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
349d4f2f 3339 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
a0ed51b3
LW
3340 *tmps = '\0';
3341 (void)SvPOK_only(TARG);
aa6ffa16 3342 SvUTF8_on(TARG);
a0ed51b3
LW
3343 XPUSHs(TARG);
3344 RETURN;
3345 }
3346
748a9306 3347 SvGROW(TARG,2);
463ee0b2
LW
3348 SvCUR_set(TARG, 1);
3349 tmps = SvPVX(TARG);
eb160463 3350 *tmps++ = (char)value;
748a9306 3351 *tmps = '\0';
a0d0e21e 3352 (void)SvPOK_only(TARG);
4c5ed6e2 3353
88632417 3354 if (PL_encoding && !IN_BYTES) {
799ef3cb 3355 sv_recode_to_utf8(TARG, PL_encoding);
88632417
JH
3356 tmps = SvPVX(TARG);
3357 if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
4c5ed6e2
ST
3358 UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
3359 SvGROW(TARG, 2);
d5a15ac2 3360 tmps = SvPVX(TARG);
4c5ed6e2
ST
3361 SvCUR_set(TARG, 1);
3362 *tmps++ = (char)value;
88632417 3363 *tmps = '\0';
4c5ed6e2 3364 SvUTF8_off(TARG);
88632417
JH
3365 }
3366 }
4c5ed6e2 3367
463ee0b2
LW
3368 XPUSHs(TARG);
3369 RETURN;
3370}
3371
79072805
LW
3372PP(pp_crypt)
3373{
79072805 3374#ifdef HAS_CRYPT
97aff369 3375 dVAR; dSP; dTARGET;
5f74f29c 3376 dPOPTOPssrl;
85c16d83 3377 STRLEN len;
10516c54 3378 const char *tmps = SvPV_const(left, len);
2bc69dc4 3379
85c16d83 3380 if (DO_UTF8(left)) {
2bc69dc4 3381 /* If Unicode, try to downgrade.
f2791508
JH
3382 * If not possible, croak.
3383 * Yes, we made this up. */
1b6737cc 3384 SV* const tsv = sv_2mortal(newSVsv(left));
2bc69dc4 3385
f2791508 3386 SvUTF8_on(tsv);
2bc69dc4 3387 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 3388 tmps = SvPV_const(tsv, len);
85c16d83 3389 }
05404ffe
JH
3390# ifdef USE_ITHREADS
3391# ifdef HAS_CRYPT_R
3392 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3393 /* This should be threadsafe because in ithreads there is only
3394 * one thread per interpreter. If this would not be true,
3395 * we would need a mutex to protect this malloc. */
3396 PL_reentrant_buffer->_crypt_struct_buffer =
3397 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3398#if defined(__GLIBC__) || defined(__EMX__)
3399 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3400 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3401 /* work around glibc-2.2.5 bug */
3402 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3403 }
05404ffe 3404#endif
6ab58e4d 3405 }
05404ffe
JH
3406# endif /* HAS_CRYPT_R */
3407# endif /* USE_ITHREADS */
5f74f29c 3408# ifdef FCRYPT
83003860 3409 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3410# else
83003860 3411 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3412# endif
ec93b65f 3413 SETTARG;
4808266b 3414 RETURN;
79072805 3415#else
b13b2135 3416 DIE(aTHX_
79072805
LW
3417 "The crypt() function is unimplemented due to excessive paranoia.");
3418#endif
79072805
LW
3419}
3420
00f254e2
KW
3421/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3422 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3423
00f254e2
KW
3424/* Below are several macros that generate code */
3425/* Generates code to store a unicode codepoint c that is known to occupy
3426 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1. */
3427#define STORE_UNI_TO_UTF8_TWO_BYTE(p, c) \
3428 STMT_START { \
3429 *(p) = UTF8_TWO_BYTE_HI(c); \
3430 *((p)+1) = UTF8_TWO_BYTE_LO(c); \
3431 } STMT_END
3432
3433/* Like STORE_UNI_TO_UTF8_TWO_BYTE, but advances p to point to the next
3434 * available byte after the two bytes */
3435#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3436 STMT_START { \
3437 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3438 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3439 } STMT_END
3440
3441/* Generates code to store the upper case of latin1 character l which is known
3442 * to have its upper case be non-latin1 into the two bytes p and p+1. There
3443 * are only two characters that fit this description, and this macro knows
3444 * about them, and that the upper case values fit into two UTF-8 or UTF-EBCDIC
3445 * bytes */
3446#define STORE_NON_LATIN1_UC(p, l) \
3447STMT_START { \
3448 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3449 STORE_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3450 } else { /* Must be the following letter */ \
3451 STORE_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3452 } \
3453} STMT_END
3454
3455/* Like STORE_NON_LATIN1_UC, but advances p to point to the next available byte
3456 * after the character stored */
3457#define CAT_NON_LATIN1_UC(p, l) \
3458STMT_START { \
3459 if ((l) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3460 CAT_UNI_TO_UTF8_TWO_BYTE((p), LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS); \
3461 } else { \
3462 CAT_UNI_TO_UTF8_TWO_BYTE((p), GREEK_CAPITAL_LETTER_MU); \
3463 } \
3464} STMT_END
3465
3466/* Generates code to add the two UTF-8 bytes (probably u) that are the upper
3467 * case of l into p and p+1. u must be the result of toUPPER_LATIN1_MOD(l),
3468 * and must require two bytes to store it. Advances p to point to the next
3469 * available position */
3470#define CAT_TWO_BYTE_UNI_UPPER_MOD(p, l, u) \
3471STMT_START { \
3472 if ((u) != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) { \
3473 CAT_UNI_TO_UTF8_TWO_BYTE((p), (u)); /* not special, just save it */ \
3474 } else if (l == LATIN_SMALL_LETTER_SHARP_S) { \
3475 *(p)++ = 'S'; *(p)++ = 'S'; /* upper case is 'SS' */ \
3476 } else {/* else is one of the other two special cases */ \
3477 CAT_NON_LATIN1_UC((p), (l)); \
3478 } \
3479} STMT_END
3480
79072805
LW
3481PP(pp_ucfirst)
3482{
00f254e2
KW
3483 /* Actually is both lcfirst() and ucfirst(). Only the first character
3484 * changes. This means that possibly we can change in-place, ie., just
3485 * take the source and change that one character and store it back, but not
3486 * if read-only etc, or if the length changes */
3487
97aff369 3488 dVAR;
39644a26 3489 dSP;
d54190f6 3490 SV *source = TOPs;
00f254e2 3491 STRLEN slen; /* slen is the byte length of the whole SV. */
d54190f6
NC
3492 STRLEN need;
3493 SV *dest;
00f254e2
KW
3494 bool inplace; /* ? Convert first char only, in-place */
3495 bool doing_utf8 = FALSE; /* ? using utf8 */
3496 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
12e9c124 3497 const int op_type = PL_op->op_type;
d54190f6
NC
3498 const U8 *s;
3499 U8 *d;
3500 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
00f254e2
KW
3501 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3502 * stored as UTF-8 at s. */
3503 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3504 * lowercased) character stored in tmpbuf. May be either
3505 * UTF-8 or not, but in either case is the number of bytes */
d54190f6
NC
3506
3507 SvGETMAGIC(source);
3508 if (SvOK(source)) {
3509 s = (const U8*)SvPV_nomg_const(source, slen);
3510 } else {
0a0ffbce
RGS
3511 if (ckWARN(WARN_UNINITIALIZED))
3512 report_uninit(source);
1eced8f8 3513 s = (const U8*)"";
d54190f6
NC
3514 slen = 0;
3515 }
a0ed51b3 3516
00f254e2
KW
3517 /* We may be able to get away with changing only the first character, in
3518 * place, but not if read-only, etc. Later we may discover more reasons to
3519 * not convert in-place. */
3520 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3521
3522 /* First calculate what the changed first character should be. This affects
3523 * whether we can just swap it out, leaving the rest of the string unchanged,
3524 * or even if have to convert the dest to UTF-8 when the source isn't */
3525
3526 if (! slen) { /* If empty */
3527 need = 1; /* still need a trailing NUL */
3528 }
3529 else if (DO_UTF8(source)) { /* Is the source utf8? */
d54190f6 3530 doing_utf8 = TRUE;
00f254e2 3531
00f254e2
KW
3532 if (UTF8_IS_INVARIANT(*s)) {
3533
3534 /* An invariant source character is either ASCII or, in EBCDIC, an
3535 * ASCII equivalent or a caseless C1 control. In both these cases,
3536 * the lower and upper cases of any character are also invariants
3537 * (and title case is the same as upper case). So it is safe to
3538 * use the simple case change macros which avoid the overhead of
3539 * the general functions. Note that if perl were to be extended to
3540 * do locale handling in UTF-8 strings, this wouldn't be true in,
3541 * for example, Lithuanian or Turkic. */
3542 *tmpbuf = (op_type == OP_LCFIRST) ? toLOWER(*s) : toUPPER(*s);
3543 tculen = ulen = 1;
3544 need = slen + 1;
12e9c124 3545 }
00f254e2
KW
3546 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
3547 U8 chr;
3548
3549 /* Similarly, if the source character isn't invariant but is in the
3550 * latin1 range (or EBCDIC equivalent thereof), we have the case
3551 * changes compiled into perl, and can avoid the overhead of the
3552 * general functions. In this range, the characters are stored as
3553 * two UTF-8 bytes, and it so happens that any changed-case version
3554 * is also two bytes (in both ASCIIish and EBCDIC machines). */
3555 tculen = ulen = 2;
3556 need = slen + 1;
3557
3558 /* Convert the two source bytes to a single Unicode code point
3559 * value, change case and save for below */
356979f4 3560 chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
00f254e2
KW
3561 if (op_type == OP_LCFIRST) { /* lower casing is easy */
3562 U8 lower = toLOWER_LATIN1(chr);
3563 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
3564 }
3565 else { /* ucfirst */
3566 U8 upper = toUPPER_LATIN1_MOD(chr);
3567
3568 /* Most of the latin1 range characters are well-behaved. Their
3569 * title and upper cases are the same, and are also in the
3570 * latin1 range. The macro above returns their upper (hence
3571 * title) case, and all that need be done is to save the result
3572 * for below. However, several characters are problematic, and
3573 * have to be handled specially. The MOD in the macro name
3574 * above means that these tricky characters all get mapped to
3575 * the single character LATIN_SMALL_LETTER_Y_WITH_DIAERESIS.
3576 * This mapping saves some tests for the majority of the
3577 * characters */
3578
3579 if (upper != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3580
3581 /* Not tricky. Just save it. */
3582 STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, upper);
3583 }
3584 else if (chr == LATIN_SMALL_LETTER_SHARP_S) {
3585
3586 /* This one is tricky because it is two characters long,
3587 * though the UTF-8 is still two bytes, so the stored
3588 * length doesn't change */
3589 *tmpbuf = 'S'; /* The UTF-8 is 'Ss' */
3590 *(tmpbuf + 1) = 's';
3591 }
3592 else {
3593
3594 /* The other two have their title and upper cases the same,
3595 * but are tricky because the changed-case characters
3596 * aren't in the latin1 range. They, however, do fit into
3597 * two UTF-8 bytes */
3598 STORE_NON_LATIN1_UC(tmpbuf, chr);
3599 }
3600 }
3601 }
3602 else {
00f254e2
KW
3603
3604 /* Here, can't short-cut the general case */
3605
3606 utf8_to_uvchr(s, &ulen);
3607 if (op_type == OP_UCFIRST) toTITLE_utf8(s, tmpbuf, &tculen);
3608 else toLOWER_utf8(s, tmpbuf, &tculen);
3609
3610 /* we can't do in-place if the length changes. */
3611 if (ulen != tculen) inplace = FALSE;
3612 need = slen + 1 - ulen + tculen;
00f254e2 3613 }
d54190f6 3614 }
00f254e2
KW
3615 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3616 * latin1 is treated as caseless. Note that a locale takes
3617 * precedence */
3618 tculen = 1; /* Most characters will require one byte, but this will
3619 * need to be overridden for the tricky ones */
3620 need = slen + 1;
3621
3622 if (op_type == OP_LCFIRST) {
d54190f6 3623
00f254e2
KW
3624 /* lower case the first letter: no trickiness for any character */
3625 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3626 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3627 }
3628 /* is ucfirst() */
3629 else if (IN_LOCALE_RUNTIME) {
3630 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3631 * have upper and title case different
3632 */
3633 }
3634 else if (! IN_UNI_8_BIT) {
3635 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3636 * on EBCDIC machines whatever the
3637 * native function does */
3638 }
3639 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
3640 *tmpbuf = toUPPER_LATIN1_MOD(*s);
3641
3642 /* tmpbuf now has the correct title case for all latin1 characters
3643 * except for the several ones that have tricky handling. All
3644 * of these are mapped by the MOD to the letter below. */
3645 if (*tmpbuf == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
3646
3647 /* The length is going to change, with all three of these, so
3648 * can't replace just the first character */
3649 inplace = FALSE;
3650
3651 /* We use the original to distinguish between these tricky
3652 * cases */
3653 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
3654 /* Two character title case 'Ss', but can remain non-UTF-8 */
3655 need = slen + 2;
3656 *tmpbuf = 'S';
3657 *(tmpbuf + 1) = 's'; /* Assert: length(tmpbuf) >= 2 */
3658 tculen = 2;
3659 }
3660 else {
d54190f6 3661
00f254e2
KW
3662 /* The other two tricky ones have their title case outside
3663 * latin1. It is the same as their upper case. */
3664 doing_utf8 = TRUE;
3665 STORE_NON_LATIN1_UC(tmpbuf, *s);
3666
3667 /* The UTF-8 and UTF-EBCDIC lengths of both these characters
3668 * and their upper cases is 2. */
3669 tculen = ulen = 2;
3670
3671 /* The entire result will have to be in UTF-8. Assume worst
3672 * case sizing in conversion. (all latin1 characters occupy
3673 * at most two bytes in utf8) */
3674 convert_source_to_utf8 = TRUE;
3675 need = slen * 2 + 1;
3676 }
3677