This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use macro not swash for utf8 quotemeta
[perl5.git] / pp.c
CommitLineData
a0d0e21e 1/* pp.c
79072805 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
79072805 5 *
a0d0e21e
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
79072805 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
a0d0e21e 17 */
79072805 18
166f8a29
DM
19/* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
79072805 26#include "EXTERN.h"
864dbfa3 27#define PERL_IN_PP_C
79072805 28#include "perl.h"
77bc9082 29#include "keywords.h"
79072805 30
a4af207c 31#include "reentr.h"
685289b5 32#include "regcharclass.h"
a4af207c 33
dfe9444c
AD
34/* XXX I can't imagine anyone who doesn't have this actually _needs_
35 it, since pid_t is an integral type.
36 --AD 2/20/1998
37*/
38#ifdef NEED_GETPID_PROTO
39extern Pid_t getpid (void);
8ac85365
NIS
40#endif
41
0630166f
SP
42/*
43 * Some BSDs and Cygwin default to POSIX math instead of IEEE.
44 * This switches them over to IEEE.
45 */
46#if defined(LIBM_LIB_VERSION)
47 _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
48#endif
49
13017935
SM
50/* variations on pp_null */
51
93a17b20
LW
52PP(pp_stub)
53{
97aff369 54 dVAR;
39644a26 55 dSP;
54310121 56 if (GIMME_V == G_SCALAR)
3280af22 57 XPUSHs(&PL_sv_undef);
93a17b20
LW
58 RETURN;
59}
60
79072805
LW
61/* Pushy stuff. */
62
93a17b20
LW
63PP(pp_padav)
64{
97aff369 65 dVAR; dSP; dTARGET;
13017935 66 I32 gimme;
e190e9b4 67 assert(SvTYPE(TARG) == SVt_PVAV);
533c011a 68 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
69 if (!(PL_op->op_private & OPpPAD_STATE))
70 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
85e6fe83 71 EXTEND(SP, 1);
533c011a 72 if (PL_op->op_flags & OPf_REF) {
85e6fe83 73 PUSHs(TARG);
93a17b20 74 RETURN;
40c94d11
FC
75 } else if (PL_op->op_private & OPpMAYBE_LVSUB) {
76 const I32 flags = is_lvalue_sub();
77 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b 78 if (GIMME == G_SCALAR)
a84828f3 79 /* diag_listed_as: Can't return %s to lvalue scalar context */
78f9721b
SM
80 Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
81 PUSHs(TARG);
82 RETURN;
40c94d11 83 }
85e6fe83 84 }
13017935
SM
85 gimme = GIMME_V;
86 if (gimme == G_ARRAY) {
502c6561 87 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83 88 EXTEND(SP, maxarg);
93965878
NIS
89 if (SvMAGICAL(TARG)) {
90 U32 i;
eb160463 91 for (i=0; i < (U32)maxarg; i++) {
502c6561 92 SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
3280af22 93 SP[i+1] = (svp) ? *svp : &PL_sv_undef;
93965878
NIS
94 }
95 }
96 else {
502c6561 97 Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*);
93965878 98 }
85e6fe83
LW
99 SP += maxarg;
100 }
13017935 101 else if (gimme == G_SCALAR) {
1b6737cc 102 SV* const sv = sv_newmortal();
502c6561 103 const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
85e6fe83
LW
104 sv_setiv(sv, maxarg);
105 PUSHs(sv);
106 }
107 RETURN;
93a17b20
LW
108}
109
110PP(pp_padhv)
111{
97aff369 112 dVAR; dSP; dTARGET;
54310121
PP
113 I32 gimme;
114
e190e9b4 115 assert(SvTYPE(TARG) == SVt_PVHV);
93a17b20 116 XPUSHs(TARG);
533c011a 117 if (PL_op->op_private & OPpLVAL_INTRO)
a5911867
RGS
118 if (!(PL_op->op_private & OPpPAD_STATE))
119 SAVECLEARSV(PAD_SVl(PL_op->op_targ));
533c011a 120 if (PL_op->op_flags & OPf_REF)
93a17b20 121 RETURN;
40c94d11
FC
122 else if (PL_op->op_private & OPpMAYBE_LVSUB) {
123 const I32 flags = is_lvalue_sub();
124 if (flags && !(flags & OPpENTERSUB_INARGS)) {
78f9721b 125 if (GIMME == G_SCALAR)
a84828f3 126 /* diag_listed_as: Can't return %s to lvalue scalar context */
78f9721b
SM
127 Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
128 RETURN;
40c94d11 129 }
78f9721b 130 }
54310121
PP
131 gimme = GIMME_V;
132 if (gimme == G_ARRAY) {
981b7185 133 RETURNOP(Perl_do_kv(aTHX));
85e6fe83 134 }
c8fe3bdf 135 else if ((PL_op->op_private & OPpTRUEBOOL
adc42c31 136 || ( PL_op->op_private & OPpMAYBE_TRUEBOOL
c8fe3bdf
FC
137 && block_gimme() == G_VOID ))
138 && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied)))
139 SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : sv_2mortal(newSViv(0)));
54310121 140 else if (gimme == G_SCALAR) {
85fbaab2 141 SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
85e6fe83 142 SETs(sv);
85e6fe83 143 }
54310121 144 RETURN;
93a17b20
LW
145}
146
79072805
LW
147/* Translations. */
148
4bdf8368 149static const char S_no_symref_sv[] =
def89bff
NC
150 "Can't use string (\"%" SVf32 "\"%s) as %s ref while \"strict refs\" in use";
151
6f7909da
FC
152/* In some cases this function inspects PL_op. If this function is called
153 for new op types, more bool parameters may need to be added in place of
154 the checks.
155
156 When noinit is true, the absence of a gv will cause a retval of undef.
157 This is unrelated to the cv-to-gv assignment case.
6f7909da
FC
158*/
159
160static SV *
161S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
162 const bool noinit)
163{
14f0f125 164 dVAR;
f64c9ac5 165 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
ed6116ce 166 if (SvROK(sv)) {
93d7320b
DM
167 if (SvAMAGIC(sv)) {
168 sv = amagic_deref_call(sv, to_gv_amg);
93d7320b 169 }
e4a1664f 170 wasref:
ed6116ce 171 sv = SvRV(sv);
b1dadf13 172 if (SvTYPE(sv) == SVt_PVIO) {
159b6efe 173 GV * const gv = MUTABLE_GV(sv_newmortal());
885f468a 174 gv_init(gv, 0, "__ANONIO__", 10, 0);
a45c7426 175 GvIOp(gv) = MUTABLE_IO(sv);
b37c2d43 176 SvREFCNT_inc_void_NN(sv);
ad64d0ec 177 sv = MUTABLE_SV(gv);
ef54e1a4 178 }
6e592b3a 179 else if (!isGV_with_GP(sv))
6f7909da 180 return (SV *)Perl_die(aTHX_ "Not a GLOB reference");
79072805
LW
181 }
182 else {
6e592b3a 183 if (!isGV_with_GP(sv)) {
f132ae69 184 if (!SvOK(sv)) {
b13b2135 185 /* If this is a 'my' scalar and flag is set then vivify
853846ea 186 * NI-S 1999/05/07
b13b2135 187 */
f132ae69 188 if (vivify_sv && sv != &PL_sv_undef) {
2c8ac474 189 GV *gv;
ce74145d
FC
190 if (SvREADONLY(sv))
191 Perl_croak_no_modify(aTHX);
2c8ac474 192 if (cUNOP->op_targ) {
0bd48802 193 SV * const namesv = PAD_SV(cUNOP->op_targ);
159b6efe 194 gv = MUTABLE_GV(newSV(0));
6b10071b 195 gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0);
2c8ac474
GS
196 }
197 else {
0bd48802 198 const char * const name = CopSTASHPV(PL_curcop);
6b10071b
BF
199 gv = newGVgen_flags(name,
200 HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
1d8d4d2a 201 }
43230e26 202 prepare_SV_for_RV(sv);
ad64d0ec 203 SvRV_set(sv, MUTABLE_SV(gv));
853846ea 204 SvROK_on(sv);
1d8d4d2a 205 SvSETMAGIC(sv);
853846ea 206 goto wasref;
2c8ac474 207 }
6f7909da
FC
208 if (PL_op->op_flags & OPf_REF || strict)
209 return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol");
599cee73 210 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 211 report_uninit(sv);
6f7909da 212 return &PL_sv_undef;
a0d0e21e 213 }
6f7909da 214 if (noinit)
35cd451c 215 {
77cb3b01
FC
216 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
217 sv, GV_ADDMG, SVt_PVGV
23496c6e 218 ))))
6f7909da 219 return &PL_sv_undef;
35cd451c
GS
220 }
221 else {
6f7909da
FC
222 if (strict)
223 return
224 (SV *)Perl_die(aTHX_
225 S_no_symref_sv,
226 sv,
bf3d870f 227 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
6f7909da
FC
228 "a symbol"
229 );
e26df76a
NC
230 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
231 == OPpDONT_INIT_GV) {
232 /* We are the target of a coderef assignment. Return
233 the scalar unchanged, and let pp_sasssign deal with
234 things. */
6f7909da 235 return sv;
e26df76a 236 }
77cb3b01 237 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
35cd451c 238 }
2acc3314 239 /* FAKE globs in the symbol table cause weird bugs (#77810) */
96293f45 240 SvFAKE_off(sv);
93a17b20 241 }
79072805 242 }
8dc99089 243 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
2acc3314 244 SV *newsv = sv_newmortal();
5cf4b255 245 sv_setsv_flags(newsv, sv, 0);
2acc3314 246 SvFAKE_off(newsv);
d8906c05 247 sv = newsv;
2acc3314 248 }
6f7909da
FC
249 return sv;
250}
251
252PP(pp_rv2gv)
253{
254 dVAR; dSP; dTOPss;
255
256 sv = S_rv2gv(aTHX_
257 sv, PL_op->op_private & OPpDEREF,
258 PL_op->op_private & HINT_STRICT_REFS,
259 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
260 || PL_op->op_type == OP_READLINE
261 );
d8906c05
FC
262 if (PL_op->op_private & OPpLVAL_INTRO)
263 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
264 SETs(sv);
79072805
LW
265 RETURN;
266}
267
dc3c76f8
NC
268/* Helper function for pp_rv2sv and pp_rv2av */
269GV *
fe9845cc
RB
270Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
271 const svtype type, SV ***spp)
dc3c76f8
NC
272{
273 dVAR;
274 GV *gv;
275
7918f24d
NC
276 PERL_ARGS_ASSERT_SOFTREF2XV;
277
dc3c76f8
NC
278 if (PL_op->op_private & HINT_STRICT_REFS) {
279 if (SvOK(sv))
bf3d870f
FC
280 Perl_die(aTHX_ S_no_symref_sv, sv,
281 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
dc3c76f8
NC
282 else
283 Perl_die(aTHX_ PL_no_usym, what);
284 }
285 if (!SvOK(sv)) {
fd1d9b5c 286 if (
c8fe3bdf 287 PL_op->op_flags & OPf_REF
fd1d9b5c 288 )
dc3c76f8
NC
289 Perl_die(aTHX_ PL_no_usym, what);
290 if (ckWARN(WARN_UNINITIALIZED))
291 report_uninit(sv);
292 if (type != SVt_PV && GIMME_V == G_ARRAY) {
293 (*spp)--;
294 return NULL;
295 }
296 **spp = &PL_sv_undef;
297 return NULL;
298 }
299 if ((PL_op->op_flags & OPf_SPECIAL) &&
300 !(PL_op->op_flags & OPf_MOD))
301 {
77cb3b01 302 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
dc3c76f8
NC
303 {
304 **spp = &PL_sv_undef;
305 return NULL;
306 }
307 }
308 else {
77cb3b01 309 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
dc3c76f8
NC
310 }
311 return gv;
312}
313
79072805
LW
314PP(pp_rv2sv)
315{
97aff369 316 dVAR; dSP; dTOPss;
c445ea15 317 GV *gv = NULL;
79072805 318
9026059d 319 SvGETMAGIC(sv);
ed6116ce 320 if (SvROK(sv)) {
93d7320b
DM
321 if (SvAMAGIC(sv)) {
322 sv = amagic_deref_call(sv, to_sv_amg);
93d7320b 323 }
f5284f61 324
ed6116ce 325 sv = SvRV(sv);
79072805
LW
326 switch (SvTYPE(sv)) {
327 case SVt_PVAV:
328 case SVt_PVHV:
329 case SVt_PVCV:
cbae9b9f
YST
330 case SVt_PVFM:
331 case SVt_PVIO:
cea2e8a9 332 DIE(aTHX_ "Not a SCALAR reference");
42d0e0b7 333 default: NOOP;
79072805
LW
334 }
335 }
336 else {
159b6efe 337 gv = MUTABLE_GV(sv);
748a9306 338
6e592b3a 339 if (!isGV_with_GP(gv)) {
dc3c76f8
NC
340 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
341 if (!gv)
342 RETURN;
463ee0b2 343 }
29c711a3 344 sv = GvSVn(gv);
a0d0e21e 345 }
533c011a 346 if (PL_op->op_flags & OPf_MOD) {
82d03984
RGS
347 if (PL_op->op_private & OPpLVAL_INTRO) {
348 if (cUNOP->op_first->op_type == OP_NULL)
159b6efe 349 sv = save_scalar(MUTABLE_GV(TOPs));
82d03984
RGS
350 else if (gv)
351 sv = save_scalar(gv);
352 else
f1f66076 353 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
82d03984 354 }
533c011a 355 else if (PL_op->op_private & OPpDEREF)
9026059d 356 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
79072805 357 }
a0d0e21e 358 SETs(sv);
79072805
LW
359 RETURN;
360}
361
362PP(pp_av2arylen)
363{
97aff369 364 dVAR; dSP;
502c6561 365 AV * const av = MUTABLE_AV(TOPs);
02d85cc3
EB
366 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
367 if (lvalue) {
368 SV ** const sv = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
369 if (!*sv) {
370 *sv = newSV_type(SVt_PVMG);
371 sv_magic(*sv, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
372 }
373 SETs(*sv);
374 } else {
e1dccc0d 375 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
79072805 376 }
79072805
LW
377 RETURN;
378}
379
a0d0e21e
LW
380PP(pp_pos)
381{
2154eca7 382 dVAR; dSP; dPOPss;
8ec5e241 383
78f9721b 384 if (PL_op->op_flags & OPf_MOD || LVRET) {
16eb5365
FC
385 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
386 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
387 LvTYPE(ret) = '.';
388 LvTARG(ret) = SvREFCNT_inc_simple(sv);
2154eca7 389 PUSHs(ret); /* no SvSETMAGIC */
a0d0e21e
LW
390 RETURN;
391 }
392 else {
a0d0e21e 393 if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) {
1b6737cc 394 const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global);
565764a8 395 if (mg && mg->mg_len >= 0) {
2154eca7 396 dTARGET;
a0ed51b3 397 I32 i = mg->mg_len;
7e2040f0 398 if (DO_UTF8(sv))
a0ed51b3 399 sv_pos_b2u(sv, &i);
e1dccc0d 400 PUSHi(i);
a0d0e21e
LW
401 RETURN;
402 }
403 }
404 RETPUSHUNDEF;
405 }
406}
407
79072805
LW
408PP(pp_rv2cv)
409{
97aff369 410 dVAR; dSP;
79072805 411 GV *gv;
1eced8f8 412 HV *stash_unused;
c445ea15 413 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
9da346da 414 ? GV_ADDMG
c445ea15
AL
415 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
416 ? GV_ADD|GV_NOEXPAND
417 : GV_ADD;
4633a7c4
LW
418 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
419 /* (But not in defined().) */
e26df76a 420
1eced8f8 421 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
5a20ba3d 422 if (cv) NOOP;
e26df76a 423 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
ea726b52 424 cv = MUTABLE_CV(gv);
e26df76a 425 }
07055b4c 426 else
ea726b52 427 cv = MUTABLE_CV(&PL_sv_undef);
ad64d0ec 428 SETs(MUTABLE_SV(cv));
79072805
LW
429 RETURN;
430}
431
c07a80fd
PP
432PP(pp_prototype)
433{
97aff369 434 dVAR; dSP;
c07a80fd
PP
435 CV *cv;
436 HV *stash;
437 GV *gv;
fabdb6c0 438 SV *ret = &PL_sv_undef;
c07a80fd 439
6954f42f 440 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
b6c543e3 441 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
e3f73d4e 442 const char * s = SvPVX_const(TOPs);
b6c543e3 443 if (strnEQ(s, "CORE::", 6)) {
be1b855b 444 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
b66130dd 445 if (!code || code == -KEY_CORE)
1b08e051
FC
446 DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
447 SVfARG(newSVpvn_flags(
448 s+6, SvCUR(TOPs)-6, SvFLAGS(TOPs) & SVf_UTF8
449 )));
4e338c21 450 {
b66130dd
FC
451 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
452 if (sv) ret = sv;
453 }
b8c38f0a 454 goto set;
b6c543e3
IZ
455 }
456 }
f2c0649b 457 cv = sv_2cv(TOPs, &stash, &gv, 0);
5f05dabc 458 if (cv && SvPOK(cv))
8fa6a409
FC
459 ret = newSVpvn_flags(
460 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
461 );
b6c543e3 462 set:
c07a80fd
PP
463 SETs(ret);
464 RETURN;
465}
466
a0d0e21e
LW
467PP(pp_anoncode)
468{
97aff369 469 dVAR; dSP;
ea726b52 470 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
a5f75d66 471 if (CvCLONE(cv))
ad64d0ec 472 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
5f05dabc 473 EXTEND(SP,1);
ad64d0ec 474 PUSHs(MUTABLE_SV(cv));
a0d0e21e
LW
475 RETURN;
476}
477
478PP(pp_srefgen)
79072805 479{
97aff369 480 dVAR; dSP;
71be2cbc 481 *SP = refto(*SP);
79072805 482 RETURN;
8ec5e241 483}
a0d0e21e
LW
484
485PP(pp_refgen)
486{
97aff369 487 dVAR; dSP; dMARK;
a0d0e21e 488 if (GIMME != G_ARRAY) {
5f0b1d4e
GS
489 if (++MARK <= SP)
490 *MARK = *SP;
491 else
3280af22 492 *MARK = &PL_sv_undef;
5f0b1d4e
GS
493 *MARK = refto(*MARK);
494 SP = MARK;
495 RETURN;
a0d0e21e 496 }
bbce6d69 497 EXTEND_MORTAL(SP - MARK);
71be2cbc
PP
498 while (++MARK <= SP)
499 *MARK = refto(*MARK);
a0d0e21e 500 RETURN;
79072805
LW
501}
502
76e3520e 503STATIC SV*
cea2e8a9 504S_refto(pTHX_ SV *sv)
71be2cbc 505{
97aff369 506 dVAR;
71be2cbc
PP
507 SV* rv;
508
7918f24d
NC
509 PERL_ARGS_ASSERT_REFTO;
510
71be2cbc
PP
511 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
512 if (LvTARGLEN(sv))
68dc0745
PP
513 vivify_defelem(sv);
514 if (!(sv = LvTARG(sv)))
3280af22 515 sv = &PL_sv_undef;
0dd88869 516 else
b37c2d43 517 SvREFCNT_inc_void_NN(sv);
71be2cbc 518 }
d8b46c1b 519 else if (SvTYPE(sv) == SVt_PVAV) {
502c6561
NC
520 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
521 av_reify(MUTABLE_AV(sv));
d8b46c1b 522 SvTEMP_off(sv);
b37c2d43 523 SvREFCNT_inc_void_NN(sv);
d8b46c1b 524 }
f2933f5f
DM
525 else if (SvPADTMP(sv) && !IS_PADGV(sv))
526 sv = newSVsv(sv);
71be2cbc
PP
527 else {
528 SvTEMP_off(sv);
b37c2d43 529 SvREFCNT_inc_void_NN(sv);
71be2cbc
PP
530 }
531 rv = sv_newmortal();
4df7f6af 532 sv_upgrade(rv, SVt_IV);
b162af07 533 SvRV_set(rv, sv);
71be2cbc
PP
534 SvROK_on(rv);
535 return rv;
536}
537
79072805
LW
538PP(pp_ref)
539{
97aff369 540 dVAR; dSP; dTARGET;
1b6737cc 541 SV * const sv = POPs;
f12c7020 542
5b295bef
RD
543 if (sv)
544 SvGETMAGIC(sv);
f12c7020 545
a0d0e21e 546 if (!sv || !SvROK(sv))
4633a7c4 547 RETPUSHNO;
79072805 548
a15456de
BF
549 (void)sv_ref(TARG,SvRV(sv),TRUE);
550 PUSHTARG;
79072805
LW
551 RETURN;
552}
553
554PP(pp_bless)
555{
97aff369 556 dVAR; dSP;
463ee0b2 557 HV *stash;
79072805 558
463ee0b2 559 if (MAXARG == 1)
c2f922f1 560 curstash:
11faa288 561 stash = CopSTASH(PL_curcop);
7b8d334a 562 else {
1b6737cc 563 SV * const ssv = POPs;
7b8d334a 564 STRLEN len;
e1ec3a88 565 const char *ptr;
81689caa 566
c2f922f1
FC
567 if (!ssv) goto curstash;
568 if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv))
81689caa 569 Perl_croak(aTHX_ "Attempt to bless into a reference");
5c144d81 570 ptr = SvPV_const(ssv,len);
a2a5de95
NC
571 if (len == 0)
572 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
573 "Explicit blessing to '' (assuming package main)");
e69c50fe 574 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
7b8d334a 575 }
a0d0e21e 576
5d3fdfeb 577 (void)sv_bless(TOPs, stash);
79072805
LW
578 RETURN;
579}
580
fb73857a
PP
581PP(pp_gelem)
582{
97aff369 583 dVAR; dSP;
b13b2135 584
1b6737cc 585 SV *sv = POPs;
a180b31a
BF
586 STRLEN len;
587 const char * const elem = SvPV_const(sv, len);
159b6efe 588 GV * const gv = MUTABLE_GV(POPs);
c445ea15 589 SV * tmpRef = NULL;
1b6737cc 590
c445ea15 591 sv = NULL;
c4ba80c3
NC
592 if (elem) {
593 /* elem will always be NUL terminated. */
1b6737cc 594 const char * const second_letter = elem + 1;
c4ba80c3
NC
595 switch (*elem) {
596 case 'A':
a180b31a 597 if (len == 5 && strEQ(second_letter, "RRAY"))
ad64d0ec 598 tmpRef = MUTABLE_SV(GvAV(gv));
c4ba80c3
NC
599 break;
600 case 'C':
a180b31a 601 if (len == 4 && strEQ(second_letter, "ODE"))
ad64d0ec 602 tmpRef = MUTABLE_SV(GvCVu(gv));
c4ba80c3
NC
603 break;
604 case 'F':
a180b31a 605 if (len == 10 && strEQ(second_letter, "ILEHANDLE")) {
c4ba80c3
NC
606 /* finally deprecated in 5.8.0 */
607 deprecate("*glob{FILEHANDLE}");
ad64d0ec 608 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
609 }
610 else
a180b31a 611 if (len == 6 && strEQ(second_letter, "ORMAT"))
ad64d0ec 612 tmpRef = MUTABLE_SV(GvFORM(gv));
c4ba80c3
NC
613 break;
614 case 'G':
a180b31a 615 if (len == 4 && strEQ(second_letter, "LOB"))
ad64d0ec 616 tmpRef = MUTABLE_SV(gv);
c4ba80c3
NC
617 break;
618 case 'H':
a180b31a 619 if (len == 4 && strEQ(second_letter, "ASH"))
ad64d0ec 620 tmpRef = MUTABLE_SV(GvHV(gv));
c4ba80c3
NC
621 break;
622 case 'I':
a180b31a 623 if (*second_letter == 'O' && !elem[2] && len == 2)
ad64d0ec 624 tmpRef = MUTABLE_SV(GvIOp(gv));
c4ba80c3
NC
625 break;
626 case 'N':
a180b31a 627 if (len == 4 && strEQ(second_letter, "AME"))
a663657d 628 sv = newSVhek(GvNAME_HEK(gv));
c4ba80c3
NC
629 break;
630 case 'P':
a180b31a 631 if (len == 7 && strEQ(second_letter, "ACKAGE")) {
7fa3a4ab
NC
632 const HV * const stash = GvSTASH(gv);
633 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
396482e1 634 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
c4ba80c3
NC
635 }
636 break;
637 case 'S':
a180b31a 638 if (len == 6 && strEQ(second_letter, "CALAR"))
f9d52e31 639 tmpRef = GvSVn(gv);
c4ba80c3 640 break;
39b99f21 641 }
fb73857a 642 }
76e3520e
GS
643 if (tmpRef)
644 sv = newRV(tmpRef);
fb73857a
PP
645 if (sv)
646 sv_2mortal(sv);
647 else
3280af22 648 sv = &PL_sv_undef;
fb73857a
PP
649 XPUSHs(sv);
650 RETURN;
651}
652
a0d0e21e 653/* Pattern matching */
79072805 654
a0d0e21e 655PP(pp_study)
79072805 656{
97aff369 657 dVAR; dSP; dPOPss;
a0d0e21e
LW
658 STRLEN len;
659
1fa930f2 660 (void)SvPV(sv, len);
bc9a5256 661 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
32f0ea87 662 /* Historically, study was skipped in these cases. */
a4f4e906
NC
663 RETPUSHNO;
664 }
665
a58a85fa 666 /* Make study a no-op. It's no longer useful and its existence
32f0ea87 667 complicates matters elsewhere. */
1e422769 668 RETPUSHYES;
79072805
LW
669}
670
a0d0e21e 671PP(pp_trans)
79072805 672{
97aff369 673 dVAR; dSP; dTARG;
a0d0e21e
LW
674 SV *sv;
675
533c011a 676 if (PL_op->op_flags & OPf_STACKED)
a0d0e21e 677 sv = POPs;
59f00321
RGS
678 else if (PL_op->op_private & OPpTARGET_MY)
679 sv = GETTARGET;
79072805 680 else {
54b9620d 681 sv = DEFSV;
a0d0e21e 682 EXTEND(SP,1);
79072805 683 }
bb16bae8 684 if(PL_op->op_type == OP_TRANSR) {
290797f7
FC
685 STRLEN len;
686 const char * const pv = SvPV(sv,len);
687 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
bb16bae8 688 do_trans(newsv);
290797f7 689 PUSHs(newsv);
bb16bae8 690 }
5bbe7184
FC
691 else {
692 TARG = sv_newmortal();
693 PUSHi(do_trans(sv));
694 }
a0d0e21e 695 RETURN;
79072805
LW
696}
697
a0d0e21e 698/* Lvalue operators. */
79072805 699
81745e4e
NC
700static void
701S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
702{
703 dVAR;
704 STRLEN len;
705 char *s;
706
707 PERL_ARGS_ASSERT_DO_CHOMP;
708
709 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
710 return;
711 if (SvTYPE(sv) == SVt_PVAV) {
712 I32 i;
713 AV *const av = MUTABLE_AV(sv);
714 const I32 max = AvFILL(av);
715
716 for (i = 0; i <= max; i++) {
717 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
718 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
719 do_chomp(retval, sv, chomping);
720 }
721 return;
722 }
723 else if (SvTYPE(sv) == SVt_PVHV) {
724 HV* const hv = MUTABLE_HV(sv);
725 HE* entry;
726 (void)hv_iterinit(hv);
727 while ((entry = hv_iternext(hv)))
728 do_chomp(retval, hv_iterval(hv,entry), chomping);
729 return;
730 }
731 else if (SvREADONLY(sv)) {
732 if (SvFAKE(sv)) {
733 /* SV is copy-on-write */
734 sv_force_normal_flags(sv, 0);
735 }
a5f4b619 736 else
81745e4e
NC
737 Perl_croak_no_modify(aTHX);
738 }
739
740 if (PL_encoding) {
741 if (!SvUTF8(sv)) {
742 /* XXX, here sv is utf8-ized as a side-effect!
743 If encoding.pm is used properly, almost string-generating
744 operations, including literal strings, chr(), input data, etc.
745 should have been utf8-ized already, right?
746 */
747 sv_recode_to_utf8(sv, PL_encoding);
748 }
749 }
750
751 s = SvPV(sv, len);
752 if (chomping) {
753 char *temp_buffer = NULL;
754 SV *svrecode = NULL;
755
756 if (s && len) {
757 s += --len;
758 if (RsPARA(PL_rs)) {
759 if (*s != '\n')
760 goto nope;
761 ++SvIVX(retval);
762 while (len && s[-1] == '\n') {
763 --len;
764 --s;
765 ++SvIVX(retval);
766 }
767 }
768 else {
769 STRLEN rslen, rs_charlen;
770 const char *rsptr = SvPV_const(PL_rs, rslen);
771
772 rs_charlen = SvUTF8(PL_rs)
773 ? sv_len_utf8(PL_rs)
774 : rslen;
775
776 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
777 /* Assumption is that rs is shorter than the scalar. */
778 if (SvUTF8(PL_rs)) {
779 /* RS is utf8, scalar is 8 bit. */
780 bool is_utf8 = TRUE;
781 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
782 &rslen, &is_utf8);
783 if (is_utf8) {
784 /* Cannot downgrade, therefore cannot possibly match
785 */
786 assert (temp_buffer == rsptr);
787 temp_buffer = NULL;
788 goto nope;
789 }
790 rsptr = temp_buffer;
791 }
792 else if (PL_encoding) {
793 /* RS is 8 bit, encoding.pm is used.
794 * Do not recode PL_rs as a side-effect. */
795 svrecode = newSVpvn(rsptr, rslen);
796 sv_recode_to_utf8(svrecode, PL_encoding);
797 rsptr = SvPV_const(svrecode, rslen);
798 rs_charlen = sv_len_utf8(svrecode);
799 }
800 else {
801 /* RS is 8 bit, scalar is utf8. */
802 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
803 rsptr = temp_buffer;
804 }
805 }
806 if (rslen == 1) {
807 if (*s != *rsptr)
808 goto nope;
809 ++SvIVX(retval);
810 }
811 else {
812 if (len < rslen - 1)
813 goto nope;
814 len -= rslen - 1;
815 s -= rslen - 1;
816 if (memNE(s, rsptr, rslen))
817 goto nope;
818 SvIVX(retval) += rs_charlen;
819 }
820 }
fbac7ddf 821 s = SvPV_force_nomg_nolen(sv);
81745e4e
NC
822 SvCUR_set(sv, len);
823 *SvEND(sv) = '\0';
824 SvNIOK_off(sv);
825 SvSETMAGIC(sv);
826 }
827 nope:
828
829 SvREFCNT_dec(svrecode);
830
831 Safefree(temp_buffer);
832 } else {
833 if (len && !SvPOK(sv))
834 s = SvPV_force_nomg(sv, len);
835 if (DO_UTF8(sv)) {
836 if (s && len) {
837 char * const send = s + len;
838 char * const start = s;
839 s = send - 1;
840 while (s > start && UTF8_IS_CONTINUATION(*s))
841 s--;
842 if (is_utf8_string((U8*)s, send - s)) {
843 sv_setpvn(retval, s, send - s);
844 *s = '\0';
845 SvCUR_set(sv, s - start);
846 SvNIOK_off(sv);
847 SvUTF8_on(retval);
848 }
849 }
850 else
851 sv_setpvs(retval, "");
852 }
853 else if (s && len) {
854 s += --len;
855 sv_setpvn(retval, s, 1);
856 *s = '\0';
857 SvCUR_set(sv, len);
858 SvUTF8_off(sv);
859 SvNIOK_off(sv);
860 }
861 else
862 sv_setpvs(retval, "");
863 SvSETMAGIC(sv);
864 }
865}
866
a0d0e21e
LW
867PP(pp_schop)
868{
97aff369 869 dVAR; dSP; dTARGET;
fa54efae
NC
870 const bool chomping = PL_op->op_type == OP_SCHOMP;
871
872 if (chomping)
873 sv_setiv(TARG, 0);
874 do_chomp(TARG, TOPs, chomping);
a0d0e21e
LW
875 SETTARG;
876 RETURN;
79072805
LW
877}
878
a0d0e21e 879PP(pp_chop)
79072805 880{
97aff369 881 dVAR; dSP; dMARK; dTARGET; dORIGMARK;
fa54efae 882 const bool chomping = PL_op->op_type == OP_CHOMP;
8ec5e241 883
fa54efae
NC
884 if (chomping)
885 sv_setiv(TARG, 0);
20cf1f79 886 while (MARK < SP)
fa54efae 887 do_chomp(TARG, *++MARK, chomping);
20cf1f79
NC
888 SP = ORIGMARK;
889 XPUSHTARG;
a0d0e21e 890 RETURN;
79072805
LW
891}
892
a0d0e21e
LW
893PP(pp_undef)
894{
97aff369 895 dVAR; dSP;
a0d0e21e
LW
896 SV *sv;
897
533c011a 898 if (!PL_op->op_private) {
774d564b 899 EXTEND(SP, 1);
a0d0e21e 900 RETPUSHUNDEF;
774d564b 901 }
79072805 902
a0d0e21e
LW
903 sv = POPs;
904 if (!sv)
905 RETPUSHUNDEF;
85e6fe83 906
765f542d 907 SV_CHECK_THINKFIRST_COW_DROP(sv);
85e6fe83 908
a0d0e21e
LW
909 switch (SvTYPE(sv)) {
910 case SVt_NULL:
911 break;
912 case SVt_PVAV:
60edcf09 913 av_undef(MUTABLE_AV(sv));
a0d0e21e
LW
914 break;
915 case SVt_PVHV:
60edcf09 916 hv_undef(MUTABLE_HV(sv));
a0d0e21e
LW
917 break;
918 case SVt_PVCV:
a2a5de95 919 if (cv_const_sv((const CV *)sv))
714cd18f
BF
920 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
921 "Constant subroutine %"SVf" undefined",
922 SVfARG(CvANON((const CV *)sv)
923 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
924 : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv))))));
5f66b61c 925 /* FALLTHROUGH */
9607fc9c 926 case SVt_PVFM:
6fc92669
GS
927 {
928 /* let user-undef'd sub keep its identity */
ea726b52
NC
929 GV* const gv = CvGV((const CV *)sv);
930 cv_undef(MUTABLE_CV(sv));
b3f91e91 931 CvGV_set(MUTABLE_CV(sv), gv);
6fc92669 932 }
a0d0e21e 933 break;
8e07c86e 934 case SVt_PVGV:
6e592b3a 935 if (SvFAKE(sv)) {
3280af22 936 SvSetMagicSV(sv, &PL_sv_undef);
6e592b3a
BM
937 break;
938 }
939 else if (isGV_with_GP(sv)) {
20408e3c 940 GP *gp;
dd69841b
BB
941 HV *stash;
942
dd69841b 943 /* undef *Pkg::meth_name ... */
e530fb81
FC
944 bool method_changed
945 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
946 && HvENAME_get(stash);
947 /* undef *Foo:: */
948 if((stash = GvHV((const GV *)sv))) {
949 if(HvENAME_get(stash))
950 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
951 else stash = NULL;
952 }
dd69841b 953
159b6efe 954 gp_free(MUTABLE_GV(sv));
a02a5408 955 Newxz(gp, 1, GP);
c43ae56f 956 GvGP_set(sv, gp_ref(gp));
561b68a9 957 GvSV(sv) = newSV(0);
57843af0 958 GvLINE(sv) = CopLINE(PL_curcop);
159b6efe 959 GvEGV(sv) = MUTABLE_GV(sv);
20408e3c 960 GvMULTI_on(sv);
e530fb81
FC
961
962 if(stash)
afdbe55d 963 mro_package_moved(NULL, stash, (const GV *)sv, 0);
e530fb81
FC
964 stash = NULL;
965 /* undef *Foo::ISA */
966 if( strEQ(GvNAME((const GV *)sv), "ISA")
967 && (stash = GvSTASH((const GV *)sv))
968 && (method_changed || HvENAME(stash)) )
969 mro_isa_changed_in(stash);
970 else if(method_changed)
971 mro_method_changed_in(
da9043f5 972 GvSTASH((const GV *)sv)
e530fb81
FC
973 );
974
6e592b3a 975 break;
20408e3c 976 }
6e592b3a 977 /* FALL THROUGH */
a0d0e21e 978 default:
b15aece3 979 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
8bd4d4c5 980 SvPV_free(sv);
c445ea15 981 SvPV_set(sv, NULL);
4633a7c4 982 SvLEN_set(sv, 0);
a0d0e21e 983 }
0c34ef67 984 SvOK_off(sv);
4633a7c4 985 SvSETMAGIC(sv);
79072805 986 }
a0d0e21e
LW
987
988 RETPUSHUNDEF;
79072805
LW
989}
990
a0d0e21e
LW
991PP(pp_postinc)
992{
97aff369 993 dVAR; dSP; dTARGET;
c22c99bc
FC
994 const bool inc =
995 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
60092ce4 996 if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
6ad8f254 997 Perl_croak_no_modify(aTHX);
7dcb9b98
DM
998 if (SvROK(TOPs))
999 TARG = sv_newmortal();
a0d0e21e 1000 sv_setsv(TARG, TOPs);
4bac9ae4 1001 if (!SvREADONLY(TOPs) && !SvGMAGICAL(TOPs) && SvIOK_notUV(TOPs) && !SvNOK(TOPs) && !SvPOK(TOPs)
c22c99bc 1002 && SvIVX(TOPs) != (inc ? IV_MAX : IV_MIN))
55497cff 1003 {
c22c99bc 1004 SvIV_set(TOPs, SvIVX(TOPs) + (inc ? 1 : -1));
55497cff 1005 SvFLAGS(TOPs) &= ~(SVp_NOK|SVp_POK);
748a9306 1006 }
c22c99bc 1007 else if (inc)
6f1401dc 1008 sv_inc_nomg(TOPs);
c22c99bc 1009 else sv_dec_nomg(TOPs);
a0d0e21e 1010 SvSETMAGIC(TOPs);
1e54a23f 1011 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
c22c99bc 1012 if (inc && !SvOK(TARG))
a0d0e21e
LW
1013 sv_setiv(TARG, 0);
1014 SETs(TARG);
1015 return NORMAL;
1016}
79072805 1017
a0d0e21e
LW
1018/* Ordinary operators. */
1019
1020PP(pp_pow)
1021{
800401ee 1022 dVAR; dSP; dATARGET; SV *svl, *svr;
58d76dfd 1023#ifdef PERL_PRESERVE_IVUV
52a96ae6
HS
1024 bool is_int = 0;
1025#endif
6f1401dc
DM
1026 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1027 svr = TOPs;
1028 svl = TOPm1s;
52a96ae6
HS
1029#ifdef PERL_PRESERVE_IVUV
1030 /* For integer to integer power, we do the calculation by hand wherever
1031 we're sure it is safe; otherwise we call pow() and try to convert to
1032 integer afterwards. */
01f91bf2 1033 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
900658e3
PF
1034 UV power;
1035 bool baseuok;
1036 UV baseuv;
1037
800401ee
JH
1038 if (SvUOK(svr)) {
1039 power = SvUVX(svr);
900658e3 1040 } else {
800401ee 1041 const IV iv = SvIVX(svr);
900658e3
PF
1042 if (iv >= 0) {
1043 power = iv;
1044 } else {
1045 goto float_it; /* Can't do negative powers this way. */
1046 }
1047 }
1048
800401ee 1049 baseuok = SvUOK(svl);
900658e3 1050 if (baseuok) {
800401ee 1051 baseuv = SvUVX(svl);
900658e3 1052 } else {
800401ee 1053 const IV iv = SvIVX(svl);
900658e3
PF
1054 if (iv >= 0) {
1055 baseuv = iv;
1056 baseuok = TRUE; /* effectively it's a UV now */
1057 } else {
1058 baseuv = -iv; /* abs, baseuok == false records sign */
1059 }
1060 }
52a96ae6
HS
1061 /* now we have integer ** positive integer. */
1062 is_int = 1;
1063
1064 /* foo & (foo - 1) is zero only for a power of 2. */
58d76dfd 1065 if (!(baseuv & (baseuv - 1))) {
52a96ae6 1066 /* We are raising power-of-2 to a positive integer.
58d76dfd
JH
1067 The logic here will work for any base (even non-integer
1068 bases) but it can be less accurate than
1069 pow (base,power) or exp (power * log (base)) when the
1070 intermediate values start to spill out of the mantissa.
1071 With powers of 2 we know this can't happen.
1072 And powers of 2 are the favourite thing for perl
1073 programmers to notice ** not doing what they mean. */
1074 NV result = 1.0;
1075 NV base = baseuok ? baseuv : -(NV)baseuv;
900658e3
PF
1076
1077 if (power & 1) {
1078 result *= base;
1079 }
1080 while (power >>= 1) {
1081 base *= base;
1082 if (power & 1) {
1083 result *= base;
1084 }
1085 }
58d76dfd
JH
1086 SP--;
1087 SETn( result );
6f1401dc 1088 SvIV_please_nomg(svr);
58d76dfd 1089 RETURN;
52a96ae6 1090 } else {
eb578fdb
KW
1091 unsigned int highbit = 8 * sizeof(UV);
1092 unsigned int diff = 8 * sizeof(UV);
900658e3
PF
1093 while (diff >>= 1) {
1094 highbit -= diff;
1095 if (baseuv >> highbit) {
1096 highbit += diff;
1097 }
52a96ae6
HS
1098 }
1099 /* we now have baseuv < 2 ** highbit */
1100 if (power * highbit <= 8 * sizeof(UV)) {
1101 /* result will definitely fit in UV, so use UV math
1102 on same algorithm as above */
eb578fdb
KW
1103 UV result = 1;
1104 UV base = baseuv;
f2338a2e 1105 const bool odd_power = cBOOL(power & 1);
900658e3
PF
1106 if (odd_power) {
1107 result *= base;
1108 }
1109 while (power >>= 1) {
1110 base *= base;
1111 if (power & 1) {
52a96ae6 1112 result *= base;
52a96ae6
HS
1113 }
1114 }
1115 SP--;
0615a994 1116 if (baseuok || !odd_power)
52a96ae6
HS
1117 /* answer is positive */
1118 SETu( result );
1119 else if (result <= (UV)IV_MAX)
1120 /* answer negative, fits in IV */
1121 SETi( -(IV)result );
1122 else if (result == (UV)IV_MIN)
1123 /* 2's complement assumption: special case IV_MIN */
1124 SETi( IV_MIN );
1125 else
1126 /* answer negative, doesn't fit */
1127 SETn( -(NV)result );
1128 RETURN;
1129 }
1130 }
58d76dfd 1131 }
52a96ae6 1132 float_it:
58d76dfd 1133#endif
a0d0e21e 1134 {
6f1401dc
DM
1135 NV right = SvNV_nomg(svr);
1136 NV left = SvNV_nomg(svl);
4efa5a16 1137 (void)POPs;
3aaeb624
JA
1138
1139#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1140 /*
1141 We are building perl with long double support and are on an AIX OS
1142 afflicted with a powl() function that wrongly returns NaNQ for any
1143 negative base. This was reported to IBM as PMR #23047-379 on
1144 03/06/2006. The problem exists in at least the following versions
1145 of AIX and the libm fileset, and no doubt others as well:
1146
1147 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1148 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1149 AIX 5.2.0 bos.adt.libm 5.2.0.85
1150
1151 So, until IBM fixes powl(), we provide the following workaround to
1152 handle the problem ourselves. Our logic is as follows: for
1153 negative bases (left), we use fmod(right, 2) to check if the
1154 exponent is an odd or even integer:
1155
1156 - if odd, powl(left, right) == -powl(-left, right)
1157 - if even, powl(left, right) == powl(-left, right)
1158
1159 If the exponent is not an integer, the result is rightly NaNQ, so
1160 we just return that (as NV_NAN).
1161 */
1162
1163 if (left < 0.0) {
1164 NV mod2 = Perl_fmod( right, 2.0 );
1165 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1166 SETn( -Perl_pow( -left, right) );
1167 } else if (mod2 == 0.0) { /* even integer */
1168 SETn( Perl_pow( -left, right) );
1169 } else { /* fractional power */
1170 SETn( NV_NAN );
1171 }
1172 } else {
1173 SETn( Perl_pow( left, right) );
1174 }
1175#else
52a96ae6 1176 SETn( Perl_pow( left, right) );
3aaeb624
JA
1177#endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1178
52a96ae6
HS
1179#ifdef PERL_PRESERVE_IVUV
1180 if (is_int)
6f1401dc 1181 SvIV_please_nomg(svr);
52a96ae6
HS
1182#endif
1183 RETURN;
93a17b20 1184 }
a0d0e21e
LW
1185}
1186
1187PP(pp_multiply)
1188{
800401ee 1189 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1190 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1191 svr = TOPs;
1192 svl = TOPm1s;
28e5dec8 1193#ifdef PERL_PRESERVE_IVUV
01f91bf2 1194 if (SvIV_please_nomg(svr)) {
28e5dec8
JH
1195 /* Unless the left argument is integer in range we are going to have to
1196 use NV maths. Hence only attempt to coerce the right argument if
1197 we know the left is integer. */
1198 /* Left operand is defined, so is it IV? */
01f91bf2 1199 if (SvIV_please_nomg(svl)) {
800401ee
JH
1200 bool auvok = SvUOK(svl);
1201 bool buvok = SvUOK(svr);
28e5dec8
JH
1202 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1203 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1204 UV alow;
1205 UV ahigh;
1206 UV blow;
1207 UV bhigh;
1208
1209 if (auvok) {
800401ee 1210 alow = SvUVX(svl);
28e5dec8 1211 } else {
800401ee 1212 const IV aiv = SvIVX(svl);
28e5dec8
JH
1213 if (aiv >= 0) {
1214 alow = aiv;
1215 auvok = TRUE; /* effectively it's a UV now */
1216 } else {
1217 alow = -aiv; /* abs, auvok == false records sign */
1218 }
1219 }
1220 if (buvok) {
800401ee 1221 blow = SvUVX(svr);
28e5dec8 1222 } else {
800401ee 1223 const IV biv = SvIVX(svr);
28e5dec8
JH
1224 if (biv >= 0) {
1225 blow = biv;
1226 buvok = TRUE; /* effectively it's a UV now */
1227 } else {
1228 blow = -biv; /* abs, buvok == false records sign */
1229 }
1230 }
1231
1232 /* If this does sign extension on unsigned it's time for plan B */
1233 ahigh = alow >> (4 * sizeof (UV));
1234 alow &= botmask;
1235 bhigh = blow >> (4 * sizeof (UV));
1236 blow &= botmask;
1237 if (ahigh && bhigh) {
6f207bd3 1238 NOOP;
28e5dec8
JH
1239 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1240 which is overflow. Drop to NVs below. */
1241 } else if (!ahigh && !bhigh) {
1242 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1243 so the unsigned multiply cannot overflow. */
c445ea15 1244 const UV product = alow * blow;
28e5dec8
JH
1245 if (auvok == buvok) {
1246 /* -ve * -ve or +ve * +ve gives a +ve result. */
1247 SP--;
1248 SETu( product );
1249 RETURN;
1250 } else if (product <= (UV)IV_MIN) {
1251 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1252 /* -ve result, which could overflow an IV */
1253 SP--;
25716404 1254 SETi( -(IV)product );
28e5dec8
JH
1255 RETURN;
1256 } /* else drop to NVs below. */
1257 } else {
1258 /* One operand is large, 1 small */
1259 UV product_middle;
1260 if (bhigh) {
1261 /* swap the operands */
1262 ahigh = bhigh;
1263 bhigh = blow; /* bhigh now the temp var for the swap */
1264 blow = alow;
1265 alow = bhigh;
1266 }
1267 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1268 multiplies can't overflow. shift can, add can, -ve can. */
1269 product_middle = ahigh * blow;
1270 if (!(product_middle & topmask)) {
1271 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1272 UV product_low;
1273 product_middle <<= (4 * sizeof (UV));
1274 product_low = alow * blow;
1275
1276 /* as for pp_add, UV + something mustn't get smaller.
1277 IIRC ANSI mandates this wrapping *behaviour* for
1278 unsigned whatever the actual representation*/
1279 product_low += product_middle;
1280 if (product_low >= product_middle) {
1281 /* didn't overflow */
1282 if (auvok == buvok) {
1283 /* -ve * -ve or +ve * +ve gives a +ve result. */
1284 SP--;
1285 SETu( product_low );
1286 RETURN;
1287 } else if (product_low <= (UV)IV_MIN) {
1288 /* 2s complement assumption again */
1289 /* -ve result, which could overflow an IV */
1290 SP--;
25716404 1291 SETi( -(IV)product_low );
28e5dec8
JH
1292 RETURN;
1293 } /* else drop to NVs below. */
1294 }
1295 } /* product_middle too large */
1296 } /* ahigh && bhigh */
800401ee
JH
1297 } /* SvIOK(svl) */
1298 } /* SvIOK(svr) */
28e5dec8 1299#endif
a0d0e21e 1300 {
6f1401dc
DM
1301 NV right = SvNV_nomg(svr);
1302 NV left = SvNV_nomg(svl);
4efa5a16 1303 (void)POPs;
a0d0e21e
LW
1304 SETn( left * right );
1305 RETURN;
79072805 1306 }
a0d0e21e
LW
1307}
1308
1309PP(pp_divide)
1310{
800401ee 1311 dVAR; dSP; dATARGET; SV *svl, *svr;
6f1401dc
DM
1312 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1313 svr = TOPs;
1314 svl = TOPm1s;
5479d192 1315 /* Only try to do UV divide first
68795e93 1316 if ((SLOPPYDIVIDE is true) or
5479d192
NC
1317 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1318 to preserve))
1319 The assumption is that it is better to use floating point divide
1320 whenever possible, only doing integer divide first if we can't be sure.
1321 If NV_PRESERVES_UV is true then we know at compile time that no UV
1322 can be too large to preserve, so don't need to compile the code to
1323 test the size of UVs. */
1324
a0d0e21e 1325#ifdef SLOPPYDIVIDE
5479d192
NC
1326# define PERL_TRY_UV_DIVIDE
1327 /* ensure that 20./5. == 4. */
a0d0e21e 1328#else
5479d192
NC
1329# ifdef PERL_PRESERVE_IVUV
1330# ifndef NV_PRESERVES_UV
1331# define PERL_TRY_UV_DIVIDE
1332# endif
1333# endif
a0d0e21e 1334#endif
5479d192
NC
1335
1336#ifdef PERL_TRY_UV_DIVIDE
01f91bf2 1337 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
800401ee
JH
1338 bool left_non_neg = SvUOK(svl);
1339 bool right_non_neg = SvUOK(svr);
5479d192
NC
1340 UV left;
1341 UV right;
1342
1343 if (right_non_neg) {
800401ee 1344 right = SvUVX(svr);
5479d192
NC
1345 }
1346 else {
800401ee 1347 const IV biv = SvIVX(svr);
5479d192
NC
1348 if (biv >= 0) {
1349 right = biv;
1350 right_non_neg = TRUE; /* effectively it's a UV now */
1351 }
1352 else {
1353 right = -biv;
1354 }
1355 }
1356 /* historically undef()/0 gives a "Use of uninitialized value"
1357 warning before dieing, hence this test goes here.
1358 If it were immediately before the second SvIV_please, then
1359 DIE() would be invoked before left was even inspected, so
486ec47a 1360 no inspection would give no warning. */
5479d192
NC
1361 if (right == 0)
1362 DIE(aTHX_ "Illegal division by zero");
1363
1364 if (left_non_neg) {
800401ee 1365 left = SvUVX(svl);
5479d192
NC
1366 }
1367 else {
800401ee 1368 const IV aiv = SvIVX(svl);
5479d192
NC
1369 if (aiv >= 0) {
1370 left = aiv;
1371 left_non_neg = TRUE; /* effectively it's a UV now */
1372 }
1373 else {
1374 left = -aiv;
1375 }
1376 }
1377
1378 if (left >= right
1379#ifdef SLOPPYDIVIDE
1380 /* For sloppy divide we always attempt integer division. */
1381#else
1382 /* Otherwise we only attempt it if either or both operands
1383 would not be preserved by an NV. If both fit in NVs
0c2ee62a
NC
1384 we fall through to the NV divide code below. However,
1385 as left >= right to ensure integer result here, we know that
1386 we can skip the test on the right operand - right big
1387 enough not to be preserved can't get here unless left is
1388 also too big. */
1389
1390 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
5479d192
NC
1391#endif
1392 ) {
1393 /* Integer division can't overflow, but it can be imprecise. */
1b6737cc 1394 const UV result = left / right;
5479d192
NC
1395 if (result * right == left) {
1396 SP--; /* result is valid */
1397 if (left_non_neg == right_non_neg) {
1398 /* signs identical, result is positive. */
1399 SETu( result );
1400 RETURN;
1401 }
1402 /* 2s complement assumption */
1403 if (result <= (UV)IV_MIN)
91f3b821 1404 SETi( -(IV)result );
5479d192
NC
1405 else {
1406 /* It's exact but too negative for IV. */
1407 SETn( -(NV)result );
1408 }
1409 RETURN;
1410 } /* tried integer divide but it was not an integer result */
32fdb065 1411 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
01f91bf2 1412 } /* one operand wasn't SvIOK */
5479d192
NC
1413#endif /* PERL_TRY_UV_DIVIDE */
1414 {
6f1401dc
DM
1415 NV right = SvNV_nomg(svr);
1416 NV left = SvNV_nomg(svl);
4efa5a16 1417 (void)POPs;(void)POPs;
ebc6a117
PD
1418#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1419 if (! Perl_isnan(right) && right == 0.0)
1420#else
5479d192 1421 if (right == 0.0)
ebc6a117 1422#endif
5479d192
NC
1423 DIE(aTHX_ "Illegal division by zero");
1424 PUSHn( left / right );
1425 RETURN;
79072805 1426 }
a0d0e21e
LW
1427}
1428
1429PP(pp_modulo)
1430{
6f1401dc
DM
1431 dVAR; dSP; dATARGET;
1432 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
a0d0e21e 1433 {
9c5ffd7c
JH
1434 UV left = 0;
1435 UV right = 0;
dc656993
JH
1436 bool left_neg = FALSE;
1437 bool right_neg = FALSE;
e2c88acc
NC
1438 bool use_double = FALSE;
1439 bool dright_valid = FALSE;
9c5ffd7c
JH
1440 NV dright = 0.0;
1441 NV dleft = 0.0;
6f1401dc
DM
1442 SV * const svr = TOPs;
1443 SV * const svl = TOPm1s;
01f91bf2 1444 if (SvIV_please_nomg(svr)) {
800401ee 1445 right_neg = !SvUOK(svr);
e2c88acc 1446 if (!right_neg) {
800401ee 1447 right = SvUVX(svr);
e2c88acc 1448 } else {
800401ee 1449 const IV biv = SvIVX(svr);
e2c88acc
NC
1450 if (biv >= 0) {
1451 right = biv;
1452 right_neg = FALSE; /* effectively it's a UV now */
1453 } else {
1454 right = -biv;
1455 }
1456 }
1457 }
1458 else {
6f1401dc 1459 dright = SvNV_nomg(svr);
787eafbd
IZ
1460 right_neg = dright < 0;
1461 if (right_neg)
1462 dright = -dright;
e2c88acc
NC
1463 if (dright < UV_MAX_P1) {
1464 right = U_V(dright);
1465 dright_valid = TRUE; /* In case we need to use double below. */
1466 } else {
1467 use_double = TRUE;
1468 }
787eafbd 1469 }
a0d0e21e 1470
e2c88acc
NC
1471 /* At this point use_double is only true if right is out of range for
1472 a UV. In range NV has been rounded down to nearest UV and
1473 use_double false. */
01f91bf2 1474 if (!use_double && SvIV_please_nomg(svl)) {
800401ee 1475 left_neg = !SvUOK(svl);
e2c88acc 1476 if (!left_neg) {
800401ee 1477 left = SvUVX(svl);
e2c88acc 1478 } else {
800401ee 1479 const IV aiv = SvIVX(svl);
e2c88acc
NC
1480 if (aiv >= 0) {
1481 left = aiv;
1482 left_neg = FALSE; /* effectively it's a UV now */
1483 } else {
1484 left = -aiv;
1485 }
1486 }
e2c88acc 1487 }
787eafbd 1488 else {
6f1401dc 1489 dleft = SvNV_nomg(svl);
787eafbd
IZ
1490 left_neg = dleft < 0;
1491 if (left_neg)
1492 dleft = -dleft;
68dc0745 1493
e2c88acc
NC
1494 /* This should be exactly the 5.6 behaviour - if left and right are
1495 both in range for UV then use U_V() rather than floor. */
1496 if (!use_double) {
1497 if (dleft < UV_MAX_P1) {
1498 /* right was in range, so is dleft, so use UVs not double.
1499 */
1500 left = U_V(dleft);
1501 }
1502 /* left is out of range for UV, right was in range, so promote
1503 right (back) to double. */
1504 else {
1505 /* The +0.5 is used in 5.6 even though it is not strictly
1506 consistent with the implicit +0 floor in the U_V()
1507 inside the #if 1. */
1508 dleft = Perl_floor(dleft + 0.5);
1509 use_double = TRUE;
1510 if (dright_valid)
1511 dright = Perl_floor(dright + 0.5);
1512 else
1513 dright = right;
1514 }
1515 }
1516 }
6f1401dc 1517 sp -= 2;
787eafbd 1518 if (use_double) {
65202027 1519 NV dans;
787eafbd 1520
787eafbd 1521 if (!dright)
cea2e8a9 1522 DIE(aTHX_ "Illegal modulus zero");
787eafbd 1523
65202027 1524 dans = Perl_fmod(dleft, dright);
787eafbd
IZ
1525 if ((left_neg != right_neg) && dans)
1526 dans = dright - dans;
1527 if (right_neg)
1528 dans = -dans;
1529 sv_setnv(TARG, dans);
1530 }
1531 else {
1532 UV ans;
1533
787eafbd 1534 if (!right)
cea2e8a9 1535 DIE(aTHX_ "Illegal modulus zero");
787eafbd
IZ
1536
1537 ans = left % right;
1538 if ((left_neg != right_neg) && ans)
1539 ans = right - ans;
1540 if (right_neg) {
1541 /* XXX may warn: unary minus operator applied to unsigned type */
1542 /* could change -foo to be (~foo)+1 instead */
1543 if (ans <= ~((UV)IV_MAX)+1)
1544 sv_setiv(TARG, ~ans+1);
1545 else
65202027 1546 sv_setnv(TARG, -(NV)ans);
787eafbd
IZ
1547 }
1548 else
1549 sv_setuv(TARG, ans);
1550 }
1551 PUSHTARG;
1552 RETURN;
79072805 1553 }
a0d0e21e 1554}
79072805 1555
a0d0e21e
LW
1556PP(pp_repeat)
1557{
6f1401dc 1558 dVAR; dSP; dATARGET;
eb578fdb 1559 IV count;
6f1401dc
DM
1560 SV *sv;
1561
1562 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
1563 /* TODO: think of some way of doing list-repeat overloading ??? */
1564 sv = POPs;
1565 SvGETMAGIC(sv);
1566 }
1567 else {
1568 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1569 sv = POPs;
1570 }
1571
2b573ace
JH
1572 if (SvIOKp(sv)) {
1573 if (SvUOK(sv)) {
6f1401dc 1574 const UV uv = SvUV_nomg(sv);
2b573ace
JH
1575 if (uv > IV_MAX)
1576 count = IV_MAX; /* The best we can do? */
1577 else
1578 count = uv;
1579 } else {
6f1401dc 1580 const IV iv = SvIV_nomg(sv);
2b573ace
JH
1581 if (iv < 0)
1582 count = 0;
1583 else
1584 count = iv;
1585 }
1586 }
1587 else if (SvNOKp(sv)) {
6f1401dc 1588 const NV nv = SvNV_nomg(sv);
2b573ace
JH
1589 if (nv < 0.0)
1590 count = 0;
1591 else
1592 count = (IV)nv;
1593 }
1594 else
6f1401dc
DM
1595 count = SvIV_nomg(sv);
1596
533c011a 1597 if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
a0d0e21e 1598 dMARK;
0bd48802
AL
1599 static const char oom_list_extend[] = "Out of memory during list extend";
1600 const I32 items = SP - MARK;
1601 const I32 max = items * count;
79072805 1602
2b573ace
JH
1603 MEM_WRAP_CHECK_1(max, SV*, oom_list_extend);
1604 /* Did the max computation overflow? */
27d5b266 1605 if (items > 0 && max > 0 && (max < items || max < count))
2b573ace 1606 Perl_croak(aTHX_ oom_list_extend);
a0d0e21e
LW
1607 MEXTEND(MARK, max);
1608 if (count > 1) {
1609 while (SP > MARK) {
976c8a39
JH
1610#if 0
1611 /* This code was intended to fix 20010809.028:
1612
1613 $x = 'abcd';
1614 for (($x =~ /./g) x 2) {
1615 print chop; # "abcdabcd" expected as output.
1616 }
1617
1618 * but that change (#11635) broke this code:
1619
1620 $x = [("foo")x2]; # only one "foo" ended up in the anonlist.
1621
1622 * I can't think of a better fix that doesn't introduce
1623 * an efficiency hit by copying the SVs. The stack isn't
1624 * refcounted, and mortalisation obviously doesn't
1625 * Do The Right Thing when the stack has more than
1626 * one pointer to the same mortal value.
1627 * .robin.
1628 */
e30acc16
RH
1629 if (*SP) {
1630 *SP = sv_2mortal(newSVsv(*SP));
1631 SvREADONLY_on(*SP);
1632 }
976c8a39
JH
1633#else
1634 if (*SP)
1635 SvTEMP_off((*SP));
1636#endif
a0d0e21e 1637 SP--;
79072805 1638 }
a0d0e21e
LW
1639 MARK++;
1640 repeatcpy((char*)(MARK + items), (char*)MARK,
ad64d0ec 1641 items * sizeof(const SV *), count - 1);
a0d0e21e 1642 SP += max;
79072805 1643 }
a0d0e21e
LW
1644 else if (count <= 0)
1645 SP -= items;
79072805 1646 }
a0d0e21e 1647 else { /* Note: mark already snarfed by pp_list */
0bd48802 1648 SV * const tmpstr = POPs;
a0d0e21e 1649 STRLEN len;
9b877dbb 1650 bool isutf;
2b573ace
JH
1651 static const char oom_string_extend[] =
1652 "Out of memory during string extend";
a0d0e21e 1653
6f1401dc
DM
1654 if (TARG != tmpstr)
1655 sv_setsv_nomg(TARG, tmpstr);
1656 SvPV_force_nomg(TARG, len);
9b877dbb 1657 isutf = DO_UTF8(TARG);
8ebc5c01
PP
1658 if (count != 1) {
1659 if (count < 1)
1660 SvCUR_set(TARG, 0);
1661 else {
c445ea15 1662 const STRLEN max = (UV)count * len;
19a94d75 1663 if (len > MEM_SIZE_MAX / count)
2b573ace
JH
1664 Perl_croak(aTHX_ oom_string_extend);
1665 MEM_WRAP_CHECK_1(max, char, oom_string_extend);
8569b9dc 1666 SvGROW(TARG, max + 1);
a0d0e21e 1667 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
b162af07 1668 SvCUR_set(TARG, SvCUR(TARG) * count);
7a4c00b4 1669 }
a0d0e21e 1670 *SvEND(TARG) = '\0';
a0d0e21e 1671 }
dfcb284a
GS
1672 if (isutf)
1673 (void)SvPOK_only_UTF8(TARG);
1674 else
1675 (void)SvPOK_only(TARG);
b80b6069
RH
1676
1677 if (PL_op->op_private & OPpREPEAT_DOLIST) {
1678 /* The parser saw this as a list repeat, and there
1679 are probably several items on the stack. But we're
1680 in scalar context, and there's no pp_list to save us
1681 now. So drop the rest of the items -- robin@kitsite.com
1682 */
1683 dMARK;
1684 SP = MARK;
1685 }
a0d0e21e 1686 PUSHTARG;
79072805 1687 }
a0d0e21e
LW
1688 RETURN;
1689}
79072805 1690
a0d0e21e
LW
1691PP(pp_subtract)
1692{
800401ee 1693 dVAR; dSP; dATARGET; bool useleft; SV *svl, *svr;
6f1401dc
DM
1694 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1695 svr = TOPs;
1696 svl = TOPm1s;
800401ee 1697 useleft = USE_LEFT(svl);
28e5dec8 1698#ifdef PERL_PRESERVE_IVUV
7dca457a
NC
1699 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1700 "bad things" happen if you rely on signed integers wrapping. */
01f91bf2 1701 if (SvIV_please_nomg(svr)) {
28e5dec8
JH
1702 /* Unless the left argument is integer in range we are going to have to
1703 use NV maths. Hence only attempt to coerce the right argument if
1704 we know the left is integer. */
eb578fdb 1705 UV auv = 0;
9c5ffd7c 1706 bool auvok = FALSE;
7dca457a
NC
1707 bool a_valid = 0;
1708
28e5dec8 1709 if (!useleft) {
7dca457a
NC
1710 auv = 0;
1711 a_valid = auvok = 1;
1712 /* left operand is undef, treat as zero. */
28e5dec8
JH
1713 } else {
1714 /* Left operand is defined, so is it IV? */
01f91bf2 1715 if (SvIV_please_nomg(svl)) {
800401ee
JH
1716 if ((auvok = SvUOK(svl)))
1717 auv = SvUVX(svl);
7dca457a 1718 else {
eb578fdb 1719 const IV aiv = SvIVX(svl);
7dca457a
NC
1720 if (aiv >= 0) {
1721 auv = aiv;
1722 auvok = 1; /* Now acting as a sign flag. */
1723 } else { /* 2s complement assumption for IV_MIN */
1724 auv = (UV)-aiv;
28e5dec8 1725 }
7dca457a
NC
1726 }
1727 a_valid = 1;
1728 }
1729 }
1730 if (a_valid) {
1731 bool result_good = 0;
1732 UV result;
eb578fdb 1733 UV buv;
800401ee 1734 bool buvok = SvUOK(svr);
9041c2e3 1735
7dca457a 1736 if (buvok)
800401ee 1737 buv = SvUVX(svr);
7dca457a 1738 else {
eb578fdb 1739 const IV biv = SvIVX(svr);
7dca457a
NC
1740 if (biv >= 0) {
1741 buv = biv;
1742 buvok = 1;
1743 } else
1744 buv = (UV)-biv;
1745 }
1746 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
602f51c4 1747 else "IV" now, independent of how it came in.
7dca457a
NC
1748 if a, b represents positive, A, B negative, a maps to -A etc
1749 a - b => (a - b)
1750 A - b => -(a + b)
1751 a - B => (a + b)
1752 A - B => -(a - b)
1753 all UV maths. negate result if A negative.
1754 subtract if signs same, add if signs differ. */
1755
1756 if (auvok ^ buvok) {
1757 /* Signs differ. */
1758 result = auv + buv;
1759 if (result >= auv)
1760 result_good = 1;
1761 } else {
1762 /* Signs same */
1763 if (auv >= buv) {
1764 result = auv - buv;
1765 /* Must get smaller */
1766 if (result <= auv)
1767 result_good = 1;
1768 } else {
1769 result = buv - auv;
1770 if (result <= buv) {
1771 /* result really should be -(auv-buv). as its negation
1772 of true value, need to swap our result flag */
1773 auvok = !auvok;
1774 result_good = 1;
28e5dec8 1775 }
28e5dec8
JH
1776 }
1777 }
7dca457a
NC
1778 if (result_good) {
1779 SP--;
1780 if (auvok)
1781 SETu( result );
1782 else {
1783 /* Negate result */
1784 if (result <= (UV)IV_MIN)
1785 SETi( -(IV)result );
1786 else {
1787 /* result valid, but out of range for IV. */
1788 SETn( -(NV)result );
1789 }
1790 }
1791 RETURN;
1792 } /* Overflow, drop through to NVs. */
28e5dec8
JH
1793 }
1794 }
1795#endif
a0d0e21e 1796 {
6f1401dc 1797 NV value = SvNV_nomg(svr);
4efa5a16
RD
1798 (void)POPs;
1799
28e5dec8
JH
1800 if (!useleft) {
1801 /* left operand is undef, treat as zero - value */
1802 SETn(-value);
1803 RETURN;
1804 }
6f1401dc 1805 SETn( SvNV_nomg(svl) - value );
28e5dec8 1806 RETURN;
79072805 1807 }
a0d0e21e 1808}
79072805 1809
a0d0e21e
LW
1810PP(pp_left_shift)
1811{
6f1401dc 1812 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1813 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1814 svr = POPs;
1815 svl = TOPs;
a0d0e21e 1816 {
6f1401dc 1817 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1818 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1819 const IV i = SvIV_nomg(svl);
972b05a9 1820 SETi(i << shift);
d0ba1bd2
JH
1821 }
1822 else {
6f1401dc 1823 const UV u = SvUV_nomg(svl);
972b05a9 1824 SETu(u << shift);
d0ba1bd2 1825 }
55497cff 1826 RETURN;
79072805 1827 }
a0d0e21e 1828}
79072805 1829
a0d0e21e
LW
1830PP(pp_right_shift)
1831{
6f1401dc 1832 dVAR; dSP; dATARGET; SV *svl, *svr;
a42d0242 1833 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
6f1401dc
DM
1834 svr = POPs;
1835 svl = TOPs;
a0d0e21e 1836 {
6f1401dc 1837 const IV shift = SvIV_nomg(svr);
d0ba1bd2 1838 if (PL_op->op_private & HINT_INTEGER) {
6f1401dc 1839 const IV i = SvIV_nomg(svl);
972b05a9 1840 SETi(i >> shift);
d0ba1bd2
JH
1841 }
1842 else {
6f1401dc 1843 const UV u = SvUV_nomg(svl);
972b05a9 1844 SETu(u >> shift);
d0ba1bd2 1845 }
a0d0e21e 1846 RETURN;
93a17b20 1847 }
79072805
LW
1848}
1849
a0d0e21e 1850PP(pp_lt)
79072805 1851{
6f1401dc 1852 dVAR; dSP;
33efebe6
DM
1853 SV *left, *right;
1854
a42d0242 1855 tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
33efebe6
DM
1856 right = POPs;
1857 left = TOPs;
1858 SETs(boolSV(
1859 (SvIOK_notUV(left) && SvIOK_notUV(right))
1860 ? (SvIVX(left) < SvIVX(right))
1861 : (do_ncmp(left, right) == -1)
1862 ));
1863 RETURN;
a0d0e21e 1864}
79072805 1865
a0d0e21e
LW
1866PP(pp_gt)
1867{
6f1401dc 1868 dVAR; dSP;
33efebe6 1869 SV *left, *right;
1b6737cc 1870
33efebe6
DM
1871 tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
1872 right = POPs;
1873 left = TOPs;
1874 SETs(boolSV(
1875 (SvIOK_notUV(left) && SvIOK_notUV(right))
1876 ? (SvIVX(left) > SvIVX(right))
1877 : (do_ncmp(left, right) == 1)
1878 ));
1879 RETURN;
a0d0e21e
LW
1880}
1881
1882PP(pp_le)
1883{
6f1401dc 1884 dVAR; dSP;
33efebe6 1885 SV *left, *right;
1b6737cc 1886
33efebe6
DM
1887 tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
1888 right = POPs;
1889 left = TOPs;
1890 SETs(boolSV(
1891 (SvIOK_notUV(left) && SvIOK_notUV(right))
1892 ? (SvIVX(left) <= SvIVX(right))
1893 : (do_ncmp(left, right) <= 0)
1894 ));
1895 RETURN;
a0d0e21e
LW
1896}
1897
1898PP(pp_ge)
1899{
6f1401dc 1900 dVAR; dSP;
33efebe6
DM
1901 SV *left, *right;
1902
1903 tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
1904 right = POPs;
1905 left = TOPs;
1906 SETs(boolSV(
1907 (SvIOK_notUV(left) && SvIOK_notUV(right))
1908 ? (SvIVX(left) >= SvIVX(right))
1909 : ( (do_ncmp(left, right) & 2) == 0)
1910 ));
1911 RETURN;
1912}
1b6737cc 1913
33efebe6
DM
1914PP(pp_ne)
1915{
1916 dVAR; dSP;
1917 SV *left, *right;
1918
1919 tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
1920 right = POPs;
1921 left = TOPs;
1922 SETs(boolSV(
1923 (SvIOK_notUV(left) && SvIOK_notUV(right))
1924 ? (SvIVX(left) != SvIVX(right))
1925 : (do_ncmp(left, right) != 0)
1926 ));
1927 RETURN;
1928}
1b6737cc 1929
33efebe6
DM
1930/* compare left and right SVs. Returns:
1931 * -1: <
1932 * 0: ==
1933 * 1: >
1934 * 2: left or right was a NaN
1935 */
1936I32
1937Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
1938{
1939 dVAR;
1b6737cc 1940
33efebe6
DM
1941 PERL_ARGS_ASSERT_DO_NCMP;
1942#ifdef PERL_PRESERVE_IVUV
33efebe6 1943 /* Fortunately it seems NaN isn't IOK */
01f91bf2 1944 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
33efebe6
DM
1945 if (!SvUOK(left)) {
1946 const IV leftiv = SvIVX(left);
1947 if (!SvUOK(right)) {
1948 /* ## IV <=> IV ## */
1949 const IV rightiv = SvIVX(right);
1950 return (leftiv > rightiv) - (leftiv < rightiv);
28e5dec8 1951 }
33efebe6
DM
1952 /* ## IV <=> UV ## */
1953 if (leftiv < 0)
1954 /* As (b) is a UV, it's >=0, so it must be < */
1955 return -1;
1956 {
1957 const UV rightuv = SvUVX(right);
1958 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
28e5dec8 1959 }
28e5dec8 1960 }
79072805 1961
33efebe6
DM
1962 if (SvUOK(right)) {
1963 /* ## UV <=> UV ## */
1964 const UV leftuv = SvUVX(left);
1965 const UV rightuv = SvUVX(right);
1966 return (leftuv > rightuv) - (leftuv < rightuv);
28e5dec8 1967 }
33efebe6
DM
1968 /* ## UV <=> IV ## */
1969 {
1970 const IV rightiv = SvIVX(right);
1971 if (rightiv < 0)
1972 /* As (a) is a UV, it's >=0, so it cannot be < */
1973 return 1;
1974 {
1975 const UV leftuv = SvUVX(left);
1976 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
28e5dec8 1977 }
28e5dec8 1978 }
118e2215 1979 assert(0); /* NOTREACHED */
28e5dec8
JH
1980 }
1981#endif
a0d0e21e 1982 {
33efebe6
DM
1983 NV const rnv = SvNV_nomg(right);
1984 NV const lnv = SvNV_nomg(left);
1985
cab190d4 1986#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
33efebe6
DM
1987 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
1988 return 2;
1989 }
1990 return (lnv > rnv) - (lnv < rnv);
cab190d4 1991#else
33efebe6
DM
1992 if (lnv < rnv)
1993 return -1;
1994 if (lnv > rnv)
1995 return 1;
1996 if (lnv == rnv)
1997 return 0;
1998 return 2;
cab190d4 1999#endif
a0d0e21e 2000 }
79072805
LW
2001}
2002
33efebe6 2003
a0d0e21e 2004PP(pp_ncmp)
79072805 2005{
33efebe6
DM
2006 dVAR; dSP;
2007 SV *left, *right;
2008 I32 value;
a42d0242 2009 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
33efebe6
DM
2010 right = POPs;
2011 left = TOPs;
2012 value = do_ncmp(left, right);
2013 if (value == 2) {
3280af22 2014 SETs(&PL_sv_undef);
79072805 2015 }
33efebe6
DM
2016 else {
2017 dTARGET;
2018 SETi(value);
2019 }
2020 RETURN;
a0d0e21e 2021}
79072805 2022
afd9910b 2023PP(pp_sle)
a0d0e21e 2024{
97aff369 2025 dVAR; dSP;
79072805 2026
afd9910b
NC
2027 int amg_type = sle_amg;
2028 int multiplier = 1;
2029 int rhs = 1;
79072805 2030
afd9910b
NC
2031 switch (PL_op->op_type) {
2032 case OP_SLT:
2033 amg_type = slt_amg;
2034 /* cmp < 0 */
2035 rhs = 0;
2036 break;
2037 case OP_SGT:
2038 amg_type = sgt_amg;
2039 /* cmp > 0 */
2040 multiplier = -1;
2041 rhs = 0;
2042 break;
2043 case OP_SGE:
2044 amg_type = sge_amg;
2045 /* cmp >= 0 */
2046 multiplier = -1;
2047 break;
79072805 2048 }
79072805 2049
6f1401dc 2050 tryAMAGICbin_MG(amg_type, AMGf_set);
a0d0e21e
LW
2051 {
2052 dPOPTOPssrl;
1b6737cc 2053 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2054 ? sv_cmp_locale_flags(left, right, 0)
2055 : sv_cmp_flags(left, right, 0));
afd9910b 2056 SETs(boolSV(cmp * multiplier < rhs));
a0d0e21e
LW
2057 RETURN;
2058 }
2059}
79072805 2060
36477c24
PP
2061PP(pp_seq)
2062{
6f1401dc
DM
2063 dVAR; dSP;
2064 tryAMAGICbin_MG(seq_amg, AMGf_set);
36477c24
PP
2065 {
2066 dPOPTOPssrl;
078504b2 2067 SETs(boolSV(sv_eq_flags(left, right, 0)));
a0d0e21e
LW
2068 RETURN;
2069 }
2070}
79072805 2071
a0d0e21e 2072PP(pp_sne)
79072805 2073{
6f1401dc
DM
2074 dVAR; dSP;
2075 tryAMAGICbin_MG(sne_amg, AMGf_set);
a0d0e21e
LW
2076 {
2077 dPOPTOPssrl;
078504b2 2078 SETs(boolSV(!sv_eq_flags(left, right, 0)));
a0d0e21e 2079 RETURN;
463ee0b2 2080 }
79072805
LW
2081}
2082
a0d0e21e 2083PP(pp_scmp)
79072805 2084{
6f1401dc
DM
2085 dVAR; dSP; dTARGET;
2086 tryAMAGICbin_MG(scmp_amg, 0);
a0d0e21e
LW
2087 {
2088 dPOPTOPssrl;
1b6737cc 2089 const int cmp = (IN_LOCALE_RUNTIME
078504b2
FC
2090 ? sv_cmp_locale_flags(left, right, 0)
2091 : sv_cmp_flags(left, right, 0));
bbce6d69 2092 SETi( cmp );
a0d0e21e
LW
2093 RETURN;
2094 }
2095}
79072805 2096
55497cff
PP
2097PP(pp_bit_and)
2098{
6f1401dc
DM
2099 dVAR; dSP; dATARGET;
2100 tryAMAGICbin_MG(band_amg, AMGf_assign);
a0d0e21e
LW
2101 {
2102 dPOPTOPssrl;
4633a7c4 2103 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2104 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2105 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2106 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2107 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
972b05a9 2108 SETi(i);
d0ba1bd2
JH
2109 }
2110 else {
1b6737cc 2111 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
972b05a9 2112 SETu(u);
d0ba1bd2 2113 }
5ee80e13 2114 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
b20c4ee1 2115 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2116 }
2117 else {
533c011a 2118 do_vop(PL_op->op_type, TARG, left, right);
a0d0e21e
LW
2119 SETTARG;
2120 }
2121 RETURN;
2122 }
2123}
79072805 2124
a0d0e21e
LW
2125PP(pp_bit_or)
2126{
3658c1f1
NC
2127 dVAR; dSP; dATARGET;
2128 const int op_type = PL_op->op_type;
2129
6f1401dc 2130 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
a0d0e21e
LW
2131 {
2132 dPOPTOPssrl;
4633a7c4 2133 if (SvNIOKp(left) || SvNIOKp(right)) {
b20c4ee1
FC
2134 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2135 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
d0ba1bd2 2136 if (PL_op->op_private & HINT_INTEGER) {
3658c1f1
NC
2137 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2138 const IV r = SvIV_nomg(right);
2139 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2140 SETi(result);
d0ba1bd2
JH
2141 }
2142 else {
3658c1f1
NC
2143 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2144 const UV r = SvUV_nomg(right);
2145 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2146 SETu(result);
d0ba1bd2 2147 }
5ee80e13 2148 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
b20c4ee1 2149 if (right_ro_nonnum) SvNIOK_off(right);
a0d0e21e
LW
2150 }
2151 else {
3658c1f1 2152 do_vop(op_type, TARG, left, right);
a0d0e21e
LW
2153 SETTARG;
2154 }
2155 RETURN;
79072805 2156 }
a0d0e21e 2157}
79072805 2158
1c2b3fd6
FC
2159PERL_STATIC_INLINE bool
2160S_negate_string(pTHX)
2161{
2162 dTARGET; dSP;
2163 STRLEN len;
2164 const char *s;
2165 SV * const sv = TOPs;
2166 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2167 return FALSE;
2168 s = SvPV_nomg_const(sv, len);
2169 if (isIDFIRST(*s)) {
2170 sv_setpvs(TARG, "-");
2171 sv_catsv(TARG, sv);
2172 }
2173 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2174 sv_setsv_nomg(TARG, sv);
2175 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2176 }
2177 else return FALSE;
2178 SETTARG; PUTBACK;
2179 return TRUE;
2180}
2181
a0d0e21e
LW
2182PP(pp_negate)
2183{
6f1401dc
DM
2184 dVAR; dSP; dTARGET;
2185 tryAMAGICun_MG(neg_amg, AMGf_numeric);
1c2b3fd6 2186 if (S_negate_string(aTHX)) return NORMAL;
a0d0e21e 2187 {
6f1401dc 2188 SV * const sv = TOPs;
a5b92898 2189
d96ab1b5 2190 if (SvIOK(sv)) {
7dbe3150 2191 /* It's publicly an integer */
28e5dec8 2192 oops_its_an_int:
9b0e499b
GS
2193 if (SvIsUV(sv)) {
2194 if (SvIVX(sv) == IV_MIN) {
28e5dec8 2195 /* 2s complement assumption. */
9b0e499b
GS
2196 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */
2197 RETURN;
2198 }
2199 else if (SvUVX(sv) <= IV_MAX) {
beccb14c 2200 SETi(-SvIVX(sv));
9b0e499b
GS
2201 RETURN;
2202 }
2203 }
2204 else if (SvIVX(sv) != IV_MIN) {
2205 SETi(-SvIVX(sv));
2206 RETURN;
2207 }
28e5dec8
JH
2208#ifdef PERL_PRESERVE_IVUV
2209 else {
2210 SETu((UV)IV_MIN);
2211 RETURN;
2212 }
2213#endif
9b0e499b 2214 }
8a5decd8 2215 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
6f1401dc 2216 SETn(-SvNV_nomg(sv));
1c2b3fd6 2217 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
8eb28a70 2218 goto oops_its_an_int;
4633a7c4 2219 else
6f1401dc 2220 SETn(-SvNV_nomg(sv));
79072805 2221 }
a0d0e21e 2222 RETURN;
79072805
LW
2223}
2224
a0d0e21e 2225PP(pp_not)
79072805 2226{
6f1401dc
DM
2227 dVAR; dSP;
2228 tryAMAGICun_MG(not_amg, AMGf_set);
06c841cf 2229 *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
a0d0e21e 2230 return NORMAL;
79072805
LW
2231}
2232
a0d0e21e 2233PP(pp_complement)
79072805 2234{
6f1401dc 2235 dVAR; dSP; dTARGET;
a42d0242 2236 tryAMAGICun_MG(compl_amg, AMGf_numeric);
a0d0e21e
LW
2237 {
2238 dTOPss;
4633a7c4 2239 if (SvNIOKp(sv)) {
d0ba1bd2 2240 if (PL_op->op_private & HINT_INTEGER) {
1b6737cc 2241 const IV i = ~SvIV_nomg(sv);
972b05a9 2242 SETi(i);
d0ba1bd2
JH
2243 }
2244 else {
1b6737cc 2245 const UV u = ~SvUV_nomg(sv);
972b05a9 2246 SETu(u);
d0ba1bd2 2247 }
a0d0e21e
LW
2248 }
2249 else {
eb578fdb
KW
2250 U8 *tmps;
2251 I32 anum;
a0d0e21e
LW
2252 STRLEN len;
2253
10516c54 2254 (void)SvPV_nomg_const(sv,len); /* force check for uninit var */
891f9566 2255 sv_setsv_nomg(TARG, sv);
6f1401dc 2256 tmps = (U8*)SvPV_force_nomg(TARG, len);
a0d0e21e 2257 anum = len;
1d68d6cd 2258 if (SvUTF8(TARG)) {
a1ca4561 2259 /* Calculate exact length, let's not estimate. */
1d68d6cd 2260 STRLEN targlen = 0;
ba210ebe 2261 STRLEN l;
a1ca4561
YST
2262 UV nchar = 0;
2263 UV nwide = 0;
01f6e806 2264 U8 * const send = tmps + len;
74d49cd0
ST
2265 U8 * const origtmps = tmps;
2266 const UV utf8flags = UTF8_ALLOW_ANYUV;
1d68d6cd 2267
1d68d6cd 2268 while (tmps < send) {
74d49cd0
ST
2269 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2270 tmps += l;
5bbb0b5a 2271 targlen += UNISKIP(~c);
a1ca4561
YST
2272 nchar++;
2273 if (c > 0xff)
2274 nwide++;
1d68d6cd
SC
2275 }
2276
2277 /* Now rewind strings and write them. */
74d49cd0 2278 tmps = origtmps;
a1ca4561
YST
2279
2280 if (nwide) {
01f6e806
AL
2281 U8 *result;
2282 U8 *p;
2283
74d49cd0 2284 Newx(result, targlen + 1, U8);
01f6e806 2285 p = result;
a1ca4561 2286 while (tmps < send) {
74d49cd0
ST
2287 const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2288 tmps += l;
01f6e806 2289 p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
a1ca4561 2290 }
01f6e806 2291 *p = '\0';
c1c21316
NC
2292 sv_usepvn_flags(TARG, (char*)result, targlen,
2293 SV_HAS_TRAILING_NUL);
a1ca4561
YST
2294 SvUTF8_on(TARG);
2295 }
2296 else {
01f6e806
AL
2297 U8 *result;
2298 U8 *p;
2299
74d49cd0 2300 Newx(result, nchar + 1, U8);
01f6e806 2301 p = result;
a1ca4561 2302 while (tmps < send) {
74d49cd0
ST
2303 const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
2304 tmps += l;
01f6e806 2305 *p++ = ~c;
a1ca4561 2306 }
01f6e806 2307 *p = '\0';
c1c21316 2308 sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
d0a21e00 2309 SvUTF8_off(TARG);
1d68d6cd 2310 }
ec93b65f 2311 SETTARG;
1d68d6cd
SC
2312 RETURN;
2313 }
a0d0e21e 2314#ifdef LIBERAL
51723571 2315 {
eb578fdb 2316 long *tmpl;
51723571
JH
2317 for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
2318 *tmps = ~*tmps;
2319 tmpl = (long*)tmps;
bb7a0f54 2320 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
51723571
JH
2321 *tmpl = ~*tmpl;
2322 tmps = (U8*)tmpl;
2323 }
a0d0e21e
LW
2324#endif
2325 for ( ; anum > 0; anum--, tmps++)
2326 *tmps = ~*tmps;
ec93b65f 2327 SETTARG;
a0d0e21e
LW
2328 }
2329 RETURN;
2330 }
79072805
LW
2331}
2332
a0d0e21e
LW
2333/* integer versions of some of the above */
2334
a0d0e21e 2335PP(pp_i_multiply)
79072805 2336{
6f1401dc
DM
2337 dVAR; dSP; dATARGET;
2338 tryAMAGICbin_MG(mult_amg, AMGf_assign);
a0d0e21e 2339 {
6f1401dc 2340 dPOPTOPiirl_nomg;
a0d0e21e
LW
2341 SETi( left * right );
2342 RETURN;
2343 }
79072805
LW
2344}
2345
a0d0e21e 2346PP(pp_i_divide)
79072805 2347{
85935d8e 2348 IV num;
6f1401dc
DM
2349 dVAR; dSP; dATARGET;
2350 tryAMAGICbin_MG(div_amg, AMGf_assign);
a0d0e21e 2351 {
6f1401dc 2352 dPOPTOPssrl;
85935d8e 2353 IV value = SvIV_nomg(right);
a0d0e21e 2354 if (value == 0)
ece1bcef 2355 DIE(aTHX_ "Illegal division by zero");
85935d8e 2356 num = SvIV_nomg(left);
a0cec769
YST
2357
2358 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2359 if (value == -1)
2360 value = - num;
2361 else
2362 value = num / value;
6f1401dc 2363 SETi(value);
a0d0e21e
LW
2364 RETURN;
2365 }
79072805
LW
2366}
2367
a5bd31f4 2368#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
224ec323
JH
2369STATIC
2370PP(pp_i_modulo_0)
befad5d1
NC
2371#else
2372PP(pp_i_modulo)
2373#endif
224ec323
JH
2374{
2375 /* This is the vanilla old i_modulo. */
6f1401dc
DM
2376 dVAR; dSP; dATARGET;
2377 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2378 {
6f1401dc 2379 dPOPTOPiirl_nomg;
224ec323
JH
2380 if (!right)
2381 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2382 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2383 if (right == -1)
2384 SETi( 0 );
2385 else
2386 SETi( left % right );
224ec323
JH
2387 RETURN;
2388 }
2389}
2390
a5bd31f4 2391#if defined(__GLIBC__) && IVSIZE == 8 && !defined(PERL_DEBUG_READONLY_OPS)
224ec323
JH
2392STATIC
2393PP(pp_i_modulo_1)
befad5d1 2394
224ec323 2395{
224ec323 2396 /* This is the i_modulo with the workaround for the _moddi3 bug
fce2b89e 2397 * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
224ec323 2398 * See below for pp_i_modulo. */
6f1401dc
DM
2399 dVAR; dSP; dATARGET;
2400 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2401 {
6f1401dc 2402 dPOPTOPiirl_nomg;
224ec323
JH
2403 if (!right)
2404 DIE(aTHX_ "Illegal modulus zero");
a0cec769
YST
2405 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2406 if (right == -1)
2407 SETi( 0 );
2408 else
2409 SETi( left % PERL_ABS(right) );
224ec323
JH
2410 RETURN;
2411 }
224ec323
JH
2412}
2413
a0d0e21e 2414PP(pp_i_modulo)
79072805 2415{
6f1401dc
DM
2416 dVAR; dSP; dATARGET;
2417 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
224ec323 2418 {
6f1401dc 2419 dPOPTOPiirl_nomg;
224ec323
JH
2420 if (!right)
2421 DIE(aTHX_ "Illegal modulus zero");
2422 /* The assumption is to use hereafter the old vanilla version... */
2423 PL_op->op_ppaddr =
2424 PL_ppaddr[OP_I_MODULO] =
1c127fab 2425 Perl_pp_i_modulo_0;
224ec323
JH
2426 /* .. but if we have glibc, we might have a buggy _moddi3
2427 * (at least glicb 2.2.5 is known to have this bug), in other
2428 * words our integer modulus with negative quad as the second
2429 * argument might be broken. Test for this and re-patch the
2430 * opcode dispatch table if that is the case, remembering to
2431 * also apply the workaround so that this first round works
2432 * right, too. See [perl #9402] for more information. */
224ec323
JH
2433 {
2434 IV l = 3;
2435 IV r = -10;
2436 /* Cannot do this check with inlined IV constants since
2437 * that seems to work correctly even with the buggy glibc. */
2438 if (l % r == -3) {
2439 /* Yikes, we have the bug.
2440 * Patch in the workaround version. */
2441 PL_op->op_ppaddr =
2442 PL_ppaddr[OP_I_MODULO] =
2443 &Perl_pp_i_modulo_1;
2444 /* Make certain we work right this time, too. */
32fdb065 2445 right = PERL_ABS(right);
224ec323
JH
2446 }
2447 }
a0cec769
YST
2448 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2449 if (right == -1)
2450 SETi( 0 );
2451 else
2452 SETi( left % right );
224ec323
JH
2453 RETURN;
2454 }
79072805 2455}
befad5d1 2456#endif
79072805 2457
a0d0e21e 2458PP(pp_i_add)
79072805 2459{
6f1401dc
DM
2460 dVAR; dSP; dATARGET;
2461 tryAMAGICbin_MG(add_amg, AMGf_assign);
a0d0e21e 2462 {
6f1401dc 2463 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2464 SETi( left + right );
2465 RETURN;
79072805 2466 }
79072805
LW
2467}
2468
a0d0e21e 2469PP(pp_i_subtract)
79072805 2470{
6f1401dc
DM
2471 dVAR; dSP; dATARGET;
2472 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
a0d0e21e 2473 {
6f1401dc 2474 dPOPTOPiirl_ul_nomg;
a0d0e21e
LW
2475 SETi( left - right );
2476 RETURN;
79072805 2477 }
79072805
LW
2478}
2479
a0d0e21e 2480PP(pp_i_lt)
79072805 2481{
6f1401dc
DM
2482 dVAR; dSP;
2483 tryAMAGICbin_MG(lt_amg, AMGf_set);
a0d0e21e 2484 {
96b6b87f 2485 dPOPTOPiirl_nomg;
54310121 2486 SETs(boolSV(left < right));
a0d0e21e
LW
2487 RETURN;
2488 }
79072805
LW
2489}
2490
a0d0e21e 2491PP(pp_i_gt)
79072805 2492{
6f1401dc
DM
2493 dVAR; dSP;
2494 tryAMAGICbin_MG(gt_amg, AMGf_set);
a0d0e21e 2495 {
96b6b87f 2496 dPOPTOPiirl_nomg;
54310121 2497 SETs(boolSV(left > right));
a0d0e21e
LW
2498 RETURN;
2499 }
79072805
LW
2500}
2501
a0d0e21e 2502PP(pp_i_le)
79072805 2503{
6f1401dc
DM
2504 dVAR; dSP;
2505 tryAMAGICbin_MG(le_amg, AMGf_set);
a0d0e21e 2506 {
96b6b87f 2507 dPOPTOPiirl_nomg;
54310121 2508 SETs(boolSV(left <= right));
a0d0e21e 2509 RETURN;
85e6fe83 2510 }
79072805
LW
2511}
2512
a0d0e21e 2513PP(pp_i_ge)
79072805 2514{
6f1401dc
DM
2515 dVAR; dSP;
2516 tryAMAGICbin_MG(ge_amg, AMGf_set);
a0d0e21e 2517 {
96b6b87f 2518 dPOPTOPiirl_nomg;
54310121 2519 SETs(boolSV(left >= right));
a0d0e21e
LW
2520 RETURN;
2521 }
79072805
LW
2522}
2523
a0d0e21e 2524PP(pp_i_eq)
79072805 2525{
6f1401dc
DM
2526 dVAR; dSP;
2527 tryAMAGICbin_MG(eq_amg, AMGf_set);
a0d0e21e 2528 {
96b6b87f 2529 dPOPTOPiirl_nomg;
54310121 2530 SETs(boolSV(left == right));
a0d0e21e
LW
2531 RETURN;
2532 }
79072805
LW
2533}
2534
a0d0e21e 2535PP(pp_i_ne)
79072805 2536{
6f1401dc
DM
2537 dVAR; dSP;
2538 tryAMAGICbin_MG(ne_amg, AMGf_set);
a0d0e21e 2539 {
96b6b87f 2540 dPOPTOPiirl_nomg;
54310121 2541 SETs(boolSV(left != right));
a0d0e21e
LW
2542 RETURN;
2543 }
79072805
LW
2544}
2545
a0d0e21e 2546PP(pp_i_ncmp)
79072805 2547{
6f1401dc
DM
2548 dVAR; dSP; dTARGET;
2549 tryAMAGICbin_MG(ncmp_amg, 0);
a0d0e21e 2550 {
96b6b87f 2551 dPOPTOPiirl_nomg;
a0d0e21e 2552 I32 value;
79072805 2553
a0d0e21e 2554 if (left > right)
79072805 2555 value = 1;
a0d0e21e 2556 else if (left < right)
79072805 2557 value = -1;
a0d0e21e 2558 else
79072805 2559 value = 0;
a0d0e21e
LW
2560 SETi(value);
2561 RETURN;
79072805 2562 }
85e6fe83
LW
2563}
2564
2565PP(pp_i_negate)
2566{
6f1401dc
DM
2567 dVAR; dSP; dTARGET;
2568 tryAMAGICun_MG(neg_amg, 0);
1c2b3fd6 2569 if (S_negate_string(aTHX)) return NORMAL;
6f1401dc
DM
2570 {
2571 SV * const sv = TOPs;
2572 IV const i = SvIV_nomg(sv);
2573 SETi(-i);
2574 RETURN;
2575 }
85e6fe83
LW
2576}
2577
79072805
LW
2578/* High falutin' math. */
2579
2580PP(pp_atan2)
2581{
6f1401dc
DM
2582 dVAR; dSP; dTARGET;
2583 tryAMAGICbin_MG(atan2_amg, 0);
a0d0e21e 2584 {
096c060c 2585 dPOPTOPnnrl_nomg;
a1021d57 2586 SETn(Perl_atan2(left, right));
a0d0e21e
LW
2587 RETURN;
2588 }
79072805
LW
2589}
2590
2591PP(pp_sin)
2592{
71302fe3
NC
2593 dVAR; dSP; dTARGET;
2594 int amg_type = sin_amg;
2595 const char *neg_report = NULL;
bc81784a 2596 NV (*func)(NV) = Perl_sin;
71302fe3
NC
2597 const int op_type = PL_op->op_type;
2598
2599 switch (op_type) {
2600 case OP_COS:
2601 amg_type = cos_amg;
bc81784a 2602 func = Perl_cos;
71302fe3
NC
2603 break;
2604 case OP_EXP:
2605 amg_type = exp_amg;
bc81784a 2606 func = Perl_exp;
71302fe3
NC
2607 break;
2608 case OP_LOG:
2609 amg_type = log_amg;
bc81784a 2610 func = Perl_log;
71302fe3
NC
2611 neg_report = "log";
2612 break;
2613 case OP_SQRT:
2614 amg_type = sqrt_amg;
bc81784a 2615 func = Perl_sqrt;
71302fe3
NC
2616 neg_report = "sqrt";
2617 break;
a0d0e21e 2618 }
79072805 2619
6f1401dc
DM
2620
2621 tryAMAGICun_MG(amg_type, 0);
a0d0e21e 2622 {
6f1401dc
DM
2623 SV * const arg = POPs;
2624 const NV value = SvNV_nomg(arg);
71302fe3
NC
2625 if (neg_report) {
2626 if (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0)) {
2627 SET_NUMERIC_STANDARD();
dcbac5bb 2628 /* diag_listed_as: Can't take log of %g */
71302fe3
NC
2629 DIE(aTHX_ "Can't take %s of %"NVgf, neg_report, value);
2630 }
2631 }
2632 XPUSHn(func(value));
a0d0e21e
LW
2633 RETURN;
2634 }
79072805
LW
2635}
2636
56cb0a1c
AD
2637/* Support Configure command-line overrides for rand() functions.
2638 After 5.005, perhaps we should replace this by Configure support
2639 for drand48(), random(), or rand(). For 5.005, though, maintain
2640 compatibility by calling rand() but allow the user to override it.
2641 See INSTALL for details. --Andy Dougherty 15 July 1998
2642*/
85ab1d1d
JH
2643/* Now it's after 5.005, and Configure supports drand48() and random(),
2644 in addition to rand(). So the overrides should not be needed any more.
2645 --Jarkko Hietaniemi 27 September 1998
2646 */
2647
2648#ifndef HAS_DRAND48_PROTO
20ce7b12 2649extern double drand48 (void);
56cb0a1c
AD
2650#endif
2651
79072805
LW
2652PP(pp_rand)
2653{
97aff369 2654 dVAR; dSP; dTARGET;
65202027 2655 NV value;
79072805
LW
2656 if (MAXARG < 1)
2657 value = 1.0;
94ec06bc
FC
2658 else if (!TOPs) {
2659 value = 1.0; (void)POPs;
2660 }
79072805
LW
2661 else
2662 value = POPn;
2663 if (value == 0.0)
2664 value = 1.0;
80252599 2665 if (!PL_srand_called) {
85ab1d1d 2666 (void)seedDrand01((Rand_seed_t)seed());
80252599 2667 PL_srand_called = TRUE;
93dc8474 2668 }
85ab1d1d 2669 value *= Drand01();
79072805
LW
2670 XPUSHn(value);
2671 RETURN;
2672}
2673
2674PP(pp_srand)
2675{
83832992 2676 dVAR; dSP; dTARGET;
f914a682
JL
2677 UV anum;
2678
0a5f3363 2679 if (MAXARG >= 1 && (TOPs || POPs)) {
f914a682
JL
2680 SV *top;
2681 char *pv;
2682 STRLEN len;
2683 int flags;
2684
2685 top = POPs;
2686 pv = SvPV(top, len);
2687 flags = grok_number(pv, len, &anum);
2688
2689 if (!(flags & IS_NUMBER_IN_UV)) {
2690 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2691 "Integer overflow in srand");
2692 anum = UV_MAX;
2693 }
2694 }
2695 else {
2696 anum = seed();
2697 }
2698
85ab1d1d 2699 (void)seedDrand01((Rand_seed_t)anum);
80252599 2700 PL_srand_called = TRUE;
da1010ec
NC
2701 if (anum)
2702 XPUSHu(anum);
2703 else {
2704 /* Historically srand always returned true. We can avoid breaking
2705 that like this: */
2706 sv_setpvs(TARG, "0 but true");
2707 XPUSHTARG;
2708 }
83832992 2709 RETURN;
79072805
LW
2710}
2711
79072805
LW
2712PP(pp_int)
2713{
6f1401dc
DM
2714 dVAR; dSP; dTARGET;
2715 tryAMAGICun_MG(int_amg, AMGf_numeric);
774d564b 2716 {
6f1401dc
DM
2717 SV * const sv = TOPs;
2718 const IV iv = SvIV_nomg(sv);
28e5dec8
JH
2719 /* XXX it's arguable that compiler casting to IV might be subtly
2720 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2721 else preferring IV has introduced a subtle behaviour change bug. OTOH
2722 relying on floating point to be accurate is a bug. */
2723
c781a409 2724 if (!SvOK(sv)) {
922c4365 2725 SETu(0);
c781a409
RD
2726 }
2727 else if (SvIOK(sv)) {
2728 if (SvIsUV(sv))
6f1401dc 2729 SETu(SvUV_nomg(sv));
c781a409 2730 else
28e5dec8 2731 SETi(iv);
c781a409 2732 }
c781a409 2733 else {
6f1401dc 2734 const NV value = SvNV_nomg(sv);
1048ea30 2735 if (value >= 0.0) {
28e5dec8
JH
2736 if (value < (NV)UV_MAX + 0.5) {
2737 SETu(U_V(value));
2738 } else {
059a1014 2739 SETn(Perl_floor(value));
28e5dec8 2740 }
1048ea30 2741 }
28e5dec8
JH
2742 else {
2743 if (value > (NV)IV_MIN - 0.5) {
2744 SETi(I_V(value));
2745 } else {
1bbae031 2746 SETn(Perl_ceil(value));
28e5dec8
JH
2747 }
2748 }
774d564b 2749 }
79072805 2750 }
79072805
LW
2751 RETURN;
2752}
2753
463ee0b2
LW
2754PP(pp_abs)
2755{
6f1401dc
DM
2756 dVAR; dSP; dTARGET;
2757 tryAMAGICun_MG(abs_amg, AMGf_numeric);
a0d0e21e 2758 {
6f1401dc 2759 SV * const sv = TOPs;
28e5dec8 2760 /* This will cache the NV value if string isn't actually integer */
6f1401dc 2761 const IV iv = SvIV_nomg(sv);
a227d84d 2762
800401ee 2763 if (!SvOK(sv)) {
922c4365 2764 SETu(0);
800401ee
JH
2765 }
2766 else if (SvIOK(sv)) {
28e5dec8 2767 /* IVX is precise */
800401ee 2768 if (SvIsUV(sv)) {
6f1401dc 2769 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
28e5dec8
JH
2770 } else {
2771 if (iv >= 0) {
2772 SETi(iv);
2773 } else {
2774 if (iv != IV_MIN) {
2775 SETi(-iv);
2776 } else {
2777 /* 2s complement assumption. Also, not really needed as
2778 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
2779 SETu(IV_MIN);
2780 }
a227d84d 2781 }
28e5dec8
JH
2782 }
2783 } else{
6f1401dc 2784 const NV value = SvNV_nomg(sv);
774d564b 2785 if (value < 0.0)
1b6737cc 2786 SETn(-value);
a4474c9e
DD
2787 else
2788 SETn(value);
774d564b 2789 }
a0d0e21e 2790 }
774d564b 2791 RETURN;
463ee0b2
LW
2792}
2793
79072805
LW
2794PP(pp_oct)
2795{
97aff369 2796 dVAR; dSP; dTARGET;
5c144d81 2797 const char *tmps;
53305cf1 2798 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
6f894ead 2799 STRLEN len;
53305cf1
NC
2800 NV result_nv;
2801 UV result_uv;
1b6737cc 2802 SV* const sv = POPs;
79072805 2803
349d4f2f 2804 tmps = (SvPV_const(sv, len));
2bc69dc4
NIS
2805 if (DO_UTF8(sv)) {
2806 /* If Unicode, try to downgrade
2807 * If not possible, croak. */
1b6737cc 2808 SV* const tsv = sv_2mortal(newSVsv(sv));
2bc69dc4
NIS
2809
2810 SvUTF8_on(tsv);
2811 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 2812 tmps = SvPV_const(tsv, len);
2bc69dc4 2813 }
daa2adfd
NC
2814 if (PL_op->op_type == OP_HEX)
2815 goto hex;
2816
6f894ead 2817 while (*tmps && len && isSPACE(*tmps))
53305cf1 2818 tmps++, len--;
9e24b6e2 2819 if (*tmps == '0')
53305cf1 2820 tmps++, len--;
a674e8db 2821 if (*tmps == 'x' || *tmps == 'X') {
daa2adfd 2822 hex:
53305cf1 2823 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
daa2adfd 2824 }
a674e8db 2825 else if (*tmps == 'b' || *tmps == 'B')
53305cf1 2826 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
464e2e8a 2827 else
53305cf1
NC
2828 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
2829
2830 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
2831 XPUSHn(result_nv);
2832 }
2833 else {
2834 XPUSHu(result_uv);
2835 }
79072805
LW
2836 RETURN;
2837}
2838
2839/* String stuff. */
2840
2841PP(pp_length)
2842{
97aff369 2843 dVAR; dSP; dTARGET;
0bd48802 2844 SV * const sv = TOPs;
a0ed51b3 2845
656266fc 2846 if (SvGAMAGIC(sv)) {
9f621bb0
NC
2847 /* For an overloaded or magic scalar, we can't know in advance if
2848 it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
2849 it likes to cache the length. Maybe that should be a documented
2850 feature of it.
92331800
NC
2851 */
2852 STRLEN len;
9f621bb0
NC
2853 const char *const p
2854 = sv_2pv_flags(sv, &len,
2855 SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
92331800 2856
d88e091f 2857 if (!p) {
9407f9c1
DL
2858 if (!SvPADTMP(TARG)) {
2859 sv_setsv(TARG, &PL_sv_undef);
2860 SETTARG;
2861 }
2862 SETs(&PL_sv_undef);
d88e091f 2863 }
9f621bb0 2864 else if (DO_UTF8(sv)) {
899be101 2865 SETi(utf8_length((U8*)p, (U8*)p + len));
92331800
NC
2866 }
2867 else
2868 SETi(len);
656266fc 2869 } else if (SvOK(sv)) {
9f621bb0
NC
2870 /* Neither magic nor overloaded. */
2871 if (DO_UTF8(sv))
2872 SETi(sv_len_utf8(sv));
2873 else
2874 SETi(sv_len(sv));
656266fc 2875 } else {
9407f9c1
DL
2876 if (!SvPADTMP(TARG)) {
2877 sv_setsv_nomg(TARG, &PL_sv_undef);
2878 SETTARG;
2879 }
2880 SETs(&PL_sv_undef);
92331800 2881 }
79072805
LW
2882 RETURN;
2883}
2884
83f78d1a
FC
2885/* Returns false if substring is completely outside original string.
2886 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
2887 always be true for an explicit 0.
2888*/
2889bool
2890Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
2891 bool pos1_is_uv, IV len_iv,
2892 bool len_is_uv, STRLEN *posp,
2893 STRLEN *lenp)
2894{
2895 IV pos2_iv;
2896 int pos2_is_uv;
2897
2898 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
2899
2900 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
2901 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
2902 pos1_iv += curlen;
2903 }
2904 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
2905 return FALSE;
2906
2907 if (len_iv || len_is_uv) {
2908 if (!len_is_uv && len_iv < 0) {
2909 pos2_iv = curlen + len_iv;
2910 if (curlen)
2911 pos2_is_uv = curlen-1 > ~(UV)len_iv;
2912 else
2913 pos2_is_uv = 0;
2914 } else { /* len_iv >= 0 */
2915 if (!pos1_is_uv && pos1_iv < 0) {
2916 pos2_iv = pos1_iv + len_iv;
2917 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
2918 } else {
2919 if ((UV)len_iv > curlen-(UV)pos1_iv)
2920 pos2_iv = curlen;
2921 else
2922 pos2_iv = pos1_iv+len_iv;
2923 pos2_is_uv = 1;
2924 }
2925 }
2926 }
2927 else {
2928 pos2_iv = curlen;
2929 pos2_is_uv = 1;
2930 }
2931
2932 if (!pos2_is_uv && pos2_iv < 0) {
2933 if (!pos1_is_uv && pos1_iv < 0)
2934 return FALSE;
2935 pos2_iv = 0;
2936 }
2937 else if (!pos1_is_uv && pos1_iv < 0)
2938 pos1_iv = 0;
2939
2940 if ((UV)pos2_iv < (UV)pos1_iv)
2941 pos2_iv = pos1_iv;
2942 if ((UV)pos2_iv > curlen)
2943 pos2_iv = curlen;
2944
2945 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
2946 *posp = (STRLEN)( (UV)pos1_iv );
2947 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
2948
2949 return TRUE;
2950}
2951
79072805
LW
2952PP(pp_substr)
2953{
97aff369 2954 dVAR; dSP; dTARGET;
79072805 2955 SV *sv;
463ee0b2 2956 STRLEN curlen;
9402d6ed 2957 STRLEN utf8_curlen;
777f7c56
EB
2958 SV * pos_sv;
2959 IV pos1_iv;
2960 int pos1_is_uv;
777f7c56
EB
2961 SV * len_sv;
2962 IV len_iv = 0;
83f78d1a 2963 int len_is_uv = 0;
24fcb59f 2964 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
bbddc9e0 2965 const bool rvalue = (GIMME_V != G_VOID);
e1ec3a88 2966 const char *tmps;
9402d6ed 2967 SV *repl_sv = NULL;
cbbf8932 2968 const char *repl = NULL;
7b8d334a 2969 STRLEN repl_len;
7bc95ae1 2970 int num_args = PL_op->op_private & 7;
13e30c65 2971 bool repl_need_utf8_upgrade = FALSE;
9402d6ed 2972 bool repl_is_utf8 = FALSE;
79072805 2973
78f9721b
SM
2974 if (num_args > 2) {
2975 if (num_args > 3) {
24fcb59f 2976 if(!(repl_sv = POPs)) num_args--;
7bc95ae1
FC
2977 }
2978 if ((len_sv = POPs)) {
2979 len_iv = SvIV(len_sv);
83f78d1a 2980 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
7b8d334a 2981 }
7bc95ae1 2982 else num_args--;
5d82c453 2983 }
777f7c56
EB
2984 pos_sv = POPs;
2985 pos1_iv = SvIV(pos_sv);
2986 pos1_is_uv = SvIOK_UV(pos_sv);
79072805 2987 sv = POPs;
24fcb59f
FC
2988 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
2989 assert(!repl_sv);
2990 repl_sv = POPs;
2991 }
849ca7ee 2992 PUTBACK;
9402d6ed 2993 if (repl_sv) {
24fcb59f
FC
2994 repl = SvPV_const(repl_sv, repl_len);
2995 repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
9402d6ed
JH
2996 if (repl_is_utf8) {
2997 if (!DO_UTF8(sv))
2998 sv_utf8_upgrade(sv);
2999 }
13e30c65
JH
3000 else if (DO_UTF8(sv))
3001 repl_need_utf8_upgrade = TRUE;
9402d6ed 3002 }
83f78d1a
FC
3003 else if (lvalue) {
3004 SV * ret;
3005 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3006 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3007 LvTYPE(ret) = 'x';
3008 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3009 LvTARGOFF(ret) =
3010 pos1_is_uv || pos1_iv >= 0
3011 ? (STRLEN)(UV)pos1_iv
3012 : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
3013 LvTARGLEN(ret) =
3014 len_is_uv || len_iv > 0
3015 ? (STRLEN)(UV)len_iv
3016 : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
3017
3018 SPAGAIN;
3019 PUSHs(ret); /* avoid SvSETMAGIC here */
3020 RETURN;
a74fb2cd 3021 }
83f78d1a 3022 tmps = SvPV_const(sv, curlen);
7e2040f0 3023 if (DO_UTF8(sv)) {
864329c3 3024 utf8_curlen = sv_len_utf8_nomg(sv);
9402d6ed
JH
3025 if (utf8_curlen == curlen)
3026 utf8_curlen = 0;
a0ed51b3 3027 else
9402d6ed 3028 curlen = utf8_curlen;
a0ed51b3 3029 }
d1c2b58a 3030 else
9402d6ed 3031 utf8_curlen = 0;
a0ed51b3 3032
83f78d1a
FC
3033 {
3034 STRLEN pos, len, byte_len, byte_pos;
777f7c56 3035
83f78d1a
FC
3036 if (!translate_substr_offsets(
3037 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3038 )) goto bound_fail;
777f7c56 3039
83f78d1a
FC
3040 byte_len = len;
3041 byte_pos = utf8_curlen
d931b1be
NC
3042 ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
3043
2154eca7 3044 tmps += byte_pos;
bbddc9e0
CS
3045
3046 if (rvalue) {
3047 SvTAINTED_off(TARG); /* decontaminate */
3048 SvUTF8_off(TARG); /* decontaminate */
3049 sv_setpvn(TARG, tmps, byte_len);
12aa1545 3050#ifdef USE_LOCALE_COLLATE
bbddc9e0 3051 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
12aa1545 3052#endif
bbddc9e0
CS
3053 if (utf8_curlen)
3054 SvUTF8_on(TARG);
3055 }
2154eca7 3056
f7928d6c 3057 if (repl) {
13e30c65
JH
3058 SV* repl_sv_copy = NULL;
3059
3060 if (repl_need_utf8_upgrade) {
3061 repl_sv_copy = newSVsv(repl_sv);
3062 sv_utf8_upgrade(repl_sv_copy);
349d4f2f 3063 repl = SvPV_const(repl_sv_copy, repl_len);
bf32a30c 3064 repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
13e30c65 3065 }
24fcb59f
FC
3066 if (SvROK(sv))
3067 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3068 "Attempt to use reference as lvalue in substr"
3069 );
502d9230
VP
3070 if (!SvOK(sv))
3071 sv_setpvs(sv, "");
777f7c56 3072 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
9402d6ed 3073 if (repl_is_utf8)
f7928d6c 3074 SvUTF8_on(sv);
ef8d46e8 3075 SvREFCNT_dec(repl_sv_copy);
f7928d6c 3076 }
79072805 3077 }
849ca7ee 3078 SPAGAIN;
bbddc9e0
CS
3079 if (rvalue) {
3080 SvSETMAGIC(TARG);
3081 PUSHs(TARG);
3082 }
79072805 3083 RETURN;
777f7c56 3084
1c900557 3085bound_fail:
83f78d1a 3086 if (repl)
777f7c56
EB
3087 Perl_croak(aTHX_ "substr outside of string");
3088 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3089 RETPUSHUNDEF;
79072805
LW
3090}
3091
3092PP(pp_vec)
3093{
2154eca7 3094 dVAR; dSP;
eb578fdb
KW
3095 const IV size = POPi;
3096 const IV offset = POPi;
3097 SV * const src = POPs;
1b6737cc 3098 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
2154eca7 3099 SV * ret;
a0d0e21e 3100
81e118e0 3101 if (lvalue) { /* it's an lvalue! */
2154eca7
EB
3102 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3103 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3104 LvTYPE(ret) = 'v';
3105 LvTARG(ret) = SvREFCNT_inc_simple(src);
3106 LvTARGOFF(ret) = offset;
3107 LvTARGLEN(ret) = size;
3108 }
3109 else {
3110 dTARGET;
3111 SvTAINTED_off(TARG); /* decontaminate */
3112 ret = TARG;
79072805
LW
3113 }
3114
2154eca7
EB
3115 sv_setuv(ret, do_vecget(src, offset, size));
3116 PUSHs(ret);
79072805
LW
3117 RETURN;
3118}
3119
3120PP(pp_index)
3121{
97aff369 3122 dVAR; dSP; dTARGET;
79072805
LW
3123 SV *big;
3124 SV *little;
c445ea15 3125 SV *temp = NULL;
ad66a58c 3126 STRLEN biglen;
2723d216 3127 STRLEN llen = 0;
79072805
LW
3128 I32 offset;
3129 I32 retval;
73ee8be2
NC
3130 const char *big_p;
3131 const char *little_p;
2f040f7f
NC
3132 bool big_utf8;
3133 bool little_utf8;
2723d216 3134 const bool is_index = PL_op->op_type == OP_INDEX;
d3e26383 3135 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
79072805 3136
e1dccc0d
Z
3137 if (threeargs)
3138 offset = POPi;
79072805
LW
3139 little = POPs;
3140 big = POPs;
73ee8be2
NC
3141 big_p = SvPV_const(big, biglen);
3142 little_p = SvPV_const(little, llen);
3143
e609e586
NC
3144 big_utf8 = DO_UTF8(big);
3145 little_utf8 = DO_UTF8(little);
3146 if (big_utf8 ^ little_utf8) {
3147 /* One needs to be upgraded. */
2f040f7f
NC
3148 if (little_utf8 && !PL_encoding) {
3149 /* Well, maybe instead we might be able to downgrade the small
3150 string? */
1eced8f8 3151 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
2f040f7f
NC
3152 &little_utf8);
3153 if (little_utf8) {
3154 /* If the large string is ISO-8859-1, and it's not possible to
3155 convert the small string to ISO-8859-1, then there is no
3156 way that it could be found anywhere by index. */
3157 retval = -1;
3158 goto fail;
3159 }
e609e586 3160
2f040f7f
NC
3161 /* At this point, pv is a malloc()ed string. So donate it to temp
3162 to ensure it will get free()d */
3163 little = temp = newSV(0);
73ee8be2
NC
3164 sv_usepvn(temp, pv, llen);
3165 little_p = SvPVX(little);
e609e586 3166 } else {
73ee8be2
NC
3167 temp = little_utf8
3168 ? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
2f040f7f
NC
3169
3170 if (PL_encoding) {
3171 sv_recode_to_utf8(temp, PL_encoding);
3172 } else {
3173 sv_utf8_upgrade(temp);
3174 }
3175 if (little_utf8) {
3176 big = temp;
3177 big_utf8 = TRUE;
73ee8be2 3178 big_p = SvPV_const(big, biglen);
2f040f7f
NC
3179 } else {
3180 little = temp;
73ee8be2 3181 little_p = SvPV_const(little, llen);
2f040f7f 3182 }
e609e586
NC
3183 }
3184 }
73ee8be2
NC
3185 if (SvGAMAGIC(big)) {
3186 /* Life just becomes a lot easier if I use a temporary here.
3187 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3188 will trigger magic and overloading again, as will fbm_instr()
3189 */
59cd0e26
NC
3190 big = newSVpvn_flags(big_p, biglen,
3191 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3192 big_p = SvPVX(big);
3193 }
e4e44778 3194 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
73ee8be2
NC
3195 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3196 warn on undef, and we've already triggered a warning with the
3197 SvPV_const some lines above. We can't remove that, as we need to
3198 call some SvPV to trigger overloading early and find out if the
3199 string is UTF-8.
3200 This is all getting to messy. The API isn't quite clean enough,
3201 because data access has side effects.
3202 */
59cd0e26
NC
3203 little = newSVpvn_flags(little_p, llen,
3204 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
73ee8be2
NC
3205 little_p = SvPVX(little);
3206 }
e609e586 3207
d3e26383 3208 if (!threeargs)
2723d216 3209 offset = is_index ? 0 : biglen;
a0ed51b3 3210 else {
ad66a58c 3211 if (big_utf8 && offset > 0)
a0ed51b3 3212 sv_pos_u2b(big, &offset, 0);
73ee8be2
NC
3213 if (!is_index)
3214 offset += llen;
a0ed51b3 3215 }
79072805
LW
3216 if (offset < 0)
3217 offset = 0;
ad66a58c
NC
3218 else if (offset > (I32)biglen)
3219 offset = biglen;
73ee8be2
NC
3220 if (!(little_p = is_index
3221 ? fbm_instr((unsigned char*)big_p + offset,
3222 (unsigned char*)big_p + biglen, little, 0)
3223 : rninstr(big_p, big_p + offset,
3224 little_p, little_p + llen)))
a0ed51b3 3225 retval = -1;
ad66a58c 3226 else {
73ee8be2 3227 retval = little_p - big_p;
ad66a58c
NC
3228 if (retval > 0 && big_utf8)
3229 sv_pos_b2u(big, &retval);
3230 }
ef8d46e8 3231 SvREFCNT_dec(temp);
2723d216 3232 fail:
e1dccc0d 3233 PUSHi(retval);
79072805
LW
3234 RETURN;
3235}
3236
3237PP(pp_sprintf)
3238{
97aff369 3239 dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3e6bd4bf 3240 SvTAINTED_off(TARG);
79072805 3241 do_sprintf(TARG, SP-MARK, MARK+1);
bbce6d69 3242 TAINT_IF(SvTAINTED(TARG));
79072805
LW
3243 SP = ORIGMARK;
3244 PUSHTARG;
3245 RETURN;
3246}
3247
79072805
LW
3248PP(pp_ord)
3249{
97aff369 3250 dVAR; dSP; dTARGET;
1eced8f8 3251
7df053ec 3252 SV *argsv = POPs;
ba210ebe 3253 STRLEN len;
349d4f2f 3254 const U8 *s = (U8*)SvPV_const(argsv, len);
121910a4 3255
799ef3cb 3256 if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
1eced8f8 3257 SV * const tmpsv = sv_2mortal(newSVsv(argsv));
799ef3cb 3258 s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
121910a4
JH
3259 argsv = tmpsv;
3260 }
79072805 3261
872c91ae 3262 XPUSHu(DO_UTF8(argsv) ?
89ebb4a3 3263 utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) :
5fc32dea 3264 (UV)(*s & 0xff));
68795e93 3265
79072805
LW
3266 RETURN;
3267}
3268
463ee0b2
LW
3269PP(pp_chr)
3270{
97aff369 3271 dVAR; dSP; dTARGET;
463ee0b2 3272 char *tmps;
8a064bd6 3273 UV value;
71739502 3274 SV *top = POPs;
8a064bd6 3275
71739502
FC
3276 SvGETMAGIC(top);
3277 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3278 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
8a064bd6 3279 ||
71739502
FC
3280 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3281 && SvNV_nomg(top) < 0.0))) {
b3fe8680
FC
3282 if (ckWARN(WARN_UTF8)) {
3283 if (SvGMAGICAL(top)) {
3284 SV *top2 = sv_newmortal();
3285 sv_setsv_nomg(top2, top);
3286 top = top2;
3287 }
3288 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3289 "Invalid negative number (%"SVf") in chr", top);
3290 }
8a064bd6 3291 value = UNICODE_REPLACEMENT;
8a064bd6 3292 } else {
71739502 3293 value = SvUV_nomg(top);
8a064bd6 3294 }
463ee0b2 3295
862a34c6 3296 SvUPGRADE(TARG,SVt_PV);
a0ed51b3 3297
0064a8a9 3298 if (value > 255 && !IN_BYTES) {
eb160463 3299 SvGROW(TARG, (STRLEN)UNISKIP(value)+1);
62961d2e 3300 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
349d4f2f 3301 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
a0ed51b3
LW
3302 *tmps = '\0';
3303 (void)SvPOK_only(TARG);
aa6ffa16 3304 SvUTF8_on(TARG);
a0ed51b3
LW
3305 XPUSHs(TARG);
3306 RETURN;
3307 }
3308
748a9306 3309 SvGROW(TARG,2);
463ee0b2
LW
3310 SvCUR_set(TARG, 1);
3311 tmps = SvPVX(TARG);
eb160463 3312 *tmps++ = (char)value;
748a9306 3313 *tmps = '\0';
a0d0e21e 3314 (void)SvPOK_only(TARG);
4c5ed6e2 3315
88632417 3316 if (PL_encoding && !IN_BYTES) {
799ef3cb 3317 sv_recode_to_utf8(TARG, PL_encoding);
88632417 3318 tmps = SvPVX(TARG);
28936164
KW
3319 if (SvCUR(TARG) == 0
3320 || ! is_utf8_string((U8*)tmps, SvCUR(TARG))
3321 || UTF8_IS_REPLACEMENT((U8*) tmps, (U8*) tmps + SvCUR(TARG)))
3322 {
4c5ed6e2 3323 SvGROW(TARG, 2);
d5a15ac2 3324 tmps = SvPVX(TARG);
4c5ed6e2
ST
3325 SvCUR_set(TARG, 1);
3326 *tmps++ = (char)value;
88632417 3327 *tmps = '\0';
4c5ed6e2 3328 SvUTF8_off(TARG);
88632417
JH
3329 }
3330 }
4c5ed6e2 3331
463ee0b2
LW
3332 XPUSHs(TARG);
3333 RETURN;
3334}
3335
79072805
LW
3336PP(pp_crypt)
3337{
79072805 3338#ifdef HAS_CRYPT
97aff369 3339 dVAR; dSP; dTARGET;
5f74f29c 3340 dPOPTOPssrl;
85c16d83 3341 STRLEN len;
10516c54 3342 const char *tmps = SvPV_const(left, len);
2bc69dc4 3343
85c16d83 3344 if (DO_UTF8(left)) {
2bc69dc4 3345 /* If Unicode, try to downgrade.
f2791508
JH
3346 * If not possible, croak.
3347 * Yes, we made this up. */
1b6737cc 3348 SV* const tsv = sv_2mortal(newSVsv(left));
2bc69dc4 3349
f2791508 3350 SvUTF8_on(tsv);
2bc69dc4 3351 sv_utf8_downgrade(tsv, FALSE);
349d4f2f 3352 tmps = SvPV_const(tsv, len);
85c16d83 3353 }
05404ffe
JH
3354# ifdef USE_ITHREADS
3355# ifdef HAS_CRYPT_R
3356 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3357 /* This should be threadsafe because in ithreads there is only
3358 * one thread per interpreter. If this would not be true,
3359 * we would need a mutex to protect this malloc. */
3360 PL_reentrant_buffer->_crypt_struct_buffer =
3361 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3362#if defined(__GLIBC__) || defined(__EMX__)
3363 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3364 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3365 /* work around glibc-2.2.5 bug */
3366 PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
3367 }
05404ffe 3368#endif
6ab58e4d 3369 }
05404ffe
JH
3370# endif /* HAS_CRYPT_R */
3371# endif /* USE_ITHREADS */
5f74f29c 3372# ifdef FCRYPT
83003860 3373 sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3374# else
83003860 3375 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
5f74f29c 3376# endif
ec93b65f 3377 SETTARG;
4808266b 3378 RETURN;
79072805 3379#else
b13b2135 3380 DIE(aTHX_
79072805
LW
3381 "The crypt() function is unimplemented due to excessive paranoia.");
3382#endif
79072805
LW
3383}
3384
00f254e2
KW
3385/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3386 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3387
00f254e2 3388/* Generates code to store a unicode codepoint c that is known to occupy
12b093a1
KW
3389 * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
3390 * and p is advanced to point to the next available byte after the two bytes */
00f254e2
KW
3391#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \
3392 STMT_START { \
3393 *(p)++ = UTF8_TWO_BYTE_HI(c); \
3394 *((p)++) = UTF8_TWO_BYTE_LO(c); \
3395 } STMT_END
3396
79072805
LW
3397PP(pp_ucfirst)
3398{
00f254e2
KW
3399 /* Actually is both lcfirst() and ucfirst(). Only the first character
3400 * changes. This means that possibly we can change in-place, ie., just
3401 * take the source and change that one character and store it back, but not
3402 * if read-only etc, or if the length changes */
3403
97aff369 3404 dVAR;
39644a26 3405 dSP;
d54190f6 3406 SV *source = TOPs;
00f254e2 3407 STRLEN slen; /* slen is the byte length of the whole SV. */
d54190f6
NC
3408 STRLEN need;
3409 SV *dest;
00f254e2
KW
3410 bool inplace; /* ? Convert first char only, in-place */
3411 bool doing_utf8 = FALSE; /* ? using utf8 */
3412 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
12e9c124 3413 const int op_type = PL_op->op_type;
d54190f6
NC
3414 const U8 *s;
3415 U8 *d;
3416 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
00f254e2
KW
3417 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3418 * stored as UTF-8 at s. */
3419 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3420 * lowercased) character stored in tmpbuf. May be either
3421 * UTF-8 or not, but in either case is the number of bytes */
094a2f8c 3422 bool tainted = FALSE;
d54190f6
NC
3423
3424 SvGETMAGIC(source);
3425 if (SvOK(source)) {
3426 s = (const U8*)SvPV_nomg_const(source, slen);
3427 } else {
0a0ffbce
RGS
3428 if (ckWARN(WARN_UNINITIALIZED))
3429 report_uninit(source);
1eced8f8 3430 s = (const U8*)"";
d54190f6
NC
3431 slen = 0;
3432 }
a0ed51b3 3433
00f254e2
KW
3434 /* We may be able to get away with changing only the first character, in
3435 * place, but not if read-only, etc. Later we may discover more reasons to
3436 * not convert in-place. */
3437 inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
3438
3439 /* First calculate what the changed first character should be. This affects
3440 * whether we can just swap it out, leaving the rest of the string unchanged,
3441 * or even if have to convert the dest to UTF-8 when the source isn't */
3442
3443 if (! slen) { /* If empty */
3444 need = 1; /* still need a trailing NUL */
b7576bcb 3445 ulen = 0;
00f254e2
KW
3446 }
3447 else if (DO_UTF8(source)) { /* Is the source utf8? */
d54190f6 3448 doing_utf8 = TRUE;
17e95c9d 3449 ulen = UTF8SKIP(s);
094a2f8c
KW
3450 if (op_type == OP_UCFIRST) {
3451 _to_utf8_title_flags(s, tmpbuf, &tculen,
3452 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3453 }
3454 else {
3455 _to_utf8_lower_flags(s, tmpbuf, &tculen,
3456 cBOOL(IN_LOCALE_RUNTIME), &tainted);
3457 }
00f254e2 3458
17e95c9d
KW
3459 /* we can't do in-place if the length changes. */
3460 if (ulen != tculen) inplace = FALSE;
3461 need = slen + 1 - ulen + tculen;
d54190f6 3462 }
00f254e2
KW
3463 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3464 * latin1 is treated as caseless. Note that a locale takes
3465 * precedence */
167d19f2 3466 ulen = 1; /* Original character is 1 byte */
00f254e2
KW
3467 tculen = 1; /* Most characters will require one byte, but this will
3468 * need to be overridden for the tricky ones */
3469 need = slen + 1;
3470
3471 if (op_type == OP_LCFIRST) {
d54190f6 3472
00f254e2
KW
3473 /* lower case the first letter: no trickiness for any character */
3474 *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) :
3475 ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s));
3476 }
3477 /* is ucfirst() */
3478 else if (IN_LOCALE_RUNTIME) {
3479 *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales
3480 * have upper and title case different
3481 */
3482 }
3483 else if (! IN_UNI_8_BIT) {
3484 *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or
3485 * on EBCDIC machines whatever the
3486 * native function does */
3487 }
3488 else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */
167d19f2
KW
3489 UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3490 if (tculen > 1) {
3491 assert(tculen == 2);
3492
3493 /* If the result is an upper Latin1-range character, it can
3494 * still be represented in one byte, which is its ordinal */
3495 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3496 *tmpbuf = (U8) title_ord;
3497 tculen = 1;
00f254e2
KW
3498 }
3499 else {
167d19f2
KW
3500 /* Otherwise it became more than one ASCII character (in
3501 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3502 * beyond Latin1, so the number of bytes changed, so can't
3503 * replace just the first character in place. */
3504 inplace = FALSE;
3505
3506 /* If the result won't fit in a byte, the entire result will
3507 * have to be in UTF-8. Assume worst case sizing in
3508 * conversion. (all latin1 characters occupy at most two bytes
3509 * in utf8) */
3510 if (title_ord > 255) {
3511 doing_utf8 = TRUE;
3512 convert_source_to_utf8 = TRUE;
3513 need = slen * 2 + 1;
3514
3515 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3516 * (both) characters whose title case is above 255 is
3517 * 2. */
3518 ulen = 2;
3519 }
3520 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3521 need = slen + 1 + 1;
3522 }
00f254e2 3523 }
167d19f2 3524 }
00f254e2
KW
3525 } /* End of use Unicode (Latin1) semantics */
3526 } /* End of changing the case of the first character */
3527
3528 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3529 * generate the result */
3530 if (inplace) {
3531
3532 /* We can convert in place. This means we change just the first
3533 * character without disturbing the rest; no need to grow */
d54190f6
NC
3534 dest = source;
3535 s = d = (U8*)SvPV_force_nomg(source, slen);
3536 } else {
3537 dTARGET;
3538
3539 dest = TARG;
3540
00f254e2
KW
3541 /* Here, we can't convert in place; we earlier calculated how much
3542 * space we will need, so grow to accommodate that */
d54190f6 3543 SvUPGRADE(dest, SVt_PV);
3b416f41 3544 d = (U8*)SvGROW(dest, need);
d54190f6
NC
3545 (void)SvPOK_only(dest);
3546
3547 SETs(dest);
d54190f6 3548 }
44bc797b 3549
d54190f6 3550 if (doing_utf8) {
00f254e2
KW
3551 if (! inplace) {
3552 if (! convert_source_to_utf8) {
3553
3554 /* Here both source and dest are in UTF-8, but have to create
3555 * the entire output. We initialize the result to be the
3556 * title/lower cased first character, and then append the rest
3557 * of the string. */
3558 sv_setpvn(dest, (char*)tmpbuf, tculen);
3559 if (slen > ulen) {
3560 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3561 }
3562 }
3563 else {
3564 const U8 *const send = s + slen;
3565
3566 /* Here the dest needs to be in UTF-8, but the source isn't,
3567 * except we earlier UTF-8'd the first character of the source
3568 * into tmpbuf. First put that into dest, and then append the
3569 * rest of the source, converting it to UTF-8 as we go. */
3570
3571 /* Assert tculen is 2 here because the only two characters that
3572 * get to this part of the code have 2-byte UTF-8 equivalents */
3573 *d++ = *tmpbuf;
3574 *d++ = *(tmpbuf + 1);
3575 s++; /* We have just processed the 1st char */
3576
3577 for (; s < send; s++) {
3578 d = uvchr_to_utf8(d, *s);
3579 }
3580 *d = '\0';
3581 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
3582 }
d54190f6 3583 SvUTF8_on(dest);
a0ed51b3 3584 }
00f254e2 3585 else { /* in-place UTF-8. Just overwrite the first character */
d54190f6
NC
3586 Copy(tmpbuf, d, tculen, U8);
3587 SvCUR_set(dest, need - 1);
a0ed51b3 3588 }
094a2f8c
KW
3589
3590 if (tainted) {
3591 TAINT;
3592 SvTAINTED_on(dest);
3593 }
a0ed51b3 3594 }
00f254e2
KW
3595 else { /* Neither source nor dest are in or need to be UTF-8 */
3596 if (slen) {
2de3dbcc 3597 if (IN_LOCALE_RUNTIME) {
31351b04 3598 TAINT;
d54190f6 3599 SvTAINTED_on(dest);
31351b04 3600 }
00f254e2
KW
3601 if (inplace) { /* in-place, only need to change the 1st char */
3602 *d = *tmpbuf;
3603 }
3604 else { /* Not in-place */
3605
3606 /* Copy the case-changed character(s) from tmpbuf */
3607 Copy(tmpbuf, d, tculen, U8);
3608 d += tculen - 1; /* Code below expects d to point to final
3609 * character stored */
3610 }
3611 }
3612 else { /* empty source */
3613 /* See bug #39028: Don't taint if empty */
d54190f6
NC
3614 *d = *s;
3615 }
3616
00f254e2
KW
3617 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
3618 * the destination to retain that flag */
d54190f6
NC
3619 if (SvUTF8(source))
3620 SvUTF8_on(dest);
3621
00f254e2 3622 if (!inplace) { /* Finish the rest of the string, unchanged */
d54190f6
NC
3623 /* This will copy the trailing NUL */
3624 Copy(s + 1, d + 1, slen, U8);
3625 SvCUR_set(dest, need - 1);
bbce6d69 3626 }
bbce6d69 3627 }
539689e7
FC
3628 if (dest != source && SvTAINTED(source))
3629 SvTAINT(dest);
d54190f6 3630 SvSETMAGIC(dest);
79072805
LW
3631 RETURN;
3632}
3633
67306194
NC
3634/* There's so much setup/teardown code common between uc and lc, I wonder if
3635 it would be worth merging the two, and just having a switch outside each
00f254e2 3636 of the three tight loops. There is less and less commonality though */
79072805
LW
3637