This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
create itervar_u union in struct block_loop
[perl5.git] / sv.c
CommitLineData
a0d0e21e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
79072805 34
51371543 35#define FCALL *f
2c5424a7 36
2f8ed50e
OS
37#ifdef __Lynx__
38/* Missing proto on LynxOS */
39 char *gconvert(double, int, int, char *);
40#endif
41
e23c8137 42#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 43/* if adding more checks watch out for the following tests:
e23c8137
JH
44 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
45 * lib/utf8.t lib/Unicode/Collate/t/index.t
46 * --jhi
47 */
6f207bd3 48# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
49 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
50 assert((cache)[2] <= (cache)[3]); \
51 assert((cache)[3] <= (cache)[1]);} \
52 } STMT_END
e23c8137 53#else
6f207bd3 54# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
55#endif
56
f8c7b90f 57#ifdef PERL_OLD_COPY_ON_WRITE
765f542d 58#define SV_COW_NEXT_SV(sv) INT2PTR(SV *,SvUVX(sv))
607fa7f2 59#define SV_COW_NEXT_SV_SET(current,next) SvUV_set(current, PTR2UV(next))
b5ccf5f2 60/* This is a pessimistic view. Scalar must be purely a read-write PV to copy-
765f542d 61 on-write. */
765f542d 62#endif
645c22ef
DM
63
64/* ============================================================================
65
66=head1 Allocation and deallocation of SVs.
67
d2a0f284
JC
68An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
69sv, av, hv...) contains type and reference count information, and for
70many types, a pointer to the body (struct xrv, xpv, xpviv...), which
71contains fields specific to each type. Some types store all they need
72in the head, so don't have a body.
73
74In all but the most memory-paranoid configuations (ex: PURIFY), heads
75and bodies are allocated out of arenas, which by default are
76approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
77Sv-bodies are allocated by their sv-type, guaranteeing size
78consistency needed to allocate safely from arrays.
79
d2a0f284
JC
80For SV-heads, the first slot in each arena is reserved, and holds a
81link to the next arena, some flags, and a note of the number of slots.
82Snaked through each arena chain is a linked list of free items; when
83this becomes empty, an extra arena is allocated and divided up into N
84items which are threaded into the free list.
85
86SV-bodies are similar, but they use arena-sets by default, which
87separate the link and info from the arena itself, and reclaim the 1st
88slot in the arena. SV-bodies are further described later.
645c22ef
DM
89
90The following global variables are associated with arenas:
91
92 PL_sv_arenaroot pointer to list of SV arenas
93 PL_sv_root pointer to list of free SV structures
94
d2a0f284
JC
95 PL_body_arenas head of linked-list of body arenas
96 PL_body_roots[] array of pointers to list of free bodies of svtype
97 arrays are indexed by the svtype needed
93e68bfb 98
d2a0f284
JC
99A few special SV heads are not allocated from an arena, but are
100instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
101The size of arenas can be changed from the default by setting
102PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
103
104The SV arena serves the secondary purpose of allowing still-live SVs
105to be located and destroyed during final cleanup.
106
107At the lowest level, the macros new_SV() and del_SV() grab and free
108an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
109to return the SV to the free list with error checking.) new_SV() calls
110more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
111SVs in the free list have their SvTYPE field set to all ones.
112
ff276b08 113At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 114perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 115start of the interpreter.
645c22ef 116
645c22ef
DM
117The function visit() scans the SV arenas list, and calls a specified
118function for each SV it finds which is still live - ie which has an SvTYPE
119other than all 1's, and a non-zero SvREFCNT. visit() is used by the
120following functions (specified as [function that calls visit()] / [function
121called by visit() for each SV]):
122
123 sv_report_used() / do_report_used()
f2524eef 124 dump all remaining SVs (debugging aid)
645c22ef
DM
125
126 sv_clean_objs() / do_clean_objs(),do_clean_named_objs()
127 Attempt to free all objects pointed to by RVs,
128 and, unless DISABLE_DESTRUCTOR_KLUDGE is defined,
129 try to do the same for all objects indirectly
130 referenced by typeglobs too. Called once from
131 perl_destruct(), prior to calling sv_clean_all()
132 below.
133
134 sv_clean_all() / do_clean_all()
135 SvREFCNT_dec(sv) each remaining SV, possibly
136 triggering an sv_free(). It also sets the
137 SVf_BREAK flag on the SV to indicate that the
138 refcnt has been artificially lowered, and thus
139 stopping sv_free() from giving spurious warnings
140 about SVs which unexpectedly have a refcnt
141 of zero. called repeatedly from perl_destruct()
142 until there are no SVs left.
143
93e68bfb 144=head2 Arena allocator API Summary
645c22ef
DM
145
146Private API to rest of sv.c
147
148 new_SV(), del_SV(),
149
df0f0429 150 new_XPVNV(), del_XPVGV(),
645c22ef
DM
151 etc
152
153Public API:
154
8cf8f3d1 155 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 156
645c22ef
DM
157=cut
158
3e8320cc 159 * ========================================================================= */
645c22ef 160
4561caa4
CS
161/*
162 * "A time to plant, and a time to uproot what was planted..."
163 */
164
d7a2c63c
MHM
165#ifdef PERL_MEM_LOG
166# define MEM_LOG_NEW_SV(sv, file, line, func) \
167 Perl_mem_log_new_sv(sv, file, line, func)
168# define MEM_LOG_DEL_SV(sv, file, line, func) \
169 Perl_mem_log_del_sv(sv, file, line, func)
170#else
171# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
172# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
173#endif
174
fd0854ff 175#ifdef DEBUG_LEAKING_SCALARS
22162ca8 176# define FREE_SV_DEBUG_FILE(sv) Safefree((sv)->sv_debug_file)
d7a2c63c
MHM
177# define DEBUG_SV_SERIAL(sv) \
178 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \
179 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
180#else
181# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 182# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
183#endif
184
48614a46
NC
185#ifdef PERL_POISON
186# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 187# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
188/* Whilst I'd love to do this, it seems that things like to check on
189 unreferenced scalars
7e337ee0 190# define POSION_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 191*/
7e337ee0
JH
192# define POSION_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
193 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
194#else
195# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 196# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
48614a46
NC
197# define POSION_SV_HEAD(sv)
198#endif
199
990198f0
DM
200/* Mark an SV head as unused, and add to free list.
201 *
202 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
203 * its refcount artificially decremented during global destruction, so
204 * there may be dangling pointers to it. The last thing we want in that
205 * case is for it to be reused. */
206
053fc874
GS
207#define plant_SV(p) \
208 STMT_START { \
990198f0 209 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
210 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
211 DEBUG_SV_SERIAL(p); \
fd0854ff 212 FREE_SV_DEBUG_FILE(p); \
48614a46 213 POSION_SV_HEAD(p); \
053fc874 214 SvFLAGS(p) = SVTYPEMASK; \
990198f0 215 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 216 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
217 PL_sv_root = (p); \
218 } \
053fc874
GS
219 --PL_sv_count; \
220 } STMT_END
a0d0e21e 221
053fc874
GS
222#define uproot_SV(p) \
223 STMT_START { \
224 (p) = PL_sv_root; \
daba3364 225 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
226 ++PL_sv_count; \
227 } STMT_END
228
645c22ef 229
cac9b346
NC
230/* make some more SVs by adding another arena */
231
cac9b346
NC
232STATIC SV*
233S_more_sv(pTHX)
234{
97aff369 235 dVAR;
cac9b346 236 SV* sv;
9a87bd09
NC
237 char *chunk; /* must use New here to match call to */
238 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
239 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
240 uproot_SV(sv);
241 return sv;
242}
243
645c22ef
DM
244/* new_SV(): return a new, empty SV head */
245
eba0f806
DM
246#ifdef DEBUG_LEAKING_SCALARS
247/* provide a real function for a debugger to play with */
248STATIC SV*
d7a2c63c 249S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
250{
251 SV* sv;
252
eba0f806
DM
253 if (PL_sv_root)
254 uproot_SV(sv);
255 else
cac9b346 256 sv = S_more_sv(aTHX);
eba0f806
DM
257 SvANY(sv) = 0;
258 SvREFCNT(sv) = 1;
259 SvFLAGS(sv) = 0;
fd0854ff 260 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
261 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
262 ? PL_parser->copline
263 : PL_curcop
f24aceb1
DM
264 ? CopLINE(PL_curcop)
265 : 0
e385c3bf 266 );
fd0854ff 267 sv->sv_debug_inpad = 0;
cd676548 268 sv->sv_debug_parent = NULL;
fd0854ff 269 sv->sv_debug_file = PL_curcop ? savepv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
270
271 sv->sv_debug_serial = PL_sv_serial++;
272
273 MEM_LOG_NEW_SV(sv, file, line, func);
274 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n",
275 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
276
eba0f806
DM
277 return sv;
278}
d7a2c63c 279# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
280
281#else
282# define new_SV(p) \
053fc874 283 STMT_START { \
053fc874
GS
284 if (PL_sv_root) \
285 uproot_SV(p); \
286 else \
cac9b346 287 (p) = S_more_sv(aTHX); \
053fc874
GS
288 SvANY(p) = 0; \
289 SvREFCNT(p) = 1; \
290 SvFLAGS(p) = 0; \
d7a2c63c 291 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 292 } STMT_END
eba0f806 293#endif
463ee0b2 294
645c22ef
DM
295
296/* del_SV(): return an empty SV head to the free list */
297
a0d0e21e 298#ifdef DEBUGGING
4561caa4 299
053fc874
GS
300#define del_SV(p) \
301 STMT_START { \
aea4f609 302 if (DEBUG_D_TEST) \
053fc874
GS
303 del_sv(p); \
304 else \
305 plant_SV(p); \
053fc874 306 } STMT_END
a0d0e21e 307
76e3520e 308STATIC void
cea2e8a9 309S_del_sv(pTHX_ SV *p)
463ee0b2 310{
97aff369 311 dVAR;
7918f24d
NC
312
313 PERL_ARGS_ASSERT_DEL_SV;
314
aea4f609 315 if (DEBUG_D_TEST) {
4633a7c4 316 SV* sva;
a3b680e6 317 bool ok = 0;
daba3364 318 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
319 const SV * const sv = sva + 1;
320 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 321 if (p >= sv && p < svend) {
a0d0e21e 322 ok = 1;
c0ff570e
NC
323 break;
324 }
a0d0e21e
LW
325 }
326 if (!ok) {
9b387841
NC
327 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
328 "Attempt to free non-arena SV: 0x%"UVxf
329 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
330 return;
331 }
332 }
4561caa4 333 plant_SV(p);
463ee0b2 334}
a0d0e21e 335
4561caa4
CS
336#else /* ! DEBUGGING */
337
338#define del_SV(p) plant_SV(p)
339
340#endif /* DEBUGGING */
463ee0b2 341
645c22ef
DM
342
343/*
ccfc67b7
JH
344=head1 SV Manipulation Functions
345
645c22ef
DM
346=for apidoc sv_add_arena
347
348Given a chunk of memory, link it to the head of the list of arenas,
349and split it into a list of free SVs.
350
351=cut
352*/
353
d2bd4e7f
NC
354static void
355S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 356{
97aff369 357 dVAR;
daba3364 358 SV *const sva = MUTABLE_SV(ptr);
463ee0b2
LW
359 register SV* sv;
360 register SV* svend;
4633a7c4 361
7918f24d
NC
362 PERL_ARGS_ASSERT_SV_ADD_ARENA;
363
4633a7c4 364 /* The first SV in an arena isn't an SV. */
3280af22 365 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
366 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
367 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
368
3280af22
NIS
369 PL_sv_arenaroot = sva;
370 PL_sv_root = sva + 1;
4633a7c4
LW
371
372 svend = &sva[SvREFCNT(sva) - 1];
373 sv = sva + 1;
463ee0b2 374 while (sv < svend) {
3eef1deb 375 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 376#ifdef DEBUGGING
978b032e 377 SvREFCNT(sv) = 0;
03e36789 378#endif
4b69cbe3 379 /* Must always set typemask because it's always checked in on cleanup
03e36789 380 when the arenas are walked looking for objects. */
8990e307 381 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
382 sv++;
383 }
3eef1deb 384 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
385#ifdef DEBUGGING
386 SvREFCNT(sv) = 0;
387#endif
4633a7c4
LW
388 SvFLAGS(sv) = SVTYPEMASK;
389}
390
055972dc
DM
391/* visit(): call the named function for each non-free SV in the arenas
392 * whose flags field matches the flags/mask args. */
645c22ef 393
5226ed68 394STATIC I32
de37a194 395S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 396{
97aff369 397 dVAR;
4633a7c4 398 SV* sva;
5226ed68 399 I32 visited = 0;
8990e307 400
7918f24d
NC
401 PERL_ARGS_ASSERT_VISIT;
402
daba3364 403 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0 404 register const SV * const svend = &sva[SvREFCNT(sva)];
a3b680e6 405 register SV* sv;
4561caa4 406 for (sv = sva + 1; sv < svend; ++sv) {
055972dc
DM
407 if (SvTYPE(sv) != SVTYPEMASK
408 && (sv->sv_flags & mask) == flags
409 && SvREFCNT(sv))
410 {
acfe0abc 411 (FCALL)(aTHX_ sv);
5226ed68
JH
412 ++visited;
413 }
8990e307
LW
414 }
415 }
5226ed68 416 return visited;
8990e307
LW
417}
418
758a08c3
JH
419#ifdef DEBUGGING
420
645c22ef
DM
421/* called by sv_report_used() for each live SV */
422
423static void
5fa45a31 424do_report_used(pTHX_ SV *const sv)
645c22ef
DM
425{
426 if (SvTYPE(sv) != SVTYPEMASK) {
427 PerlIO_printf(Perl_debug_log, "****\n");
428 sv_dump(sv);
429 }
430}
758a08c3 431#endif
645c22ef
DM
432
433/*
434=for apidoc sv_report_used
435
436Dump the contents of all SVs not yet freed. (Debugging aid).
437
438=cut
439*/
440
8990e307 441void
864dbfa3 442Perl_sv_report_used(pTHX)
4561caa4 443{
ff270d3a 444#ifdef DEBUGGING
055972dc 445 visit(do_report_used, 0, 0);
96a5add6
AL
446#else
447 PERL_UNUSED_CONTEXT;
ff270d3a 448#endif
4561caa4
CS
449}
450
645c22ef
DM
451/* called by sv_clean_objs() for each live SV */
452
453static void
de37a194 454do_clean_objs(pTHX_ SV *const ref)
645c22ef 455{
97aff369 456 dVAR;
ea724faa
NC
457 assert (SvROK(ref));
458 {
823a54a3
AL
459 SV * const target = SvRV(ref);
460 if (SvOBJECT(target)) {
461 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
462 if (SvWEAKREF(ref)) {
463 sv_del_backref(target, ref);
464 SvWEAKREF_off(ref);
465 SvRV_set(ref, NULL);
466 } else {
467 SvROK_off(ref);
468 SvRV_set(ref, NULL);
469 SvREFCNT_dec(target);
470 }
645c22ef
DM
471 }
472 }
473
474 /* XXX Might want to check arrays, etc. */
475}
476
477/* called by sv_clean_objs() for each live SV */
478
479#ifndef DISABLE_DESTRUCTOR_KLUDGE
480static void
f30de749 481do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 482{
97aff369 483 dVAR;
ea724faa 484 assert(SvTYPE(sv) == SVt_PVGV);
d011219a
NC
485 assert(isGV_with_GP(sv));
486 if (GvGP(sv)) {
c69033f2
NC
487 if ((
488#ifdef PERL_DONT_CREATE_GVSV
489 GvSV(sv) &&
490#endif
491 SvOBJECT(GvSV(sv))) ||
645c22ef
DM
492 (GvAV(sv) && SvOBJECT(GvAV(sv))) ||
493 (GvHV(sv) && SvOBJECT(GvHV(sv))) ||
9c12f1e5
RGS
494 /* In certain rare cases GvIOp(sv) can be NULL, which would make SvOBJECT(GvIO(sv)) dereference NULL. */
495 (GvIO(sv) ? (SvFLAGS(GvIOp(sv)) & SVs_OBJECT) : 0) ||
645c22ef
DM
496 (GvCV(sv) && SvOBJECT(GvCV(sv))) )
497 {
498 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning named glob object:\n "), sv_dump(sv)));
ec5f3c78 499 SvFLAGS(sv) |= SVf_BREAK;
645c22ef
DM
500 SvREFCNT_dec(sv);
501 }
502 }
503}
504#endif
505
506/*
507=for apidoc sv_clean_objs
508
509Attempt to destroy all objects not yet freed
510
511=cut
512*/
513
4561caa4 514void
864dbfa3 515Perl_sv_clean_objs(pTHX)
4561caa4 516{
97aff369 517 dVAR;
3280af22 518 PL_in_clean_objs = TRUE;
055972dc 519 visit(do_clean_objs, SVf_ROK, SVf_ROK);
4561caa4 520#ifndef DISABLE_DESTRUCTOR_KLUDGE
2d0f3c12 521 /* some barnacles may yet remain, clinging to typeglobs */
d011219a 522 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4561caa4 523#endif
3280af22 524 PL_in_clean_objs = FALSE;
4561caa4
CS
525}
526
645c22ef
DM
527/* called by sv_clean_all() for each live SV */
528
529static void
de37a194 530do_clean_all(pTHX_ SV *const sv)
645c22ef 531{
97aff369 532 dVAR;
daba3364 533 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 534 /* don't clean pid table and strtab */
d17ea597 535 return;
cddfcddc 536 }
645c22ef
DM
537 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) ));
538 SvFLAGS(sv) |= SVf_BREAK;
539 SvREFCNT_dec(sv);
540}
541
542/*
543=for apidoc sv_clean_all
544
545Decrement the refcnt of each remaining SV, possibly triggering a
546cleanup. This function may have to be called multiple times to free
ff276b08 547SVs which are in complex self-referential hierarchies.
645c22ef
DM
548
549=cut
550*/
551
5226ed68 552I32
864dbfa3 553Perl_sv_clean_all(pTHX)
8990e307 554{
97aff369 555 dVAR;
5226ed68 556 I32 cleaned;
3280af22 557 PL_in_clean_all = TRUE;
055972dc 558 cleaned = visit(do_clean_all, 0,0);
3280af22 559 PL_in_clean_all = FALSE;
5226ed68 560 return cleaned;
8990e307 561}
463ee0b2 562
5e258f8c
JC
563/*
564 ARENASETS: a meta-arena implementation which separates arena-info
565 into struct arena_set, which contains an array of struct
566 arena_descs, each holding info for a single arena. By separating
567 the meta-info from the arena, we recover the 1st slot, formerly
568 borrowed for list management. The arena_set is about the size of an
39244528 569 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
570
571 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
572 memory in the last arena-set (1/2 on average). In trade, we get
573 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 574 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
575 small arenas for large, rare body types, by changing array* fields
576 in body_details_by_type[] below.
5e258f8c 577*/
5e258f8c 578struct arena_desc {
398c677b
NC
579 char *arena; /* the raw storage, allocated aligned */
580 size_t size; /* its size ~4k typ */
e5973ed5 581 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
582};
583
e6148039
NC
584struct arena_set;
585
586/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 587 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
588 therefore likely to be 1 aligned memory page. */
589
590#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
591 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
592
593struct arena_set {
594 struct arena_set* next;
0a848332
NC
595 unsigned int set_size; /* ie ARENAS_PER_SET */
596 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
597 struct arena_desc set[ARENAS_PER_SET];
598};
599
645c22ef
DM
600/*
601=for apidoc sv_free_arenas
602
603Deallocate the memory used by all arenas. Note that all the individual SV
604heads and bodies within the arenas must already have been freed.
605
606=cut
607*/
4633a7c4 608void
864dbfa3 609Perl_sv_free_arenas(pTHX)
4633a7c4 610{
97aff369 611 dVAR;
4633a7c4
LW
612 SV* sva;
613 SV* svanext;
0a848332 614 unsigned int i;
4633a7c4
LW
615
616 /* Free arenas here, but be careful about fake ones. (We assume
617 contiguity of the fake ones with the corresponding real ones.) */
618
3280af22 619 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 620 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 621 while (svanext && SvFAKE(svanext))
daba3364 622 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
623
624 if (!SvFAKE(sva))
1df70142 625 Safefree(sva);
4633a7c4 626 }
93e68bfb 627
5e258f8c 628 {
0a848332
NC
629 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
630
631 while (aroot) {
632 struct arena_set *current = aroot;
633 i = aroot->curr;
634 while (i--) {
5e258f8c
JC
635 assert(aroot->set[i].arena);
636 Safefree(aroot->set[i].arena);
637 }
0a848332
NC
638 aroot = aroot->next;
639 Safefree(current);
5e258f8c
JC
640 }
641 }
dc8220bf 642 PL_body_arenas = 0;
fdda85ca 643
0a848332
NC
644 i = PERL_ARENA_ROOTS_SIZE;
645 while (i--)
93e68bfb 646 PL_body_roots[i] = 0;
93e68bfb 647
3280af22
NIS
648 PL_sv_arenaroot = 0;
649 PL_sv_root = 0;
4633a7c4
LW
650}
651
bd81e77b
NC
652/*
653 Here are mid-level routines that manage the allocation of bodies out
654 of the various arenas. There are 5 kinds of arenas:
29489e7c 655
bd81e77b
NC
656 1. SV-head arenas, which are discussed and handled above
657 2. regular body arenas
658 3. arenas for reduced-size bodies
659 4. Hash-Entry arenas
29489e7c 660
bd81e77b
NC
661 Arena types 2 & 3 are chained by body-type off an array of
662 arena-root pointers, which is indexed by svtype. Some of the
663 larger/less used body types are malloced singly, since a large
664 unused block of them is wasteful. Also, several svtypes dont have
665 bodies; the data fits into the sv-head itself. The arena-root
666 pointer thus has a few unused root-pointers (which may be hijacked
667 later for arena types 4,5)
29489e7c 668
bd81e77b
NC
669 3 differs from 2 as an optimization; some body types have several
670 unused fields in the front of the structure (which are kept in-place
671 for consistency). These bodies can be allocated in smaller chunks,
672 because the leading fields arent accessed. Pointers to such bodies
673 are decremented to point at the unused 'ghost' memory, knowing that
674 the pointers are used with offsets to the real memory.
29489e7c 675
d2a0f284
JC
676
677=head1 SV-Body Allocation
678
679Allocation of SV-bodies is similar to SV-heads, differing as follows;
680the allocation mechanism is used for many body types, so is somewhat
681more complicated, it uses arena-sets, and has no need for still-live
682SV detection.
683
684At the outermost level, (new|del)_X*V macros return bodies of the
685appropriate type. These macros call either (new|del)_body_type or
686(new|del)_body_allocated macro pairs, depending on specifics of the
687type. Most body types use the former pair, the latter pair is used to
688allocate body types with "ghost fields".
689
690"ghost fields" are fields that are unused in certain types, and
69ba284b 691consequently don't need to actually exist. They are declared because
d2a0f284
JC
692they're part of a "base type", which allows use of functions as
693methods. The simplest examples are AVs and HVs, 2 aggregate types
694which don't use the fields which support SCALAR semantics.
695
69ba284b 696For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
697chunks, we thus avoid wasted memory for those unaccessed members.
698When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 699size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
700structure. (But things will all go boom if you write to the part that
701is "not there", because you'll be overwriting the last members of the
702preceding structure in memory.)
703
69ba284b
NC
704We calculate the correction using the STRUCT_OFFSET macro on the first
705member present. If the allocated structure is smaller (no initial NV
706actually allocated) then the net effect is to subtract the size of the NV
707from the pointer, to return a new pointer as if an initial NV were actually
708allocated. (We were using structures named *_allocated for this, but
709this turned out to be a subtle bug, because a structure without an NV
710could have a lower alignment constraint, but the compiler is allowed to
711optimised accesses based on the alignment constraint of the actual pointer
712to the full structure, for example, using a single 64 bit load instruction
713because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284
JC
714
715This is the same trick as was used for NV and IV bodies. Ironically it
716doesn't need to be used for NV bodies any more, because NV is now at
717the start of the structure. IV bodies don't need it either, because
718they are no longer allocated.
719
720In turn, the new_body_* allocators call S_new_body(), which invokes
721new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 722linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
723necessary to refresh an empty list. Then the lock is released, and
724the body is returned.
725
99816f8d 726Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
727bodies, which it strings into a linked list. It looks up arena-size
728and body-size from the body_details table described below, thus
729supporting the multiple body-types.
730
731If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
732the (new|del)_X*V macros are mapped directly to malloc/free.
733
d2a0f284
JC
734For each sv-type, struct body_details bodies_by_type[] carries
735parameters which control these aspects of SV handling:
736
737Arena_size determines whether arenas are used for this body type, and if
738so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
739zero, forcing individual mallocs and frees.
740
741Body_size determines how big a body is, and therefore how many fit into
742each arena. Offset carries the body-pointer adjustment needed for
69ba284b 743"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
744
745But its main purpose is to parameterize info needed in
746Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 747vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
748are used for this, except for arena_size.
749
750For the sv-types that have no bodies, arenas are not used, so those
751PL_body_roots[sv_type] are unused, and can be overloaded. In
752something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 753PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 754bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 755available in hv.c.
d2a0f284 756
29489e7c
DM
757*/
758
bd81e77b 759struct body_details {
0fb58b32 760 U8 body_size; /* Size to allocate */
10666ae3 761 U8 copy; /* Size of structure to copy (may be shorter) */
0fb58b32 762 U8 offset;
10666ae3
NC
763 unsigned int type : 4; /* We have space for a sanity check. */
764 unsigned int cant_upgrade : 1; /* Cannot upgrade this type */
765 unsigned int zero_nv : 1; /* zero the NV when upgrading from this */
766 unsigned int arena : 1; /* Allocated from an arena */
767 size_t arena_size; /* Size of arena to allocate */
bd81e77b 768};
29489e7c 769
bd81e77b
NC
770#define HADNV FALSE
771#define NONV TRUE
29489e7c 772
d2a0f284 773
bd81e77b
NC
774#ifdef PURIFY
775/* With -DPURFIY we allocate everything directly, and don't use arenas.
776 This seems a rather elegant way to simplify some of the code below. */
777#define HASARENA FALSE
778#else
779#define HASARENA TRUE
780#endif
781#define NOARENA FALSE
29489e7c 782
d2a0f284
JC
783/* Size the arenas to exactly fit a given number of bodies. A count
784 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
785 simplifying the default. If count > 0, the arena is sized to fit
786 only that many bodies, allowing arenas to be used for large, rare
787 bodies (XPVFM, XPVIO) without undue waste. The arena size is
788 limited by PERL_ARENA_SIZE, so we can safely oversize the
789 declarations.
790 */
95db5f15
MB
791#define FIT_ARENA0(body_size) \
792 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
793#define FIT_ARENAn(count,body_size) \
794 ( count * body_size <= PERL_ARENA_SIZE) \
795 ? count * body_size \
796 : FIT_ARENA0 (body_size)
797#define FIT_ARENA(count,body_size) \
798 count \
799 ? FIT_ARENAn (count, body_size) \
800 : FIT_ARENA0 (body_size)
d2a0f284 801
bd81e77b
NC
802/* Calculate the length to copy. Specifically work out the length less any
803 final padding the compiler needed to add. See the comment in sv_upgrade
804 for why copying the padding proved to be a bug. */
29489e7c 805
bd81e77b
NC
806#define copy_length(type, last_member) \
807 STRUCT_OFFSET(type, last_member) \
daba3364 808 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 809
bd81e77b 810static const struct body_details bodies_by_type[] = {
829cd18a
NC
811 /* HEs use this offset for their arena. */
812 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 813
1cb9cd50 814 /* The bind placeholder pretends to be an RV for now.
c6f8b1d0 815 Also it's marked as "can't upgrade" to stop anyone using it before it's
1cb9cd50
NC
816 implemented. */
817 { 0, 0, 0, SVt_BIND, TRUE, NONV, NOARENA, 0 },
818
db93c0c4
NC
819 /* IVs are in the head, so the allocation size is 0. */
820 { 0,
d2a0f284 821 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 822 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 823 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
824 },
825
bd81e77b 826 /* 8 bytes on most ILP32 with IEEE doubles */
6e128786
NC
827 { sizeof(NV), sizeof(NV),
828 STRUCT_OFFSET(XPVNV, xnv_u),
829 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
d2a0f284 830
bd81e77b 831 /* 8 bytes on most ILP32 with IEEE doubles */
bc337e5c 832 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
833 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
834 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 835 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 836 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 837
bd81e77b 838 /* 12 */
bc337e5c 839 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
840 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
841 + STRUCT_OFFSET(XPV, xpv_cur),
842 SVt_PVIV, FALSE, NONV, HASARENA,
843 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 844
889d28b2 845 /* 20 */
bc337e5c 846 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
847 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
848 + STRUCT_OFFSET(XPV, xpv_cur),
849 SVt_PVNV, FALSE, HADNV, HASARENA,
850 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 851
bd81e77b 852 /* 28 */
6e128786 853 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 854 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 855
288b8c02 856 /* something big */
601dfd0a
NC
857 { sizeof(regexp),
858 sizeof(regexp),
859 0,
08e44740 860 SVt_REGEXP, FALSE, NONV, HASARENA,
eaeb1e7f 861 FIT_ARENA(0, sizeof(regexp))
5c35adbb 862 },
4df7f6af 863
bd81e77b 864 /* 48 */
10666ae3 865 { sizeof(XPVGV), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
d2a0f284
JC
866 HASARENA, FIT_ARENA(0, sizeof(XPVGV)) },
867
bd81e77b 868 /* 64 */
10666ae3 869 { sizeof(XPVLV), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
d2a0f284
JC
870 HASARENA, FIT_ARENA(0, sizeof(XPVLV)) },
871
601dfd0a 872 { sizeof(XPVAV),
4f7003f5 873 copy_length(XPVAV, xav_alloc),
601dfd0a 874 0,
69ba284b 875 SVt_PVAV, TRUE, NONV, HASARENA,
601dfd0a 876 FIT_ARENA(0, sizeof(XPVAV)) },
d2a0f284 877
601dfd0a 878 { sizeof(XPVHV),
359164a0 879 copy_length(XPVHV, xhv_max),
601dfd0a 880 0,
69ba284b 881 SVt_PVHV, TRUE, NONV, HASARENA,
601dfd0a 882 FIT_ARENA(0, sizeof(XPVHV)) },
d2a0f284 883
c84c4652 884 /* 56 */
601dfd0a
NC
885 { sizeof(XPVCV),
886 sizeof(XPVCV),
887 0,
69ba284b 888 SVt_PVCV, TRUE, NONV, HASARENA,
601dfd0a 889 FIT_ARENA(0, sizeof(XPVCV)) },
69ba284b 890
601dfd0a
NC
891 { sizeof(XPVFM),
892 sizeof(XPVFM),
893 0,
69ba284b 894 SVt_PVFM, TRUE, NONV, NOARENA,
601dfd0a 895 FIT_ARENA(20, sizeof(XPVFM)) },
d2a0f284
JC
896
897 /* XPVIO is 84 bytes, fits 48x */
601dfd0a
NC
898 { sizeof(XPVIO),
899 sizeof(XPVIO),
900 0,
b6f60916 901 SVt_PVIO, TRUE, NONV, HASARENA,
601dfd0a 902 FIT_ARENA(24, sizeof(XPVIO)) },
bd81e77b 903};
29489e7c 904
bd81e77b 905#define new_body_allocated(sv_type) \
d2a0f284 906 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 907 - bodies_by_type[sv_type].offset)
29489e7c 908
26359cfa
NC
909/* return a thing to the free list */
910
911#define del_body(thing, root) \
912 STMT_START { \
913 void ** const thing_copy = (void **)thing; \
914 *thing_copy = *root; \
915 *root = (void*)thing_copy; \
916 } STMT_END
29489e7c 917
bd81e77b 918#ifdef PURIFY
29489e7c 919
beeec492
NC
920#define new_XNV() safemalloc(sizeof(XPVNV))
921#define new_XPVNV() safemalloc(sizeof(XPVNV))
922#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 923
beeec492 924#define del_XPVGV(p) safefree(p)
29489e7c 925
bd81e77b 926#else /* !PURIFY */
29489e7c 927
65ac1738 928#define new_XNV() new_body_allocated(SVt_NV)
65ac1738 929#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 930#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 931
26359cfa
NC
932#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
933 &PL_body_roots[SVt_PVGV])
1d7c1841 934
bd81e77b 935#endif /* PURIFY */
93e68bfb 936
bd81e77b 937/* no arena for you! */
93e68bfb 938
bd81e77b 939#define new_NOARENA(details) \
beeec492 940 safemalloc((details)->body_size + (details)->offset)
bd81e77b 941#define new_NOARENAZ(details) \
beeec492 942 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 943
1e30fcd5
NC
944void *
945Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
946 const size_t arena_size)
d2a0f284
JC
947{
948 dVAR;
949 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
950 struct arena_desc *adesc;
951 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
952 unsigned int curr;
d2a0f284
JC
953 char *start;
954 const char *end;
02982131 955 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
0b2d3faa 956#if defined(DEBUGGING) && !defined(PERL_GLOBAL_STRUCT_PRIVATE)
23e9d66c
NC
957 static bool done_sanity_check;
958
0b2d3faa
JH
959 /* PERL_GLOBAL_STRUCT_PRIVATE cannot coexist with global
960 * variables like done_sanity_check. */
10666ae3 961 if (!done_sanity_check) {
ea471437 962 unsigned int i = SVt_LAST;
10666ae3
NC
963
964 done_sanity_check = TRUE;
965
966 while (i--)
967 assert (bodies_by_type[i].type == i);
968 }
969#endif
970
02982131 971 assert(arena_size);
23e9d66c 972
99816f8d
NC
973 /* may need new arena-set to hold new arena */
974 if (!aroot || aroot->curr >= aroot->set_size) {
975 struct arena_set *newroot;
976 Newxz(newroot, 1, struct arena_set);
977 newroot->set_size = ARENAS_PER_SET;
978 newroot->next = aroot;
979 aroot = newroot;
980 PL_body_arenas = (void *) newroot;
981 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
982 }
983
984 /* ok, now have arena-set with at least 1 empty/available arena-desc */
985 curr = aroot->curr++;
986 adesc = &(aroot->set[curr]);
987 assert(!adesc->arena);
988
989 Newx(adesc->arena, good_arena_size, char);
990 adesc->size = good_arena_size;
991 adesc->utype = sv_type;
992 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n",
993 curr, (void*)adesc->arena, (UV)good_arena_size));
994
995 start = (char *) adesc->arena;
d2a0f284 996
29657bb6
NC
997 /* Get the address of the byte after the end of the last body we can fit.
998 Remember, this is integer division: */
02982131 999 end = start + good_arena_size / body_size * body_size;
d2a0f284 1000
d2a0f284 1001 /* computed count doesnt reflect the 1st slot reservation */
d8fca402
NC
1002#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1003 DEBUG_m(PerlIO_printf(Perl_debug_log,
1004 "arena %p end %p arena-size %d (from %d) type %d "
1005 "size %d ct %d\n",
02982131
NC
1006 (void*)start, (void*)end, (int)good_arena_size,
1007 (int)arena_size, sv_type, (int)body_size,
1008 (int)good_arena_size / (int)body_size));
d8fca402 1009#else
d2a0f284
JC
1010 DEBUG_m(PerlIO_printf(Perl_debug_log,
1011 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1012 (void*)start, (void*)end,
02982131
NC
1013 (int)arena_size, sv_type, (int)body_size,
1014 (int)good_arena_size / (int)body_size));
d8fca402 1015#endif
d2a0f284
JC
1016 *root = (void *)start;
1017
29657bb6
NC
1018 while (1) {
1019 /* Where the next body would start: */
d2a0f284 1020 char * const next = start + body_size;
29657bb6
NC
1021
1022 if (next >= end) {
1023 /* This is the last body: */
1024 assert(next == end);
1025
1026 *(void **)start = 0;
1027 return *root;
1028 }
1029
d2a0f284
JC
1030 *(void**) start = (void *)next;
1031 start = next;
1032 }
d2a0f284
JC
1033}
1034
1035/* grab a new thing from the free list, allocating more if necessary.
1036 The inline version is used for speed in hot routines, and the
1037 function using it serves the rest (unless PURIFY).
1038*/
1039#define new_body_inline(xpv, sv_type) \
1040 STMT_START { \
1041 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1042 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1043 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1044 bodies_by_type[sv_type].body_size,\
1045 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1046 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1047 } STMT_END
1048
1049#ifndef PURIFY
1050
1051STATIC void *
de37a194 1052S_new_body(pTHX_ const svtype sv_type)
d2a0f284
JC
1053{
1054 dVAR;
1055 void *xpv;
1056 new_body_inline(xpv, sv_type);
1057 return xpv;
1058}
1059
1060#endif
93e68bfb 1061
238b27b3
NC
1062static const struct body_details fake_rv =
1063 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1064
bd81e77b
NC
1065/*
1066=for apidoc sv_upgrade
93e68bfb 1067
bd81e77b
NC
1068Upgrade an SV to a more complex form. Generally adds a new body type to the
1069SV, then copies across as much information as possible from the old body.
1070You generally want to use the C<SvUPGRADE> macro wrapper. See also C<svtype>.
93e68bfb 1071
bd81e77b 1072=cut
93e68bfb 1073*/
93e68bfb 1074
bd81e77b 1075void
aad570aa 1076Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type)
cac9b346 1077{
97aff369 1078 dVAR;
bd81e77b
NC
1079 void* old_body;
1080 void* new_body;
42d0e0b7 1081 const svtype old_type = SvTYPE(sv);
d2a0f284 1082 const struct body_details *new_type_details;
238b27b3 1083 const struct body_details *old_type_details
bd81e77b 1084 = bodies_by_type + old_type;
4df7f6af 1085 SV *referant = NULL;
cac9b346 1086
7918f24d
NC
1087 PERL_ARGS_ASSERT_SV_UPGRADE;
1088
1776cbe8
NC
1089 if (old_type == new_type)
1090 return;
1091
1092 /* This clause was purposefully added ahead of the early return above to
1093 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1094 inference by Nick I-S that it would fix other troublesome cases. See
1095 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1096
1097 Given that shared hash key scalars are no longer PVIV, but PV, there is
1098 no longer need to unshare so as to free up the IVX slot for its proper
1099 purpose. So it's safe to move the early return earlier. */
1100
bd81e77b
NC
1101 if (new_type != SVt_PV && SvIsCOW(sv)) {
1102 sv_force_normal_flags(sv, 0);
1103 }
cac9b346 1104
bd81e77b 1105 old_body = SvANY(sv);
de042e1d 1106
bd81e77b
NC
1107 /* Copying structures onto other structures that have been neatly zeroed
1108 has a subtle gotcha. Consider XPVMG
cac9b346 1109
bd81e77b
NC
1110 +------+------+------+------+------+-------+-------+
1111 | NV | CUR | LEN | IV | MAGIC | STASH |
1112 +------+------+------+------+------+-------+-------+
1113 0 4 8 12 16 20 24 28
645c22ef 1114
bd81e77b
NC
1115 where NVs are aligned to 8 bytes, so that sizeof that structure is
1116 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1117
bd81e77b
NC
1118 +------+------+------+------+------+-------+-------+------+
1119 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1120 +------+------+------+------+------+-------+-------+------+
1121 0 4 8 12 16 20 24 28 32
08742458 1122
bd81e77b 1123 so what happens if you allocate memory for this structure:
30f9da9e 1124
bd81e77b
NC
1125 +------+------+------+------+------+-------+-------+------+------+...
1126 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1127 +------+------+------+------+------+-------+-------+------+------+...
1128 0 4 8 12 16 20 24 28 32 36
bfc44f79 1129
bd81e77b
NC
1130 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1131 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1132 started out as zero once, but it's quite possible that it isn't. So now,
1133 rather than a nicely zeroed GP, you have it pointing somewhere random.
1134 Bugs ensue.
bfc44f79 1135
bd81e77b
NC
1136 (In fact, GP ends up pointing at a previous GP structure, because the
1137 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1138 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1139 this happens to be moot because XPVGV has been re-ordered, with GP
1140 no longer after STASH)
30f9da9e 1141
bd81e77b
NC
1142 So we are careful and work out the size of used parts of all the
1143 structures. */
bfc44f79 1144
bd81e77b
NC
1145 switch (old_type) {
1146 case SVt_NULL:
1147 break;
1148 case SVt_IV:
4df7f6af
NC
1149 if (SvROK(sv)) {
1150 referant = SvRV(sv);
238b27b3
NC
1151 old_type_details = &fake_rv;
1152 if (new_type == SVt_NV)
1153 new_type = SVt_PVNV;
4df7f6af
NC
1154 } else {
1155 if (new_type < SVt_PVIV) {
1156 new_type = (new_type == SVt_NV)
1157 ? SVt_PVNV : SVt_PVIV;
1158 }
bd81e77b
NC
1159 }
1160 break;
1161 case SVt_NV:
1162 if (new_type < SVt_PVNV) {
1163 new_type = SVt_PVNV;
bd81e77b
NC
1164 }
1165 break;
bd81e77b
NC
1166 case SVt_PV:
1167 assert(new_type > SVt_PV);
1168 assert(SVt_IV < SVt_PV);
1169 assert(SVt_NV < SVt_PV);
1170 break;
1171 case SVt_PVIV:
1172 break;
1173 case SVt_PVNV:
1174 break;
1175 case SVt_PVMG:
1176 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1177 there's no way that it can be safely upgraded, because perl.c
1178 expects to Safefree(SvANY(PL_mess_sv)) */
1179 assert(sv != PL_mess_sv);
1180 /* This flag bit is used to mean other things in other scalar types.
1181 Given that it only has meaning inside the pad, it shouldn't be set
1182 on anything that can get upgraded. */
00b1698f 1183 assert(!SvPAD_TYPED(sv));
bd81e77b
NC
1184 break;
1185 default:
1186 if (old_type_details->cant_upgrade)
c81225bc
NC
1187 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1188 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1189 }
3376de98
NC
1190
1191 if (old_type > new_type)
1192 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1193 (int)old_type, (int)new_type);
1194
2fa1109b 1195 new_type_details = bodies_by_type + new_type;
645c22ef 1196
bd81e77b
NC
1197 SvFLAGS(sv) &= ~SVTYPEMASK;
1198 SvFLAGS(sv) |= new_type;
932e9ff9 1199
ab4416c0
NC
1200 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1201 the return statements above will have triggered. */
1202 assert (new_type != SVt_NULL);
bd81e77b 1203 switch (new_type) {
bd81e77b
NC
1204 case SVt_IV:
1205 assert(old_type == SVt_NULL);
1206 SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv));
1207 SvIV_set(sv, 0);
1208 return;
1209 case SVt_NV:
1210 assert(old_type == SVt_NULL);
1211 SvANY(sv) = new_XNV();
1212 SvNV_set(sv, 0);
1213 return;
bd81e77b 1214 case SVt_PVHV:
bd81e77b 1215 case SVt_PVAV:
d2a0f284 1216 assert(new_type_details->body_size);
c1ae03ae
NC
1217
1218#ifndef PURIFY
1219 assert(new_type_details->arena);
d2a0f284 1220 assert(new_type_details->arena_size);
c1ae03ae 1221 /* This points to the start of the allocated area. */
d2a0f284
JC
1222 new_body_inline(new_body, new_type);
1223 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1224 new_body = ((char *)new_body) - new_type_details->offset;
1225#else
1226 /* We always allocated the full length item with PURIFY. To do this
1227 we fake things so that arena is false for all 16 types.. */
1228 new_body = new_NOARENAZ(new_type_details);
1229#endif
1230 SvANY(sv) = new_body;
1231 if (new_type == SVt_PVAV) {
1232 AvMAX(sv) = -1;
1233 AvFILLp(sv) = -1;
1234 AvREAL_only(sv);
64484faa 1235 if (old_type_details->body_size) {
ac572bf4
NC
1236 AvALLOC(sv) = 0;
1237 } else {
1238 /* It will have been zeroed when the new body was allocated.
1239 Lets not write to it, in case it confuses a write-back
1240 cache. */
1241 }
78ac7dd9
NC
1242 } else {
1243 assert(!SvOK(sv));
1244 SvOK_off(sv);
1245#ifndef NODEFAULT_SHAREKEYS
1246 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1247#endif
1248 HvMAX(sv) = 7; /* (start with 8 buckets) */
c1ae03ae 1249 }
aeb18a1e 1250
bd81e77b
NC
1251 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1252 The target created by newSVrv also is, and it can have magic.
1253 However, it never has SvPVX set.
1254 */
4df7f6af
NC
1255 if (old_type == SVt_IV) {
1256 assert(!SvROK(sv));
1257 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1258 assert(SvPVX_const(sv) == 0);
1259 }
aeb18a1e 1260
bd81e77b 1261 if (old_type >= SVt_PVMG) {
e736a858 1262 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1263 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1264 } else {
1265 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1266 }
1267 break;
93e68bfb 1268
93e68bfb 1269
b9ad13ac
NC
1270 case SVt_REGEXP:
1271 /* This ensures that SvTHINKFIRST(sv) is true, and hence that
1272 sv_force_normal_flags(sv) is called. */
1273 SvFAKE_on(sv);
bd81e77b
NC
1274 case SVt_PVIV:
1275 /* XXX Is this still needed? Was it ever needed? Surely as there is
1276 no route from NV to PVIV, NOK can never be true */
1277 assert(!SvNOKp(sv));
1278 assert(!SvNOK(sv));
1279 case SVt_PVIO:
1280 case SVt_PVFM:
bd81e77b
NC
1281 case SVt_PVGV:
1282 case SVt_PVCV:
1283 case SVt_PVLV:
1284 case SVt_PVMG:
1285 case SVt_PVNV:
1286 case SVt_PV:
93e68bfb 1287
d2a0f284 1288 assert(new_type_details->body_size);
bd81e77b
NC
1289 /* We always allocated the full length item with PURIFY. To do this
1290 we fake things so that arena is false for all 16 types.. */
1291 if(new_type_details->arena) {
1292 /* This points to the start of the allocated area. */
d2a0f284
JC
1293 new_body_inline(new_body, new_type);
1294 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1295 new_body = ((char *)new_body) - new_type_details->offset;
1296 } else {
1297 new_body = new_NOARENAZ(new_type_details);
1298 }
1299 SvANY(sv) = new_body;
5e2fc214 1300
bd81e77b 1301 if (old_type_details->copy) {
f9ba3d20
NC
1302 /* There is now the potential for an upgrade from something without
1303 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1304 int offset = old_type_details->offset;
1305 int length = old_type_details->copy;
1306
1307 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1308 const int difference
f9ba3d20
NC
1309 = new_type_details->offset - old_type_details->offset;
1310 offset += difference;
1311 length -= difference;
1312 }
1313 assert (length >= 0);
1314
1315 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1316 char);
bd81e77b
NC
1317 }
1318
1319#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1320 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1321 * correct 0.0 for us. Otherwise, if the old body didn't have an
1322 * NV slot, but the new one does, then we need to initialise the
1323 * freshly created NV slot with whatever the correct bit pattern is
1324 * for 0.0 */
e22a937e
NC
1325 if (old_type_details->zero_nv && !new_type_details->zero_nv
1326 && !isGV_with_GP(sv))
bd81e77b 1327 SvNV_set(sv, 0);
82048762 1328#endif
5e2fc214 1329
85dca89a
NC
1330 if (new_type == SVt_PVIO) {
1331 IO * const io = MUTABLE_IO(sv);
d963bf01 1332 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1333
1334 SvOBJECT_on(io);
1335 /* Clear the stashcache because a new IO could overrule a package
1336 name */
1337 hv_clear(PL_stashcache);
1338
85dca89a 1339 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1340 IoPAGE_LEN(sv) = 60;
85dca89a 1341 }
4df7f6af
NC
1342 if (old_type < SVt_PV) {
1343 /* referant will be NULL unless the old type was SVt_IV emulating
1344 SVt_RV */
1345 sv->sv_u.svu_rv = referant;
1346 }
bd81e77b
NC
1347 break;
1348 default:
afd78fd5
JH
1349 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1350 (unsigned long)new_type);
bd81e77b 1351 }
73171d91 1352
db93c0c4 1353 if (old_type > SVt_IV) {
bd81e77b 1354#ifdef PURIFY
beeec492 1355 safefree(old_body);
bd81e77b 1356#else
bc786448
GG
1357 /* Note that there is an assumption that all bodies of types that
1358 can be upgraded came from arenas. Only the more complex non-
1359 upgradable types are allowed to be directly malloc()ed. */
1360 assert(old_type_details->arena);
bd81e77b
NC
1361 del_body((void*)((char*)old_body + old_type_details->offset),
1362 &PL_body_roots[old_type]);
1363#endif
1364 }
1365}
73171d91 1366
bd81e77b
NC
1367/*
1368=for apidoc sv_backoff
73171d91 1369
bd81e77b
NC
1370Remove any string offset. You should normally use the C<SvOOK_off> macro
1371wrapper instead.
73171d91 1372
bd81e77b 1373=cut
73171d91
NC
1374*/
1375
bd81e77b 1376int
aad570aa 1377Perl_sv_backoff(pTHX_ register SV *const sv)
bd81e77b 1378{
69240efd 1379 STRLEN delta;
7a4bba22 1380 const char * const s = SvPVX_const(sv);
7918f24d
NC
1381
1382 PERL_ARGS_ASSERT_SV_BACKOFF;
96a5add6 1383 PERL_UNUSED_CONTEXT;
7918f24d 1384
bd81e77b
NC
1385 assert(SvOOK(sv));
1386 assert(SvTYPE(sv) != SVt_PVHV);
1387 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1388
69240efd
NC
1389 SvOOK_offset(sv, delta);
1390
7a4bba22
NC
1391 SvLEN_set(sv, SvLEN(sv) + delta);
1392 SvPV_set(sv, SvPVX(sv) - delta);
1393 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
bd81e77b
NC
1394 SvFLAGS(sv) &= ~SVf_OOK;
1395 return 0;
1396}
73171d91 1397
bd81e77b
NC
1398/*
1399=for apidoc sv_grow
73171d91 1400
bd81e77b
NC
1401Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1402upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1403Use the C<SvGROW> wrapper instead.
93e68bfb 1404
bd81e77b
NC
1405=cut
1406*/
93e68bfb 1407
bd81e77b 1408char *
aad570aa 1409Perl_sv_grow(pTHX_ register SV *const sv, register STRLEN newlen)
bd81e77b
NC
1410{
1411 register char *s;
93e68bfb 1412
7918f24d
NC
1413 PERL_ARGS_ASSERT_SV_GROW;
1414
5db06880
NC
1415 if (PL_madskills && newlen >= 0x100000) {
1416 PerlIO_printf(Perl_debug_log,
1417 "Allocation too large: %"UVxf"\n", (UV)newlen);
1418 }
bd81e77b
NC
1419#ifdef HAS_64K_LIMIT
1420 if (newlen >= 0x10000) {
1421 PerlIO_printf(Perl_debug_log,
1422 "Allocation too large: %"UVxf"\n", (UV)newlen);
1423 my_exit(1);
1424 }
1425#endif /* HAS_64K_LIMIT */
1426 if (SvROK(sv))
1427 sv_unref(sv);
1428 if (SvTYPE(sv) < SVt_PV) {
1429 sv_upgrade(sv, SVt_PV);
1430 s = SvPVX_mutable(sv);
1431 }
1432 else if (SvOOK(sv)) { /* pv is offset? */
1433 sv_backoff(sv);
1434 s = SvPVX_mutable(sv);
1435 if (newlen > SvLEN(sv))
1436 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
1437#ifdef HAS_64K_LIMIT
1438 if (newlen >= 0x10000)
1439 newlen = 0xFFFF;
1440#endif
1441 }
1442 else
1443 s = SvPVX_mutable(sv);
aeb18a1e 1444
bd81e77b 1445 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559
WH
1446 STRLEN minlen = SvCUR(sv);
1447 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
1448 if (newlen < minlen)
1449 newlen = minlen;
aedff202 1450#ifndef Perl_safesysmalloc_size
bd81e77b 1451 newlen = PERL_STRLEN_ROUNDUP(newlen);
bd81e77b 1452#endif
98653f18 1453 if (SvLEN(sv) && s) {
10edeb5d 1454 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1455 }
1456 else {
10edeb5d 1457 s = (char*)safemalloc(newlen);
bd81e77b
NC
1458 if (SvPVX_const(sv) && SvCUR(sv)) {
1459 Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char);
1460 }
1461 }
1462 SvPV_set(sv, s);
ca7c1a29 1463#ifdef Perl_safesysmalloc_size
98653f18
NC
1464 /* Do this here, do it once, do it right, and then we will never get
1465 called back into sv_grow() unless there really is some growing
1466 needed. */
ca7c1a29 1467 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1468#else
bd81e77b 1469 SvLEN_set(sv, newlen);
98653f18 1470#endif
bd81e77b
NC
1471 }
1472 return s;
1473}
aeb18a1e 1474
bd81e77b
NC
1475/*
1476=for apidoc sv_setiv
932e9ff9 1477
bd81e77b
NC
1478Copies an integer into the given SV, upgrading first if necessary.
1479Does not handle 'set' magic. See also C<sv_setiv_mg>.
463ee0b2 1480
bd81e77b
NC
1481=cut
1482*/
463ee0b2 1483
bd81e77b 1484void
aad570aa 1485Perl_sv_setiv(pTHX_ register SV *const sv, const IV i)
bd81e77b 1486{
97aff369 1487 dVAR;
7918f24d
NC
1488
1489 PERL_ARGS_ASSERT_SV_SETIV;
1490
bd81e77b
NC
1491 SV_CHECK_THINKFIRST_COW_DROP(sv);
1492 switch (SvTYPE(sv)) {
1493 case SVt_NULL:
bd81e77b 1494 case SVt_NV:
3376de98 1495 sv_upgrade(sv, SVt_IV);
bd81e77b 1496 break;
bd81e77b
NC
1497 case SVt_PV:
1498 sv_upgrade(sv, SVt_PVIV);
1499 break;
463ee0b2 1500
bd81e77b 1501 case SVt_PVGV:
6e592b3a
BM
1502 if (!isGV_with_GP(sv))
1503 break;
bd81e77b
NC
1504 case SVt_PVAV:
1505 case SVt_PVHV:
1506 case SVt_PVCV:
1507 case SVt_PVFM:
1508 case SVt_PVIO:
1509 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1510 OP_DESC(PL_op));
42d0e0b7 1511 default: NOOP;
bd81e77b
NC
1512 }
1513 (void)SvIOK_only(sv); /* validate number */
1514 SvIV_set(sv, i);
1515 SvTAINT(sv);
1516}
932e9ff9 1517
bd81e77b
NC
1518/*
1519=for apidoc sv_setiv_mg
d33b2eba 1520
bd81e77b 1521Like C<sv_setiv>, but also handles 'set' magic.
1c846c1f 1522
bd81e77b
NC
1523=cut
1524*/
d33b2eba 1525
bd81e77b 1526void
aad570aa 1527Perl_sv_setiv_mg(pTHX_ register SV *const sv, const IV i)
bd81e77b 1528{
7918f24d
NC
1529 PERL_ARGS_ASSERT_SV_SETIV_MG;
1530
bd81e77b
NC
1531 sv_setiv(sv,i);
1532 SvSETMAGIC(sv);
1533}
727879eb 1534
bd81e77b
NC
1535/*
1536=for apidoc sv_setuv
d33b2eba 1537
bd81e77b
NC
1538Copies an unsigned integer into the given SV, upgrading first if necessary.
1539Does not handle 'set' magic. See also C<sv_setuv_mg>.
9b94d1dd 1540
bd81e77b
NC
1541=cut
1542*/
d33b2eba 1543
bd81e77b 1544void
aad570aa 1545Perl_sv_setuv(pTHX_ register SV *const sv, const UV u)
bd81e77b 1546{
7918f24d
NC
1547 PERL_ARGS_ASSERT_SV_SETUV;
1548
bd81e77b
NC
1549 /* With these two if statements:
1550 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1551
bd81e77b
NC
1552 without
1553 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1554
bd81e77b
NC
1555 If you wish to remove them, please benchmark to see what the effect is
1556 */
1557 if (u <= (UV)IV_MAX) {
1558 sv_setiv(sv, (IV)u);
1559 return;
1560 }
1561 sv_setiv(sv, 0);
1562 SvIsUV_on(sv);
1563 SvUV_set(sv, u);
1564}
d33b2eba 1565
bd81e77b
NC
1566/*
1567=for apidoc sv_setuv_mg
727879eb 1568
bd81e77b 1569Like C<sv_setuv>, but also handles 'set' magic.
9b94d1dd 1570
bd81e77b
NC
1571=cut
1572*/
5e2fc214 1573
bd81e77b 1574void
aad570aa 1575Perl_sv_setuv_mg(pTHX_ register SV *const sv, const UV u)
bd81e77b 1576{
7918f24d
NC
1577 PERL_ARGS_ASSERT_SV_SETUV_MG;
1578
bd81e77b
NC
1579 sv_setuv(sv,u);
1580 SvSETMAGIC(sv);
1581}
5e2fc214 1582
954c1994 1583/*
bd81e77b 1584=for apidoc sv_setnv
954c1994 1585
bd81e77b
NC
1586Copies a double into the given SV, upgrading first if necessary.
1587Does not handle 'set' magic. See also C<sv_setnv_mg>.
954c1994
GS
1588
1589=cut
1590*/
1591
63f97190 1592void
aad570aa 1593Perl_sv_setnv(pTHX_ register SV *const sv, const NV num)
79072805 1594{
97aff369 1595 dVAR;
7918f24d
NC
1596
1597 PERL_ARGS_ASSERT_SV_SETNV;
1598
bd81e77b
NC
1599 SV_CHECK_THINKFIRST_COW_DROP(sv);
1600 switch (SvTYPE(sv)) {
79072805 1601 case SVt_NULL:
79072805 1602 case SVt_IV:
bd81e77b 1603 sv_upgrade(sv, SVt_NV);
79072805
LW
1604 break;
1605 case SVt_PV:
79072805 1606 case SVt_PVIV:
bd81e77b 1607 sv_upgrade(sv, SVt_PVNV);
79072805 1608 break;
bd4b1eb5 1609
bd4b1eb5 1610 case SVt_PVGV:
6e592b3a
BM
1611 if (!isGV_with_GP(sv))
1612 break;
bd81e77b
NC
1613 case SVt_PVAV:
1614 case SVt_PVHV:
79072805 1615 case SVt_PVCV:
bd81e77b
NC
1616 case SVt_PVFM:
1617 case SVt_PVIO:
1618 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1619 OP_DESC(PL_op));
42d0e0b7 1620 default: NOOP;
2068cd4d 1621 }
bd81e77b
NC
1622 SvNV_set(sv, num);
1623 (void)SvNOK_only(sv); /* validate number */
1624 SvTAINT(sv);
79072805
LW
1625}
1626
645c22ef 1627/*
bd81e77b 1628=for apidoc sv_setnv_mg
645c22ef 1629
bd81e77b 1630Like C<sv_setnv>, but also handles 'set' magic.
645c22ef
DM
1631
1632=cut
1633*/
1634
bd81e77b 1635void
aad570aa 1636Perl_sv_setnv_mg(pTHX_ register SV *const sv, const NV num)
79072805 1637{
7918f24d
NC
1638 PERL_ARGS_ASSERT_SV_SETNV_MG;
1639
bd81e77b
NC
1640 sv_setnv(sv,num);
1641 SvSETMAGIC(sv);
79072805
LW
1642}
1643
bd81e77b
NC
1644/* Print an "isn't numeric" warning, using a cleaned-up,
1645 * printable version of the offending string
1646 */
954c1994 1647
bd81e77b 1648STATIC void
aad570aa 1649S_not_a_number(pTHX_ SV *const sv)
79072805 1650{
97aff369 1651 dVAR;
bd81e77b
NC
1652 SV *dsv;
1653 char tmpbuf[64];
1654 const char *pv;
94463019 1655
7918f24d
NC
1656 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1657
94463019 1658 if (DO_UTF8(sv)) {
84bafc02 1659 dsv = newSVpvs_flags("", SVs_TEMP);
94463019
JH
1660 pv = sv_uni_display(dsv, sv, 10, 0);
1661 } else {
1662 char *d = tmpbuf;
551405c4 1663 const char * const limit = tmpbuf + sizeof(tmpbuf) - 8;
94463019
JH
1664 /* each *s can expand to 4 chars + "...\0",
1665 i.e. need room for 8 chars */
ecdeb87c 1666
00b6aa41
AL
1667 const char *s = SvPVX_const(sv);
1668 const char * const end = s + SvCUR(sv);
1669 for ( ; s < end && d < limit; s++ ) {
94463019
JH
1670 int ch = *s & 0xFF;
1671 if (ch & 128 && !isPRINT_LC(ch)) {
1672 *d++ = 'M';
1673 *d++ = '-';
1674 ch &= 127;
1675 }
1676 if (ch == '\n') {
1677 *d++ = '\\';
1678 *d++ = 'n';
1679 }
1680 else if (ch == '\r') {
1681 *d++ = '\\';
1682 *d++ = 'r';
1683 }
1684 else if (ch == '\f') {
1685 *d++ = '\\';
1686 *d++ = 'f';
1687 }
1688 else if (ch == '\\') {
1689 *d++ = '\\';
1690 *d++ = '\\';
1691 }
1692 else if (ch == '\0') {
1693 *d++ = '\\';
1694 *d++ = '0';
1695 }
1696 else if (isPRINT_LC(ch))
1697 *d++ = ch;
1698 else {
1699 *d++ = '^';
1700 *d++ = toCTRL(ch);
1701 }
1702 }
1703 if (s < end) {
1704 *d++ = '.';
1705 *d++ = '.';
1706 *d++ = '.';
1707 }
1708 *d = '\0';
1709 pv = tmpbuf;
a0d0e21e 1710 }
a0d0e21e 1711
533c011a 1712 if (PL_op)
9014280d 1713 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
94463019
JH
1714 "Argument \"%s\" isn't numeric in %s", pv,
1715 OP_DESC(PL_op));
a0d0e21e 1716 else
9014280d 1717 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
94463019 1718 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1719}
1720
c2988b20
NC
1721/*
1722=for apidoc looks_like_number
1723
645c22ef
DM
1724Test if the content of an SV looks like a number (or is a number).
1725C<Inf> and C<Infinity> are treated as numbers (so will not issue a
1726non-numeric warning), even if your atof() doesn't grok them.
c2988b20
NC
1727
1728=cut
1729*/
1730
1731I32
aad570aa 1732Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1733{
a3b680e6 1734 register const char *sbegin;
c2988b20
NC
1735 STRLEN len;
1736
7918f24d
NC
1737 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1738
c2988b20 1739 if (SvPOK(sv)) {
3f7c398e 1740 sbegin = SvPVX_const(sv);
c2988b20
NC
1741 len = SvCUR(sv);
1742 }
1743 else if (SvPOKp(sv))
83003860 1744 sbegin = SvPV_const(sv, len);
c2988b20 1745 else
e0ab1c0e 1746 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
c2988b20
NC
1747 return grok_number(sbegin, len, NULL);
1748}
25da4f38 1749
19f6321d
NC
1750STATIC bool
1751S_glob_2number(pTHX_ GV * const gv)
180488f8
NC
1752{
1753 const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
1754 SV *const buffer = sv_newmortal();
1755
7918f24d
NC
1756 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1757
180488f8
NC
1758 /* FAKE globs can get coerced, so need to turn this off temporarily if it
1759 is on. */
1760 SvFAKE_off(gv);
1761 gv_efullname3(buffer, gv, "*");
1762 SvFLAGS(gv) |= wasfake;
1763
675c862f
AL
1764 /* We know that all GVs stringify to something that is not-a-number,
1765 so no need to test that. */
1766 if (ckWARN(WARN_NUMERIC))
1767 not_a_number(buffer);
1768 /* We just want something true to return, so that S_sv_2iuv_common
1769 can tail call us and return true. */
19f6321d 1770 return TRUE;
675c862f
AL
1771}
1772
25da4f38
IZ
1773/* Actually, ISO C leaves conversion of UV to IV undefined, but
1774 until proven guilty, assume that things are not that bad... */
1775
645c22ef
DM
1776/*
1777 NV_PRESERVES_UV:
1778
1779 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1780 an IV (an assumption perl has been based on to date) it becomes necessary
1781 to remove the assumption that the NV always carries enough precision to
1782 recreate the IV whenever needed, and that the NV is the canonical form.
1783 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1784 precision as a side effect of conversion (which would lead to insanity
28e5dec8
JH
1785 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
1786 1) to distinguish between IV/UV/NV slots that have cached a valid
1787 conversion where precision was lost and IV/UV/NV slots that have a
1788 valid conversion which has lost no precision
645c22ef 1789 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1790 would lose precision, the precise conversion (or differently
1791 imprecise conversion) is also performed and cached, to prevent
1792 requests for different numeric formats on the same SV causing
1793 lossy conversion chains. (lossless conversion chains are perfectly
1794 acceptable (still))
1795
1796
1797 flags are used:
1798 SvIOKp is true if the IV slot contains a valid value
1799 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1800 SvNOKp is true if the NV slot contains a valid value
1801 SvNOK is true only if the NV value is accurate
1802
1803 so
645c22ef 1804 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1805 IV(or UV) would lose accuracy over a direct conversion from PV to
1806 IV(or UV). If it would, cache both conversions, return NV, but mark
1807 SV as IOK NOKp (ie not NOK).
1808
645c22ef 1809 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1810 NV would lose accuracy over a direct conversion from PV to NV. If it
1811 would, cache both conversions, flag similarly.
1812
1813 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
1814 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
1815 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
1816 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 1817 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 1818
645c22ef
DM
1819 The benefit of this is that operations such as pp_add know that if
1820 SvIOK is true for both left and right operands, then integer addition
1821 can be used instead of floating point (for cases where the result won't
1822 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
1823 loss of precision compared with integer addition.
1824
1825 * making IV and NV equal status should make maths accurate on 64 bit
1826 platforms
1827 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 1828 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
1829 looking for SvIOK and checking for overflow will not outweigh the
1830 fp to integer speedup)
1831 * will slow down integer operations (callers of SvIV) on "inaccurate"
1832 values, as the change from SvIOK to SvIOKp will cause a call into
1833 sv_2iv each time rather than a macro access direct to the IV slot
1834 * should speed up number->string conversion on integers as IV is
645c22ef 1835 favoured when IV and NV are equally accurate
28e5dec8
JH
1836
1837 ####################################################################
645c22ef
DM
1838 You had better be using SvIOK_notUV if you want an IV for arithmetic:
1839 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
1840 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
1841 ####################################################################
1842
645c22ef 1843 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
1844 performance ratio.
1845*/
1846
1847#ifndef NV_PRESERVES_UV
645c22ef
DM
1848# define IS_NUMBER_UNDERFLOW_IV 1
1849# define IS_NUMBER_UNDERFLOW_UV 2
1850# define IS_NUMBER_IV_AND_UV 2
1851# define IS_NUMBER_OVERFLOW_IV 4
1852# define IS_NUMBER_OVERFLOW_UV 5
1853
1854/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
1855
1856/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
1857STATIC int
5de3775c 1858S_sv_2iuv_non_preserve(pTHX_ register SV *const sv
47031da6
NC
1859# ifdef DEBUGGING
1860 , I32 numtype
1861# endif
1862 )
28e5dec8 1863{
97aff369 1864 dVAR;
7918f24d
NC
1865
1866 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
1867
3f7c398e 1868 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
1869 if (SvNVX(sv) < (NV)IV_MIN) {
1870 (void)SvIOKp_on(sv);
1871 (void)SvNOK_on(sv);
45977657 1872 SvIV_set(sv, IV_MIN);
28e5dec8
JH
1873 return IS_NUMBER_UNDERFLOW_IV;
1874 }
1875 if (SvNVX(sv) > (NV)UV_MAX) {
1876 (void)SvIOKp_on(sv);
1877 (void)SvNOK_on(sv);
1878 SvIsUV_on(sv);
607fa7f2 1879 SvUV_set(sv, UV_MAX);
28e5dec8
JH
1880 return IS_NUMBER_OVERFLOW_UV;
1881 }
c2988b20
NC
1882 (void)SvIOKp_on(sv);
1883 (void)SvNOK_on(sv);
1884 /* Can't use strtol etc to convert this string. (See truth table in
1885 sv_2iv */
1886 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 1887 SvIV_set(sv, I_V(SvNVX(sv)));
c2988b20
NC
1888 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
1889 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
1890 } else {
1891 /* Integer is imprecise. NOK, IOKp */
1892 }
1893 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
1894 }
1895 SvIsUV_on(sv);
607fa7f2 1896 SvUV_set(sv, U_V(SvNVX(sv)));
c2988b20
NC
1897 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
1898 if (SvUVX(sv) == UV_MAX) {
1899 /* As we know that NVs don't preserve UVs, UV_MAX cannot
1900 possibly be preserved by NV. Hence, it must be overflow.
1901 NOK, IOKp */
1902 return IS_NUMBER_OVERFLOW_UV;
1903 }
1904 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
1905 } else {
1906 /* Integer is imprecise. NOK, IOKp */
28e5dec8 1907 }
c2988b20 1908 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 1909}
645c22ef
DM
1910#endif /* !NV_PRESERVES_UV*/
1911
af359546 1912STATIC bool
7918f24d
NC
1913S_sv_2iuv_common(pTHX_ SV *const sv)
1914{
97aff369 1915 dVAR;
7918f24d
NC
1916
1917 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
1918
af359546 1919 if (SvNOKp(sv)) {
28e5dec8
JH
1920 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
1921 * without also getting a cached IV/UV from it at the same time
1922 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
1923 * IV or UV at same time to avoid this. */
1924 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
1925
1926 if (SvTYPE(sv) == SVt_NV)
1927 sv_upgrade(sv, SVt_PVNV);
1928
28e5dec8
JH
1929 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
1930 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
1931 certainly cast into the IV range at IV_MAX, whereas the correct
1932 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
1933 cases go to UV */
cab190d4
JD
1934#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1935 if (Perl_isnan(SvNVX(sv))) {
1936 SvUV_set(sv, 0);
1937 SvIsUV_on(sv);
fdbe6d7c 1938 return FALSE;
cab190d4 1939 }
cab190d4 1940#endif
28e5dec8 1941 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 1942 SvIV_set(sv, I_V(SvNVX(sv)));
28e5dec8
JH
1943 if (SvNVX(sv) == (NV) SvIVX(sv)
1944#ifndef NV_PRESERVES_UV
1945 && (((UV)1 << NV_PRESERVES_UV_BITS) >
1946 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
1947 /* Don't flag it as "accurately an integer" if the number
1948 came from a (by definition imprecise) NV operation, and
1949 we're outside the range of NV integer precision */
1950#endif
1951 ) {
a43d94f2
NC
1952 if (SvNOK(sv))
1953 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
1954 else {
1955 /* scalar has trailing garbage, eg "42a" */
1956 }
28e5dec8 1957 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 1958 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n",
28e5dec8
JH
1959 PTR2UV(sv),
1960 SvNVX(sv),
1961 SvIVX(sv)));
1962
1963 } else {
1964 /* IV not precise. No need to convert from PV, as NV
1965 conversion would already have cached IV if it detected
1966 that PV->IV would be better than PV->NV->IV
1967 flags already correct - don't set public IOK. */
1968 DEBUG_c(PerlIO_printf(Perl_debug_log,
7234c960 1969 "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n",
28e5dec8
JH
1970 PTR2UV(sv),
1971 SvNVX(sv),
1972 SvIVX(sv)));
1973 }
1974 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
1975 but the cast (NV)IV_MIN rounds to a the value less (more
1976 negative) than IV_MIN which happens to be equal to SvNVX ??
1977 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
1978 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
1979 (NV)UVX == NVX are both true, but the values differ. :-(
1980 Hopefully for 2s complement IV_MIN is something like
1981 0x8000000000000000 which will be exact. NWC */
d460ef45 1982 }
25da4f38 1983 else {
607fa7f2 1984 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8
JH
1985 if (
1986 (SvNVX(sv) == (NV) SvUVX(sv))
1987#ifndef NV_PRESERVES_UV
1988 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
1989 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
1990 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
1991 /* Don't flag it as "accurately an integer" if the number
1992 came from a (by definition imprecise) NV operation, and
1993 we're outside the range of NV integer precision */
1994#endif
a43d94f2 1995 && SvNOK(sv)
28e5dec8
JH
1996 )
1997 SvIOK_on(sv);
25da4f38 1998 SvIsUV_on(sv);
1c846c1f 1999 DEBUG_c(PerlIO_printf(Perl_debug_log,
57def98f 2000 "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n",
56431972 2001 PTR2UV(sv),
57def98f
JH
2002 SvUVX(sv),
2003 SvUVX(sv)));
25da4f38 2004 }
748a9306
LW
2005 }
2006 else if (SvPOKp(sv) && SvLEN(sv)) {
c2988b20 2007 UV value;
504618e9 2008 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
af359546 2009 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2010 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2011 the same as the direct translation of the initial string
2012 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2013 be careful to ensure that the value with the .456 is around if the
2014 NV value is requested in the future).
1c846c1f 2015
af359546 2016 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2017 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2018 cache the NV if we are sure it's not needed.
25da4f38 2019 */
16b7a9a4 2020
c2988b20
NC
2021 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2022 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2023 == IS_NUMBER_IN_UV) {
5e045b90 2024 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2025 if (SvTYPE(sv) < SVt_PVIV)
2026 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2027 (void)SvIOK_on(sv);
c2988b20
NC
2028 } else if (SvTYPE(sv) < SVt_PVNV)
2029 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2030
f2524eef 2031 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2032 we aren't going to call atof() below. If NVs don't preserve UVs
2033 then the value returned may have more precision than atof() will
2034 return, even though value isn't perfectly accurate. */
2035 if ((numtype & (IS_NUMBER_IN_UV
2036#ifdef NV_PRESERVES_UV
2037 | IS_NUMBER_NOT_INT
2038#endif
2039 )) == IS_NUMBER_IN_UV) {
2040 /* This won't turn off the public IOK flag if it was set above */
2041 (void)SvIOKp_on(sv);
2042
2043 if (!(numtype & IS_NUMBER_NEG)) {
2044 /* positive */;
2045 if (value <= (UV)IV_MAX) {
45977657 2046 SvIV_set(sv, (IV)value);
c2988b20 2047 } else {
af359546 2048 /* it didn't overflow, and it was positive. */
607fa7f2 2049 SvUV_set(sv, value);
c2988b20
NC
2050 SvIsUV_on(sv);
2051 }
2052 } else {
2053 /* 2s complement assumption */
2054 if (value <= (UV)IV_MIN) {
45977657 2055 SvIV_set(sv, -(IV)value);
c2988b20
NC
2056 } else {
2057 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2058 I'm assuming it will be rare. */
c2988b20
NC
2059 if (SvTYPE(sv) < SVt_PVNV)
2060 sv_upgrade(sv, SVt_PVNV);
2061 SvNOK_on(sv);
2062 SvIOK_off(sv);
2063 SvIOKp_on(sv);
9d6ce603 2064 SvNV_set(sv, -(NV)value);
45977657 2065 SvIV_set(sv, IV_MIN);
c2988b20
NC
2066 }
2067 }
2068 }
2069 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2070 will be in the previous block to set the IV slot, and the next
2071 block to set the NV slot. So no else here. */
2072
2073 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2074 != IS_NUMBER_IN_UV) {
2075 /* It wasn't an (integer that doesn't overflow the UV). */
3f7c398e 2076 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8 2077
c2988b20
NC
2078 if (! numtype && ckWARN(WARN_NUMERIC))
2079 not_a_number(sv);
28e5dec8 2080
65202027 2081#if defined(USE_LONG_DOUBLE)
c2988b20
NC
2082 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
2083 PTR2UV(sv), SvNVX(sv)));
65202027 2084#else
1779d84d 2085 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
c2988b20 2086 PTR2UV(sv), SvNVX(sv)));
65202027 2087#endif
28e5dec8 2088
28e5dec8 2089#ifdef NV_PRESERVES_UV
af359546
NC
2090 (void)SvIOKp_on(sv);
2091 (void)SvNOK_on(sv);
2092 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2093 SvIV_set(sv, I_V(SvNVX(sv)));
2094 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2095 SvIOK_on(sv);
2096 } else {
6f207bd3 2097 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2098 }
2099 /* UV will not work better than IV */
2100 } else {
2101 if (SvNVX(sv) > (NV)UV_MAX) {
2102 SvIsUV_on(sv);
2103 /* Integer is inaccurate. NOK, IOKp, is UV */
2104 SvUV_set(sv, UV_MAX);
af359546
NC
2105 } else {
2106 SvUV_set(sv, U_V(SvNVX(sv)));
2107 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2108 NV preservse UV so can do correct comparison. */
2109 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2110 SvIOK_on(sv);
af359546 2111 } else {
6f207bd3 2112 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2113 }
2114 }
4b0c9573 2115 SvIsUV_on(sv);
af359546 2116 }
28e5dec8 2117#else /* NV_PRESERVES_UV */
c2988b20
NC
2118 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2119 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2120 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2121 grok_number above. The NV slot has just been set using
2122 Atof. */
560b0c46 2123 SvNOK_on(sv);
c2988b20
NC
2124 assert (SvIOKp(sv));
2125 } else {
2126 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2127 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2128 /* Small enough to preserve all bits. */
2129 (void)SvIOKp_on(sv);
2130 SvNOK_on(sv);
45977657 2131 SvIV_set(sv, I_V(SvNVX(sv)));
c2988b20
NC
2132 if ((NV)(SvIVX(sv)) == SvNVX(sv))
2133 SvIOK_on(sv);
2134 /* Assumption: first non-preserved integer is < IV_MAX,
2135 this NV is in the preserved range, therefore: */
2136 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2137 < (UV)IV_MAX)) {
32fdb065 2138 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2139 }
2140 } else {
2141 /* IN_UV NOT_INT
2142 0 0 already failed to read UV.
2143 0 1 already failed to read UV.
2144 1 0 you won't get here in this case. IV/UV
2145 slot set, public IOK, Atof() unneeded.
2146 1 1 already read UV.
2147 so there's no point in sv_2iuv_non_preserve() attempting
2148 to use atol, strtol, strtoul etc. */
47031da6 2149# ifdef DEBUGGING
40a17c4c 2150 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2151# else
2152 sv_2iuv_non_preserve (sv);
2153# endif
c2988b20
NC
2154 }
2155 }
28e5dec8 2156#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2157 /* It might be more code efficient to go through the entire logic above
2158 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2159 gets complex and potentially buggy, so more programmer efficient
2160 to do it this way, by turning off the public flags: */
2161 if (!numtype)
2162 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2163 }
af359546
NC
2164 }
2165 else {
675c862f 2166 if (isGV_with_GP(sv))
159b6efe 2167 return glob_2number(MUTABLE_GV(sv));
180488f8 2168
af359546
NC
2169 if (!(SvFLAGS(sv) & SVs_PADTMP)) {
2170 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2171 report_uninit(sv);
2172 }
25da4f38
IZ
2173 if (SvTYPE(sv) < SVt_IV)
2174 /* Typically the caller expects that sv_any is not NULL now. */
2175 sv_upgrade(sv, SVt_IV);
af359546
NC
2176 /* Return 0 from the caller. */
2177 return TRUE;
2178 }
2179 return FALSE;
2180}
2181
2182/*
2183=for apidoc sv_2iv_flags
2184
2185Return the integer value of an SV, doing any necessary string
2186conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2187Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2188
2189=cut
2190*/
2191
2192IV
5de3775c 2193Perl_sv_2iv_flags(pTHX_ register SV *const sv, const I32 flags)
af359546 2194{
97aff369 2195 dVAR;
af359546 2196 if (!sv)
a0d0e21e 2197 return 0;
cecf5685
NC
2198 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2199 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
50caf62e
NC
2200 cache IVs just in case. In practice it seems that they never
2201 actually anywhere accessible by user Perl code, let alone get used
2202 in anything other than a string context. */
af359546
NC
2203 if (flags & SV_GMAGIC)
2204 mg_get(sv);
2205 if (SvIOKp(sv))
2206 return SvIVX(sv);
2207 if (SvNOKp(sv)) {
2208 return I_V(SvNVX(sv));
2209 }
71c558c3
NC
2210 if (SvPOKp(sv) && SvLEN(sv)) {
2211 UV value;
2212 const int numtype
2213 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2214
2215 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2216 == IS_NUMBER_IN_UV) {
2217 /* It's definitely an integer */
2218 if (numtype & IS_NUMBER_NEG) {
2219 if (value < (UV)IV_MIN)
2220 return -(IV)value;
2221 } else {
2222 if (value < (UV)IV_MAX)
2223 return (IV)value;
2224 }
2225 }
2226 if (!numtype) {
2227 if (ckWARN(WARN_NUMERIC))
2228 not_a_number(sv);
2229 }
2230 return I_V(Atof(SvPVX_const(sv)));
2231 }
1c7ff15e
NC
2232 if (SvROK(sv)) {
2233 goto return_rok;
af359546 2234 }
1c7ff15e
NC
2235 assert(SvTYPE(sv) >= SVt_PVMG);
2236 /* This falls through to the report_uninit inside S_sv_2iuv_common. */
4cb1ec55 2237 } else if (SvTHINKFIRST(sv)) {
af359546 2238 if (SvROK(sv)) {
1c7ff15e 2239 return_rok:
af359546 2240 if (SvAMAGIC(sv)) {
aee036bb
DM
2241 SV * tmpstr;
2242 if (flags & SV_SKIP_OVERLOAD)
2243 return 0;
2244 tmpstr=AMG_CALLun(sv,numer);
af359546
NC
2245 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2246 return SvIV(tmpstr);
2247 }
2248 }
2249 return PTR2IV(SvRV(sv));
2250 }
2251 if (SvIsCOW(sv)) {
2252 sv_force_normal_flags(sv, 0);
2253 }
2254 if (SvREADONLY(sv) && !SvOK(sv)) {
2255 if (ckWARN(WARN_UNINITIALIZED))
2256 report_uninit(sv);
2257 return 0;
2258 }
2259 }
2260 if (!SvIOKp(sv)) {
2261 if (S_sv_2iuv_common(aTHX_ sv))
2262 return 0;
79072805 2263 }
1d7c1841
GS
2264 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n",
2265 PTR2UV(sv),SvIVX(sv)));
25da4f38 2266 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2267}
2268
645c22ef 2269/*
891f9566 2270=for apidoc sv_2uv_flags
645c22ef
DM
2271
2272Return the unsigned integer value of an SV, doing any necessary string
891f9566
YST
2273conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2274Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef
DM
2275
2276=cut
2277*/
2278
ff68c719 2279UV
5de3775c 2280Perl_sv_2uv_flags(pTHX_ register SV *const sv, const I32 flags)
ff68c719 2281{
97aff369 2282 dVAR;
ff68c719 2283 if (!sv)
2284 return 0;
cecf5685
NC
2285 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2286 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
50caf62e 2287 cache IVs just in case. */
891f9566
YST
2288 if (flags & SV_GMAGIC)
2289 mg_get(sv);
ff68c719 2290 if (SvIOKp(sv))
2291 return SvUVX(sv);
2292 if (SvNOKp(sv))
2293 return U_V(SvNVX(sv));
71c558c3
NC
2294 if (SvPOKp(sv) && SvLEN(sv)) {
2295 UV value;
2296 const int numtype
2297 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
2298
2299 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2300 == IS_NUMBER_IN_UV) {
2301 /* It's definitely an integer */
2302 if (!(numtype & IS_NUMBER_NEG))
2303 return value;
2304 }
2305 if (!numtype) {
2306 if (ckWARN(WARN_NUMERIC))
2307 not_a_number(sv);
2308 }
2309 return U_V(Atof(SvPVX_const(sv)));
2310 }
1c7ff15e
NC
2311 if (SvROK(sv)) {
2312 goto return_rok;
3fe9a6f1 2313 }
1c7ff15e
NC
2314 assert(SvTYPE(sv) >= SVt_PVMG);
2315 /* This falls through to the report_uninit inside S_sv_2iuv_common. */
4cb1ec55 2316 } else if (SvTHINKFIRST(sv)) {
ff68c719 2317 if (SvROK(sv)) {
1c7ff15e 2318 return_rok:
deb46114 2319 if (SvAMAGIC(sv)) {
aee036bb
DM
2320 SV *tmpstr;
2321 if (flags & SV_SKIP_OVERLOAD)
2322 return 0;
2323 tmpstr = AMG_CALLun(sv,numer);
deb46114
NC
2324 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2325 return SvUV(tmpstr);
2326 }
2327 }
2328 return PTR2UV(SvRV(sv));
ff68c719 2329 }
765f542d
NC
2330 if (SvIsCOW(sv)) {
2331 sv_force_normal_flags(sv, 0);
8a818333 2332 }
0336b60e 2333 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2334 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2335 report_uninit(sv);
ff68c719 2336 return 0;
2337 }
2338 }
af359546
NC
2339 if (!SvIOKp(sv)) {
2340 if (S_sv_2iuv_common(aTHX_ sv))
2341 return 0;
ff68c719 2342 }
25da4f38 2343
1d7c1841
GS
2344 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n",
2345 PTR2UV(sv),SvUVX(sv)));
25da4f38 2346 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719 2347}
2348
645c22ef 2349/*
196007d1 2350=for apidoc sv_2nv_flags
645c22ef
DM
2351
2352Return the num value of an SV, doing any necessary string or integer
39d5de13
DM
2353conversion. If flags includes SV_GMAGIC, does an mg_get() first.
2354Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2355
2356=cut
2357*/
2358
65202027 2359NV
39d5de13 2360Perl_sv_2nv_flags(pTHX_ register SV *const sv, const I32 flags)
79072805 2361{
97aff369 2362 dVAR;
79072805
LW
2363 if (!sv)
2364 return 0.0;
cecf5685
NC
2365 if (SvGMAGICAL(sv) || (SvTYPE(sv) == SVt_PVGV && SvVALID(sv))) {
2366 /* FBMs use the same flag bit as SVf_IVisUV, so must let them
50caf62e 2367 cache IVs just in case. */
39d5de13
DM
2368 if (flags & SV_GMAGIC)
2369 mg_get(sv);
463ee0b2
LW
2370 if (SvNOKp(sv))
2371 return SvNVX(sv);
0aa395f8 2372 if ((SvPOKp(sv) && SvLEN(sv)) && !SvIOKp(sv)) {
041457d9 2373 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
504618e9 2374 !grok_number(SvPVX_const(sv), SvCUR(sv), NULL))
a0d0e21e 2375 not_a_number(sv);
3f7c398e 2376 return Atof(SvPVX_const(sv));
a0d0e21e 2377 }
25da4f38 2378 if (SvIOKp(sv)) {
1c846c1f 2379 if (SvIsUV(sv))
65202027 2380 return (NV)SvUVX(sv);
25da4f38 2381 else
65202027 2382 return (NV)SvIVX(sv);
47a72cb8
NC
2383 }
2384 if (SvROK(sv)) {
2385 goto return_rok;
2386 }
2387 assert(SvTYPE(sv) >= SVt_PVMG);
2388 /* This falls through to the report_uninit near the end of the
2389 function. */
2390 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2391 if (SvROK(sv)) {
47a72cb8 2392 return_rok:
deb46114 2393 if (SvAMAGIC(sv)) {
aee036bb
DM
2394 SV *tmpstr;
2395 if (flags & SV_SKIP_OVERLOAD)
2396 return 0;
2397 tmpstr = AMG_CALLun(sv,numer);
deb46114
NC
2398 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2399 return SvNV(tmpstr);
2400 }
2401 }
2402 return PTR2NV(SvRV(sv));
a0d0e21e 2403 }
765f542d
NC
2404 if (SvIsCOW(sv)) {
2405 sv_force_normal_flags(sv, 0);
8a818333 2406 }
0336b60e 2407 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2408 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2409 report_uninit(sv);
ed6116ce
LW
2410 return 0.0;
2411 }
79072805
LW
2412 }
2413 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2414 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2415 sv_upgrade(sv, SVt_NV);
906f284f 2416#ifdef USE_LONG_DOUBLE
097ee67d 2417 DEBUG_c({
f93f4e46 2418 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2419 PerlIO_printf(Perl_debug_log,
2420 "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
2421 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2422 RESTORE_NUMERIC_LOCAL();
2423 });
65202027 2424#else
572bbb43 2425 DEBUG_c({
f93f4e46 2426 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2427 PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
1d7c1841 2428 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2429 RESTORE_NUMERIC_LOCAL();
2430 });
572bbb43 2431#endif
79072805
LW
2432 }
2433 else if (SvTYPE(sv) < SVt_PVNV)
2434 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2435 if (SvNOKp(sv)) {
2436 return SvNVX(sv);
61604483 2437 }
59d8ce62 2438 if (SvIOKp(sv)) {
9d6ce603 2439 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2440#ifdef NV_PRESERVES_UV
a43d94f2
NC
2441 if (SvIOK(sv))
2442 SvNOK_on(sv);
2443 else
2444 SvNOKp_on(sv);
28e5dec8
JH
2445#else
2446 /* Only set the public NV OK flag if this NV preserves the IV */
2447 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2448 if (SvIOK(sv) &&
2449 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2450 : (SvIVX(sv) == I_V(SvNVX(sv))))
2451 SvNOK_on(sv);
2452 else
2453 SvNOKp_on(sv);
2454#endif
93a17b20 2455 }
748a9306 2456 else if (SvPOKp(sv) && SvLEN(sv)) {
c2988b20 2457 UV value;
3f7c398e 2458 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2459 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2460 not_a_number(sv);
28e5dec8 2461#ifdef NV_PRESERVES_UV
c2988b20
NC
2462 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2463 == IS_NUMBER_IN_UV) {
5e045b90 2464 /* It's definitely an integer */
9d6ce603 2465 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
c2988b20 2466 } else
3f7c398e 2467 SvNV_set(sv, Atof(SvPVX_const(sv)));
a43d94f2
NC
2468 if (numtype)
2469 SvNOK_on(sv);
2470 else
2471 SvNOKp_on(sv);
28e5dec8 2472#else
3f7c398e 2473 SvNV_set(sv, Atof(SvPVX_const(sv)));
28e5dec8
JH
2474 /* Only set the public NV OK flag if this NV preserves the value in
2475 the PV at least as well as an IV/UV would.
2476 Not sure how to do this 100% reliably. */
2477 /* if that shift count is out of range then Configure's test is
2478 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2479 UV_BITS */
2480 if (((UV)1 << NV_PRESERVES_UV_BITS) >
c2988b20 2481 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
28e5dec8 2482 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
c2988b20
NC
2483 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2484 /* Can't use strtol etc to convert this string, so don't try.
2485 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
2486 SvNOK_on(sv);
2487 } else {
2488 /* value has been set. It may not be precise. */
2489 if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) {
2490 /* 2s complement assumption for (UV)IV_MIN */
2491 SvNOK_on(sv); /* Integer is too negative. */
2492 } else {
2493 SvNOKp_on(sv);
2494 SvIOKp_on(sv);
6fa402ec 2495
c2988b20 2496 if (numtype & IS_NUMBER_NEG) {
45977657 2497 SvIV_set(sv, -(IV)value);
c2988b20 2498 } else if (value <= (UV)IV_MAX) {
45977657 2499 SvIV_set(sv, (IV)value);
c2988b20 2500 } else {
607fa7f2 2501 SvUV_set(sv, value);
c2988b20
NC
2502 SvIsUV_on(sv);
2503 }
2504
2505 if (numtype & IS_NUMBER_NOT_INT) {
2506 /* I believe that even if the original PV had decimals,
2507 they are lost beyond the limit of the FP precision.
2508 However, neither is canonical, so both only get p
2509 flags. NWC, 2000/11/25 */
2510 /* Both already have p flags, so do nothing */
2511 } else {
66a1b24b 2512 const NV nv = SvNVX(sv);
c2988b20
NC
2513 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2514 if (SvIVX(sv) == I_V(nv)) {
2515 SvNOK_on(sv);
c2988b20 2516 } else {
c2988b20
NC
2517 /* It had no "." so it must be integer. */
2518 }
00b6aa41 2519 SvIOK_on(sv);
c2988b20
NC
2520 } else {
2521 /* between IV_MAX and NV(UV_MAX).
2522 Could be slightly > UV_MAX */
6fa402ec 2523
c2988b20
NC
2524 if (numtype & IS_NUMBER_NOT_INT) {
2525 /* UV and NV both imprecise. */
2526 } else {
66a1b24b 2527 const UV nv_as_uv = U_V(nv);
c2988b20
NC
2528
2529 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2530 SvNOK_on(sv);
c2988b20 2531 }
00b6aa41 2532 SvIOK_on(sv);
c2988b20
NC
2533 }
2534 }
2535 }
2536 }
2537 }
a43d94f2
NC
2538 /* It might be more code efficient to go through the entire logic above
2539 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2540 gets complex and potentially buggy, so more programmer efficient
2541 to do it this way, by turning off the public flags: */
2542 if (!numtype)
2543 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2544#endif /* NV_PRESERVES_UV */
93a17b20 2545 }
79072805 2546 else {
f7877b28 2547 if (isGV_with_GP(sv)) {
159b6efe 2548 glob_2number(MUTABLE_GV(sv));
180488f8
NC
2549 return 0.0;
2550 }
2551
041457d9 2552 if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED))
29489e7c 2553 report_uninit(sv);
7e25a7e9
NC
2554 assert (SvTYPE(sv) >= SVt_NV);
2555 /* Typically the caller expects that sv_any is not NULL now. */
2556 /* XXX Ilya implies that this is a bug in callers that assume this
2557 and ideally should be fixed. */
a0d0e21e 2558 return 0.0;
79072805 2559 }
572bbb43 2560#if defined(USE_LONG_DOUBLE)
097ee67d 2561 DEBUG_c({
f93f4e46 2562 STORE_NUMERIC_LOCAL_SET_STANDARD();
1d7c1841
GS
2563 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
2564 PTR2UV(sv), SvNVX(sv));
572bbb43
GS
2565 RESTORE_NUMERIC_LOCAL();
2566 });
65202027 2567#else
572bbb43 2568 DEBUG_c({
f93f4e46 2569 STORE_NUMERIC_LOCAL_SET_STANDARD();
1779d84d 2570 PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
1d7c1841 2571 PTR2UV(sv), SvNVX(sv));
097ee67d
JH
2572 RESTORE_NUMERIC_LOCAL();
2573 });
572bbb43 2574#endif
463ee0b2 2575 return SvNVX(sv);
79072805
LW
2576}
2577
800401ee
JH
2578/*
2579=for apidoc sv_2num
2580
2581Return an SV with the numeric value of the source SV, doing any necessary
a196a5fa
JH
2582reference or overload conversion. You must use the C<SvNUM(sv)> macro to
2583access this function.
800401ee
JH
2584
2585=cut
2586*/
2587
2588SV *
5de3775c 2589Perl_sv_2num(pTHX_ register SV *const sv)
800401ee 2590{
7918f24d
NC
2591 PERL_ARGS_ASSERT_SV_2NUM;
2592
b9ee0594
RGS
2593 if (!SvROK(sv))
2594 return sv;
800401ee
JH
2595 if (SvAMAGIC(sv)) {
2596 SV * const tmpsv = AMG_CALLun(sv,numer);
a02ec77a 2597 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2598 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2599 return sv_2num(tmpsv);
2600 }
2601 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2602}
2603
645c22ef
DM
2604/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2605 * UV as a string towards the end of buf, and return pointers to start and
2606 * end of it.
2607 *
2608 * We assume that buf is at least TYPE_CHARS(UV) long.
2609 */
2610
864dbfa3 2611static char *
5de3775c 2612S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2613{
25da4f38 2614 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2615 char * const ebuf = ptr;
25da4f38 2616 int sign;
25da4f38 2617
7918f24d
NC
2618 PERL_ARGS_ASSERT_UIV_2BUF;
2619
25da4f38
IZ
2620 if (is_uv)
2621 sign = 0;
2622 else if (iv >= 0) {
2623 uv = iv;
2624 sign = 0;
2625 } else {
2626 uv = -iv;
2627 sign = 1;
2628 }
2629 do {
eb160463 2630 *--ptr = '0' + (char)(uv % 10);
25da4f38
IZ
2631 } while (uv /= 10);
2632 if (sign)
2633 *--ptr = '-';
2634 *peob = ebuf;
2635 return ptr;
2636}
2637
645c22ef
DM
2638/*
2639=for apidoc sv_2pv_flags
2640
ff276b08 2641Returns a pointer to the string value of an SV, and sets *lp to its length.
645c22ef
DM
2642If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a string
2643if necessary.
2644Normally invoked via the C<SvPV_flags> macro. C<sv_2pv()> and C<sv_2pv_nomg>
2645usually end up here too.
2646
2647=cut
2648*/
2649
8d6d96c1 2650char *
5de3775c 2651Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags)
8d6d96c1 2652{
97aff369 2653 dVAR;
79072805 2654 register char *s;
79072805 2655
463ee0b2 2656 if (!sv) {
cdb061a3
NC
2657 if (lp)
2658 *lp = 0;
73d840c0 2659 return (char *)"";
463ee0b2 2660 }
8990e307 2661 if (SvGMAGICAL(sv)) {
8d6d96c1
HS
2662 if (flags & SV_GMAGIC)
2663 mg_get(sv);
463ee0b2 2664 if (SvPOKp(sv)) {
cdb061a3
NC
2665 if (lp)
2666 *lp = SvCUR(sv);
10516c54
NC
2667 if (flags & SV_MUTABLE_RETURN)
2668 return SvPVX_mutable(sv);
4d84ee25
NC
2669 if (flags & SV_CONST_RETURN)
2670 return (char *)SvPVX_const(sv);
463ee0b2
LW
2671 return SvPVX(sv);
2672 }
75dfc8ec
NC
2673 if (SvIOKp(sv) || SvNOKp(sv)) {
2674 char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */
75dfc8ec
NC
2675 STRLEN len;
2676
2677 if (SvIOKp(sv)) {
e80fed9d 2678 len = SvIsUV(sv)
d9fad198
JH
2679 ? my_snprintf(tbuf, sizeof(tbuf), "%"UVuf, (UV)SvUVX(sv))
2680 : my_snprintf(tbuf, sizeof(tbuf), "%"IVdf, (IV)SvIVX(sv));
75dfc8ec 2681 } else {
e8ada2d0
NC
2682 Gconvert(SvNVX(sv), NV_DIG, 0, tbuf);
2683 len = strlen(tbuf);
75dfc8ec 2684 }
b5b886f0
NC
2685 assert(!SvROK(sv));
2686 {
75dfc8ec
NC
2687 dVAR;
2688
2689#ifdef FIXNEGATIVEZERO
e8ada2d0
NC
2690 if (len == 2 && tbuf[0] == '-' && tbuf[1] == '0') {
2691 tbuf[0] = '0';
2692 tbuf[1] = 0;
75dfc8ec
NC
2693 len = 1;
2694 }
2695#endif
2696 SvUPGRADE(sv, SVt_PV);
2697 if (lp)
2698 *lp = len;
2699 s = SvGROW_mutable(sv, len + 1);
2700 SvCUR_set(sv, len);
2701 SvPOKp_on(sv);
10edeb5d 2702 return (char*)memcpy(s, tbuf, len + 1);
75dfc8ec 2703 }
463ee0b2 2704 }
1c7ff15e
NC
2705 if (SvROK(sv)) {
2706 goto return_rok;
2707 }
2708 assert(SvTYPE(sv) >= SVt_PVMG);
2709 /* This falls through to the report_uninit near the end of the
2710 function. */
2711 } else if (SvTHINKFIRST(sv)) {
ed6116ce 2712 if (SvROK(sv)) {
1c7ff15e 2713 return_rok:
deb46114 2714 if (SvAMAGIC(sv)) {
aee036bb
DM
2715 SV *tmpstr;
2716 if (flags & SV_SKIP_OVERLOAD)
2717 return NULL;
2718 tmpstr = AMG_CALLun(sv,string);
a02ec77a 2719 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
deb46114
NC
2720 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2721 /* Unwrap this: */
2722 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
2723 */
2724
2725 char *pv;
2726 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
2727 if (flags & SV_CONST_RETURN) {
2728 pv = (char *) SvPVX_const(tmpstr);
2729 } else {
2730 pv = (flags & SV_MUTABLE_RETURN)
2731 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
2732 }
2733 if (lp)
2734 *lp = SvCUR(tmpstr);
50adf7d2 2735 } else {
deb46114 2736 pv = sv_2pv_flags(tmpstr, lp, flags);
50adf7d2 2737 }
deb46114
NC
2738 if (SvUTF8(tmpstr))
2739 SvUTF8_on(sv);
2740 else
2741 SvUTF8_off(sv);
2742 return pv;
50adf7d2 2743 }
deb46114
NC
2744 }
2745 {
fafee734
NC
2746 STRLEN len;
2747 char *retval;
2748 char *buffer;
d2c6dc5e 2749 SV *const referent = SvRV(sv);
d8eae41e
NC
2750
2751 if (!referent) {
fafee734
NC
2752 len = 7;
2753 retval = buffer = savepvn("NULLREF", len);
5c35adbb 2754 } else if (SvTYPE(referent) == SVt_REGEXP) {
d2c6dc5e 2755 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
67d2d14d
AB
2756 I32 seen_evals = 0;
2757
2758 assert(re);
2759
2760 /* If the regex is UTF-8 we want the containing scalar to
2761 have an UTF-8 flag too */
2762 if (RX_UTF8(re))
2763 SvUTF8_on(sv);
2764 else
2765 SvUTF8_off(sv);
2766
2767 if ((seen_evals = RX_SEEN_EVALS(re)))
2768 PL_reginterp_cnt += seen_evals;
2769
2770 if (lp)
2771 *lp = RX_WRAPLEN(re);
2772
2773 return RX_WRAPPED(re);
d8eae41e
NC
2774 } else {
2775 const char *const typestr = sv_reftype(referent, 0);
fafee734
NC
2776 const STRLEN typelen = strlen(typestr);
2777 UV addr = PTR2UV(referent);
2778 const char *stashname = NULL;
2779 STRLEN stashnamelen = 0; /* hush, gcc */
2780 const char *buffer_end;
d8eae41e 2781
d8eae41e 2782 if (SvOBJECT(referent)) {
fafee734
NC
2783 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
2784
2785 if (name) {
2786 stashname = HEK_KEY(name);
2787 stashnamelen = HEK_LEN(name);
2788
2789 if (HEK_UTF8(name)) {
2790 SvUTF8_on(sv);
2791 } else {
2792 SvUTF8_off(sv);
2793 }
2794 } else {
2795 stashname = "__ANON__";
2796 stashnamelen = 8;
2797 }
2798 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
2799 + 2 * sizeof(UV) + 2 /* )\0 */;
2800 } else {
2801 len = typelen + 3 /* (0x */
2802 + 2 * sizeof(UV) + 2 /* )\0 */;
d8eae41e 2803 }
fafee734
NC
2804
2805 Newx(buffer, len, char);
2806 buffer_end = retval = buffer + len;
2807
2808 /* Working backwards */
2809 *--retval = '\0';
2810 *--retval = ')';
2811 do {
2812 *--retval = PL_hexdigit[addr & 15];
2813 } while (addr >>= 4);
2814 *--retval = 'x';
2815 *--retval = '0';
2816 *--retval = '(';
2817
2818 retval -= typelen;
2819 memcpy(retval, typestr, typelen);
2820
2821 if (stashname) {
2822 *--retval = '=';
2823 retval -= stashnamelen;
2824 memcpy(retval, stashname, stashnamelen);
2825 }
2826 /* retval may not neccesarily have reached the start of the
2827 buffer here. */
2828 assert (retval >= buffer);
2829
2830 len = buffer_end - retval - 1; /* -1 for that \0 */
c080367d 2831 }
042dae7a 2832 if (lp)
fafee734
NC
2833 *lp = len;
2834 SAVEFREEPV(buffer);
2835 return retval;
463ee0b2 2836 }
79072805 2837 }
0336b60e 2838 if (SvREADONLY(sv) && !SvOK(sv)) {
cdb061a3
NC
2839 if (lp)
2840 *lp = 0;
9f621bb0
NC
2841 if (flags & SV_UNDEF_RETURNS_NULL)
2842 return NULL;
2843 if (ckWARN(WARN_UNINITIALIZED))
2844 report_uninit(sv);
73d840c0 2845 return (char *)"";
79072805 2846 }
79072805 2847 }
28e5dec8
JH
2848 if (SvIOK(sv) || ((SvIOKp(sv) && !SvNOKp(sv)))) {
2849 /* I'm assuming that if both IV and NV are equally valid then
2850 converting the IV is going to be more efficient */
e1ec3a88 2851 const U32 isUIOK = SvIsUV(sv);
28e5dec8
JH
2852 char buf[TYPE_CHARS(UV)];
2853 char *ebuf, *ptr;
97a130b8 2854 STRLEN len;
28e5dec8
JH
2855
2856 if (SvTYPE(sv) < SVt_PVIV)
2857 sv_upgrade(sv, SVt_PVIV);
4ea1d550 2858 ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 2859 len = ebuf - ptr;
5902b6a9 2860 /* inlined from sv_setpvn */
97a130b8
NC
2861 s = SvGROW_mutable(sv, len + 1);
2862 Move(ptr, s, len, char);
2863 s += len;
28e5dec8 2864 *s = '\0';
28e5dec8
JH
2865 }
2866 else if (SvNOKp(sv)) {
4ee39169 2867 dSAVE_ERRNO;
79072805
LW
2868 if (SvTYPE(sv) < SVt_PVNV)
2869 sv_upgrade(sv, SVt_PVNV);
1c846c1f 2870 /* The +20 is pure guesswork. Configure test needed. --jhi */
5902b6a9 2871 s = SvGROW_mutable(sv, NV_DIG + 20);
c81271c3 2872 /* some Xenix systems wipe out errno here */
79072805 2873#ifdef apollo
463ee0b2 2874 if (SvNVX(sv) == 0.0)
d1307786 2875 my_strlcpy(s, "0", SvLEN(sv));
79072805
LW
2876 else
2877#endif /*apollo*/
bbce6d69 2878 {
2d4389e4 2879 Gconvert(SvNVX(sv), NV_DIG, 0, s);
bbce6d69 2880 }
4ee39169 2881 RESTORE_ERRNO;
a0d0e21e 2882#ifdef FIXNEGATIVEZERO
20773dcd
NC
2883 if (*s == '-' && s[1] == '0' && !s[2]) {
2884 s[0] = '0';
2885 s[1] = 0;
2886 }
a0d0e21e 2887#endif
79072805
LW
2888 while (*s) s++;
2889#ifdef hcx
2890 if (s[-1] == '.')
46fc3d4c 2891 *--s = '\0';
79072805
LW
2892#endif
2893 }
79072805 2894 else {
8d1c3e26
NC
2895 if (isGV_with_GP(sv)) {
2896 GV *const gv = MUTABLE_GV(sv);
2897 const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
2898 SV *const buffer = sv_newmortal();
2899
2900 /* FAKE globs can get coerced, so need to turn this off temporarily
2901 if it is on. */
2902 SvFAKE_off(gv);
2903 gv_efullname3(buffer, gv, "*");
2904 SvFLAGS(gv) |= wasfake;
2905
1809c940
DM
2906 if (SvPOK(buffer)) {
2907 if (lp) {
2908 *lp = SvCUR(buffer);
2909 }
2910 return SvPVX(buffer);
2911 }
2912 else {
2913 if (lp)
2914 *lp = 0;
2915 return (char *)"";
8d1c3e26 2916 }
8d1c3e26 2917 }
180488f8 2918
cdb061a3 2919 if (lp)
00b6aa41 2920 *lp = 0;
9f621bb0
NC
2921 if (flags & SV_UNDEF_RETURNS_NULL)
2922 return NULL;
2923 if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED))
2924 report_uninit(sv);
25da4f38
IZ
2925 if (SvTYPE(sv) < SVt_PV)
2926 /* Typically the caller expects that sv_any is not NULL now. */
2927 sv_upgrade(sv, SVt_PV);
73d840c0 2928 return (char *)"";
79072805 2929 }
cdb061a3 2930 {
823a54a3 2931 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
2932 if (lp)
2933 *lp = len;
2934 SvCUR_set(sv, len);
2935 }
79072805 2936 SvPOK_on(sv);
1d7c1841 2937 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n",
3f7c398e 2938 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
2939 if (flags & SV_CONST_RETURN)
2940 return (char *)SvPVX_const(sv);
10516c54
NC
2941 if (flags & SV_MUTABLE_RETURN)
2942 return SvPVX_mutable(sv);
463ee0b2
LW
2943 return SvPVX(sv);
2944}
2945
645c22ef 2946/*
6050d10e
JP
2947=for apidoc sv_copypv
2948
2949Copies a stringified representation of the source SV into the
2950destination SV. Automatically performs any necessary mg_get and
54f0641b 2951coercion of numeric values into strings. Guaranteed to preserve
2575c402 2952UTF8 flag even from overloaded objects. Similar in nature to
54f0641b
NIS
2953sv_2pv[_flags] but operates directly on an SV instead of just the
2954string. Mostly uses sv_2pv_flags to do its work, except when that
6050d10e
JP
2955would lose the UTF-8'ness of the PV.
2956
2957=cut
2958*/
2959
2960void
5de3775c 2961Perl_sv_copypv(pTHX_ SV *const dsv, register SV *const ssv)
6050d10e 2962{
446eaa42 2963 STRLEN len;
53c1dcc0 2964 const char * const s = SvPV_const(ssv,len);
7918f24d
NC
2965
2966 PERL_ARGS_ASSERT_SV_COPYPV;
2967
cb50f42d 2968 sv_setpvn(dsv,s,len);
446eaa42 2969 if (SvUTF8(ssv))
cb50f42d 2970 SvUTF8_on(dsv);
446eaa42 2971 else
cb50f42d 2972 SvUTF8_off(dsv);
6050d10e
JP
2973}
2974
2975/*
645c22ef
DM
2976=for apidoc sv_2pvbyte
2977
2978Return a pointer to the byte-encoded representation of the SV, and set *lp
1e54db1a 2979to its length. May cause the SV to be downgraded from UTF-8 as a
645c22ef
DM
2980side-effect.
2981
2982Usually accessed via the C<SvPVbyte> macro.
2983
2984=cut
2985*/
2986
7340a771 2987char *
5de3775c 2988Perl_sv_2pvbyte(pTHX_ register SV *const sv, STRLEN *const lp)
7340a771 2989{
7918f24d
NC
2990 PERL_ARGS_ASSERT_SV_2PVBYTE;
2991
0875d2fe 2992 sv_utf8_downgrade(sv,0);
97972285 2993 return lp ? SvPV(sv,*lp) : SvPV_nolen(sv);
7340a771
GS
2994}
2995
645c22ef 2996/*
035cbb0e
RGS
2997=for apidoc sv_2pvutf8
2998
2999Return a pointer to the UTF-8-encoded representation of the SV, and set *lp
3000to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3001
3002Usually accessed via the C<SvPVutf8> macro.
3003
3004=cut
3005*/
645c22ef 3006
7340a771 3007char *
7bc54cea 3008Perl_sv_2pvutf8(pTHX_ register SV *const sv, STRLEN *const lp)
7340a771 3009{
7918f24d
NC
3010 PERL_ARGS_ASSERT_SV_2PVUTF8;
3011
035cbb0e
RGS
3012 sv_utf8_upgrade(sv);
3013 return lp ? SvPV(sv,*lp) : SvPV_nolen(sv);
7340a771 3014}
1c846c1f 3015
7ee2227d 3016
645c22ef
DM
3017/*
3018=for apidoc sv_2bool
3019
3020This function is only called on magical items, and is only used by
8cf8f3d1 3021sv_true() or its macro equivalent.
645c22ef
DM
3022
3023=cut
3024*/
3025
463ee0b2 3026bool
7bc54cea 3027Perl_sv_2bool(pTHX_ register SV *const sv)
463ee0b2 3028{
97aff369 3029 dVAR;
7918f24d
NC
3030
3031 PERL_ARGS_ASSERT_SV_2BOOL;
3032
5b295bef 3033 SvGETMAGIC(sv);
463ee0b2 3034
a0d0e21e
LW
3035 if (!SvOK(sv))
3036 return 0;
3037 if (SvROK(sv)) {
fabdb6c0
AL
3038 if (SvAMAGIC(sv)) {
3039 SV * const tmpsv = AMG_CALLun(sv,bool_);
3040 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
f2338a2e 3041 return cBOOL(SvTRUE(tmpsv));
fabdb6c0
AL
3042 }
3043 return SvRV(sv) != 0;
a0d0e21e 3044 }
463ee0b2 3045 if (SvPOKp(sv)) {
53c1dcc0
AL
3046 register XPV* const Xpvtmp = (XPV*)SvANY(sv);
3047 if (Xpvtmp &&
339049b0 3048 (*sv->sv_u.svu_pv > '0' ||
11343788 3049 Xpvtmp->xpv_cur > 1 ||
339049b0 3050 (Xpvtmp->xpv_cur && *sv->sv_u.svu_pv != '0')))
463ee0b2
LW
3051 return 1;
3052 else
3053 return 0;
3054 }
3055 else {
3056 if (SvIOKp(sv))
3057 return SvIVX(sv) != 0;
3058 else {
3059 if (SvNOKp(sv))
3060 return SvNVX(sv) != 0.0;
180488f8 3061 else {
f7877b28 3062 if (isGV_with_GP(sv))
180488f8
NC
3063 return TRUE;
3064 else
3065 return FALSE;
3066 }
463ee0b2
LW
3067 }
3068 }
79072805
LW
3069}
3070
c461cf8f
JH
3071/*
3072=for apidoc sv_utf8_upgrade
3073
78ea37eb 3074Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3075Forces the SV to string form if it is not already.
2bbc8d55 3076Will C<mg_get> on C<sv> if appropriate.
4411f3b6 3077Always sets the SvUTF8 flag to avoid future validity checks even
2bbc8d55
SP
3078if the whole string is the same in UTF-8 as not.
3079Returns the number of bytes in the converted string
c461cf8f 3080
13a6c0e0
JH
3081This is not as a general purpose byte encoding to Unicode interface:
3082use the Encode extension for that.
3083
fe749c9a
KW
3084=for apidoc sv_utf8_upgrade_nomg
3085
3086Like sv_utf8_upgrade, but doesn't do magic on C<sv>
3087
8d6d96c1
HS
3088=for apidoc sv_utf8_upgrade_flags
3089
78ea37eb 3090Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3091Forces the SV to string form if it is not already.
8d6d96c1 3092Always sets the SvUTF8 flag to avoid future validity checks even
2bbc8d55
SP
3093if all the bytes are invariant in UTF-8. If C<flags> has C<SV_GMAGIC> bit set,
3094will C<mg_get> on C<sv> if appropriate, else not.
3095Returns the number of bytes in the converted string
3096C<sv_utf8_upgrade> and
8d6d96c1
HS
3097C<sv_utf8_upgrade_nomg> are implemented in terms of this function.
3098
13a6c0e0
JH
3099This is not as a general purpose byte encoding to Unicode interface:
3100use the Encode extension for that.
3101
8d6d96c1 3102=cut
b3ab6785
KW
3103
3104The grow version is currently not externally documented. It adds a parameter,
3105extra, which is the number of unused bytes the string of 'sv' is guaranteed to
3106have free after it upon return. This allows the caller to reserve extra space
3107that it intends to fill, to avoid extra grows.
3108
3109Also externally undocumented for the moment is the flag SV_FORCE_UTF8_UPGRADE,
3110which can be used to tell this function to not first check to see if there are
3111any characters that are different in UTF-8 (variant characters) which would
3112force it to allocate a new string to sv, but to assume there are. Typically
3113this flag is used by a routine that has already parsed the string to find that
3114there are such characters, and passes this information on so that the work
3115doesn't have to be repeated.
3116
3117(One might think that the calling routine could pass in the position of the
3118first such variant, so it wouldn't have to be found again. But that is not the
3119case, because typically when the caller is likely to use this flag, it won't be
3120calling this routine unless it finds something that won't fit into a byte.
3121Otherwise it tries to not upgrade and just use bytes. But some things that
3122do fit into a byte are variants in utf8, and the caller may not have been
3123keeping track of these.)
3124
3125If the routine itself changes the string, it adds a trailing NUL. Such a NUL
3126isn't guaranteed due to having other routines do the work in some input cases,
3127or if the input is already flagged as being in utf8.
3128
3129The speed of this could perhaps be improved for many cases if someone wanted to
3130write a fast function that counts the number of variant characters in a string,
3131especially if it could return the position of the first one.
3132
8d6d96c1
HS
3133*/
3134
3135STRLEN
b3ab6785 3136Perl_sv_utf8_upgrade_flags_grow(pTHX_ register SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3137{
97aff369 3138 dVAR;
7918f24d 3139
b3ab6785 3140 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3141
808c356f
RGS
3142 if (sv == &PL_sv_undef)
3143 return 0;
e0e62c2a
NIS
3144 if (!SvPOK(sv)) {
3145 STRLEN len = 0;
d52b7888
NC
3146 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3147 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3148 if (SvUTF8(sv)) {
3149 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3150 return len;
b3ab6785 3151 }
d52b7888
NC
3152 } else {
3153 (void) SvPV_force(sv,len);
3154 }
e0e62c2a 3155 }
4411f3b6 3156
f5cee72b 3157 if (SvUTF8(sv)) {
b3ab6785 3158 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3159 return SvCUR(sv);
f5cee72b 3160 }
5fec3b1d 3161
765f542d
NC
3162 if (SvIsCOW(sv)) {
3163 sv_force_normal_flags(sv, 0);
db42d148
NIS
3164 }
3165
b3ab6785 3166 if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) {
799ef3cb 3167 sv_recode_to_utf8(sv, PL_encoding);
b3ab6785
KW
3168 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3169 return SvCUR(sv);
3170 }
3171
4e93345f
KW
3172 if (SvCUR(sv) == 0) {
3173 if (extra) SvGROW(sv, extra);
3174 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3175 /* This function could be much more efficient if we
2bbc8d55 3176 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3177 * chars in the PV. Given that there isn't such a flag
b3ab6785
KW
3178 * make the loop as fast as possible (although there are certainly ways
3179 * to speed this up, eg. through vectorization) */
3180 U8 * s = (U8 *) SvPVX_const(sv);
3181 U8 * e = (U8 *) SvEND(sv);
3182 U8 *t = s;
3183 STRLEN two_byte_count = 0;
c4e7c712 3184
b3ab6785
KW
3185 if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
3186
3187 /* See if really will need to convert to utf8. We mustn't rely on our
3188 * incoming SV being well formed and having a trailing '\0', as certain
3189 * code in pp_formline can send us partially built SVs. */
3190
c4e7c712 3191 while (t < e) {
53c1dcc0 3192 const U8 ch = *t++;
b3ab6785
KW
3193 if (NATIVE_IS_INVARIANT(ch)) continue;
3194
3195 t--; /* t already incremented; re-point to first variant */
3196 two_byte_count = 1;
3197 goto must_be_utf8;
c4e7c712 3198 }
b3ab6785
KW
3199
3200 /* utf8 conversion not needed because all are invariants. Mark as
3201 * UTF-8 even if no variant - saves scanning loop */
c4e7c712 3202 SvUTF8_on(sv);
b3ab6785
KW
3203 return SvCUR(sv);
3204
3205must_be_utf8:
3206
3207 /* Here, the string should be converted to utf8, either because of an
3208 * input flag (two_byte_count = 0), or because a character that
3209 * requires 2 bytes was found (two_byte_count = 1). t points either to
3210 * the beginning of the string (if we didn't examine anything), or to
3211 * the first variant. In either case, everything from s to t - 1 will
3212 * occupy only 1 byte each on output.
3213 *
3214 * There are two main ways to convert. One is to create a new string
3215 * and go through the input starting from the beginning, appending each
3216 * converted value onto the new string as we go along. It's probably
3217 * best to allocate enough space in the string for the worst possible
3218 * case rather than possibly running out of space and having to
3219 * reallocate and then copy what we've done so far. Since everything
3220 * from s to t - 1 is invariant, the destination can be initialized
3221 * with these using a fast memory copy
3222 *
3223 * The other way is to figure out exactly how big the string should be
3224 * by parsing the entire input. Then you don't have to make it big
3225 * enough to handle the worst possible case, and more importantly, if
3226 * the string you already have is large enough, you don't have to
3227 * allocate a new string, you can copy the last character in the input
3228 * string to the final position(s) that will be occupied by the
3229 * converted string and go backwards, stopping at t, since everything
3230 * before that is invariant.
3231 *
3232 * There are advantages and disadvantages to each method.
3233 *
3234 * In the first method, we can allocate a new string, do the memory
3235 * copy from the s to t - 1, and then proceed through the rest of the
3236 * string byte-by-byte.
3237 *
3238 * In the second method, we proceed through the rest of the input
3239 * string just calculating how big the converted string will be. Then
3240 * there are two cases:
3241 * 1) if the string has enough extra space to handle the converted
3242 * value. We go backwards through the string, converting until we
3243 * get to the position we are at now, and then stop. If this
3244 * position is far enough along in the string, this method is
3245 * faster than the other method. If the memory copy were the same
3246 * speed as the byte-by-byte loop, that position would be about
3247 * half-way, as at the half-way mark, parsing to the end and back
3248 * is one complete string's parse, the same amount as starting
3249 * over and going all the way through. Actually, it would be
3250 * somewhat less than half-way, as it's faster to just count bytes
3251 * than to also copy, and we don't have the overhead of allocating
3252 * a new string, changing the scalar to use it, and freeing the
3253 * existing one. But if the memory copy is fast, the break-even
3254 * point is somewhere after half way. The counting loop could be
3255 * sped up by vectorization, etc, to move the break-even point
3256 * further towards the beginning.
3257 * 2) if the string doesn't have enough space to handle the converted
3258 * value. A new string will have to be allocated, and one might
3259 * as well, given that, start from the beginning doing the first
3260 * method. We've spent extra time parsing the string and in
3261 * exchange all we've gotten is that we know precisely how big to
3262 * make the new one. Perl is more optimized for time than space,
3263 * so this case is a loser.
3264 * So what I've decided to do is not use the 2nd method unless it is
3265 * guaranteed that a new string won't have to be allocated, assuming
3266 * the worst case. I also decided not to put any more conditions on it
3267 * than this, for now. It seems likely that, since the worst case is
3268 * twice as big as the unknown portion of the string (plus 1), we won't
3269 * be guaranteed enough space, causing us to go to the first method,
3270 * unless the string is short, or the first variant character is near
3271 * the end of it. In either of these cases, it seems best to use the
3272 * 2nd method. The only circumstance I can think of where this would
3273 * be really slower is if the string had once had much more data in it
3274 * than it does now, but there is still a substantial amount in it */
3275
3276 {
3277 STRLEN invariant_head = t - s;
3278 STRLEN size = invariant_head + (e - t) * 2 + 1 + extra;
3279 if (SvLEN(sv) < size) {
3280
3281 /* Here, have decided to allocate a new string */
3282
3283 U8 *dst;
3284 U8 *d;
3285
3286 Newx(dst, size, U8);
3287
3288 /* If no known invariants at the beginning of the input string,
3289 * set so starts from there. Otherwise, can use memory copy to
3290 * get up to where we are now, and then start from here */
3291
3292 if (invariant_head <= 0) {
3293 d = dst;
3294 } else {
3295 Copy(s, dst, invariant_head, char);
3296 d = dst + invariant_head;
3297 }
3298
3299 while (t < e) {
3300 const UV uv = NATIVE8_TO_UNI(*t++);
3301 if (UNI_IS_INVARIANT(uv))
3302 *d++ = (U8)UNI_TO_NATIVE(uv);
3303 else {
3304 *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
3305 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
3306 }
3307 }
3308 *d = '\0';
3309 SvPV_free(sv); /* No longer using pre-existing string */
3310 SvPV_set(sv, (char*)dst);
3311 SvCUR_set(sv, d - dst);
3312 SvLEN_set(sv, size);
3313 } else {
3314
3315 /* Here, have decided to get the exact size of the string.
3316 * Currently this happens only when we know that there is
3317 * guaranteed enough space to fit the converted string, so
3318 * don't have to worry about growing. If two_byte_count is 0,
3319 * then t points to the first byte of the string which hasn't
3320 * been examined yet. Otherwise two_byte_count is 1, and t
3321 * points to the first byte in the string that will expand to
3322 * two. Depending on this, start examining at t or 1 after t.
3323 * */
3324
3325 U8 *d = t + two_byte_count;
3326
3327
3328 /* Count up the remaining bytes that expand to two */
3329
3330 while (d < e) {
3331 const U8 chr = *d++;
3332 if (! NATIVE_IS_INVARIANT(chr)) two_byte_count++;
3333 }
3334
3335 /* The string will expand by just the number of bytes that
3336 * occupy two positions. But we are one afterwards because of
3337 * the increment just above. This is the place to put the
3338 * trailing NUL, and to set the length before we decrement */
3339
3340 d += two_byte_count;
3341 SvCUR_set(sv, d - s);
3342 *d-- = '\0';
3343
3344
3345 /* Having decremented d, it points to the position to put the
3346 * very last byte of the expanded string. Go backwards through
3347 * the string, copying and expanding as we go, stopping when we
3348 * get to the part that is invariant the rest of the way down */
3349
3350 e--;
3351 while (e >= t) {
3352 const U8 ch = NATIVE8_TO_UNI(*e--);
3353 if (UNI_IS_INVARIANT(ch)) {
3354 *d-- = UNI_TO_NATIVE(ch);
3355 } else {
3356 *d-- = (U8)UTF8_EIGHT_BIT_LO(ch);
3357 *d-- = (U8)UTF8_EIGHT_BIT_HI(ch);
3358 }
3359 }
3360 }
3361 }
560a288e 3362 }
b3ab6785
KW
3363
3364 /* Mark as UTF-8 even if no variant - saves scanning loop */
3365 SvUTF8_on(sv);
4411f3b6 3366 return SvCUR(sv);
560a288e
GS
3367}
3368
c461cf8f
JH
3369/*
3370=for apidoc sv_utf8_downgrade
3371
78ea37eb 3372Attempts to convert the PV of an SV from characters to bytes.
2bbc8d55
SP
3373If the PV contains a character that cannot fit
3374in a byte, this conversion will fail;
78ea37eb 3375in this case, either returns false or, if C<fail_ok> is not
c461cf8f
JH
3376true, croaks.
3377
13a6c0e0
JH
3378This is not as a general purpose Unicode to byte encoding interface:
3379use the Encode extension for that.
3380
c461cf8f
JH
3381=cut
3382*/
3383
560a288e 3384bool
7bc54cea 3385Perl_sv_utf8_downgrade(pTHX_ register SV *const sv, const bool fail_ok)
560a288e 3386{
97aff369 3387 dVAR;
7918f24d
NC
3388
3389 PERL_ARGS_ASSERT_SV_UTF8_DOWNGRADE;
3390
78ea37eb 3391 if (SvPOKp(sv) && SvUTF8(sv)) {
fa301091 3392 if (SvCUR(sv)) {
03cfe0ae 3393 U8 *s;
652088fc 3394 STRLEN len;
fa301091 3395
765f542d
NC
3396 if (SvIsCOW(sv)) {
3397 sv_force_normal_flags(sv, 0);
3398 }
03cfe0ae
NIS
3399 s = (U8 *) SvPV(sv, len);
3400 if (!utf8_to_bytes(s, &len)) {
fa301091
JH
3401 if (fail_ok)
3402 return FALSE;
3403 else {
3404 if (PL_op)
3405 Perl_croak(aTHX_ "Wide character in %s",
53e06cf0 3406 OP_DESC(PL_op));
fa301091
JH
3407 else
3408 Perl_croak(aTHX_ "Wide character");
3409 }
4b3603a4 3410 }
b162af07 3411 SvCUR_set(sv, len);
67e989fb 3412 }
560a288e 3413 }
ffebcc3e 3414 SvUTF8_off(sv);
560a288e
GS
3415 return TRUE;
3416}
3417
c461cf8f
JH
3418/*
3419=for apidoc sv_utf8_encode
3420
78ea37eb
TS
3421Converts the PV of an SV to UTF-8, but then turns the C<SvUTF8>
3422flag off so that it looks like octets again.
c461cf8f
JH
3423
3424=cut
3425*/
3426
560a288e 3427void
7bc54cea 3428Perl_sv_utf8_encode(pTHX_ register SV *const sv)
560a288e 3429{
7918f24d
NC
3430 PERL_ARGS_ASSERT_SV_UTF8_ENCODE;
3431
4c94c214
NC
3432 if (SvIsCOW(sv)) {
3433 sv_force_normal_flags(sv, 0);
3434 }
3435 if (SvREADONLY(sv)) {
6ad8f254 3436 Perl_croak_no_modify(aTHX);
4c94c214 3437 }
a5f5288a 3438 (void) sv_utf8_upgrade(sv);
560a288e
GS
3439 SvUTF8_off(sv);
3440}
3441
4411f3b6
NIS
3442/*
3443=for apidoc sv_utf8_decode
3444
78ea37eb
TS
3445If the PV of the SV is an octet sequence in UTF-8
3446and contains a multiple-byte character, the C<SvUTF8> flag is turned on
3447so that it looks like a character. If the PV contains only single-byte
3448characters, the C<SvUTF8> flag stays being off.
3449Scans PV for validity and returns false if the PV is invalid UTF-8.
4411f3b6
NIS
3450
3451=cut
3452*/
3453
560a288e 3454bool
7bc54cea 3455Perl_sv_utf8_decode(pTHX_ register SV *const sv)
560a288e 3456{
7918f24d
NC
3457 PERL_ARGS_ASSERT_SV_UTF8_DECODE;
3458
78ea37eb 3459 if (SvPOKp(sv)) {
93524f2b
NC
3460 const U8 *c;
3461 const U8 *e;
9cbac4c7 3462
645c22ef
DM
3463 /* The octets may have got themselves encoded - get them back as
3464 * bytes
3465 */
3466 if (!sv_utf8_downgrade(sv, TRUE))
560a288e
GS
3467 return FALSE;
3468
3469 /* it is actually just a matter of turning the utf8 flag on, but
3470 * we want to make sure everything inside is valid utf8 first.
3471 */
93524f2b 3472 c = (const U8 *) SvPVX_const(sv);
63cd0674 3473 if (!is_utf8_string(c, SvCUR(sv)+1))
67e989fb 3474 return FALSE;
93524f2b 3475 e = (const U8 *) SvEND(sv);
511c2ff0 3476 while (c < e) {
b64e5050 3477 const U8 ch = *c++;
c4d5f83a 3478 if (!UTF8_IS_INVARIANT(ch)) {
67e989fb
JH
3479 SvUTF8_on(sv);
3480 break;
3481 }
560a288e 3482 }
560a288e
GS
3483 }
3484 return TRUE;
3485}
3486
954c1994
GS
3487/*
3488=for apidoc sv_setsv
3489
645c22ef
DM
3490Copies the contents of the source SV C<ssv> into the destination SV
3491C<dsv>. The source SV may be destroyed if it is mortal, so don't use this
3492function if the source SV needs to be reused. Does not handle 'set' magic.
3493Loosely speaking, it performs a copy-by-value, obliterating any previous
3494content of the destination.
3495
3496You probably want to use one of the assortment of wrappers, such as
3497C<SvSetSV>, C<SvSetSV_nosteal>, C<SvSetMagicSV> and
3498C<SvSetMagicSV_nosteal>.
3499
8d6d96c1
HS
3500=for apidoc sv_setsv_flags
3501
645c22ef
DM
3502Copies the contents of the source SV C<ssv> into the destination SV
3503C<dsv>. The source SV may be destroyed if it is mortal, so don't use this
3504function if the source SV needs to be reused. Does not handle 'set' magic.
3505Loosely speaking, it performs a copy-by-value, obliterating any previous
3506content of the destination.
3507If the C<flags> parameter has the C<SV_GMAGIC> bit set, will C<mg_get> on
5fcdf167
NC
3508C<ssv> if appropriate, else not. If the C<flags> parameter has the
3509C<NOSTEAL> bit set then the buffers of temps will not be stolen. <sv_setsv>
3510and C<sv_setsv_nomg> are implemented in terms of this function.
645c22ef
DM
3511
3512You probably want to use one of the assortment of wrappers, such as
3513C<SvSetSV>, C<SvSetSV_nosteal>, C<SvSetMagicSV> and
3514C<SvSetMagicSV_nosteal>.
3515
3516This is the primary function for copying scalars, and most other
3517copy-ish functions and macros use this underneath.
8d6d96c1
HS
3518
3519=cut
3520*/
3521
5d0301b7 3522static void
7bc54cea 3523S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype)
5d0301b7 3524{
70cd14a1 3525 I32 mro_changes = 0; /* 1 = method, 2 = isa */
dd69841b 3526
7918f24d
NC
3527 PERL_ARGS_ASSERT_GLOB_ASSIGN_GLOB;
3528
5d0301b7
NC
3529 if (dtype != SVt_PVGV) {
3530 const char * const name = GvNAME(sstr);
3531 const STRLEN len = GvNAMELEN(sstr);
0d092c36 3532 {
f7877b28
NC
3533 if (dtype >= SVt_PV) {
3534 SvPV_free(dstr);
3535 SvPV_set(dstr, 0);
3536 SvLEN_set(dstr, 0);
3537 SvCUR_set(dstr, 0);
3538 }
0d092c36 3539 SvUPGRADE(dstr, SVt_PVGV);
dedf8e73 3540 (void)SvOK_off(dstr);
2e5b91de
NC
3541 /* FIXME - why are we doing this, then turning it off and on again
3542 below? */
3543 isGV_with_GP_on(dstr);
f7877b28 3544 }
5d0301b7
NC
3545 GvSTASH(dstr) = GvSTASH(sstr);
3546 if (GvSTASH(dstr))
daba3364 3547 Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dstr)), dstr);
159b6efe 3548 gv_name_set(MUTABLE_GV(dstr), name, len, GV_ADD);
5d0301b7
NC
3549 SvFAKE_on(dstr); /* can coerce to non-glob */
3550 }
3551
159b6efe 3552 if(GvGP(MUTABLE_GV(sstr))) {
dd69841b
BB
3553 /* If source has method cache entry, clear it */
3554 if(GvCVGEN(sstr)) {
3555 SvREFCNT_dec(GvCV(sstr));
3556 GvCV(sstr) = NULL;
3557 GvCVGEN(sstr) = 0;
3558 }
3559 /* If source has a real method, then a method is
3560 going to change */
159b6efe 3561 else if(GvCV((const GV *)sstr)) {
70cd14a1 3562 mro_changes = 1;
dd69841b
BB
3563 }
3564 }
3565
3566 /* If dest already had a real method, that's a change as well */
159b6efe 3567 if(!mro_changes && GvGP(MUTABLE_GV(dstr)) && GvCVu((const GV *)dstr)) {
70cd14a1 3568 mro_changes = 1;
dd69841b
BB
3569 }
3570
159b6efe 3571 if(strEQ(GvNAME((const GV *)dstr),"ISA"))
70cd14a1
CB
3572 mro_changes = 2;
3573
159b6efe 3574 gp_free(MUTABLE_GV(dstr));
2e5b91de 3575 isGV_with_GP_off(dstr);
5d0301b7 3576 (void)SvOK_off(dstr);
2e5b91de 3577 isGV_with_GP_on(dstr);
dedf8e73 3578 GvINTRO_off(dstr); /* one-shot flag */
5d0301b7
NC
3579 GvGP(dstr) = gp_ref(GvGP(sstr));
3580 if (SvTAINTED(sstr))
3581 SvTAINT(dstr);
3582 if (GvIMPORTED(dstr) != GVf_IMPORTED
3583 && CopSTASH_ne(PL_curcop, GvSTASH(dstr)))
3584 {
3585 GvIMPORTED_on(dstr);
3586 }
3587 GvMULTI_on(dstr);
70cd14a1
CB
3588 if(mro_changes == 2) mro_isa_changed_in(GvSTASH(dstr));
3589 else if(mro_changes) mro_method_changed_in(GvSTASH(dstr));
5d0301b7
NC
3590 return;
3591}
3592
b8473700 3593static void
7bc54cea 3594S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr)
7918f24d 3595{
b8473700
NC
3596 SV * const sref = SvREFCNT_inc(SvRV(sstr));
3597 SV *dref = NULL;
3598 const int intro = GvINTRO(dstr);
2440974c 3599 SV **location;
3386d083 3600 U8 import_flag = 0;
27242d61
NC
3601 const U32 stype = SvTYPE(sref);
3602
7918f24d 3603 PERL_ARGS_ASSERT_GLOB_ASSIGN_REF;
b8473700 3604
b8473700
NC
3605 if (intro) {
3606 GvINTRO_off(dstr); /* one-shot flag */
3607 GvLINE(dstr) = CopLINE(PL_curcop);
159b6efe 3608 GvEGV(dstr) = MUTABLE_GV(dstr);
b8473700
NC
3609 }
3610 GvMULTI_on(dstr);
27242d61 3611 switch (stype) {
b8473700 3612 case SVt_PVCV:
27242d61
NC
3613 location = (SV **) &GvCV(dstr);
3614 import_flag = GVf_IMPORTED_CV;
3615 goto common;
3616 case SVt_PVHV:
3617 location = (SV **) &GvHV(dstr);
3618 import_flag = GVf_IMPORTED_HV;
3619 goto common;
3620 case SVt_PVAV:
3621 location = (SV **) &GvAV(dstr);
3622 import_flag = GVf_IMPORTED_AV;
3623 goto common;
3624 case SVt_PVIO:
3625 location = (SV **) &GvIOp(dstr);
3626 goto common;
3627 case SVt_PVFM:
3628 location = (SV **) &GvFORM(dstr);
ef595a33 3629 goto common;
27242d61
NC
3630 default:
3631 location = &GvSV(dstr);
3632 import_flag = GVf_IMPORTED_SV;
3633 common:
b8473700 3634 if (intro) {
27242d61 3635 if (stype == SVt_PVCV) {
ea726b52 3636 /*if (GvCVGEN(dstr) && (GvCV(dstr) != (const CV *)sref || GvCVGEN(dstr))) {*/
5f2fca8a 3637 if (GvCVGEN(dstr)) {
27242d61
NC
3638 SvREFCNT_dec(GvCV(dstr));
3639 GvCV(dstr) = NULL;
3640 GvCVGEN(dstr) = 0; /* Switch off cacheness. */
27242d61 3641 }
b8473700 3642 }
27242d61 3643 SAVEGENERICSV(*location);
b8473700
NC
3644 }
3645 else
27242d61 3646 dref = *location;
5f2fca8a 3647 if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dstr))) {
ea726b52 3648 CV* const cv = MUTABLE_CV(*location);
b8473700 3649 if (cv) {
159b6efe 3650 if (!GvCVGEN((const GV *)dstr) &&
b8473700
NC
3651 (CvROOT(cv) || CvXSUB(cv)))
3652 {
3653 /* Redefining a sub - warning is mandatory if
3654 it was a const and its value changed. */
ea726b52 3655 if (CvCONST(cv) &