This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Document a bunch of foo_nocontext functions
[perl5.git] / sv.c
CommitLineData
5356d32e 1/* sv.c
79072805 2 *
1129b882 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
83706693
RGS
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall
5 * and others
79072805
LW
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
4ac71550
TC
10 */
11
12/*
13 * 'I wonder what the Entish is for "yes" and "no",' he thought.
14 * --Pippin
15 *
16 * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"]
17 */
18
19/*
645c22ef
DM
20 *
21 *
5e045b90
AMS
22 * This file contains the code that creates, manipulates and destroys
23 * scalar values (SVs). The other types (AV, HV, GV, etc.) reuse the
24 * structure of an SV, so their creation and destruction is handled
25 * here; higher-level functions are in av.c, hv.c, and so on. Opcode
26 * level functions (eg. substr, split, join) for each of the types are
27 * in the pp*.c files.
79072805
LW
28 */
29
30#include "EXTERN.h"
864dbfa3 31#define PERL_IN_SV_C
79072805 32#include "perl.h"
d2f185dc 33#include "regcomp.h"
9d9a81f0
CB
34#ifdef __VMS
35# include <rms.h>
36#endif
79072805 37
2f8ed50e
OS
38#ifdef __Lynx__
39/* Missing proto on LynxOS */
40 char *gconvert(double, int, int, char *);
41#endif
42
a4eca1d4
JH
43#ifdef USE_QUADMATH
44# define SNPRINTF_G(nv, buffer, size, ndig) \
45 quadmath_snprintf(buffer, size, "%.*Qg", (int)ndig, (NV)(nv))
46#else
47# define SNPRINTF_G(nv, buffer, size, ndig) \
48 PERL_UNUSED_RESULT(Gconvert((NV)(nv), (int)ndig, 0, buffer))
49#endif
50
9f53080a 51#ifndef SV_COW_THRESHOLD
e8c6a474 52# define SV_COW_THRESHOLD 0 /* COW iff len > K */
9f53080a
FC
53#endif
54#ifndef SV_COWBUF_THRESHOLD
e8c6a474 55# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */
9f53080a
FC
56#endif
57#ifndef SV_COW_MAX_WASTE_THRESHOLD
e8c6a474 58# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
59#endif
60#ifndef SV_COWBUF_WASTE_THRESHOLD
e8c6a474 61# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */
9f53080a
FC
62#endif
63#ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD
e8c6a474 64# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
9f53080a
FC
65#endif
66#ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD
e8c6a474 67# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */
e8c6a474
YO
68#endif
69/* Work around compiler warnings about unsigned >= THRESHOLD when thres-
70 hold is 0. */
71#if SV_COW_THRESHOLD
72# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD)
73#else
74# define GE_COW_THRESHOLD(cur) 1
75#endif
76#if SV_COWBUF_THRESHOLD
77# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD)
78#else
79# define GE_COWBUF_THRESHOLD(cur) 1
80#endif
81#if SV_COW_MAX_WASTE_THRESHOLD
82# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD)
83#else
84# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1
85#endif
86#if SV_COWBUF_WASTE_THRESHOLD
87# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD)
88#else
89# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1
90#endif
91#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD
92# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur))
93#else
94# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1
95#endif
96#if SV_COWBUF_WASTE_FACTOR_THRESHOLD
97# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur))
98#else
99# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1
100#endif
101
102#define CHECK_COW_THRESHOLD(cur,len) (\
103 GE_COW_THRESHOLD((cur)) && \
104 GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \
105 GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \
106)
107#define CHECK_COWBUF_THRESHOLD(cur,len) (\
108 GE_COWBUF_THRESHOLD((cur)) && \
109 GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
110 GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
111)
cca0492e 112
e23c8137 113#ifdef PERL_UTF8_CACHE_ASSERT
ab455f60 114/* if adding more checks watch out for the following tests:
e23c8137
JH
115 * t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
116 * lib/utf8.t lib/Unicode/Collate/t/index.t
117 * --jhi
118 */
6f207bd3 119# define ASSERT_UTF8_CACHE(cache) \
ab455f60
NC
120 STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
121 assert((cache)[2] <= (cache)[3]); \
122 assert((cache)[3] <= (cache)[1]);} \
123 } STMT_END
e23c8137 124#else
6f207bd3 125# define ASSERT_UTF8_CACHE(cache) NOOP
e23c8137
JH
126#endif
127
958cdeac
TC
128static const char S_destroy[] = "DESTROY";
129#define S_destroy_len (sizeof(S_destroy)-1)
130
645c22ef
DM
131/* ============================================================================
132
133=head1 Allocation and deallocation of SVs.
17c7a51d 134
d2a0f284
JC
135An SV (or AV, HV, etc.) is allocated in two parts: the head (struct
136sv, av, hv...) contains type and reference count information, and for
137many types, a pointer to the body (struct xrv, xpv, xpviv...), which
138contains fields specific to each type. Some types store all they need
139in the head, so don't have a body.
140
486ec47a 141In all but the most memory-paranoid configurations (ex: PURIFY), heads
d2a0f284
JC
142and bodies are allocated out of arenas, which by default are
143approximately 4K chunks of memory parcelled up into N heads or bodies.
93e68bfb
JC
144Sv-bodies are allocated by their sv-type, guaranteeing size
145consistency needed to allocate safely from arrays.
146
d2a0f284
JC
147For SV-heads, the first slot in each arena is reserved, and holds a
148link to the next arena, some flags, and a note of the number of slots.
149Snaked through each arena chain is a linked list of free items; when
150this becomes empty, an extra arena is allocated and divided up into N
151items which are threaded into the free list.
152
153SV-bodies are similar, but they use arena-sets by default, which
154separate the link and info from the arena itself, and reclaim the 1st
155slot in the arena. SV-bodies are further described later.
645c22ef
DM
156
157The following global variables are associated with arenas:
158
7fefc6c1
KW
159 PL_sv_arenaroot pointer to list of SV arenas
160 PL_sv_root pointer to list of free SV structures
645c22ef 161
7fefc6c1
KW
162 PL_body_arenas head of linked-list of body arenas
163 PL_body_roots[] array of pointers to list of free bodies of svtype
164 arrays are indexed by the svtype needed
93e68bfb 165
d2a0f284
JC
166A few special SV heads are not allocated from an arena, but are
167instead directly created in the interpreter structure, eg PL_sv_undef.
93e68bfb
JC
168The size of arenas can be changed from the default by setting
169PERL_ARENA_SIZE appropriately at compile time.
645c22ef
DM
170
171The SV arena serves the secondary purpose of allowing still-live SVs
172to be located and destroyed during final cleanup.
173
174At the lowest level, the macros new_SV() and del_SV() grab and free
175an SV head. (If debugging with -DD, del_SV() calls the function S_del_sv()
176to return the SV to the free list with error checking.) new_SV() calls
177more_sv() / sv_add_arena() to add an extra arena if the free list is empty.
178SVs in the free list have their SvTYPE field set to all ones.
179
ff276b08 180At the time of very final cleanup, sv_free_arenas() is called from
645c22ef 181perl_destruct() to physically free all the arenas allocated since the
6a93a7e5 182start of the interpreter.
645c22ef 183
645c22ef
DM
184The function visit() scans the SV arenas list, and calls a specified
185function for each SV it finds which is still live - ie which has an SvTYPE
186other than all 1's, and a non-zero SvREFCNT. visit() is used by the
187following functions (specified as [function that calls visit()] / [function
188called by visit() for each SV]):
189
190 sv_report_used() / do_report_used()
f2524eef 191 dump all remaining SVs (debugging aid)
645c22ef 192
e4487e9b 193 sv_clean_objs() / do_clean_objs(),do_clean_named_objs(),
e76981f9 194 do_clean_named_io_objs(),do_curse()
645c22ef 195 Attempt to free all objects pointed to by RVs,
e76981f9
FC
196 try to do the same for all objects indir-
197 ectly referenced by typeglobs too, and
198 then do a final sweep, cursing any
199 objects that remain. Called once from
645c22ef
DM
200 perl_destruct(), prior to calling sv_clean_all()
201 below.
202
203 sv_clean_all() / do_clean_all()
204 SvREFCNT_dec(sv) each remaining SV, possibly
205 triggering an sv_free(). It also sets the
206 SVf_BREAK flag on the SV to indicate that the
207 refcnt has been artificially lowered, and thus
208 stopping sv_free() from giving spurious warnings
209 about SVs which unexpectedly have a refcnt
210 of zero. called repeatedly from perl_destruct()
211 until there are no SVs left.
212
93e68bfb 213=head2 Arena allocator API Summary
645c22ef
DM
214
215Private API to rest of sv.c
216
217 new_SV(), del_SV(),
218
df0f0429 219 new_XPVNV(), del_XPVGV(),
645c22ef
DM
220 etc
221
222Public API:
223
8cf8f3d1 224 sv_report_used(), sv_clean_objs(), sv_clean_all(), sv_free_arenas()
645c22ef 225
645c22ef
DM
226=cut
227
3e8320cc 228 * ========================================================================= */
645c22ef 229
4561caa4
CS
230/*
231 * "A time to plant, and a time to uproot what was planted..."
232 */
233
d7a2c63c
MHM
234#ifdef PERL_MEM_LOG
235# define MEM_LOG_NEW_SV(sv, file, line, func) \
236 Perl_mem_log_new_sv(sv, file, line, func)
237# define MEM_LOG_DEL_SV(sv, file, line, func) \
238 Perl_mem_log_del_sv(sv, file, line, func)
239#else
240# define MEM_LOG_NEW_SV(sv, file, line, func) NOOP
241# define MEM_LOG_DEL_SV(sv, file, line, func) NOOP
242#endif
243
fd0854ff 244#ifdef DEBUG_LEAKING_SCALARS
484e6108
FC
245# define FREE_SV_DEBUG_FILE(sv) STMT_START { \
246 if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \
247 } STMT_END
d7a2c63c 248# define DEBUG_SV_SERIAL(sv) \
147e3846 249 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \
d7a2c63c 250 PTR2UV(sv), (long)(sv)->sv_debug_serial))
fd0854ff
DM
251#else
252# define FREE_SV_DEBUG_FILE(sv)
d7a2c63c 253# define DEBUG_SV_SERIAL(sv) NOOP
fd0854ff
DM
254#endif
255
48614a46
NC
256#ifdef PERL_POISON
257# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
daba3364 258# define SvARENA_CHAIN_SET(sv,val) (sv)->sv_u.svu_rv = MUTABLE_SV((val))
48614a46
NC
259/* Whilst I'd love to do this, it seems that things like to check on
260 unreferenced scalars
ce5dbf61 261# define POISON_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
48614a46 262*/
ce5dbf61 263# define POISON_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
7e337ee0 264 PoisonNew(&SvREFCNT(sv), 1, U32)
48614a46
NC
265#else
266# define SvARENA_CHAIN(sv) SvANY(sv)
3eef1deb 267# define SvARENA_CHAIN_SET(sv,val) SvANY(sv) = (void *)(val)
ce5dbf61 268# define POISON_SV_HEAD(sv)
48614a46
NC
269#endif
270
990198f0
DM
271/* Mark an SV head as unused, and add to free list.
272 *
273 * If SVf_BREAK is set, skip adding it to the free list, as this SV had
274 * its refcount artificially decremented during global destruction, so
275 * there may be dangling pointers to it. The last thing we want in that
276 * case is for it to be reused. */
277
053fc874
GS
278#define plant_SV(p) \
279 STMT_START { \
990198f0 280 const U32 old_flags = SvFLAGS(p); \
d7a2c63c
MHM
281 MEM_LOG_DEL_SV(p, __FILE__, __LINE__, FUNCTION__); \
282 DEBUG_SV_SERIAL(p); \
fd0854ff 283 FREE_SV_DEBUG_FILE(p); \
ce5dbf61 284 POISON_SV_HEAD(p); \
053fc874 285 SvFLAGS(p) = SVTYPEMASK; \
990198f0 286 if (!(old_flags & SVf_BREAK)) { \
3eef1deb 287 SvARENA_CHAIN_SET(p, PL_sv_root); \
990198f0
DM
288 PL_sv_root = (p); \
289 } \
053fc874
GS
290 --PL_sv_count; \
291 } STMT_END
a0d0e21e 292
053fc874
GS
293#define uproot_SV(p) \
294 STMT_START { \
295 (p) = PL_sv_root; \
daba3364 296 PL_sv_root = MUTABLE_SV(SvARENA_CHAIN(p)); \
053fc874
GS
297 ++PL_sv_count; \
298 } STMT_END
299
645c22ef 300
cac9b346
NC
301/* make some more SVs by adding another arena */
302
cac9b346
NC
303STATIC SV*
304S_more_sv(pTHX)
305{
306 SV* sv;
9a87bd09
NC
307 char *chunk; /* must use New here to match call to */
308 Newx(chunk,PERL_ARENA_SIZE,char); /* Safefree() in sv_free_arenas() */
309 sv_add_arena(chunk, PERL_ARENA_SIZE, 0);
cac9b346
NC
310 uproot_SV(sv);
311 return sv;
312}
313
645c22ef
DM
314/* new_SV(): return a new, empty SV head */
315
eba0f806
DM
316#ifdef DEBUG_LEAKING_SCALARS
317/* provide a real function for a debugger to play with */
318STATIC SV*
d7a2c63c 319S_new_SV(pTHX_ const char *file, int line, const char *func)
eba0f806
DM
320{
321 SV* sv;
322
eba0f806
DM
323 if (PL_sv_root)
324 uproot_SV(sv);
325 else
cac9b346 326 sv = S_more_sv(aTHX);
eba0f806
DM
327 SvANY(sv) = 0;
328 SvREFCNT(sv) = 1;
329 SvFLAGS(sv) = 0;
fd0854ff 330 sv->sv_debug_optype = PL_op ? PL_op->op_type : 0;
e385c3bf
DM
331 sv->sv_debug_line = (U16) (PL_parser && PL_parser->copline != NOLINE
332 ? PL_parser->copline
333 : PL_curcop
f24aceb1
DM
334 ? CopLINE(PL_curcop)
335 : 0
e385c3bf 336 );
fd0854ff 337 sv->sv_debug_inpad = 0;
cd676548 338 sv->sv_debug_parent = NULL;
484e6108 339 sv->sv_debug_file = PL_curcop ? savesharedpv(CopFILE(PL_curcop)): NULL;
d7a2c63c
MHM
340
341 sv->sv_debug_serial = PL_sv_serial++;
342
343 MEM_LOG_NEW_SV(sv, file, line, func);
147e3846 344 DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) new_SV (from %s:%d [%s])\n",
d7a2c63c
MHM
345 PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func));
346
eba0f806
DM
347 return sv;
348}
d7a2c63c 349# define new_SV(p) (p)=S_new_SV(aTHX_ __FILE__, __LINE__, FUNCTION__)
eba0f806
DM
350
351#else
352# define new_SV(p) \
053fc874 353 STMT_START { \
053fc874
GS
354 if (PL_sv_root) \
355 uproot_SV(p); \
356 else \
cac9b346 357 (p) = S_more_sv(aTHX); \
053fc874
GS
358 SvANY(p) = 0; \
359 SvREFCNT(p) = 1; \
360 SvFLAGS(p) = 0; \
d7a2c63c 361 MEM_LOG_NEW_SV(p, __FILE__, __LINE__, FUNCTION__); \
053fc874 362 } STMT_END
eba0f806 363#endif
463ee0b2 364
645c22ef
DM
365
366/* del_SV(): return an empty SV head to the free list */
367
a0d0e21e 368#ifdef DEBUGGING
4561caa4 369
053fc874
GS
370#define del_SV(p) \
371 STMT_START { \
aea4f609 372 if (DEBUG_D_TEST) \
053fc874
GS
373 del_sv(p); \
374 else \
375 plant_SV(p); \
053fc874 376 } STMT_END
a0d0e21e 377
76e3520e 378STATIC void
cea2e8a9 379S_del_sv(pTHX_ SV *p)
463ee0b2 380{
7918f24d
NC
381 PERL_ARGS_ASSERT_DEL_SV;
382
aea4f609 383 if (DEBUG_D_TEST) {
4633a7c4 384 SV* sva;
a3b680e6 385 bool ok = 0;
daba3364 386 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
53c1dcc0
AL
387 const SV * const sv = sva + 1;
388 const SV * const svend = &sva[SvREFCNT(sva)];
c0ff570e 389 if (p >= sv && p < svend) {
a0d0e21e 390 ok = 1;
c0ff570e
NC
391 break;
392 }
a0d0e21e
LW
393 }
394 if (!ok) {
9b387841 395 Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
147e3846 396 "Attempt to free non-arena SV: 0x%" UVxf
9b387841 397 pTHX__FORMAT, PTR2UV(p) pTHX__VALUE);
a0d0e21e
LW
398 return;
399 }
400 }
4561caa4 401 plant_SV(p);
463ee0b2 402}
a0d0e21e 403
4561caa4
CS
404#else /* ! DEBUGGING */
405
406#define del_SV(p) plant_SV(p)
407
408#endif /* DEBUGGING */
463ee0b2 409
645c22ef
DM
410
411/*
ccfc67b7
JH
412=head1 SV Manipulation Functions
413
645c22ef
DM
414=for apidoc sv_add_arena
415
416Given a chunk of memory, link it to the head of the list of arenas,
417and split it into a list of free SVs.
418
419=cut
420*/
421
d2bd4e7f
NC
422static void
423S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags)
463ee0b2 424{
daba3364 425 SV *const sva = MUTABLE_SV(ptr);
eb578fdb
KW
426 SV* sv;
427 SV* svend;
4633a7c4 428
7918f24d
NC
429 PERL_ARGS_ASSERT_SV_ADD_ARENA;
430
4633a7c4 431 /* The first SV in an arena isn't an SV. */
3280af22 432 SvANY(sva) = (void *) PL_sv_arenaroot; /* ptr to next arena */
4633a7c4
LW
433 SvREFCNT(sva) = size / sizeof(SV); /* number of SV slots */
434 SvFLAGS(sva) = flags; /* FAKE if not to be freed */
435
3280af22
NIS
436 PL_sv_arenaroot = sva;
437 PL_sv_root = sva + 1;
4633a7c4
LW
438
439 svend = &sva[SvREFCNT(sva) - 1];
440 sv = sva + 1;
463ee0b2 441 while (sv < svend) {
3eef1deb 442 SvARENA_CHAIN_SET(sv, (sv + 1));
03e36789 443#ifdef DEBUGGING
978b032e 444 SvREFCNT(sv) = 0;
03e36789 445#endif
4b69cbe3 446 /* Must always set typemask because it's always checked in on cleanup
03e36789 447 when the arenas are walked looking for objects. */
8990e307 448 SvFLAGS(sv) = SVTYPEMASK;
463ee0b2
LW
449 sv++;
450 }
3eef1deb 451 SvARENA_CHAIN_SET(sv, 0);
03e36789
NC
452#ifdef DEBUGGING
453 SvREFCNT(sv) = 0;
454#endif
4633a7c4
LW
455 SvFLAGS(sv) = SVTYPEMASK;
456}
457
055972dc
DM
458/* visit(): call the named function for each non-free SV in the arenas
459 * whose flags field matches the flags/mask args. */
645c22ef 460
5226ed68 461STATIC I32
de37a194 462S_visit(pTHX_ SVFUNC_t f, const U32 flags, const U32 mask)
8990e307 463{
4633a7c4 464 SV* sva;
5226ed68 465 I32 visited = 0;
8990e307 466
7918f24d
NC
467 PERL_ARGS_ASSERT_VISIT;
468
daba3364 469 for (sva = PL_sv_arenaroot; sva; sva = MUTABLE_SV(SvANY(sva))) {
eb578fdb
KW
470 const SV * const svend = &sva[SvREFCNT(sva)];
471 SV* sv;
4561caa4 472 for (sv = sva + 1; sv < svend; ++sv) {
e4787c0c 473 if (SvTYPE(sv) != (svtype)SVTYPEMASK
055972dc
DM
474 && (sv->sv_flags & mask) == flags
475 && SvREFCNT(sv))
476 {
942481a7 477 (*f)(aTHX_ sv);
5226ed68
JH
478 ++visited;
479 }
8990e307
LW
480 }
481 }
5226ed68 482 return visited;
8990e307
LW
483}
484
758a08c3
JH
485#ifdef DEBUGGING
486
645c22ef
DM
487/* called by sv_report_used() for each live SV */
488
489static void
5fa45a31 490do_report_used(pTHX_ SV *const sv)
645c22ef 491{
e4787c0c 492 if (SvTYPE(sv) != (svtype)SVTYPEMASK) {
645c22ef
DM
493 PerlIO_printf(Perl_debug_log, "****\n");
494 sv_dump(sv);
495 }
496}
758a08c3 497#endif
645c22ef
DM
498
499/*
500=for apidoc sv_report_used
501
fde67290 502Dump the contents of all SVs not yet freed (debugging aid).
645c22ef
DM
503
504=cut
505*/
506
8990e307 507void
864dbfa3 508Perl_sv_report_used(pTHX)
4561caa4 509{
ff270d3a 510#ifdef DEBUGGING
055972dc 511 visit(do_report_used, 0, 0);
96a5add6
AL
512#else
513 PERL_UNUSED_CONTEXT;
ff270d3a 514#endif
4561caa4
CS
515}
516
645c22ef
DM
517/* called by sv_clean_objs() for each live SV */
518
519static void
de37a194 520do_clean_objs(pTHX_ SV *const ref)
645c22ef 521{
ea724faa
NC
522 assert (SvROK(ref));
523 {
823a54a3
AL
524 SV * const target = SvRV(ref);
525 if (SvOBJECT(target)) {
526 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning object ref:\n "), sv_dump(ref)));
527 if (SvWEAKREF(ref)) {
528 sv_del_backref(target, ref);
529 SvWEAKREF_off(ref);
530 SvRV_set(ref, NULL);
531 } else {
532 SvROK_off(ref);
533 SvRV_set(ref, NULL);
fc2b2dca 534 SvREFCNT_dec_NN(target);
823a54a3 535 }
645c22ef
DM
536 }
537 }
645c22ef
DM
538}
539
645c22ef 540
e4487e9b
DM
541/* clear any slots in a GV which hold objects - except IO;
542 * called by sv_clean_objs() for each live GV */
543
645c22ef 544static void
f30de749 545do_clean_named_objs(pTHX_ SV *const sv)
645c22ef 546{
57ef47cc 547 SV *obj;
ea724faa 548 assert(SvTYPE(sv) == SVt_PVGV);
d011219a 549 assert(isGV_with_GP(sv));
57ef47cc
DM
550 if (!GvGP(sv))
551 return;
552
553 /* freeing GP entries may indirectly free the current GV;
554 * hold onto it while we mess with the GP slots */
555 SvREFCNT_inc(sv);
556
557 if ( ((obj = GvSV(sv) )) && SvOBJECT(obj)) {
558 DEBUG_D((PerlIO_printf(Perl_debug_log,
559 "Cleaning named glob SV object:\n "), sv_dump(obj)));
560 GvSV(sv) = NULL;
fc2b2dca 561 SvREFCNT_dec_NN(obj);
57ef47cc
DM
562 }
563 if ( ((obj = MUTABLE_SV(GvAV(sv)) )) && SvOBJECT(obj)) {
564 DEBUG_D((PerlIO_printf(Perl_debug_log,
565 "Cleaning named glob AV object:\n "), sv_dump(obj)));
566 GvAV(sv) = NULL;
fc2b2dca 567 SvREFCNT_dec_NN(obj);
57ef47cc
DM
568 }
569 if ( ((obj = MUTABLE_SV(GvHV(sv)) )) && SvOBJECT(obj)) {
570 DEBUG_D((PerlIO_printf(Perl_debug_log,
571 "Cleaning named glob HV object:\n "), sv_dump(obj)));
572 GvHV(sv) = NULL;
fc2b2dca 573 SvREFCNT_dec_NN(obj);
57ef47cc
DM
574 }
575 if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) {
576 DEBUG_D((PerlIO_printf(Perl_debug_log,
577 "Cleaning named glob CV object:\n "), sv_dump(obj)));
c43ae56f 578 GvCV_set(sv, NULL);
fc2b2dca 579 SvREFCNT_dec_NN(obj);
57ef47cc 580 }
fc2b2dca 581 SvREFCNT_dec_NN(sv); /* undo the inc above */
e4487e9b
DM
582}
583
68b590d9 584/* clear any IO slots in a GV which hold objects (except stderr, defout);
e4487e9b
DM
585 * called by sv_clean_objs() for each live GV */
586
587static void
588do_clean_named_io_objs(pTHX_ SV *const sv)
589{
e4487e9b
DM
590 SV *obj;
591 assert(SvTYPE(sv) == SVt_PVGV);
592 assert(isGV_with_GP(sv));
68b590d9 593 if (!GvGP(sv) || sv == (SV*)PL_stderrgv || sv == (SV*)PL_defoutgv)
e4487e9b
DM
594 return;
595
596 SvREFCNT_inc(sv);
57ef47cc
DM
597 if ( ((obj = MUTABLE_SV(GvIO(sv)) )) && SvOBJECT(obj)) {
598 DEBUG_D((PerlIO_printf(Perl_debug_log,
599 "Cleaning named glob IO object:\n "), sv_dump(obj)));
600 GvIOp(sv) = NULL;
fc2b2dca 601 SvREFCNT_dec_NN(obj);
645c22ef 602 }
fc2b2dca 603 SvREFCNT_dec_NN(sv); /* undo the inc above */
645c22ef 604}
645c22ef 605
4155e4fe
FC
606/* Void wrapper to pass to visit() */
607static void
608do_curse(pTHX_ SV * const sv) {
c2910e6c
FC
609 if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv)
610 || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv))
4155e4fe
FC
611 return;
612 (void)curse(sv, 0);
613}
614
645c22ef
DM
615/*
616=for apidoc sv_clean_objs
617
fde67290 618Attempt to destroy all objects not yet freed.
645c22ef
DM
619
620=cut
621*/
622
4561caa4 623void
864dbfa3 624Perl_sv_clean_objs(pTHX)
4561caa4 625{
68b590d9 626 GV *olddef, *olderr;
3280af22 627 PL_in_clean_objs = TRUE;
055972dc 628 visit(do_clean_objs, SVf_ROK, SVf_ROK);
e4487e9b
DM
629 /* Some barnacles may yet remain, clinging to typeglobs.
630 * Run the non-IO destructors first: they may want to output
631 * error messages, close files etc */
d011219a 632 visit(do_clean_named_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
e4487e9b 633 visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP);
4155e4fe
FC
634 /* And if there are some very tenacious barnacles clinging to arrays,
635 closures, or what have you.... */
636 visit(do_curse, SVs_OBJECT, SVs_OBJECT);
68b590d9
DM
637 olddef = PL_defoutgv;
638 PL_defoutgv = NULL; /* disable skip of PL_defoutgv */
639 if (olddef && isGV_with_GP(olddef))
640 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olddef));
641 olderr = PL_stderrgv;
642 PL_stderrgv = NULL; /* disable skip of PL_stderrgv */
643 if (olderr && isGV_with_GP(olderr))
644 do_clean_named_io_objs(aTHX_ MUTABLE_SV(olderr));
645 SvREFCNT_dec(olddef);
3280af22 646 PL_in_clean_objs = FALSE;
4561caa4
CS
647}
648
645c22ef
DM
649/* called by sv_clean_all() for each live SV */
650
651static void
de37a194 652do_clean_all(pTHX_ SV *const sv)
645c22ef 653{
daba3364 654 if (sv == (const SV *) PL_fdpid || sv == (const SV *)PL_strtab) {
cddfcddc 655 /* don't clean pid table and strtab */
d17ea597 656 return;
cddfcddc 657 }
147e3846 658 DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) ));
645c22ef 659 SvFLAGS(sv) |= SVf_BREAK;
fc2b2dca 660 SvREFCNT_dec_NN(sv);
645c22ef
DM
661}
662
663/*
664=for apidoc sv_clean_all
665
666Decrement the refcnt of each remaining SV, possibly triggering a
fde67290 667cleanup. This function may have to be called multiple times to free
ff276b08 668SVs which are in complex self-referential hierarchies.
645c22ef
DM
669
670=cut
671*/
672
5226ed68 673I32
864dbfa3 674Perl_sv_clean_all(pTHX)
8990e307 675{
5226ed68 676 I32 cleaned;
3280af22 677 PL_in_clean_all = TRUE;
055972dc 678 cleaned = visit(do_clean_all, 0,0);
5226ed68 679 return cleaned;
8990e307 680}
463ee0b2 681
5e258f8c
JC
682/*
683 ARENASETS: a meta-arena implementation which separates arena-info
684 into struct arena_set, which contains an array of struct
685 arena_descs, each holding info for a single arena. By separating
686 the meta-info from the arena, we recover the 1st slot, formerly
687 borrowed for list management. The arena_set is about the size of an
39244528 688 arena, avoiding the needless malloc overhead of a naive linked-list.
5e258f8c
JC
689
690 The cost is 1 arena-set malloc per ~320 arena-mallocs, + the unused
691 memory in the last arena-set (1/2 on average). In trade, we get
692 back the 1st slot in each arena (ie 1.7% of a CV-arena, less for
d2a0f284 693 smaller types). The recovery of the wasted space allows use of
e15dad31
JC
694 small arenas for large, rare body types, by changing array* fields
695 in body_details_by_type[] below.
5e258f8c 696*/
5e258f8c 697struct arena_desc {
398c677b
NC
698 char *arena; /* the raw storage, allocated aligned */
699 size_t size; /* its size ~4k typ */
e5973ed5 700 svtype utype; /* bodytype stored in arena */
5e258f8c
JC
701};
702
e6148039
NC
703struct arena_set;
704
705/* Get the maximum number of elements in set[] such that struct arena_set
e15dad31 706 will fit within PERL_ARENA_SIZE, which is probably just under 4K, and
e6148039
NC
707 therefore likely to be 1 aligned memory page. */
708
709#define ARENAS_PER_SET ((PERL_ARENA_SIZE - sizeof(struct arena_set*) \
710 - 2 * sizeof(int)) / sizeof (struct arena_desc))
5e258f8c
JC
711
712struct arena_set {
713 struct arena_set* next;
0a848332
NC
714 unsigned int set_size; /* ie ARENAS_PER_SET */
715 unsigned int curr; /* index of next available arena-desc */
5e258f8c
JC
716 struct arena_desc set[ARENAS_PER_SET];
717};
718
645c22ef
DM
719/*
720=for apidoc sv_free_arenas
721
fde67290 722Deallocate the memory used by all arenas. Note that all the individual SV
645c22ef
DM
723heads and bodies within the arenas must already have been freed.
724
725=cut
7fefc6c1 726
645c22ef 727*/
4633a7c4 728void
864dbfa3 729Perl_sv_free_arenas(pTHX)
4633a7c4
LW
730{
731 SV* sva;
732 SV* svanext;
0a848332 733 unsigned int i;
4633a7c4
LW
734
735 /* Free arenas here, but be careful about fake ones. (We assume
736 contiguity of the fake ones with the corresponding real ones.) */
737
3280af22 738 for (sva = PL_sv_arenaroot; sva; sva = svanext) {
daba3364 739 svanext = MUTABLE_SV(SvANY(sva));
4633a7c4 740 while (svanext && SvFAKE(svanext))
daba3364 741 svanext = MUTABLE_SV(SvANY(svanext));
4633a7c4
LW
742
743 if (!SvFAKE(sva))
1df70142 744 Safefree(sva);
4633a7c4 745 }
93e68bfb 746
5e258f8c 747 {
0a848332
NC
748 struct arena_set *aroot = (struct arena_set*) PL_body_arenas;
749
750 while (aroot) {
751 struct arena_set *current = aroot;
752 i = aroot->curr;
753 while (i--) {
5e258f8c
JC
754 assert(aroot->set[i].arena);
755 Safefree(aroot->set[i].arena);
756 }
0a848332
NC
757 aroot = aroot->next;
758 Safefree(current);
5e258f8c
JC
759 }
760 }
dc8220bf 761 PL_body_arenas = 0;
fdda85ca 762
0a848332
NC
763 i = PERL_ARENA_ROOTS_SIZE;
764 while (i--)
93e68bfb 765 PL_body_roots[i] = 0;
93e68bfb 766
3280af22
NIS
767 PL_sv_arenaroot = 0;
768 PL_sv_root = 0;
4633a7c4
LW
769}
770
bd81e77b
NC
771/*
772 Here are mid-level routines that manage the allocation of bodies out
cc463ce5 773 of the various arenas. There are 4 kinds of arenas:
29489e7c 774
bd81e77b
NC
775 1. SV-head arenas, which are discussed and handled above
776 2. regular body arenas
777 3. arenas for reduced-size bodies
778 4. Hash-Entry arenas
29489e7c 779
bd81e77b
NC
780 Arena types 2 & 3 are chained by body-type off an array of
781 arena-root pointers, which is indexed by svtype. Some of the
782 larger/less used body types are malloced singly, since a large
783 unused block of them is wasteful. Also, several svtypes dont have
784 bodies; the data fits into the sv-head itself. The arena-root
785 pointer thus has a few unused root-pointers (which may be hijacked
cc463ce5 786 later for arena type 4)
29489e7c 787
bd81e77b
NC
788 3 differs from 2 as an optimization; some body types have several
789 unused fields in the front of the structure (which are kept in-place
790 for consistency). These bodies can be allocated in smaller chunks,
791 because the leading fields arent accessed. Pointers to such bodies
792 are decremented to point at the unused 'ghost' memory, knowing that
793 the pointers are used with offsets to the real memory.
29489e7c 794
d2a0f284
JC
795Allocation of SV-bodies is similar to SV-heads, differing as follows;
796the allocation mechanism is used for many body types, so is somewhat
797more complicated, it uses arena-sets, and has no need for still-live
798SV detection.
799
800At the outermost level, (new|del)_X*V macros return bodies of the
801appropriate type. These macros call either (new|del)_body_type or
802(new|del)_body_allocated macro pairs, depending on specifics of the
803type. Most body types use the former pair, the latter pair is used to
804allocate body types with "ghost fields".
805
806"ghost fields" are fields that are unused in certain types, and
69ba284b 807consequently don't need to actually exist. They are declared because
d2a0f284
JC
808they're part of a "base type", which allows use of functions as
809methods. The simplest examples are AVs and HVs, 2 aggregate types
810which don't use the fields which support SCALAR semantics.
811
69ba284b 812For these types, the arenas are carved up into appropriately sized
d2a0f284
JC
813chunks, we thus avoid wasted memory for those unaccessed members.
814When bodies are allocated, we adjust the pointer back in memory by the
69ba284b 815size of the part not allocated, so it's as if we allocated the full
d2a0f284
JC
816structure. (But things will all go boom if you write to the part that
817is "not there", because you'll be overwriting the last members of the
818preceding structure in memory.)
819
69ba284b 820We calculate the correction using the STRUCT_OFFSET macro on the first
a05ea1cf 821member present. If the allocated structure is smaller (no initial NV
69ba284b
NC
822actually allocated) then the net effect is to subtract the size of the NV
823from the pointer, to return a new pointer as if an initial NV were actually
a05ea1cf 824allocated. (We were using structures named *_allocated for this, but
69ba284b
NC
825this turned out to be a subtle bug, because a structure without an NV
826could have a lower alignment constraint, but the compiler is allowed to
827optimised accesses based on the alignment constraint of the actual pointer
828to the full structure, for example, using a single 64 bit load instruction
829because it "knows" that two adjacent 32 bit members will be 8-byte aligned.)
d2a0f284 830
a05ea1cf 831This is the same trick as was used for NV and IV bodies. Ironically it
d2a0f284 832doesn't need to be used for NV bodies any more, because NV is now at
5b306eef
DD
833the start of the structure. IV bodies, and also in some builds NV bodies,
834don't need it either, because they are no longer allocated.
d2a0f284
JC
835
836In turn, the new_body_* allocators call S_new_body(), which invokes
837new_body_inline macro, which takes a lock, and takes a body off the
1e30fcd5 838linked list at PL_body_roots[sv_type], calling Perl_more_bodies() if
d2a0f284
JC
839necessary to refresh an empty list. Then the lock is released, and
840the body is returned.
841
99816f8d 842Perl_more_bodies allocates a new arena, and carves it up into an array of N
d2a0f284
JC
843bodies, which it strings into a linked list. It looks up arena-size
844and body-size from the body_details table described below, thus
845supporting the multiple body-types.
846
847If PURIFY is defined, or PERL_ARENA_SIZE=0, arenas are not used, and
848the (new|del)_X*V macros are mapped directly to malloc/free.
849
d2a0f284
JC
850For each sv-type, struct body_details bodies_by_type[] carries
851parameters which control these aspects of SV handling:
852
853Arena_size determines whether arenas are used for this body type, and if
854so, how big they are. PURIFY or PERL_ARENA_SIZE=0 set this field to
855zero, forcing individual mallocs and frees.
856
857Body_size determines how big a body is, and therefore how many fit into
858each arena. Offset carries the body-pointer adjustment needed for
69ba284b 859"ghost fields", and is used in *_allocated macros.
d2a0f284
JC
860
861But its main purpose is to parameterize info needed in
862Perl_sv_upgrade(). The info here dramatically simplifies the function
69ba284b 863vs the implementation in 5.8.8, making it table-driven. All fields
d2a0f284
JC
864are used for this, except for arena_size.
865
866For the sv-types that have no bodies, arenas are not used, so those
867PL_body_roots[sv_type] are unused, and can be overloaded. In
868something of a special case, SVt_NULL is borrowed for HE arenas;
c6f8b1d0 869PL_body_roots[HE_SVSLOT=SVt_NULL] is filled by S_more_he, but the
d2a0f284 870bodies_by_type[SVt_NULL] slot is not used, as the table is not
c6f8b1d0 871available in hv.c.
d2a0f284 872
29489e7c
DM
873*/
874
bd81e77b 875struct body_details {
0fb58b32 876 U8 body_size; /* Size to allocate */
10666ae3 877 U8 copy; /* Size of structure to copy (may be shorter) */
5b306eef
DD
878 U8 offset; /* Size of unalloced ghost fields to first alloced field*/
879 PERL_BITFIELD8 type : 4; /* We have space for a sanity check. */
880 PERL_BITFIELD8 cant_upgrade : 1;/* Cannot upgrade this type */
881 PERL_BITFIELD8 zero_nv : 1; /* zero the NV when upgrading from this */
882 PERL_BITFIELD8 arena : 1; /* Allocated from an arena */
883 U32 arena_size; /* Size of arena to allocate */
bd81e77b 884};
29489e7c 885
b11187bf
TC
886#define ALIGNED_TYPE_NAME(name) name##_aligned
887#define ALIGNED_TYPE(name) \
888 typedef union { \
889 name align_me; \
890 NV nv; \
891 IV iv; \
892 } ALIGNED_TYPE_NAME(name);
893
894ALIGNED_TYPE(regexp);
895ALIGNED_TYPE(XPVGV);
896ALIGNED_TYPE(XPVLV);
897ALIGNED_TYPE(XPVAV);
898ALIGNED_TYPE(XPVHV);
899ALIGNED_TYPE(XPVCV);
900ALIGNED_TYPE(XPVFM);
901ALIGNED_TYPE(XPVIO);
902
bd81e77b
NC
903#define HADNV FALSE
904#define NONV TRUE
29489e7c 905
d2a0f284 906
bd81e77b
NC
907#ifdef PURIFY
908/* With -DPURFIY we allocate everything directly, and don't use arenas.
909 This seems a rather elegant way to simplify some of the code below. */
910#define HASARENA FALSE
911#else
912#define HASARENA TRUE
913#endif
914#define NOARENA FALSE
29489e7c 915
d2a0f284
JC
916/* Size the arenas to exactly fit a given number of bodies. A count
917 of 0 fits the max number bodies into a PERL_ARENA_SIZE.block,
918 simplifying the default. If count > 0, the arena is sized to fit
919 only that many bodies, allowing arenas to be used for large, rare
920 bodies (XPVFM, XPVIO) without undue waste. The arena size is
921 limited by PERL_ARENA_SIZE, so we can safely oversize the
922 declarations.
923 */
95db5f15
MB
924#define FIT_ARENA0(body_size) \
925 ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
926#define FIT_ARENAn(count,body_size) \
927 ( count * body_size <= PERL_ARENA_SIZE) \
928 ? count * body_size \
929 : FIT_ARENA0 (body_size)
930#define FIT_ARENA(count,body_size) \
cd1dc8e2 931 (U32)(count \
95db5f15 932 ? FIT_ARENAn (count, body_size) \
cd1dc8e2 933 : FIT_ARENA0 (body_size))
d2a0f284 934
bd81e77b
NC
935/* Calculate the length to copy. Specifically work out the length less any
936 final padding the compiler needed to add. See the comment in sv_upgrade
937 for why copying the padding proved to be a bug. */
29489e7c 938
bd81e77b
NC
939#define copy_length(type, last_member) \
940 STRUCT_OFFSET(type, last_member) \
daba3364 941 + sizeof (((type*)SvANY((const SV *)0))->last_member)
29489e7c 942
bd81e77b 943static const struct body_details bodies_by_type[] = {
829cd18a
NC
944 /* HEs use this offset for their arena. */
945 { 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
d2a0f284 946
db93c0c4
NC
947 /* IVs are in the head, so the allocation size is 0. */
948 { 0,
d2a0f284 949 sizeof(IV), /* This is used to copy out the IV body. */
10666ae3 950 STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
db93c0c4 951 NOARENA /* IVS don't need an arena */, 0
d2a0f284
JC
952 },
953
5b306eef
DD
954#if NVSIZE <= IVSIZE
955 { 0, sizeof(NV),
956 STRUCT_OFFSET(XPVNV, xnv_u),
957 SVt_NV, FALSE, HADNV, NOARENA, 0 },
958#else
6e128786
NC
959 { sizeof(NV), sizeof(NV),
960 STRUCT_OFFSET(XPVNV, xnv_u),
961 SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
5b306eef 962#endif
d2a0f284 963
bc337e5c 964 { sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
965 copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
966 + STRUCT_OFFSET(XPV, xpv_cur),
69ba284b 967 SVt_PV, FALSE, NONV, HASARENA,
889d28b2 968 FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 969
d361b004
KW
970 { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
971 copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
972 + STRUCT_OFFSET(XPV, xpv_cur),
973 SVt_INVLIST, TRUE, NONV, HASARENA,
974 FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
e94d9b54 975
bc337e5c 976 { sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
977 copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
978 + STRUCT_OFFSET(XPV, xpv_cur),
979 SVt_PVIV, FALSE, NONV, HASARENA,
980 FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 981
bc337e5c 982 { sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
889d28b2
NC
983 copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
984 + STRUCT_OFFSET(XPV, xpv_cur),
985 SVt_PVNV, FALSE, HADNV, HASARENA,
986 FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },
d2a0f284 987
6e128786 988 { sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
d2a0f284 989 HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },
4df7f6af 990
b11187bf 991 { sizeof(ALIGNED_TYPE_NAME(regexp)),
601dfd0a
NC
992 sizeof(regexp),
993 0,
ecff11eb 994 SVt_REGEXP, TRUE, NONV, HASARENA,
b11187bf 995 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(regexp)))
5c35adbb 996 },
4df7f6af 997
b11187bf
TC
998 { sizeof(ALIGNED_TYPE_NAME(XPVGV)), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
999 HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVGV))) },
d2a0f284 1000
b11187bf
TC
1001 { sizeof(ALIGNED_TYPE_NAME(XPVLV)), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
1002 HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVLV))) },
d2a0f284 1003
b11187bf 1004 { sizeof(ALIGNED_TYPE_NAME(XPVAV)),
4f7003f5 1005 copy_length(XPVAV, xav_alloc),
601dfd0a 1006 0,
69ba284b 1007 SVt_PVAV, TRUE, NONV, HASARENA,
b11187bf 1008 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVAV))) },
d2a0f284 1009
b11187bf 1010 { sizeof(ALIGNED_TYPE_NAME(XPVHV)),
359164a0 1011 copy_length(XPVHV, xhv_max),
601dfd0a 1012 0,
69ba284b 1013 SVt_PVHV, TRUE, NONV, HASARENA,
b11187bf 1014 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVHV))) },
d2a0f284 1015
b11187bf 1016 { sizeof(ALIGNED_TYPE_NAME(XPVCV)),
601dfd0a
NC
1017 sizeof(XPVCV),
1018 0,
69ba284b 1019 SVt_PVCV, TRUE, NONV, HASARENA,
b11187bf 1020 FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVCV))) },
69ba284b 1021
b11187bf 1022 { sizeof(ALIGNED_TYPE_NAME(XPVFM)),
601dfd0a
NC
1023 sizeof(XPVFM),
1024 0,
69ba284b 1025 SVt_PVFM, TRUE, NONV, NOARENA,
b11187bf 1026 FIT_ARENA(20, sizeof(ALIGNED_TYPE_NAME(XPVFM))) },
d2a0f284 1027
b11187bf 1028 { sizeof(ALIGNED_TYPE_NAME(XPVIO)),
601dfd0a
NC
1029 sizeof(XPVIO),
1030 0,
b6f60916 1031 SVt_PVIO, TRUE, NONV, HASARENA,
b11187bf 1032 FIT_ARENA(24, sizeof(ALIGNED_TYPE_NAME(XPVIO))) },
bd81e77b 1033};
29489e7c 1034
bd81e77b 1035#define new_body_allocated(sv_type) \
d2a0f284 1036 (void *)((char *)S_new_body(aTHX_ sv_type) \
bd81e77b 1037 - bodies_by_type[sv_type].offset)
29489e7c 1038
26359cfa
NC
1039/* return a thing to the free list */
1040
1041#define del_body(thing, root) \
1042 STMT_START { \
1043 void ** const thing_copy = (void **)thing; \
1044 *thing_copy = *root; \
1045 *root = (void*)thing_copy; \
1046 } STMT_END
29489e7c 1047
bd81e77b 1048#ifdef PURIFY
5b306eef
DD
1049#if !(NVSIZE <= IVSIZE)
1050# define new_XNV() safemalloc(sizeof(XPVNV))
1051#endif
beeec492
NC
1052#define new_XPVNV() safemalloc(sizeof(XPVNV))
1053#define new_XPVMG() safemalloc(sizeof(XPVMG))
29489e7c 1054
beeec492 1055#define del_XPVGV(p) safefree(p)
29489e7c 1056
bd81e77b 1057#else /* !PURIFY */
29489e7c 1058
5b306eef
DD
1059#if !(NVSIZE <= IVSIZE)
1060# define new_XNV() new_body_allocated(SVt_NV)
1061#endif
65ac1738 1062#define new_XPVNV() new_body_allocated(SVt_PVNV)
65ac1738 1063#define new_XPVMG() new_body_allocated(SVt_PVMG)
645c22ef 1064
26359cfa
NC
1065#define del_XPVGV(p) del_body(p + bodies_by_type[SVt_PVGV].offset, \
1066 &PL_body_roots[SVt_PVGV])
1d7c1841 1067
bd81e77b 1068#endif /* PURIFY */
93e68bfb 1069
bd81e77b 1070/* no arena for you! */
93e68bfb 1071
bd81e77b 1072#define new_NOARENA(details) \
beeec492 1073 safemalloc((details)->body_size + (details)->offset)
bd81e77b 1074#define new_NOARENAZ(details) \
beeec492 1075 safecalloc((details)->body_size + (details)->offset, 1)
d2a0f284 1076
1e30fcd5
NC
1077void *
1078Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size,
1079 const size_t arena_size)
d2a0f284 1080{
d2a0f284 1081 void ** const root = &PL_body_roots[sv_type];
99816f8d
NC
1082 struct arena_desc *adesc;
1083 struct arena_set *aroot = (struct arena_set *) PL_body_arenas;
1084 unsigned int curr;
d2a0f284
JC
1085 char *start;
1086 const char *end;
02982131 1087 const size_t good_arena_size = Perl_malloc_good_size(arena_size);
8c3a0f6c 1088#if defined(DEBUGGING)
23e9d66c
NC
1089 static bool done_sanity_check;
1090
10666ae3 1091 if (!done_sanity_check) {
ea471437 1092 unsigned int i = SVt_LAST;
10666ae3
NC
1093
1094 done_sanity_check = TRUE;
1095
1096 while (i--)
1097 assert (bodies_by_type[i].type == i);
1098 }
1099#endif
1100
02982131 1101 assert(arena_size);
23e9d66c 1102
99816f8d
NC
1103 /* may need new arena-set to hold new arena */
1104 if (!aroot || aroot->curr >= aroot->set_size) {
1105 struct arena_set *newroot;
1106 Newxz(newroot, 1, struct arena_set);
1107 newroot->set_size = ARENAS_PER_SET;
1108 newroot->next = aroot;
1109 aroot = newroot;
1110 PL_body_arenas = (void *) newroot;
1111 DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)aroot));
1112 }
1113
1114 /* ok, now have arena-set with at least 1 empty/available arena-desc */
1115 curr = aroot->curr++;
1116 adesc = &(aroot->set[curr]);
1117 assert(!adesc->arena);
1118
1119 Newx(adesc->arena, good_arena_size, char);
1120 adesc->size = good_arena_size;
1121 adesc->utype = sv_type;
147e3846 1122 DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n",
99816f8d
NC
1123 curr, (void*)adesc->arena, (UV)good_arena_size));
1124
1125 start = (char *) adesc->arena;
d2a0f284 1126
29657bb6
NC
1127 /* Get the address of the byte after the end of the last body we can fit.
1128 Remember, this is integer division: */
02982131 1129 end = start + good_arena_size / body_size * body_size;
d2a0f284 1130
486ec47a 1131 /* computed count doesn't reflect the 1st slot reservation */
d8fca402
NC
1132#if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE)
1133 DEBUG_m(PerlIO_printf(Perl_debug_log,
1134 "arena %p end %p arena-size %d (from %d) type %d "
1135 "size %d ct %d\n",
02982131
NC
1136 (void*)start, (void*)end, (int)good_arena_size,
1137 (int)arena_size, sv_type, (int)body_size,
1138 (int)good_arena_size / (int)body_size));
d8fca402 1139#else
d2a0f284
JC
1140 DEBUG_m(PerlIO_printf(Perl_debug_log,
1141 "arena %p end %p arena-size %d type %d size %d ct %d\n",
6c9570dc 1142 (void*)start, (void*)end,
02982131
NC
1143 (int)arena_size, sv_type, (int)body_size,
1144 (int)good_arena_size / (int)body_size));
d8fca402 1145#endif
d2a0f284
JC
1146 *root = (void *)start;
1147
29657bb6
NC
1148 while (1) {
1149 /* Where the next body would start: */
d2a0f284 1150 char * const next = start + body_size;
29657bb6
NC
1151
1152 if (next >= end) {
1153 /* This is the last body: */
1154 assert(next == end);
1155
1156 *(void **)start = 0;
1157 return *root;
1158 }
1159
d2a0f284
JC
1160 *(void**) start = (void *)next;
1161 start = next;
1162 }
d2a0f284
JC
1163}
1164
1165/* grab a new thing from the free list, allocating more if necessary.
1166 The inline version is used for speed in hot routines, and the
1167 function using it serves the rest (unless PURIFY).
1168*/
1169#define new_body_inline(xpv, sv_type) \
1170 STMT_START { \
1171 void ** const r3wt = &PL_body_roots[sv_type]; \
11b79775 1172 xpv = (PTR_TBL_ENT_t*) (*((void **)(r3wt)) \
1e30fcd5 1173 ? *((void **)(r3wt)) : Perl_more_bodies(aTHX_ sv_type, \
02982131
NC
1174 bodies_by_type[sv_type].body_size,\
1175 bodies_by_type[sv_type].arena_size)); \
d2a0f284 1176 *(r3wt) = *(void**)(xpv); \
d2a0f284
JC
1177 } STMT_END
1178
1179#ifndef PURIFY
1180
1181STATIC void *
de37a194 1182S_new_body(pTHX_ const svtype sv_type)
d2a0f284 1183{
d2a0f284
JC
1184 void *xpv;
1185 new_body_inline(xpv, sv_type);
1186 return xpv;
1187}
1188
1189#endif
93e68bfb 1190
238b27b3
NC
1191static const struct body_details fake_rv =
1192 { 0, 0, 0, SVt_IV, FALSE, NONV, NOARENA, 0 };
1193
bd81e77b
NC
1194/*
1195=for apidoc sv_upgrade
93e68bfb 1196
bd81e77b
NC
1197Upgrade an SV to a more complex form. Generally adds a new body type to the
1198SV, then copies across as much information as possible from the old body.
9521ca61
FC
1199It croaks if the SV is already in a more complex form than requested. You
1200generally want to use the C<SvUPGRADE> macro wrapper, which checks the type
1201before calling C<sv_upgrade>, and hence does not croak. See also
fbe13c60 1202C<L</svtype>>.
93e68bfb 1203
bd81e77b 1204=cut
93e68bfb 1205*/
93e68bfb 1206
bd81e77b 1207void
5aaab254 1208Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
cac9b346 1209{
bd81e77b
NC
1210 void* old_body;
1211 void* new_body;
42d0e0b7 1212 const svtype old_type = SvTYPE(sv);
d2a0f284 1213 const struct body_details *new_type_details;
238b27b3 1214 const struct body_details *old_type_details
bd81e77b 1215 = bodies_by_type + old_type;
ed7df46e 1216 SV *referent = NULL;
cac9b346 1217
7918f24d
NC
1218 PERL_ARGS_ASSERT_SV_UPGRADE;
1219
1776cbe8
NC
1220 if (old_type == new_type)
1221 return;
1222
1223 /* This clause was purposefully added ahead of the early return above to
1224 the shared string hackery for (sort {$a <=> $b} keys %hash), with the
1225 inference by Nick I-S that it would fix other troublesome cases. See
1226 changes 7162, 7163 (f130fd4589cf5fbb24149cd4db4137c8326f49c1 and parent)
1227
1228 Given that shared hash key scalars are no longer PVIV, but PV, there is
1229 no longer need to unshare so as to free up the IVX slot for its proper
1230 purpose. So it's safe to move the early return earlier. */
1231
093085a8 1232 if (new_type > SVt_PVMG && SvIsCOW(sv)) {
bd81e77b
NC
1233 sv_force_normal_flags(sv, 0);
1234 }
cac9b346 1235
bd81e77b 1236 old_body = SvANY(sv);
de042e1d 1237
bd81e77b
NC
1238 /* Copying structures onto other structures that have been neatly zeroed
1239 has a subtle gotcha. Consider XPVMG
cac9b346 1240
bd81e77b
NC
1241 +------+------+------+------+------+-------+-------+
1242 | NV | CUR | LEN | IV | MAGIC | STASH |
1243 +------+------+------+------+------+-------+-------+
1244 0 4 8 12 16 20 24 28
645c22ef 1245
bd81e77b
NC
1246 where NVs are aligned to 8 bytes, so that sizeof that structure is
1247 actually 32 bytes long, with 4 bytes of padding at the end:
08742458 1248
bd81e77b
NC
1249 +------+------+------+------+------+-------+-------+------+
1250 | NV | CUR | LEN | IV | MAGIC | STASH | ??? |
1251 +------+------+------+------+------+-------+-------+------+
1252 0 4 8 12 16 20 24 28 32
08742458 1253
bd81e77b 1254 so what happens if you allocate memory for this structure:
30f9da9e 1255
bd81e77b
NC
1256 +------+------+------+------+------+-------+-------+------+------+...
1257 | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME |
1258 +------+------+------+------+------+-------+-------+------+------+...
1259 0 4 8 12 16 20 24 28 32 36
bfc44f79 1260
bd81e77b
NC
1261 zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you
1262 expect, because you copy the area marked ??? onto GP. Now, ??? may have
1263 started out as zero once, but it's quite possible that it isn't. So now,
1264 rather than a nicely zeroed GP, you have it pointing somewhere random.
1265 Bugs ensue.
bfc44f79 1266
bd81e77b
NC
1267 (In fact, GP ends up pointing at a previous GP structure, because the
1268 principle cause of the padding in XPVMG getting garbage is a copy of
6c9e42f7
NC
1269 sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob. Right now
1270 this happens to be moot because XPVGV has been re-ordered, with GP
1271 no longer after STASH)
30f9da9e 1272
bd81e77b
NC
1273 So we are careful and work out the size of used parts of all the
1274 structures. */
bfc44f79 1275
bd81e77b
NC
1276 switch (old_type) {
1277 case SVt_NULL:
1278 break;
1279 case SVt_IV:
4df7f6af 1280 if (SvROK(sv)) {
ed7df46e 1281 referent = SvRV(sv);
238b27b3
NC
1282 old_type_details = &fake_rv;
1283 if (new_type == SVt_NV)
1284 new_type = SVt_PVNV;
4df7f6af
NC
1285 } else {
1286 if (new_type < SVt_PVIV) {
1287 new_type = (new_type == SVt_NV)
1288 ? SVt_PVNV : SVt_PVIV;
1289 }
bd81e77b
NC
1290 }
1291 break;
1292 case SVt_NV:
1293 if (new_type < SVt_PVNV) {
1294 new_type = SVt_PVNV;
bd81e77b
NC
1295 }
1296 break;
bd81e77b
NC
1297 case SVt_PV:
1298 assert(new_type > SVt_PV);
6d59e610
LM
1299 STATIC_ASSERT_STMT(SVt_IV < SVt_PV);
1300 STATIC_ASSERT_STMT(SVt_NV < SVt_PV);
bd81e77b
NC
1301 break;
1302 case SVt_PVIV:
1303 break;
1304 case SVt_PVNV:
1305 break;
1306 case SVt_PVMG:
1307 /* Because the XPVMG of PL_mess_sv isn't allocated from the arena,
1308 there's no way that it can be safely upgraded, because perl.c
1309 expects to Safefree(SvANY(PL_mess_sv)) */
1310 assert(sv != PL_mess_sv);
bd81e77b
NC
1311 break;
1312 default:
2439e033 1313 if (UNLIKELY(old_type_details->cant_upgrade))
c81225bc
NC
1314 Perl_croak(aTHX_ "Can't upgrade %s (%" UVuf ") to %" UVuf,
1315 sv_reftype(sv, 0), (UV) old_type, (UV) new_type);
bd81e77b 1316 }
3376de98 1317
2439e033 1318 if (UNLIKELY(old_type > new_type))
3376de98
NC
1319 Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d",
1320 (int)old_type, (int)new_type);
1321
2fa1109b 1322 new_type_details = bodies_by_type + new_type;
645c22ef 1323
bd81e77b
NC
1324 SvFLAGS(sv) &= ~SVTYPEMASK;
1325 SvFLAGS(sv) |= new_type;
932e9ff9 1326
ab4416c0
NC
1327 /* This can't happen, as SVt_NULL is <= all values of new_type, so one of
1328 the return statements above will have triggered. */
1329 assert (new_type != SVt_NULL);
bd81e77b 1330 switch (new_type) {
bd81e77b
NC
1331 case SVt_IV:
1332 assert(old_type == SVt_NULL);
dc6369ef 1333 SET_SVANY_FOR_BODYLESS_IV(sv);
bd81e77b
NC
1334 SvIV_set(sv, 0);
1335 return;
1336 case SVt_NV:
1337 assert(old_type == SVt_NULL);
5b306eef 1338#if NVSIZE <= IVSIZE
dc6369ef 1339 SET_SVANY_FOR_BODYLESS_NV(sv);
5b306eef 1340#else
bd81e77b 1341 SvANY(sv) = new_XNV();
5b306eef 1342#endif
bd81e77b
NC
1343 SvNV_set(sv, 0);
1344 return;
bd81e77b 1345 case SVt_PVHV:
bd81e77b 1346 case SVt_PVAV:
d2a0f284 1347 assert(new_type_details->body_size);
c1ae03ae
NC
1348
1349#ifndef PURIFY
1350 assert(new_type_details->arena);
d2a0f284 1351 assert(new_type_details->arena_size);
c1ae03ae 1352 /* This points to the start of the allocated area. */
d2a0f284
JC
1353 new_body_inline(new_body, new_type);
1354 Zero(new_body, new_type_details->body_size, char);
c1ae03ae
NC
1355 new_body = ((char *)new_body) - new_type_details->offset;
1356#else
1357 /* We always allocated the full length item with PURIFY. To do this
1358 we fake things so that arena is false for all 16 types.. */
1359 new_body = new_NOARENAZ(new_type_details);
1360#endif
1361 SvANY(sv) = new_body;
1362 if (new_type == SVt_PVAV) {
1363 AvMAX(sv) = -1;
1364 AvFILLp(sv) = -1;
1365 AvREAL_only(sv);
64484faa 1366 if (old_type_details->body_size) {
ac572bf4
NC
1367 AvALLOC(sv) = 0;
1368 } else {
1369 /* It will have been zeroed when the new body was allocated.
1370 Lets not write to it, in case it confuses a write-back
1371 cache. */
1372 }
78ac7dd9
NC
1373 } else {
1374 assert(!SvOK(sv));
1375 SvOK_off(sv);
1376#ifndef NODEFAULT_SHAREKEYS
1377 HvSHAREKEYS_on(sv); /* key-sharing on by default */
1378#endif
586fc6a3
SM
1379 /* start with PERL_HASH_DEFAULT_HvMAX+1 buckets: */
1380 HvMAX(sv) = PERL_HASH_DEFAULT_HvMAX;
c1ae03ae 1381 }
aeb18a1e 1382
bd81e77b
NC
1383 /* SVt_NULL isn't the only thing upgraded to AV or HV.
1384 The target created by newSVrv also is, and it can have magic.
1385 However, it never has SvPVX set.
1386 */
4df7f6af
NC
1387 if (old_type == SVt_IV) {
1388 assert(!SvROK(sv));
1389 } else if (old_type >= SVt_PV) {
bd81e77b
NC
1390 assert(SvPVX_const(sv) == 0);
1391 }
aeb18a1e 1392
bd81e77b 1393 if (old_type >= SVt_PVMG) {
e736a858 1394 SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
bd81e77b 1395 SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
797c7171
NC
1396 } else {
1397 sv->sv_u.svu_array = NULL; /* or svu_hash */
bd81e77b
NC
1398 }
1399 break;
93e68bfb 1400
bd81e77b
NC
1401 case SVt_PVIV:
1402 /* XXX Is this still needed? Was it ever needed? Surely as there is
1403 no route from NV to PVIV, NOK can never be true */
1404 assert(!SvNOKp(sv));
1405 assert(!SvNOK(sv));
2b5060ae 1406 /* FALLTHROUGH */
bd81e77b
NC
1407 case SVt_PVIO:
1408 case SVt_PVFM:
bd81e77b
NC
1409 case SVt_PVGV:
1410 case SVt_PVCV:
1411 case SVt_PVLV:
d361b004 1412 case SVt_INVLIST:
12c45b25 1413 case SVt_REGEXP:
bd81e77b
NC
1414 case SVt_PVMG:
1415 case SVt_PVNV:
1416 case SVt_PV:
93e68bfb 1417
d2a0f284 1418 assert(new_type_details->body_size);
bd81e77b
NC
1419 /* We always allocated the full length item with PURIFY. To do this
1420 we fake things so that arena is false for all 16 types.. */
1421 if(new_type_details->arena) {
1422 /* This points to the start of the allocated area. */
d2a0f284
JC
1423 new_body_inline(new_body, new_type);
1424 Zero(new_body, new_type_details->body_size, char);
bd81e77b
NC
1425 new_body = ((char *)new_body) - new_type_details->offset;
1426 } else {
1427 new_body = new_NOARENAZ(new_type_details);
1428 }
1429 SvANY(sv) = new_body;
5e2fc214 1430
bd81e77b 1431 if (old_type_details->copy) {
f9ba3d20
NC
1432 /* There is now the potential for an upgrade from something without
1433 an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
1434 int offset = old_type_details->offset;
1435 int length = old_type_details->copy;
1436
1437 if (new_type_details->offset > old_type_details->offset) {
d4c19fe8 1438 const int difference
f9ba3d20
NC
1439 = new_type_details->offset - old_type_details->offset;
1440 offset += difference;
1441 length -= difference;
1442 }
1443 assert (length >= 0);
1444
1445 Copy((char *)old_body + offset, (char *)new_body + offset, length,
1446 char);
bd81e77b
NC
1447 }
1448
1449#ifndef NV_ZERO_IS_ALLBITS_ZERO
f2524eef 1450 /* If NV 0.0 is stores as all bits 0 then Zero() already creates a
e5ce394c
NC
1451 * correct 0.0 for us. Otherwise, if the old body didn't have an
1452 * NV slot, but the new one does, then we need to initialise the
1453 * freshly created NV slot with whatever the correct bit pattern is
1454 * for 0.0 */
e22a937e
NC
1455 if (old_type_details->zero_nv && !new_type_details->zero_nv
1456 && !isGV_with_GP(sv))
bd81e77b 1457 SvNV_set(sv, 0);
82048762 1458#endif
5e2fc214 1459
2439e033 1460 if (UNLIKELY(new_type == SVt_PVIO)) {
85dca89a 1461 IO * const io = MUTABLE_IO(sv);
d963bf01 1462 GV *iogv = gv_fetchpvs("IO::File::", GV_ADD, SVt_PVHV);
85dca89a
NC
1463
1464 SvOBJECT_on(io);
1465 /* Clear the stashcache because a new IO could overrule a package
1466 name */
103f5a36 1467 DEBUG_o(Perl_deb(aTHX_ "sv_upgrade clearing PL_stashcache\n"));
85dca89a
NC
1468 hv_clear(PL_stashcache);
1469
85dca89a 1470 SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv))));
f2524eef 1471 IoPAGE_LEN(sv) = 60;
85dca89a 1472 }
df6b4bd5 1473 if (old_type < SVt_PV) {
ed7df46e 1474 /* referent will be NULL unless the old type was SVt_IV emulating
4df7f6af 1475 SVt_RV */
ed7df46e 1476 sv->sv_u.svu_rv = referent;
4df7f6af 1477 }
bd81e77b
NC
1478 break;
1479 default:
afd78fd5
JH
1480 Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu",
1481 (unsigned long)new_type);
bd81e77b 1482 }
73171d91 1483
5b306eef
DD
1484 /* if this is zero, this is a body-less SVt_NULL, SVt_IV/SVt_RV,
1485 and sometimes SVt_NV */
1486 if (old_type_details->body_size) {
bd81e77b 1487#ifdef PURIFY
beeec492 1488 safefree(old_body);
bd81e77b 1489#else
bc786448
GG
1490 /* Note that there is an assumption that all bodies of types that
1491 can be upgraded came from arenas. Only the more complex non-
1492 upgradable types are allowed to be directly malloc()ed. */
1493 assert(old_type_details->arena);
bd81e77b
NC
1494 del_body((void*)((char*)old_body + old_type_details->offset),
1495 &PL_body_roots[old_type]);
1496#endif
1497 }
1498}
73171d91 1499
bd81e77b
NC
1500/*
1501=for apidoc sv_backoff
73171d91 1502
fde67290 1503Remove any string offset. You should normally use the C<SvOOK_off> macro
bd81e77b 1504wrapper instead.
73171d91 1505
bd81e77b 1506=cut
73171d91
NC
1507*/
1508
fa7a1e49
DD
1509/* prior to 5.000 stable, this function returned the new OOK-less SvFLAGS
1510 prior to 5.23.4 this function always returned 0
1511*/
1512
1513void
ddeaf645 1514Perl_sv_backoff(SV *const sv)
bd81e77b 1515{
69240efd 1516 STRLEN delta;
7a4bba22 1517 const char * const s = SvPVX_const(sv);
7918f24d
NC
1518
1519 PERL_ARGS_ASSERT_SV_BACKOFF;
7918f24d 1520
bd81e77b
NC
1521 assert(SvOOK(sv));
1522 assert(SvTYPE(sv) != SVt_PVHV);
1523 assert(SvTYPE(sv) != SVt_PVAV);
7a4bba22 1524
69240efd
NC
1525 SvOOK_offset(sv, delta);
1526
7a4bba22
NC
1527 SvLEN_set(sv, SvLEN(sv) + delta);
1528 SvPV_set(sv, SvPVX(sv) - delta);
bd81e77b 1529 SvFLAGS(sv) &= ~SVf_OOK;
fa7a1e49
DD
1530 Move(s, SvPVX(sv), SvCUR(sv)+1, char);
1531 return;
bd81e77b 1532}
73171d91 1533
03885497
DM
1534
1535/* forward declaration */
1536static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
1537
1538
bd81e77b
NC
1539/*
1540=for apidoc sv_grow
73171d91 1541
bd81e77b
NC
1542Expands the character buffer in the SV. If necessary, uses C<sv_unref> and
1543upgrades the SV to C<SVt_PV>. Returns a pointer to the character buffer.
1544Use the C<SvGROW> wrapper instead.
93e68bfb 1545
bd81e77b
NC
1546=cut
1547*/
93e68bfb 1548
e0060e30 1549
bd81e77b 1550char *
5aaab254 1551Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
bd81e77b 1552{
eb578fdb 1553 char *s;
93e68bfb 1554
7918f24d
NC
1555 PERL_ARGS_ASSERT_SV_GROW;
1556
bd81e77b
NC
1557 if (SvROK(sv))
1558 sv_unref(sv);
1559 if (SvTYPE(sv) < SVt_PV) {
1560 sv_upgrade(sv, SVt_PV);
1561 s = SvPVX_mutable(sv);
1562 }
1563 else if (SvOOK(sv)) { /* pv is offset? */
1564 sv_backoff(sv);
1565 s = SvPVX_mutable(sv);
1566 if (newlen > SvLEN(sv))
1567 newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */
bd81e77b
NC
1568 }
1569 else
db2c6cb3 1570 {
e0060e30 1571 if (SvIsCOW(sv)) S_sv_uncow(aTHX_ sv, 0);
bd81e77b 1572 s = SvPVX_mutable(sv);
db2c6cb3 1573 }
aeb18a1e 1574
93c10d60 1575#ifdef PERL_COPY_ON_WRITE
cbcb2a16 1576 /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
3c239bea 1577 * to store the COW count. So in general, allocate one more byte than
cbcb2a16
DM
1578 * asked for, to make it likely this byte is always spare: and thus
1579 * make more strings COW-able.
fe546b38 1580 *
fa8f4f85
TC
1581 * Only increment if the allocation isn't MEM_SIZE_MAX,
1582 * otherwise it will wrap to 0.
1583 */
fe546b38 1584 if ( newlen != MEM_SIZE_MAX )
cbcb2a16
DM
1585 newlen++;
1586#endif
1587
ce861ea7
YO
1588#if defined(PERL_USE_MALLOC_SIZE) && defined(Perl_safesysmalloc_size)
1589#define PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
1590#endif
1591
bd81e77b 1592 if (newlen > SvLEN(sv)) { /* need more room? */
f1200559 1593 STRLEN minlen = SvCUR(sv);
3c239bea 1594 minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
f1200559
WH
1595 if (newlen < minlen)
1596 newlen = minlen;
ce861ea7 1597#ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
7c641603
KW
1598
1599 /* Don't round up on the first allocation, as odds are pretty good that
1600 * the initial request is accurate as to what is really needed */
ce861ea7 1601 if (SvLEN(sv)) {
9efda33a
TC
1602 STRLEN rounded = PERL_STRLEN_ROUNDUP(newlen);
1603 if (rounded > newlen)
1604 newlen = rounded;
ce861ea7 1605 }
bd81e77b 1606#endif
98653f18 1607 if (SvLEN(sv) && s) {
10edeb5d 1608 s = (char*)saferealloc(s, newlen);
bd81e77b
NC
1609 }
1610 else {
10edeb5d 1611 s = (char*)safemalloc(newlen);
bd81e77b 1612 if (SvPVX_const(sv) && SvCUR(sv)) {
0a5fcc38 1613 Move(SvPVX_const(sv), s, SvCUR(sv), char);
bd81e77b
NC
1614 }
1615 }
1616 SvPV_set(sv, s);
ce861ea7 1617#ifdef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
98653f18
NC
1618 /* Do this here, do it once, do it right, and then we will never get
1619 called back into sv_grow() unless there really is some growing
1620 needed. */
ca7c1a29 1621 SvLEN_set(sv, Perl_safesysmalloc_size(s));
98653f18 1622#else
bd81e77b 1623 SvLEN_set(sv, newlen);
98653f18 1624#endif
bd81e77b
NC
1625 }
1626 return s;
1627}
aeb18a1e 1628
bd81e77b
NC
1629/*
1630=for apidoc sv_setiv
932e9ff9 1631
bd81e77b 1632Copies an integer into the given SV, upgrading first if necessary.
fbe13c60 1633Does not handle 'set' magic. See also C<L</sv_setiv_mg>>.
463ee0b2 1634
bd81e77b
NC
1635=cut
1636*/
463ee0b2 1637
bd81e77b 1638void
5aaab254 1639Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
bd81e77b 1640{
7918f24d
NC
1641 PERL_ARGS_ASSERT_SV_SETIV;
1642
bd81e77b
NC
1643 SV_CHECK_THINKFIRST_COW_DROP(sv);
1644 switch (SvTYPE(sv)) {
1645 case SVt_NULL:
bd81e77b 1646 case SVt_NV:
3376de98 1647 sv_upgrade(sv, SVt_IV);
bd81e77b 1648 break;
bd81e77b
NC
1649 case SVt_PV:
1650 sv_upgrade(sv, SVt_PVIV);
1651 break;
463ee0b2 1652
bd81e77b 1653 case SVt_PVGV:
6e592b3a
BM
1654 if (!isGV_with_GP(sv))
1655 break;
2165bd23 1656 /* FALLTHROUGH */
bd81e77b
NC
1657 case SVt_PVAV:
1658 case SVt_PVHV:
1659 case SVt_PVCV:
1660 case SVt_PVFM:
1661 case SVt_PVIO:
22e74366 1662 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b
NC
1663 Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
1664 OP_DESC(PL_op));
c9a0dcdc 1665 NOT_REACHED; /* NOTREACHED */
0103ca14 1666 break;
42d0e0b7 1667 default: NOOP;
bd81e77b
NC
1668 }
1669 (void)SvIOK_only(sv); /* validate number */
1670 SvIV_set(sv, i);
1671 SvTAINT(sv);
1672}
932e9ff9 1673
bd81e77b
NC
1674/*
1675=for apidoc sv_setiv_mg
d33b2eba 1676
bd81e77b 1677Like C<sv_setiv>, but also handles 'set' magic.
1c846c1f 1678
bd81e77b
NC
1679=cut
1680*/
d33b2eba 1681
bd81e77b 1682void
5aaab254 1683Perl_sv_setiv_mg(pTHX_ SV *const sv, const IV i)
bd81e77b 1684{
7918f24d
NC
1685 PERL_ARGS_ASSERT_SV_SETIV_MG;
1686
bd81e77b
NC
1687 sv_setiv(sv,i);
1688 SvSETMAGIC(sv);
1689}
727879eb 1690
bd81e77b
NC
1691/*
1692=for apidoc sv_setuv
d33b2eba 1693
bd81e77b 1694Copies an unsigned integer into the given SV, upgrading first if necessary.
fbe13c60 1695Does not handle 'set' magic. See also C<L</sv_setuv_mg>>.
9b94d1dd 1696
bd81e77b
NC
1697=cut
1698*/
d33b2eba 1699
bd81e77b 1700void
5aaab254 1701Perl_sv_setuv(pTHX_ SV *const sv, const UV u)
bd81e77b 1702{
7918f24d
NC
1703 PERL_ARGS_ASSERT_SV_SETUV;
1704
013abb9b
NC
1705 /* With the if statement to ensure that integers are stored as IVs whenever
1706 possible:
bd81e77b 1707 u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865
d33b2eba 1708
bd81e77b
NC
1709 without
1710 u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865
1c846c1f 1711
013abb9b
NC
1712 If you wish to remove the following if statement, so that this routine
1713 (and its callers) always return UVs, please benchmark to see what the
1714 effect is. Modern CPUs may be different. Or may not :-)
bd81e77b
NC
1715 */
1716 if (u <= (UV)IV_MAX) {
1717 sv_setiv(sv, (IV)u);
1718 return;
1719 }
1720 sv_setiv(sv, 0);
1721 SvIsUV_on(sv);
1722 SvUV_set(sv, u);
1723}
d33b2eba 1724
bd81e77b
NC
1725/*
1726=for apidoc sv_setuv_mg
727879eb 1727
bd81e77b 1728Like C<sv_setuv>, but also handles 'set' magic.
9b94d1dd 1729
bd81e77b
NC
1730=cut
1731*/
5e2fc214 1732
bd81e77b 1733void
5aaab254 1734Perl_sv_setuv_mg(pTHX_ SV *const sv, const UV u)
bd81e77b 1735{
7918f24d
NC
1736 PERL_ARGS_ASSERT_SV_SETUV_MG;
1737
bd81e77b
NC
1738 sv_setuv(sv,u);
1739 SvSETMAGIC(sv);
1740}
5e2fc214 1741
954c1994 1742/*
bd81e77b 1743=for apidoc sv_setnv
954c1994 1744
bd81e77b 1745Copies a double into the given SV, upgrading first if necessary.
fbe13c60 1746Does not handle 'set' magic. See also C<L</sv_setnv_mg>>.
954c1994
GS
1747
1748=cut
1749*/
1750
63f97190 1751void
5aaab254 1752Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
79072805 1753{
7918f24d
NC
1754 PERL_ARGS_ASSERT_SV_SETNV;
1755
bd81e77b
NC
1756 SV_CHECK_THINKFIRST_COW_DROP(sv);
1757 switch (SvTYPE(sv)) {
79072805 1758 case SVt_NULL:
79072805 1759 case SVt_IV:
bd81e77b 1760 sv_upgrade(sv, SVt_NV);
79072805
LW
1761 break;
1762 case SVt_PV:
79072805 1763 case SVt_PVIV:
bd81e77b 1764 sv_upgrade(sv, SVt_PVNV);
79072805 1765 break;
bd4b1eb5 1766
bd4b1eb5 1767 case SVt_PVGV:
6e592b3a
BM
1768 if (!isGV_with_GP(sv))
1769 break;
2165bd23 1770 /* FALLTHROUGH */
bd81e77b
NC
1771 case SVt_PVAV:
1772 case SVt_PVHV:
79072805 1773 case SVt_PVCV:
bd81e77b
NC
1774 case SVt_PVFM:
1775 case SVt_PVIO:
22e74366 1776 /* diag_listed_as: Can't coerce %s to %s in %s */
bd81e77b 1777 Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
94bbb3f4 1778 OP_DESC(PL_op));
c9a0dcdc 1779 NOT_REACHED; /* NOTREACHED */
0103ca14 1780 break;
42d0e0b7 1781 default: NOOP;
2068cd4d 1782 }
bd81e77b
NC
1783 SvNV_set(sv, num);
1784 (void)SvNOK_only(sv); /* validate number */
1785 SvTAINT(sv);
79072805
LW
1786}
1787
645c22ef 1788/*
bd81e77b 1789=for apidoc sv_setnv_mg
645c22ef 1790
bd81e77b 1791Like C<sv_setnv>, but also handles 'set' magic.
645c22ef
DM
1792
1793=cut
1794*/
1795
bd81e77b 1796void
5aaab254 1797Perl_sv_setnv_mg(pTHX_ SV *const sv, const NV num)
79072805 1798{
7918f24d
NC
1799 PERL_ARGS_ASSERT_SV_SETNV_MG;
1800
bd81e77b
NC
1801 sv_setnv(sv,num);
1802 SvSETMAGIC(sv);
79072805
LW
1803}
1804
3f7602fa
TC
1805/* Return a cleaned-up, printable version of sv, for non-numeric, or
1806 * not incrementable warning display.
1807 * Originally part of S_not_a_number().
1808 * The return value may be != tmpbuf.
bd81e77b 1809 */
954c1994 1810
3f7602fa
TC
1811STATIC const char *
1812S_sv_display(pTHX_ SV *const sv, char *tmpbuf, STRLEN tmpbuf_size) {
1813 const char *pv;
94463019 1814
3f7602fa 1815 PERL_ARGS_ASSERT_SV_DISPLAY;
7918f24d 1816
94463019 1817 if (DO_UTF8(sv)) {
3f7602fa 1818 SV *dsv = newSVpvs_flags("", SVs_TEMP);
37b8cdd1 1819 pv = sv_uni_display(dsv, sv, 32, UNI_DISPLAY_ISPRINT);
94463019
JH
1820 } else {
1821 char *d = tmpbuf;
3f7602fa 1822 const char * const limit = tmpbuf + tmpbuf_size - 8;
94463019
JH
1823 /* each *s can expand to 4 chars + "...\0",
1824 i.e. need room for 8 chars */
ecdeb87c 1825
00b6aa41
AL
1826 const char *s = SvPVX_const(sv);
1827 const char * const end = s + SvCUR(sv);
1828 for ( ; s < end && d < limit; s++ ) {
94463019 1829 int ch = *s & 0xFF;
bd27cf70 1830 if (! isASCII(ch) && !isPRINT_LC(ch)) {
94463019
JH
1831 *d++ = 'M';
1832 *d++ = '-';
bd27cf70
KW
1833
1834 /* Map to ASCII "equivalent" of Latin1 */
1835 ch = LATIN1_TO_NATIVE(NATIVE_TO_LATIN1(ch) & 127);
94463019
JH
1836 }
1837 if (ch == '\n') {
1838 *d++ = '\\';
1839 *d++ = 'n';
1840 }
1841 else if (ch == '\r') {
1842 *d++ = '\\';
1843 *d++ = 'r';
1844 }
1845 else if (ch == '\f') {
1846 *d++ = '\\';
1847 *d++ = 'f';
1848 }
1849 else if (ch == '\\') {
1850 *d++ = '\\';
1851 *d++ = '\\';
1852 }
1853 else if (ch == '\0') {
1854 *d++ = '\\';
1855 *d++ = '0';
1856 }
1857 else if (isPRINT_LC(ch))
1858 *d++ = ch;
1859 else {
1860 *d++ = '^';
1861 *d++ = toCTRL(ch);
1862 }
1863 }
1864 if (s < end) {
1865 *d++ = '.';
1866 *d++ = '.';
1867 *d++ = '.';
1868 }
1869 *d = '\0';
1870 pv = tmpbuf;
a0d0e21e 1871 }
a0d0e21e 1872
3f7602fa
TC
1873 return pv;
1874}
1875
1876/* Print an "isn't numeric" warning, using a cleaned-up,
1877 * printable version of the offending string
1878 */
1879
1880STATIC void
1881S_not_a_number(pTHX_ SV *const sv)
1882{
3f7602fa
TC
1883 char tmpbuf[64];
1884 const char *pv;
1885
1886 PERL_ARGS_ASSERT_NOT_A_NUMBER;
1887
1888 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1889
533c011a 1890 if (PL_op)
9014280d 1891 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1892 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019
JH
1893 "Argument \"%s\" isn't numeric in %s", pv,
1894 OP_DESC(PL_op));
a0d0e21e 1895 else
9014280d 1896 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
734856a2 1897 /* diag_listed_as: Argument "%s" isn't numeric%s */
94463019 1898 "Argument \"%s\" isn't numeric", pv);
a0d0e21e
LW
1899}
1900
3f7602fa
TC
1901STATIC void
1902S_not_incrementable(pTHX_ SV *const sv) {
3f7602fa
TC
1903 char tmpbuf[64];
1904 const char *pv;
1905
1906 PERL_ARGS_ASSERT_NOT_INCREMENTABLE;
1907
1908 pv = sv_display(sv, tmpbuf, sizeof(tmpbuf));
1909
1910 Perl_warner(aTHX_ packWARN(WARN_NUMERIC),
1911 "Argument \"%s\" treated as 0 in increment (++)", pv);
1912}
1913
c2988b20
NC
1914/*
1915=for apidoc looks_like_number
1916
645c22ef
DM
1917Test if the content of an SV looks like a number (or is a number).
1918C<Inf> and C<Infinity> are treated as numbers (so will not issue a
796b6530 1919non-numeric warning), even if your C<atof()> doesn't grok them. Get-magic is
f52e41ad 1920ignored.
c2988b20
NC
1921
1922=cut
1923*/
1924
1925I32
aad570aa 1926Perl_looks_like_number(pTHX_ SV *const sv)
c2988b20 1927{
eb578fdb 1928 const char *sbegin;
c2988b20 1929 STRLEN len;
ea2485eb 1930 int numtype;
c2988b20 1931
7918f24d
NC
1932 PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER;
1933
f52e41ad
FC
1934 if (SvPOK(sv) || SvPOKp(sv)) {
1935 sbegin = SvPV_nomg_const(sv, len);
c2988b20 1936 }
c2988b20 1937 else
e0ab1c0e 1938 return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK);
ea2485eb
JH
1939 numtype = grok_number(sbegin, len, NULL);
1940 return ((numtype & IS_NUMBER_TRAILING)) ? 0 : numtype;
c2988b20 1941}
25da4f38 1942
19f6321d
NC
1943STATIC bool
1944S_glob_2number(pTHX_ GV * const gv)
180488f8 1945{
7918f24d
NC
1946 PERL_ARGS_ASSERT_GLOB_2NUMBER;
1947
675c862f
AL
1948 /* We know that all GVs stringify to something that is not-a-number,
1949 so no need to test that. */
1950 if (ckWARN(WARN_NUMERIC))
8e629ff4
FC
1951 {
1952 SV *const buffer = sv_newmortal();
1953 gv_efullname3(buffer, gv, "*");
675c862f 1954 not_a_number(buffer);
8e629ff4 1955 }
675c862f
AL
1956 /* We just want something true to return, so that S_sv_2iuv_common
1957 can tail call us and return true. */
19f6321d 1958 return TRUE;
675c862f
AL
1959}
1960
25da4f38
IZ
1961/* Actually, ISO C leaves conversion of UV to IV undefined, but
1962 until proven guilty, assume that things are not that bad... */
1963
645c22ef
DM
1964/*
1965 NV_PRESERVES_UV:
1966
1967 As 64 bit platforms often have an NV that doesn't preserve all bits of
28e5dec8
JH
1968 an IV (an assumption perl has been based on to date) it becomes necessary
1969 to remove the assumption that the NV always carries enough precision to
1970 recreate the IV whenever needed, and that the NV is the canonical form.
1971 Instead, IV/UV and NV need to be given equal rights. So as to not lose
645c22ef 1972 precision as a side effect of conversion (which would lead to insanity
28e5dec8 1973 and the dragon(s) in t/op/numconvert.t getting very angry) the intent is
8a4a3196
KW
1974 1) to distinguish between IV/UV/NV slots that have a valid conversion cached
1975 where precision was lost, and IV/UV/NV slots that have a valid conversion
1976 which has lost no precision
645c22ef 1977 2) to ensure that if a numeric conversion to one form is requested that
28e5dec8
JH
1978 would lose precision, the precise conversion (or differently
1979 imprecise conversion) is also performed and cached, to prevent
1980 requests for different numeric formats on the same SV causing
1981 lossy conversion chains. (lossless conversion chains are perfectly
1982 acceptable (still))
1983
1984
1985 flags are used:
1986 SvIOKp is true if the IV slot contains a valid value
1987 SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true)
1988 SvNOKp is true if the NV slot contains a valid value
1989 SvNOK is true only if the NV value is accurate
1990
1991 so
645c22ef 1992 while converting from PV to NV, check to see if converting that NV to an
28e5dec8
JH
1993 IV(or UV) would lose accuracy over a direct conversion from PV to
1994 IV(or UV). If it would, cache both conversions, return NV, but mark
1995 SV as IOK NOKp (ie not NOK).
1996
645c22ef 1997 While converting from PV to IV, check to see if converting that IV to an
28e5dec8
JH
1998 NV would lose accuracy over a direct conversion from PV to NV. If it
1999 would, cache both conversions, flag similarly.
2000
2001 Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite
2002 correctly because if IV & NV were set NV *always* overruled.
645c22ef
DM
2003 Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning
2004 changes - now IV and NV together means that the two are interchangeable:
28e5dec8 2005 SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX;
d460ef45 2006
645c22ef
DM
2007 The benefit of this is that operations such as pp_add know that if
2008 SvIOK is true for both left and right operands, then integer addition
2009 can be used instead of floating point (for cases where the result won't
2010 overflow). Before, floating point was always used, which could lead to
28e5dec8
JH
2011 loss of precision compared with integer addition.
2012
2013 * making IV and NV equal status should make maths accurate on 64 bit
2014 platforms
2015 * may speed up maths somewhat if pp_add and friends start to use
645c22ef 2016 integers when possible instead of fp. (Hopefully the overhead in
28e5dec8
JH
2017 looking for SvIOK and checking for overflow will not outweigh the
2018 fp to integer speedup)
2019 * will slow down integer operations (callers of SvIV) on "inaccurate"
2020 values, as the change from SvIOK to SvIOKp will cause a call into
2021 sv_2iv each time rather than a macro access direct to the IV slot
2022 * should speed up number->string conversion on integers as IV is
645c22ef 2023 favoured when IV and NV are equally accurate
28e5dec8
JH
2024
2025 ####################################################################
645c22ef
DM
2026 You had better be using SvIOK_notUV if you want an IV for arithmetic:
2027 SvIOK is true if (IV or UV), so you might be getting (IV)SvUV.
2028 On the other hand, SvUOK is true iff UV.
28e5dec8
JH
2029 ####################################################################
2030
645c22ef 2031 Your mileage will vary depending your CPU's relative fp to integer
28e5dec8
JH
2032 performance ratio.
2033*/
2034
2035#ifndef NV_PRESERVES_UV
645c22ef
DM
2036# define IS_NUMBER_UNDERFLOW_IV 1
2037# define IS_NUMBER_UNDERFLOW_UV 2
2038# define IS_NUMBER_IV_AND_UV 2
2039# define IS_NUMBER_OVERFLOW_IV 4
2040# define IS_NUMBER_OVERFLOW_UV 5
2041
2042/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */
28e5dec8
JH
2043
2044/* For sv_2nv these three cases are "SvNOK and don't bother casting" */
2045STATIC int
5aaab254 2046S_sv_2iuv_non_preserve(pTHX_ SV *const sv
47031da6
NC
2047# ifdef DEBUGGING
2048 , I32 numtype
2049# endif
2050 )
28e5dec8 2051{
7918f24d 2052 PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE;
23491f1d 2053 PERL_UNUSED_CONTEXT;
7918f24d 2054
147e3846 2055 DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype));
28e5dec8
JH
2056 if (SvNVX(sv) < (NV)IV_MIN) {
2057 (void)SvIOKp_on(sv);
2058 (void)SvNOK_on(sv);
45977657 2059 SvIV_set(sv, IV_MIN);
28e5dec8
JH
2060 return IS_NUMBER_UNDERFLOW_IV;
2061 }
2062 if (SvNVX(sv) > (NV)UV_MAX) {
2063 (void)SvIOKp_on(sv);
2064 (void)SvNOK_on(sv);
2065 SvIsUV_on(sv);
607fa7f2 2066 SvUV_set(sv, UV_MAX);
28e5dec8
JH
2067 return IS_NUMBER_OVERFLOW_UV;
2068 }
c2988b20
NC
2069 (void)SvIOKp_on(sv);
2070 (void)SvNOK_on(sv);
2071 /* Can't use strtol etc to convert this string. (See truth table in
2072 sv_2iv */
2073 if (SvNVX(sv) <= (UV)IV_MAX) {
45977657 2074 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2075 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2076 SvIOK_on(sv); /* Integer is precise. NOK, IOK */
2077 } else {
2078 /* Integer is imprecise. NOK, IOKp */
2079 }
2080 return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV;
2081 }
2082 SvIsUV_on(sv);
607fa7f2 2083 SvUV_set(sv, U_V(SvNVX(sv)));
659c4b96 2084 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
c2988b20
NC
2085 if (SvUVX(sv) == UV_MAX) {
2086 /* As we know that NVs don't preserve UVs, UV_MAX cannot
2087 possibly be preserved by NV. Hence, it must be overflow.
2088 NOK, IOKp */
2089 return IS_NUMBER_OVERFLOW_UV;
2090 }
2091 SvIOK_on(sv); /* Integer is precise. NOK, UOK */
2092 } else {
2093 /* Integer is imprecise. NOK, IOKp */
28e5dec8 2094 }
c2988b20 2095 return IS_NUMBER_OVERFLOW_IV;
28e5dec8 2096}
645c22ef
DM
2097#endif /* !NV_PRESERVES_UV*/
2098
a13f4dff 2099/* If numtype is infnan, set the NV of the sv accordingly.
5564cd7f 2100 * If numtype is anything else, try setting the NV using Atof(PV). */
a13f4dff 2101static void
3823048b 2102S_sv_setnv(pTHX_ SV* sv, int numtype)
a13f4dff 2103{
07925c5e 2104 bool pok = cBOOL(SvPOK(sv));
5564cd7f 2105 bool nok = FALSE;
a7157111 2106#ifdef NV_INF
a13f4dff
JH
2107 if ((numtype & IS_NUMBER_INFINITY)) {
2108 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -NV_INF : NV_INF);
5564cd7f 2109 nok = TRUE;
a7157111
JH
2110 } else
2111#endif
2112#ifdef NV_NAN
2113 if ((numtype & IS_NUMBER_NAN)) {
3823048b 2114 SvNV_set(sv, NV_NAN);
d48bd569 2115 nok = TRUE;
a7157111
JH
2116 } else
2117#endif
2118 if (pok) {
a13f4dff 2119 SvNV_set(sv, Atof(SvPVX_const(sv)));
d48bd569
JH
2120 /* Purposefully no true nok here, since we don't want to blow
2121 * away the possible IOK/UV of an existing sv. */
2122 }
5564cd7f 2123 if (nok) {
d48bd569 2124 SvNOK_only(sv); /* No IV or UV please, this is pure infnan. */
5564cd7f
JH
2125 if (pok)
2126 SvPOK_on(sv); /* PV is okay, though. */
2127 }
a13f4dff
JH
2128}
2129
af359546 2130STATIC bool
7918f24d
NC
2131S_sv_2iuv_common(pTHX_ SV *const sv)
2132{
7918f24d
NC
2133 PERL_ARGS_ASSERT_SV_2IUV_COMMON;
2134
af359546 2135 if (SvNOKp(sv)) {
28e5dec8
JH
2136 /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv
2137 * without also getting a cached IV/UV from it at the same time
2138 * (ie PV->NV conversion should detect loss of accuracy and cache
af359546
NC
2139 * IV or UV at same time to avoid this. */
2140 /* IV-over-UV optimisation - choose to cache IV if possible */
25da4f38
IZ
2141
2142 if (SvTYPE(sv) == SVt_NV)
2143 sv_upgrade(sv, SVt_PVNV);
2144
28e5dec8
JH
2145 (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */
2146 /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost
2147 certainly cast into the IV range at IV_MAX, whereas the correct
2148 answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
2149 cases go to UV */
e91de695
JH
2150#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2151 if (Perl_isnan(SvNVX(sv))) {
2152 SvUV_set(sv, 0);
2153 SvIsUV_on(sv);
2154 return FALSE;
2155 }
2156#endif
28e5dec8 2157 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
45977657 2158 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2159 if (SvNVX(sv) == (NV) SvIVX(sv)
28e5dec8 2160#ifndef NV_PRESERVES_UV
53e2bfb7 2161 && SvIVX(sv) != IV_MIN /* avoid negating IV_MIN below */
28e5dec8
JH
2162 && (((UV)1 << NV_PRESERVES_UV_BITS) >
2163 (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv)))
2164 /* Don't flag it as "accurately an integer" if the number
2165 came from a (by definition imprecise) NV operation, and
2166 we're outside the range of NV integer precision */
2167#endif
2168 ) {
a43d94f2
NC
2169 if (SvNOK(sv))
2170 SvIOK_on(sv); /* Can this go wrong with rounding? NWC */
2171 else {
2172 /* scalar has trailing garbage, eg "42a" */
2173 }
28e5dec8 2174 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2175 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n",
28e5dec8
JH
2176 PTR2UV(sv),
2177 SvNVX(sv),
2178 SvIVX(sv)));
2179
2180 } else {
2181 /* IV not precise. No need to convert from PV, as NV
2182 conversion would already have cached IV if it detected
2183 that PV->IV would be better than PV->NV->IV
2184 flags already correct - don't set public IOK. */
2185 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2186 "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n",
28e5dec8
JH
2187 PTR2UV(sv),
2188 SvNVX(sv),
2189 SvIVX(sv)));
2190 }
2191 /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN,
2192 but the cast (NV)IV_MIN rounds to a the value less (more
2193 negative) than IV_MIN which happens to be equal to SvNVX ??
2194 Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and
2195 NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and
2196 (NV)UVX == NVX are both true, but the values differ. :-(
2197 Hopefully for 2s complement IV_MIN is something like
2198 0x8000000000000000 which will be exact. NWC */
d460ef45 2199 }
25da4f38 2200 else {
607fa7f2 2201 SvUV_set(sv, U_V(SvNVX(sv)));
28e5dec8 2202 if (
659c4b96 2203 (SvNVX(sv) == (NV) SvUVX(sv))
28e5dec8
JH
2204#ifndef NV_PRESERVES_UV
2205 /* Make sure it's not 0xFFFFFFFFFFFFFFFF */
2206 /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */
2207 && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv))
2208 /* Don't flag it as "accurately an integer" if the number
2209 came from a (by definition imprecise) NV operation, and
2210 we're outside the range of NV integer precision */
2211#endif
a43d94f2 2212 && SvNOK(sv)
28e5dec8
JH
2213 )
2214 SvIOK_on(sv);
25da4f38 2215 SvIsUV_on(sv);
1c846c1f 2216 DEBUG_c(PerlIO_printf(Perl_debug_log,
147e3846 2217 "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n",
56431972 2218 PTR2UV(sv),
57def98f
JH
2219 SvUVX(sv),
2220 SvUVX(sv)));
25da4f38 2221 }
748a9306 2222 }
cd84013a 2223 else if (SvPOKp(sv)) {
c2988b20 2224 UV value;
80e5abf2
DM
2225 int numtype;
2226 const char *s = SvPVX_const(sv);
2227 const STRLEN cur = SvCUR(sv);
2228
2229 /* short-cut for a single digit string like "1" */
2230
2231 if (cur == 1) {
2232 char c = *s;
2233 if (isDIGIT(c)) {
2234 if (SvTYPE(sv) < SVt_PVIV)
2235 sv_upgrade(sv, SVt_PVIV);
2236 (void)SvIOK_on(sv);
2237 SvIV_set(sv, (IV)(c - '0'));
2238 return FALSE;
2239 }
2240 }
2241
2242 numtype = grok_number(s, cur, &value);
af359546 2243 /* We want to avoid a possible problem when we cache an IV/ a UV which
25da4f38 2244 may be later translated to an NV, and the resulting NV is not
c2988b20
NC
2245 the same as the direct translation of the initial string
2246 (eg 123.456 can shortcut to the IV 123 with atol(), but we must
2247 be careful to ensure that the value with the .456 is around if the
2248 NV value is requested in the future).
1c846c1f 2249
af359546 2250 This means that if we cache such an IV/a UV, we need to cache the
25da4f38 2251 NV as well. Moreover, we trade speed for space, and do not
28e5dec8 2252 cache the NV if we are sure it's not needed.
25da4f38 2253 */
16b7a9a4 2254
c2988b20
NC
2255 /* SVt_PVNV is one higher than SVt_PVIV, hence this order */
2256 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2257 == IS_NUMBER_IN_UV) {
5e045b90 2258 /* It's definitely an integer, only upgrade to PVIV */
28e5dec8
JH
2259 if (SvTYPE(sv) < SVt_PVIV)
2260 sv_upgrade(sv, SVt_PVIV);
f7bbb42a 2261 (void)SvIOK_on(sv);
c2988b20
NC
2262 } else if (SvTYPE(sv) < SVt_PVNV)
2263 sv_upgrade(sv, SVt_PVNV);
28e5dec8 2264
a13f4dff 2265 if ((numtype & (IS_NUMBER_INFINITY | IS_NUMBER_NAN))) {
75a57a38 2266 if (ckWARN(WARN_NUMERIC) && ((numtype & IS_NUMBER_TRAILING)))
6b322424 2267 not_a_number(sv);
3823048b 2268 S_sv_setnv(aTHX_ sv, numtype);
a13f4dff
JH
2269 return FALSE;
2270 }
2271
f2524eef 2272 /* If NVs preserve UVs then we only use the UV value if we know that
c2988b20
NC
2273 we aren't going to call atof() below. If NVs don't preserve UVs
2274 then the value returned may have more precision than atof() will
2275 return, even though value isn't perfectly accurate. */
2276 if ((numtype & (IS_NUMBER_IN_UV
2277#ifdef NV_PRESERVES_UV
2278 | IS_NUMBER_NOT_INT
2279#endif
2280 )) == IS_NUMBER_IN_UV) {
2281 /* This won't turn off the public IOK flag if it was set above */
2282 (void)SvIOKp_on(sv);
2283
2284 if (!(numtype & IS_NUMBER_NEG)) {
2285 /* positive */;
2286 if (value <= (UV)IV_MAX) {
45977657 2287 SvIV_set(sv, (IV)value);
c2988b20 2288 } else {
af359546 2289 /* it didn't overflow, and it was positive. */
607fa7f2 2290 SvUV_set(sv, value);
c2988b20
NC
2291 SvIsUV_on(sv);
2292 }
2293 } else {
2294 /* 2s complement assumption */
2295 if (value <= (UV)IV_MIN) {
53e2bfb7
DM
2296 SvIV_set(sv, value == (UV)IV_MIN
2297 ? IV_MIN : -(IV)value);
c2988b20
NC
2298 } else {
2299 /* Too negative for an IV. This is a double upgrade, but
d1be9408 2300 I'm assuming it will be rare. */
c2988b20
NC
2301 if (SvTYPE(sv) < SVt_PVNV)
2302 sv_upgrade(sv, SVt_PVNV);
2303 SvNOK_on(sv);
2304 SvIOK_off(sv);
2305 SvIOKp_on(sv);
9d6ce603 2306 SvNV_set(sv, -(NV)value);
45977657 2307 SvIV_set(sv, IV_MIN);
c2988b20
NC
2308 }
2309 }
2310 }
2311 /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we
2312 will be in the previous block to set the IV slot, and the next
2313 block to set the NV slot. So no else here. */
2314
2315 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2316 != IS_NUMBER_IN_UV) {
2317 /* It wasn't an (integer that doesn't overflow the UV). */
3823048b 2318 S_sv_setnv(aTHX_ sv, numtype);
28e5dec8 2319
c2988b20
NC
2320 if (! numtype && ckWARN(WARN_NUMERIC))
2321 not_a_number(sv);
28e5dec8 2322
147e3846 2323 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n",
c2988b20 2324 PTR2UV(sv), SvNVX(sv)));
28e5dec8 2325
28e5dec8 2326#ifdef NV_PRESERVES_UV
af359546
NC
2327 (void)SvIOKp_on(sv);
2328 (void)SvNOK_on(sv);
e91de695
JH
2329#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2330 if (Perl_isnan(SvNVX(sv))) {
2331 SvUV_set(sv, 0);
2332 SvIsUV_on(sv);
2333 return FALSE;
2334 }
2335#endif
af359546
NC
2336 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2337 SvIV_set(sv, I_V(SvNVX(sv)));
2338 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
2339 SvIOK_on(sv);
2340 } else {
6f207bd3 2341 NOOP; /* Integer is imprecise. NOK, IOKp */
af359546
NC
2342 }
2343 /* UV will not work better than IV */
2344 } else {
2345 if (SvNVX(sv) > (NV)UV_MAX) {
2346 SvIsUV_on(sv);
2347 /* Integer is inaccurate. NOK, IOKp, is UV */
2348 SvUV_set(sv, UV_MAX);
af359546
NC
2349 } else {
2350 SvUV_set(sv, U_V(SvNVX(sv)));
2351 /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs
2352 NV preservse UV so can do correct comparison. */
2353 if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
2354 SvIOK_on(sv);
af359546 2355 } else {
6f207bd3 2356 NOOP; /* Integer is imprecise. NOK, IOKp, is UV */
af359546
NC
2357 }
2358 }
4b0c9573 2359 SvIsUV_on(sv);
af359546 2360 }
28e5dec8 2361#else /* NV_PRESERVES_UV */
c2988b20
NC
2362 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2363 == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) {
af359546 2364 /* The IV/UV slot will have been set from value returned by
c2988b20
NC
2365 grok_number above. The NV slot has just been set using
2366 Atof. */
560b0c46 2367 SvNOK_on(sv);
c2988b20
NC
2368 assert (SvIOKp(sv));
2369 } else {
2370 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2371 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2372 /* Small enough to preserve all bits. */
2373 (void)SvIOKp_on(sv);
2374 SvNOK_on(sv);
45977657 2375 SvIV_set(sv, I_V(SvNVX(sv)));
659c4b96 2376 if ((NV)(SvIVX(sv)) == SvNVX(sv))
c2988b20
NC
2377 SvIOK_on(sv);
2378 /* Assumption: first non-preserved integer is < IV_MAX,
2379 this NV is in the preserved range, therefore: */
2380 if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))
2381 < (UV)IV_MAX)) {
147e3846 2382 Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%" NVgf " U_V is 0x%" UVxf ", IV_MAX is 0x%" UVxf "\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX);
c2988b20
NC
2383 }
2384 } else {
2385 /* IN_UV NOT_INT
2386 0 0 already failed to read UV.
2387 0 1 already failed to read UV.
2388 1 0 you won't get here in this case. IV/UV
2389 slot set, public IOK, Atof() unneeded.
2390 1 1 already read UV.
2391 so there's no point in sv_2iuv_non_preserve() attempting
2392 to use atol, strtol, strtoul etc. */
47031da6 2393# ifdef DEBUGGING
40a17c4c 2394 sv_2iuv_non_preserve (sv, numtype);
47031da6
NC
2395# else
2396 sv_2iuv_non_preserve (sv);
2397# endif
c2988b20
NC
2398 }
2399 }
28e5dec8 2400#endif /* NV_PRESERVES_UV */
a43d94f2
NC
2401 /* It might be more code efficient to go through the entire logic above
2402 and conditionally set with SvIOKp_on() rather than SvIOK(), but it
2403 gets complex and potentially buggy, so more programmer efficient
2404 to do it this way, by turning off the public flags: */
2405 if (!numtype)
2406 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
25da4f38 2407 }
af359546 2408 }
c0443cc0 2409 else {
675c862f 2410 if (isGV_with_GP(sv))
159b6efe 2411 return glob_2number(MUTABLE_GV(sv));
180488f8 2412
4f62cd62 2413 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
af359546 2414 report_uninit(sv);
25da4f38
IZ
2415 if (SvTYPE(sv) < SVt_IV)
2416 /* Typically the caller expects that sv_any is not NULL now. */
2417 sv_upgrade(sv, SVt_IV);
af359546
NC
2418 /* Return 0 from the caller. */
2419 return TRUE;
2420 }
2421 return FALSE;
2422}
2423
2424/*
2425=for apidoc sv_2iv_flags
2426
2427Return the integer value of an SV, doing any necessary string
c5608a1f 2428conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
af359546
NC
2429Normally used via the C<SvIV(sv)> and C<SvIVx(sv)> macros.
2430
2431=cut
2432*/
2433
2434IV
5aaab254 2435Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags)
af359546 2436{
1061065f 2437 PERL_ARGS_ASSERT_SV_2IV_FLAGS;
4bac9ae4 2438
217f6fa3
FC
2439 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2440 && SvTYPE(sv) != SVt_PVFM);
2441
4bac9ae4
CS
2442 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2443 mg_get(sv);
2444
2445 if (SvROK(sv)) {
2446 if (SvAMAGIC(sv)) {
2447 SV * tmpstr;
2448 if (flags & SV_SKIP_OVERLOAD)
2449 return 0;
2450 tmpstr = AMG_CALLunary(sv, numer_amg);
2451 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2452 return SvIV(tmpstr);
2453 }
2454 }
2455 return PTR2IV(SvRV(sv));
2456 }
2457
8d919b0a 2458 if (SvVALID(sv) || isREGEXP(sv)) {
4e8879f3
DM
2459 /* FBMs use the space for SvIVX and SvNVX for other purposes, so
2460 must not let them cache IVs.
2b2b6d6d
NC
2461 In practice they are extremely unlikely to actually get anywhere
2462 accessible by user Perl code - the only way that I'm aware of is when
2463 a constant subroutine which is used as the second argument to index.
cd84013a
FC
2464
2465 Regexps have no SvIVX and SvNVX fields.
2b2b6d6d 2466 */
df6b4bd5 2467 assert(SvPOKp(sv));
e20b6c3b 2468 {
71c558c3 2469 UV value;
8d919b0a
FC
2470 const char * const ptr =
2471 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2472 const int numtype
2473 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2474
2475 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2476 == IS_NUMBER_IN_UV) {
2477 /* It's definitely an integer */
2478 if (numtype & IS_NUMBER_NEG) {
2479 if (value < (UV)IV_MIN)
2480 return -(IV)value;
2481 } else {
2482 if (value < (UV)IV_MAX)
2483 return (IV)value;
2484 }
2485 }
058b8ae2 2486
e91de695
JH
2487 /* Quite wrong but no good choices. */
2488 if ((numtype & IS_NUMBER_INFINITY)) {
2489 return (numtype & IS_NUMBER_NEG) ? IV_MIN : IV_MAX;
2490 } else if ((numtype & IS_NUMBER_NAN)) {
2491 return 0; /* So wrong. */
2492 }
2493
71c558c3
NC
2494 if (!numtype) {
2495 if (ckWARN(WARN_NUMERIC))
2496 not_a_number(sv);
2497 }
8d919b0a 2498 return I_V(Atof(ptr));
e20b6c3b 2499 }
4bac9ae4
CS
2500 }
2501
2502 if (SvTHINKFIRST(sv)) {
af359546
NC
2503 if (SvREADONLY(sv) && !SvOK(sv)) {
2504 if (ckWARN(WARN_UNINITIALIZED))
2505 report_uninit(sv);
2506 return 0;
2507 }
2508 }
4bac9ae4 2509
af359546
NC
2510 if (!SvIOKp(sv)) {
2511 if (S_sv_2iuv_common(aTHX_ sv))
2512 return 0;
79072805 2513 }
4bac9ae4 2514
147e3846 2515 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n",
1d7c1841 2516 PTR2UV(sv),SvIVX(sv)));
25da4f38 2517 return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv);
79072805
LW
2518}
2519
645c22ef 2520/*
891f9566 2521=for apidoc sv_2uv_flags
645c22ef
DM
2522
2523Return the unsigned integer value of an SV, doing any necessary string
c5608a1f 2524conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
891f9566 2525Normally used via the C<SvUV(sv)> and C<SvUVx(sv)> macros.
645c22ef 2526
58ae9391
KW
2527=for apidoc Amnh||SV_GMAGIC
2528
645c22ef
DM
2529=cut
2530*/
2531
ff68c719 2532UV
5aaab254 2533Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags)
ff68c719 2534{
1061065f 2535 PERL_ARGS_ASSERT_SV_2UV_FLAGS;
4bac9ae4
CS
2536
2537 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
2538 mg_get(sv);
2539
2540 if (SvROK(sv)) {
2541 if (SvAMAGIC(sv)) {
2542 SV *tmpstr;
2543 if (flags & SV_SKIP_OVERLOAD)
2544 return 0;
2545 tmpstr = AMG_CALLunary(sv, numer_amg);
2546 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2547 return SvUV(tmpstr);
2548 }
2549 }
2550 return PTR2UV(SvRV(sv));
2551 }
2552
8d919b0a 2553 if (SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2554 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2555 the same flag bit as SVf_IVisUV, so must not let them cache IVs.
2556 Regexps have no SvIVX and SvNVX fields. */
df6b4bd5 2557 assert(SvPOKp(sv));
e20b6c3b 2558 {
71c558c3 2559 UV value;
8d919b0a
FC
2560 const char * const ptr =
2561 isREGEXP(sv) ? RX_WRAPPED((REGEXP*)sv) : SvPVX_const(sv);
e91de695
JH
2562 const int numtype
2563 = grok_number(ptr, SvCUR(sv), &value);
71c558c3
NC
2564
2565 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2566 == IS_NUMBER_IN_UV) {
2567 /* It's definitely an integer */
2568 if (!(numtype & IS_NUMBER_NEG))
2569 return value;
2570 }
058b8ae2 2571
e91de695
JH
2572 /* Quite wrong but no good choices. */
2573 if ((numtype & IS_NUMBER_INFINITY)) {
2574 return UV_MAX; /* So wrong. */
2575 } else if ((numtype & IS_NUMBER_NAN)) {
2576 return 0; /* So wrong. */
2577 }
2578
71c558c3
NC
2579 if (!numtype) {
2580 if (ckWARN(WARN_NUMERIC))
2581 not_a_number(sv);
2582 }
8d919b0a 2583 return U_V(Atof(ptr));
e20b6c3b 2584 }
4bac9ae4
CS
2585 }
2586
2587 if (SvTHINKFIRST(sv)) {
0336b60e 2588 if (SvREADONLY(sv) && !SvOK(sv)) {
0336b60e 2589 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2590 report_uninit(sv);
ff68c719
PP
2591 return 0;
2592 }
2593 }
4bac9ae4 2594
af359546
NC
2595 if (!SvIOKp(sv)) {
2596 if (S_sv_2iuv_common(aTHX_ sv))
2597 return 0;
ff68c719 2598 }
25da4f38 2599
147e3846 2600 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n",
1d7c1841 2601 PTR2UV(sv),SvUVX(sv)));
25da4f38 2602 return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv);
ff68c719
PP
2603}
2604
645c22ef 2605/*
196007d1 2606=for apidoc sv_2nv_flags
645c22ef
DM
2607
2608Return the num value of an SV, doing any necessary string or integer
c5608a1f 2609conversion. If C<flags> has the C<SV_GMAGIC> bit set, does an C<mg_get()> first.
39d5de13 2610Normally used via the C<SvNV(sv)> and C<SvNVx(sv)> macros.
645c22ef
DM
2611
2612=cut
2613*/
2614
65202027 2615NV
5aaab254 2616Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
79072805 2617{
1061065f
DD
2618 PERL_ARGS_ASSERT_SV_2NV_FLAGS;
2619
217f6fa3
FC
2620 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
2621 && SvTYPE(sv) != SVt_PVFM);
8d919b0a 2622 if (SvGMAGICAL(sv) || SvVALID(sv) || isREGEXP(sv)) {
2b2b6d6d 2623 /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
cd84013a
FC
2624 the same flag bit as SVf_IVisUV, so must not let them cache NVs.
2625 Regexps have no SvIVX and SvNVX fields. */
8d919b0a 2626 const char *ptr;
39d5de13
DM
2627 if (flags & SV_GMAGIC)
2628 mg_get(sv);
463ee0b2
LW
2629 if (SvNOKp(sv))
2630 return SvNVX(sv);
cd84013a 2631 if (SvPOKp(sv) && !SvIOKp(sv)) {
8d919b0a 2632 ptr = SvPVX_const(sv);
041457d9 2633 if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
8d919b0a 2634 !grok_number(ptr, SvCUR(sv), NULL))
a0d0e21e 2635 not_a_number(sv);
8d919b0a 2636 return Atof(ptr);
a0d0e21e 2637 }
25da4f38 2638 if (SvIOKp(sv)) {
1c846c1f 2639 if (SvIsUV(sv))
65202027 2640 return (NV)SvUVX(sv);
25da4f38 2641 else
65202027 2642 return (NV)SvIVX(sv);
47a72cb8
NC
2643 }
2644 if (SvROK(sv)) {
2645 goto return_rok;
2646 }
2647 assert(SvTYPE(sv) >= SVt_PVMG);
2648 /* This falls through to the report_uninit near the end of the
2649 function. */
2650 } else if (SvTHINKFIRST(sv)) {
a0d0e21e 2651 if (SvROK(sv)) {
47a72cb8 2652 return_rok:
deb46114 2653 if (SvAMAGIC(sv)) {
aee036bb
DM
2654 SV *tmpstr;
2655 if (flags & SV_SKIP_OVERLOAD)
2656 return 0;
31d632c3 2657 tmpstr = AMG_CALLunary(sv, numer_amg);
deb46114
NC
2658 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
2659 return SvNV(tmpstr);
2660 }
2661 }
2662 return PTR2NV(SvRV(sv));
a0d0e21e 2663 }
0336b60e 2664 if (SvREADONLY(sv) && !SvOK(sv)) {
599cee73 2665 if (ckWARN(WARN_UNINITIALIZED))
29489e7c 2666 report_uninit(sv);
ed6116ce
LW
2667 return 0.0;
2668 }
79072805
LW
2669 }
2670 if (SvTYPE(sv) < SVt_NV) {
7e25a7e9
NC
2671 /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */
2672 sv_upgrade(sv, SVt_NV);
b2f82b52 2673 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
097ee67d 2674 DEBUG_c({
688523a0
KW
2675 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2676 STORE_LC_NUMERIC_SET_STANDARD();
1d7c1841 2677 PerlIO_printf(Perl_debug_log,
147e3846 2678 "0x%" UVxf " num(%" NVgf ")\n",
1d7c1841 2679 PTR2UV(sv), SvNVX(sv));
688523a0 2680 RESTORE_LC_NUMERIC();
097ee67d 2681 });
b2f82b52
KW
2682 CLANG_DIAG_RESTORE_STMT;
2683
79072805
LW
2684 }
2685 else if (SvTYPE(sv) < SVt_PVNV)
2686 sv_upgrade(sv, SVt_PVNV);
59d8ce62
NC
2687 if (SvNOKp(sv)) {
2688 return SvNVX(sv);
61604483 2689 }
59d8ce62 2690 if (SvIOKp(sv)) {
9d6ce603 2691 SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv));
28e5dec8 2692#ifdef NV_PRESERVES_UV
a43d94f2
NC
2693 if (SvIOK(sv))
2694 SvNOK_on(sv);
2695 else
2696 SvNOKp_on(sv);
28e5dec8
JH
2697#else
2698 /* Only set the public NV OK flag if this NV preserves the IV */
2699 /* Check it's not 0xFFFFFFFFFFFFFFFF */
a43d94f2
NC
2700 if (SvIOK(sv) &&
2701 SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv))))
28e5dec8
JH
2702 : (SvIVX(sv) == I_V(SvNVX(sv))))
2703 SvNOK_on(sv);
2704 else
2705 SvNOKp_on(sv);
2706#endif
93a17b20 2707 }
cd84013a 2708 else if (SvPOKp(sv)) {
c2988b20 2709 UV value;
3823048b 2710 const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
041457d9 2711 if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
a0d0e21e 2712 not_a_number(sv);
28e5dec8 2713#ifdef NV_PRESERVES_UV
c2988b20
NC
2714 if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT))
2715 == IS_NUMBER_IN_UV) {
5e045b90 2716 /* It's definitely an integer */
9d6ce603 2717 SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value);
66d83377 2718 } else {
3823048b 2719 S_sv_setnv(aTHX_ sv, numtype);
66d83377 2720 }
a43d94f2
NC
2721 if (numtype)
2722 SvNOK_on(sv);
2723 else
2724 SvNOKp_on(sv);
28e5dec8 2725#else
e91de695
JH
2726 SvNV_set(sv, Atof(SvPVX_const(sv)));
2727 /* Only set the public NV OK flag if this NV preserves the value in
2728 the PV at least as well as an IV/UV would.
2729 Not sure how to do this 100% reliably. */
2730 /* if that shift count is out of range then Configure's test is
2731 wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS ==
2732 UV_BITS */
2733 if (((UV)1 << NV_PRESERVES_UV_BITS) >
2734 U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) {
2735 SvNOK_on(sv); /* Definitely small enough to preserve all bits */
2736 } else if (!(numtype & IS_NUMBER_IN_UV)) {
2737 /* Can't use strtol etc to convert this string, so don't try.
2738 sv_2iv and sv_2uv will use the NV to convert, not the PV. */
c2988b20
NC
2739 SvNOK_on(sv);
2740 } else {
e91de695 2741 /* value has been set. It may not be precise. */
53e2bfb7 2742 if ((numtype & IS_NUMBER_NEG) && (value >= (UV)IV_MIN)) {
e91de695
JH
2743 /* 2s complement assumption for (UV)IV_MIN */
2744 SvNOK_on(sv); /* Integer is too negative. */
c2988b20 2745 } else {
e91de695
JH
2746 SvNOKp_on(sv);
2747 SvIOKp_on(sv);
6fa402ec 2748
e91de695 2749 if (numtype & IS_NUMBER_NEG) {
02b08bbc
DM
2750 /* -IV_MIN is undefined, but we should never reach
2751 * this point with both IS_NUMBER_NEG and value ==
2752 * (UV)IV_MIN */
2753 assert(value != (UV)IV_MIN);
e91de695
JH
2754 SvIV_set(sv, -(IV)value);
2755 } else if (value <= (UV)IV_MAX) {
2756 SvIV_set(sv, (IV)value);
2757 } else {
2758 SvUV_set(sv, value);
2759 SvIsUV_on(sv);
2760 }
c2988b20 2761
e91de695
JH
2762 if (numtype & IS_NUMBER_NOT_INT) {
2763 /* I believe that even if the original PV had decimals,
2764 they are lost beyond the limit of the FP precision.
2765 However, neither is canonical, so both only get p
2766 flags. NWC, 2000/11/25 */
2767 /* Both already have p flags, so do nothing */
2768 } else {
2769 const NV nv = SvNVX(sv);
2770 /* XXX should this spot have NAN_COMPARE_BROKEN, too? */
2771 if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
2772 if (SvIVX(sv) == I_V(nv)) {
2773 SvNOK_on(sv);
2774 } else {
2775 /* It had no "." so it must be integer. */
2776 }
2777 SvIOK_on(sv);
0f83c5a4 2778 } else {
e91de695
JH
2779 /* between IV_MAX and NV(UV_MAX).
2780 Could be slightly > UV_MAX */
2781
2782 if (numtype & IS_NUMBER_NOT_INT) {
2783 /* UV and NV both imprecise. */
0f83c5a4 2784 } else {
e91de695
JH
2785 const UV nv_as_uv = U_V(nv);
2786
2787 if (value == nv_as_uv && SvUVX(sv) != UV_MAX) {
2788 SvNOK_on(sv);
c2988b20 2789 }
e91de695 2790 SvIOK_on(sv);
c2988b20
NC
2791 }
2792 }
2793 }
2794 }
0f83c5a4 2795 }
e91de695
JH
2796 /* It might be more code efficient to go through the entire logic above
2797 and conditionally set with SvNOKp_on() rather than SvNOK(), but it
2798 gets complex and potentially buggy, so more programmer efficient
2799 to do it this way, by turning off the public flags: */
2800 if (!numtype)
2801 SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK);
28e5dec8 2802#endif /* NV_PRESERVES_UV */
93a17b20 2803 }
c0443cc0 2804 else {
e91de695
JH
2805 if (isGV_with_GP(sv)) {
2806 glob_2number(MUTABLE_GV(sv));
2807 return 0.0;
2808 }
180488f8 2809
e91de695
JH
2810 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
2811 report_uninit(sv);
2812 assert (SvTYPE(sv) >= SVt_NV);
2813 /* Typically the caller expects that sv_any is not NULL now. */
2814 /* XXX Ilya implies that this is a bug in callers that assume this
2815 and ideally should be fixed. */
2816 return 0.0;
79072805 2817 }
b2f82b52 2818 CLANG_DIAG_IGNORE_STMT(-Wthread-safety);
097ee67d 2819 DEBUG_c({
688523a0
KW
2820 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
2821 STORE_LC_NUMERIC_SET_STANDARD();
147e3846 2822 PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n",
e91de695 2823 PTR2UV(sv), SvNVX(sv));
688523a0 2824 RESTORE_LC_NUMERIC();
e91de695 2825 });
b2f82b52 2826 CLANG_DIAG_RESTORE_STMT;
463ee0b2 2827 return SvNVX(sv);
79072805
LW
2828}
2829
800401ee
JH
2830/*
2831=for apidoc sv_2num
2832
2833Return an SV with the numeric value of the source SV, doing any necessary
d024d1a7
FC
2834reference or overload conversion. The caller is expected to have handled
2835get-magic already.
800401ee
JH
2836
2837=cut
2838*/
2839
2840SV *
5aaab254 2841Perl_sv_2num(pTHX_ SV *const sv)
800401ee 2842{
7918f24d
NC
2843 PERL_ARGS_ASSERT_SV_2NUM;
2844
b9ee0594
RGS
2845 if (!SvROK(sv))
2846 return sv;
800401ee 2847 if (SvAMAGIC(sv)) {
31d632c3 2848 SV * const tmpsv = AMG_CALLunary(sv, numer_amg);
a02ec77a 2849 TAINT_IF(tmpsv && SvTAINTED(tmpsv));
800401ee
JH
2850 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv))))
2851 return sv_2num(tmpsv);
2852 }
2853 return sv_2mortal(newSVuv(PTR2UV(SvRV(sv))));
2854}
2855
dd0a5f5f
TK
2856/* int2str_table: lookup table containing string representations of all
2857 * two digit numbers. For example, int2str_table.arr[0] is "00" and
2858 * int2str_table.arr[12*2] is "12".
2859 *
2860 * We are going to read two bytes at a time, so we have to ensure that
2861 * the array is aligned to a 2 byte boundary. That's why it was made a
2862 * union with a dummy U16 member. */
2863static const union {
2864 char arr[200];
2865 U16 dummy;
2866} int2str_table = {{
2867 '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6',
2868 '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3',
2869 '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0',
2870 '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
2871 '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4',
2872 '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1',
2873 '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8',
2874 '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5',
2875 '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2',
2876 '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
2877 '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6',
2878 '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3',
2879 '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0',
2880 '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7',
2881 '9', '8', '9', '9'
2882}};
2883
645c22ef
DM
2884/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or
2885 * UV as a string towards the end of buf, and return pointers to start and
2886 * end of it.
2887 *
2888 * We assume that buf is at least TYPE_CHARS(UV) long.
2889 */
2890
dd0a5f5f 2891PERL_STATIC_INLINE char *
5de3775c 2892S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const peob)
25da4f38 2893{
25da4f38 2894 char *ptr = buf + TYPE_CHARS(UV);
823a54a3 2895 char * const ebuf = ptr;
25da4f38 2896 int sign;
dd0a5f5f 2897 U16 *word_ptr, *word_table;
25da4f38 2898
7918f24d
NC
2899 PERL_ARGS_ASSERT_UIV_2BUF;
2900
dd0a5f5f
TK
2901 /* ptr has to be properly aligned, because we will cast it to U16* */
2902 assert(PTR2nat(ptr) % 2 == 0);
2903 /* we are going to read/write two bytes at a time */
2904 word_ptr = (U16*)ptr;
2905 word_table = (U16*)int2str_table.arr;
2906
2907 if (UNLIKELY(is_uv))
25da4f38
IZ
2908 sign = 0;
2909 else if (iv >= 0) {
2910 uv = iv;
2911 sign = 0;
2912 } else {
7895b980
KW
2913 /* Using 0- here to silence bogus warning from MS VC */
2914 uv = (UV) (0 - (UV) iv);
25da4f38
IZ
2915 sign = 1;
2916 }
dd0a5f5f
TK
2917
2918 while (uv > 99) {
2919 *--word_ptr = word_table[uv % 100];
2920 uv /= 100;
2921 }
2922 ptr = (char*)word_ptr;
2923
2924 if (uv < 10)
2925 *--ptr = (char)uv + '0';
2926 else {
2927 *--word_ptr = word_table[uv];
2928 ptr = (char*)word_ptr;
2929 }
2930
25da4f38 2931 if (sign)
dd0a5f5f
TK
2932 *--ptr = '-';
2933
25da4f38
IZ
2934 *peob = ebuf;
2935 return ptr;
2936}
2937
bfaa02d5
JH
2938/* Helper for sv_2pv_flags and sv_vcatpvfn_flags. If the NV is an
2939 * infinity or a not-a-number, writes the appropriate strings to the
2940 * buffer, including a zero byte. On success returns the written length,
3bde2d43
JH
2941 * excluding the zero byte, on failure (not an infinity, not a nan)
2942 * returns zero, assert-fails on maxlen being too short.
3823048b
JH
2943 *
2944 * XXX for "Inf", "-Inf", and "NaN", we could have three read-only
2945 * shared string constants we point to, instead of generating a new
2946 * string for each instance. */
bfaa02d5 2947STATIC size_t
3823048b 2948S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
3bde2d43 2949 char* s = buffer;
bfaa02d5 2950 assert(maxlen >= 4);
3bde2d43
JH
2951 if (Perl_isinf(nv)) {
2952 if (nv < 0) {
2953 if (maxlen < 5) /* "-Inf\0" */
2954 return 0;
2955 *s++ = '-';
2956 } else if (plus) {
2957 *s++ = '+';
6e915616 2958 }
3bde2d43
JH
2959 *s++ = 'I';
2960 *s++ = 'n';
2961 *s++ = 'f';
2962 }
2963 else if (Perl_isnan(nv)) {
2964 *s++ = 'N';
2965 *s++ = 'a';
2966 *s++ = 'N';
2967 /* XXX optionally output the payload mantissa bits as
2968 * "(unsigned)" (to match the nan("...") C99 function,
2969 * or maybe as "(0xhhh...)" would make more sense...
2970 * provide a format string so that the user can decide?
2971 * NOTE: would affect the maxlen and assert() logic.*/
2972 }
2973 else {
2974 return 0;
bfaa02d5 2975 }
3bde2d43 2976 assert((s == buffer + 3) || (s == buffer + 4));
defe49c8
AL
2977 *s = 0;
2978 return s - buffer;
bfaa02d5
JH
2979}
2980
2981/*
2982=for apidoc sv_2pv_flags
2983
796b6530 2984Returns a pointer to the string value of an SV, and sets C<*lp> to its length.
c5608a1f 2985If flags has the C<SV_GMAGIC> bit set, does an C<mg_get()> first. Coerces C<sv> to a
bfaa02d5
JH
2986string if necessary. Normally invoked via the C<SvPV_flags> macro.
2987C<sv_2pv()> and C<sv_2pv_nomg> usually end up here too.
2988
2989=cut
2990*/
2991
2992char *
2993Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
2994{
2995 char *s;
2996
2997 PERL_ARGS_ASSERT_SV_2PV_FLAGS;
2998
2999 assert (SvTYPE(sv) != SVt_PVAV && SvTYPE(sv) != SVt_PVHV
3000 && SvTYPE(sv) != SVt_PVFM);
3001 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3002 mg_get(sv);
3003 if (SvROK(sv)) {
3004 if (SvAMAGIC(sv)) {
3005 SV *tmpstr;
3006 if (flags & SV_SKIP_OVERLOAD)
3007 return NULL;
3008 tmpstr = AMG_CALLunary(sv, string_amg);
3009 TAINT_IF(tmpstr && SvTAINTED(tmpstr));
3010 if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) {
3011 /* Unwrap this: */
3012 /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr);
3013 */
3014
3015 char *pv;
3016 if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) {
3017 if (flags & SV_CONST_RETURN) {
3018 pv = (char *) SvPVX_const(tmpstr);
3019 } else {
3020 pv = (flags & SV_MUTABLE_RETURN)
3021 ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr);
3022 }
3023 if (lp)
3024 *lp = SvCUR(tmpstr);
3025 } else {
3026 pv = sv_2pv_flags(tmpstr, lp, flags);
3027 }
3028 if (SvUTF8(tmpstr))
3029 SvUTF8_on(sv);
3030 else
3031 SvUTF8_off(sv);
3032 return pv;
3033 }
3034 }
3035 {
3036 STRLEN len;
3037 char *retval;
3038 char *buffer;
3039 SV *const referent = SvRV(sv);
3040
3041 if (!referent) {
3042 len = 7;
3043 retval = buffer = savepvn("NULLREF", len);
3044 } else if (SvTYPE(referent) == SVt_REGEXP &&
3045 (!(PL_curcop->cop_hints & HINT_NO_AMAGIC) ||
3046 amagic_is_enabled(string_amg))) {
3047 REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent);
3048
3049 assert(re);
3050
3051 /* If the regex is UTF-8 we want the containing scalar to
3052 have an UTF-8 flag too */
3053 if (RX_UTF8(re))
3054 SvUTF8_on(sv);
3055 else
3056 SvUTF8_off(sv);
3057
3058 if (lp)
3059 *lp = RX_WRAPLEN(re);
3060
3061 return RX_WRAPPED(re);
3062 } else {
a123eb89
R
3063 const char *const typestring = sv_reftype(referent, 0);
3064 const STRLEN typelen = strlen(typestring);
bfaa02d5
JH
3065 UV addr = PTR2UV(referent);
3066 const char *stashname = NULL;
3067 STRLEN stashnamelen = 0; /* hush, gcc */
3068 const char *buffer_end;
3069
3070 if (SvOBJECT(referent)) {
3071 const HEK *const name = HvNAME_HEK(SvSTASH(referent));
3072
3073 if (name) {
3074 stashname = HEK_KEY(name);
3075 stashnamelen = HEK_LEN(name);
3076
3077 if (HEK_UTF8(name)) {
3078 SvUTF8_on(sv);
3079 } else {
3080 SvUTF8_off(sv);
3081 }
3082 } else {
3083 stashname = "__ANON__";
3084 stashnamelen = 8;
3085 }
3086 len = stashnamelen + 1 /* = */ + typelen + 3 /* (0x */
3087 + 2 * sizeof(UV) + 2 /* )\0 */;
3088 } else {
3089 len = typelen + 3 /* (0x */
3090 + 2 * sizeof(UV) + 2 /* )\0 */;
3091 }
fafee734 3092
4bac9ae4
CS
3093 Newx(buffer, len, char);
3094 buffer_end = retval = buffer + len;
3095
3096 /* Working backwards */
3097 *--retval = '\0';
3098 *--retval = ')';
3099 do {
3100 *--retval = PL_hexdigit[addr & 15];
3101 } while (addr >>= 4);
3102 *--retval = 'x';
3103 *--retval = '0';
3104 *--retval = '(';
3105
3106 retval -= typelen;
a123eb89 3107 memcpy(retval, typestring, typelen);
4bac9ae4
CS
3108
3109 if (stashname) {
3110 *--retval = '=';
3111 retval -= stashnamelen;
3112 memcpy(retval, stashname, stashnamelen);
c080367d 3113 }
4bac9ae4
CS
3114 /* retval may not necessarily have reached the start of the
3115 buffer here. */
3116 assert (retval >= buffer);
3117
3118 len = buffer_end - retval - 1; /* -1 for that \0 */
463ee0b2 3119 }
cdb061a3 3120 if (lp)
4bac9ae4
CS
3121 *lp = len;
3122 SAVEFREEPV(buffer);
3123 return retval;
79072805 3124 }
79072805 3125 }
4bac9ae4
CS
3126
3127 if (SvPOKp(sv)) {
3128 if (lp)
3129 *lp = SvCUR(sv);
3130 if (flags & SV_MUTABLE_RETURN)
3131 return SvPVX_mutable(sv);
3132 if (flags & SV_CONST_RETURN)
3133 return (char *)SvPVX_const(sv);
3134 return SvPVX(sv);
3135 }
3136
3137 if (SvIOK(sv)) {
28e5dec8
JH
3138 /* I'm assuming that if both IV and NV are equally valid then
3139 converting the IV is going to be more efficient */
e1ec3a88 3140 const U32 isUIOK = SvIsUV(sv);
dd0a5f5f
TK
3141 /* The purpose of this union is to ensure that arr is aligned on
3142 a 2 byte boundary, because that is what uiv_2buf() requires */
3143 union {
3144 char arr[TYPE_CHARS(UV)];
3145 U16 dummy;
3146 } buf;
28e5dec8 3147 char *ebuf, *ptr;
97a130b8 3148 STRLEN len;
28e5dec8
JH
3149
3150 if (SvTYPE(sv) < SVt_PVIV)
3151 sv_upgrade(sv, SVt_PVIV);
dd0a5f5f 3152 ptr = uiv_2buf(buf.arr, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf);
97a130b8 3153 len = ebuf - ptr;
5902b6a9 3154 /* inlined from sv_setpvn */
97a130b8
NC
3155 s = SvGROW_mutable(sv, len + 1);
3156 Move(ptr, s, len, char);
3157 s += len;
28e5dec8 3158 *s = '\0';
b127e37e 3159 SvPOK_on(sv);
28e5dec8 3160 }
4bac9ae4 3161 else if (SvNOK(sv)) {
79072805
LW
3162 if (SvTYPE(sv) < SVt_PVNV)
3163 sv_upgrade(sv, SVt_PVNV);
128eeacb
DD
3164 if (SvNVX(sv) == 0.0
3165#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
3166 && !Perl_isnan(SvNVX(sv))
3167#endif
3168 ) {
29912d93
NC
3169 s = SvGROW_mutable(sv, 2);
3170 *s++ = '0';
3171 *s = '\0';
3172 } else {
5e85836e 3173 STRLEN len;
fb8cdbc5 3174 STRLEN size = 5; /* "-Inf\0" */
0c7e610f 3175
fb8cdbc5 3176 s = SvGROW_mutable(sv, size);
3823048b 3177 len = S_infnan_2pv(SvNVX(sv), s, size, 0);
fb8cdbc5 3178 if (len > 0) {
0c7e610f 3179 s += len;
fb8cdbc5
JH
3180 SvPOK_on(sv);
3181 }
0c7e610f 3182 else {
0c7e610f 3183 /* some Xenix systems wipe out errno here */
fb8cdbc5
JH
3184 dSAVE_ERRNO;
3185
3840bff0
JH
3186 size =
3187 1 + /* sign */
3188 1 + /* "." */
3189 NV_DIG +
3190 1 + /* "e" */
3191 1 + /* sign */
3192 5 + /* exponent digits */
3193 1 + /* \0 */
3194 2; /* paranoia */
b127e37e 3195
fb8cdbc5 3196 s = SvGROW_mutable(sv, size);
b127e37e 3197#ifndef USE_LOCALE_NUMERIC
a4eca1d4
JH
3198 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
3199
0c7e610f
JH
3200 SvPOK_on(sv);
3201#else
28acfe03 3202 {
3840bff0 3203 bool local_radix;
67d796ae
KW
3204 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
3205 STORE_LC_NUMERIC_SET_TO_NEEDED();
3840bff0 3206
dd2dbc5f 3207 local_radix = _NOT_IN_NUMERIC_STANDARD;
4c039fd8
DM
3208 if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
3209 size += SvCUR(PL_numeric_radix_sv) - 1;
3840bff0
JH
3210 s = SvGROW_mutable(sv, size);
3211 }
3212
a4eca1d4 3213 SNPRINTF_G(SvNVX(sv), s, SvLEN(sv), NV_DIG);
0c7e610f
JH
3214
3215 /* If the radix character is UTF-8, and actually is in the
3216 * output, turn on the UTF-8 flag for the scalar */
3dbc6af5
KW
3217 if ( local_radix
3218 && SvUTF8(PL_numeric_radix_sv)
3219 && instr(s, SvPVX_const(PL_numeric_radix_sv)))
3220 {
3840bff0
JH
3221 SvUTF8_on(sv);
3222 }
3223
0c7e610f 3224 RESTORE_LC_NUMERIC();
28acfe03 3225 }
68e8f474 3226
0c7e610f
JH
3227 /* We don't call SvPOK_on(), because it may come to
3228 * pass that the locale changes so that the
3229 * stringification we just did is no longer correct. We
3230 * will have to re-stringify every time it is needed */
b127e37e 3231#endif
0c7e610f
JH
3232 RESTORE_ERRNO;
3233 }
3234 while (*s) s++;
bbce6d69 3235 }
79072805 3236 }
4bac9ae4
CS
3237 else if (isGV_with_GP(sv)) {
3238 GV *const gv = MUTABLE_GV(sv);
3239 SV *const buffer = sv_newmortal();
8d1c3e26 3240
4bac9ae4 3241 gv_efullname3(buffer, gv, "*");
180488f8 3242
4bac9ae4
CS
3243 assert(SvPOK(buffer));
3244 if (SvUTF8(buffer))
3245 SvUTF8_on(sv);
1097da16
TC
3246 else
3247 SvUTF8_off(sv);
4bac9ae4
CS
3248 if (lp)
3249 *lp = SvCUR(buffer);
3250 return SvPVX(buffer);
3251 }
3252 else {
cdb061a3 3253 if (lp)
00b6aa41 3254 *lp = 0;
9f621bb0
NC
3255 if (flags & SV_UNDEF_RETURNS_NULL)
3256 return NULL;
4f62cd62 3257 if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
9f621bb0 3258 report_uninit(sv);
4bac9ae4
CS
3259 /* Typically the caller expects that sv_any is not NULL now. */
3260 if (!SvREADONLY(sv) && SvTYPE(sv) < SVt_PV)
25da4f38 3261 sv_upgrade(sv, SVt_PV);
73d840c0 3262 return (char *)"";
79072805 3263 }
4bac9ae4 3264
cdb061a3 3265 {
823a54a3 3266 const STRLEN len = s - SvPVX_const(sv);
cdb061a3
NC
3267 if (lp)
3268 *lp = len;
3269 SvCUR_set(sv, len);
3270 }
147e3846 3271 DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n",
3f7c398e 3272 PTR2UV(sv),SvPVX_const(sv)));
4d84ee25
NC
3273 if (flags & SV_CONST_RETURN)
3274 return (char *)SvPVX_const(sv);
10516c54
NC
3275 if (flags & SV_MUTABLE_RETURN)
3276 return SvPVX_mutable(sv);
463ee0b2
LW
3277 return SvPVX(sv);
3278}
3279
645c22ef 3280/*
6050d10e
JP
3281=for apidoc sv_copypv
3282
3283Copies a stringified representation of the source SV into the
796b6530 3284destination SV. Automatically performs any necessary C<mg_get> and
54f0641b 3285coercion of numeric values into strings. Guaranteed to preserve
796b6530
KW
3286C<UTF8> flag even from overloaded objects. Similar in nature to
3287C<sv_2pv[_flags]> but operates directly on an SV instead of just the
3288string. Mostly uses C<sv_2pv_flags> to do its work, except when that
6050d10e
JP
3289would lose the UTF-8'ness of the PV.
3290
4bac9ae4
CS
3291=for apidoc sv_copypv_nomg
3292
796b6530 3293Like C<sv_copypv>, but doesn't invoke get magic first.
4bac9ae4
CS
3294
3295=for apidoc sv_copypv_flags
3296
796b6530 3297Implementation of C<sv_copypv> and C<sv_copypv_nomg>. Calls get magic iff flags
c5608a1f 3298has the C<SV_GMAGIC> bit set.
4bac9ae4 3299
6050d10e
JP
3300=cut
3301*/
3302
3303void
5aaab254 3304Perl_sv_copypv_flags(pTHX_ SV *const dsv, SV *const ssv, const I32 flags)
4bac9ae4 3305{
446eaa42 3306 STRLEN len;
4bac9ae4 3307 const char *s;
7918f24d 3308
4bac9ae4 3309 PERL_ARGS_ASSERT_SV_COPYPV_FLAGS;
7918f24d 3310
c77ed9ca 3311 s = SvPV_flags_const(ssv,len,(flags & SV_GMAGIC));
cb50f42d 3312 sv_setpvn(dsv,s,len);
446eaa42 3313 if (SvUTF8(ssv))
cb50f42d 3314 SvUTF8_on(dsv);
446eaa42 3315 else
cb50f42d 3316 SvUTF8_off(dsv);
6050d10e
JP
3317}
3318
3319/*
645c22ef
DM
3320=for apidoc sv_2pvbyte
3321
796b6530 3322Return a pointer to the byte-encoded representation of the SV, and set C<*lp>
b661dc90
KW
3323to its length. If the SV is marked as being encoded as UTF-8, it will
3324downgrade it to a byte string as a side-effect, if possible. If the SV cannot
3325be downgraded, this croaks.
645c22ef
DM
3326
3327Usually accessed via the C<SvPVbyte> macro.
3328
3329=cut
3330*/
3331
7340a771 3332char *
757fc329 3333Perl_sv_2pvbyte_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3334{
757fc329 3335 PERL_ARGS_ASSERT_SV_2PVBYTE_FLAGS;
7918f24d 3336
757fc329
P
3337 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3338 mg_get(sv);
4499db73
FC
3339 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
3340 || isGV_with_GP(sv) || SvROK(sv)) {
a901b181 3341 SV *sv2 = sv_newmortal();
48120f8f 3342 sv_copypv_nomg(sv2,sv);
a901b181
FC
3343 sv = sv2;
3344 }
757fc329 3345 sv_utf8_downgrade_nomg(sv,0);
71eb6d8c 3346 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771
GS
3347}
3348
645c22ef 3349/*
035cbb0e
RGS
3350=for apidoc sv_2pvutf8
3351
796b6530 3352Return a pointer to the UTF-8-encoded representation of the SV, and set C<*lp>
035cbb0e
RGS
3353to its length. May cause the SV to be upgraded to UTF-8 as a side-effect.
3354
3355Usually accessed via the C<SvPVutf8> macro.
3356
3357=cut
3358*/
645c22ef 3359
7340a771 3360char *
757fc329 3361Perl_sv_2pvutf8_flags(pTHX_ SV *sv, STRLEN *const lp, const U32 flags)
7340a771 3362{
757fc329 3363 PERL_ARGS_ASSERT_SV_2PVUTF8_FLAGS;
7918f24d 3364
757fc329
P
3365 if (SvGMAGICAL(sv) && (flags & SV_GMAGIC))
3366 mg_get(sv);
4499db73 3367 if (((SvREADONLY(sv) || SvFAKE(sv)) && !SvIsCOW(sv))
757fc329
P
3368 || isGV_with_GP(sv) || SvROK(sv)) {
3369 SV *sv2 = sv_newmortal();
3370 sv_copypv_nomg(sv2,sv);
3371 sv = sv2;
3372 }
4bac9ae4 3373 sv_utf8_upgrade_nomg(sv);
c3ec315f 3374 return lp ? SvPV_nomg(sv,*lp) : SvPV_nomg_nolen(sv);
7340a771 3375}
1c846c1f 3376
7ee2227d 3377
645c22ef
DM
3378/*
3379=for apidoc sv_2bool
3380
796b6530
KW
3381This macro is only used by C<sv_true()> or its macro equivalent, and only if
3382the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>.
3383It calls C<sv_2bool_flags> with the C<SV_GMAGIC> flag.
06c841cf
FC
3384
3385=for apidoc sv_2bool_flags
3386
796b6530
KW
3387This function is only used by C<sv_true()> and friends, and only if
3388the latter's argument is neither C<SvPOK>, C<SvIOK> nor C<SvNOK>. If the flags
3389contain C<SV_GMAGIC>, then it does an C<mg_get()> first.
06c841cf 3390
645c22ef
DM
3391
3392=cut
3393*/
3394
463ee0b2 3395bool
9d176cd8 3396Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags)
463ee0b2 3397{
06c841cf 3398 PERL_ARGS_ASSERT_SV_2BOOL_FLAGS;
7918f24d 3399
9d176cd8 3400 restart:
06c841cf 3401 if(flags & SV_GMAGIC) SvGETMAGIC(sv);
463ee0b2 3402
a0d0e21e
LW
3403 if (!SvOK(sv))
3404 return 0;
3405 if (SvROK(sv)) {
fabdb6c0 3406 if (SvAMAGIC(sv)) {
31d632c3 3407 SV * const tmpsv = AMG_CALLunary(sv, bool__amg);
9d176cd8
DD
3408 if (tmpsv && (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) {
3409 bool svb;
3410 sv = tmpsv;
3411 if(SvGMAGICAL(sv)) {
3412 flags = SV_GMAGIC;
3413 goto restart; /* call sv_2bool */
3414 }
3415 /* expanded SvTRUE_common(sv, (flags = 0, goto restart)) */
3416 else if(!SvOK(sv)) {
3417 svb = 0;
3418 }
3419 else if(SvPOK(sv)) {
3420 svb = SvPVXtrue(sv);
3421 }
3422 else if((SvFLAGS(sv) & (SVf_IOK|SVf_NOK))) {
3423 svb = (SvIOK(sv) && SvIVX(sv) != 0)
659c4b96 3424 || (SvNOK(sv) && SvNVX(sv) != 0.0);
9d176cd8
DD
3425 }
3426 else {
3427 flags = 0;
3428 goto restart; /* call sv_2bool_nomg */
3429 }
3430 return cBOOL(svb);
3431 }
fabdb6c0 3432 }
e8f01ee5
DM
3433 assert(SvRV(sv));
3434 return TRUE;
a0d0e21e 3435 }
85b7d9b3
FC
3436 if (isREGEXP(sv))
3437 return
3438 RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0');
1a436fbe
DM
3439
3440 if (SvNOK(sv) && !SvPOK(sv))
3441 return SvNVX(sv) != 0.0;
3442
4bac9ae4 3443 return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0);
79072805
LW
3444}
3445
c461cf8f
JH
3446/*
3447=for apidoc sv_utf8_upgrade
3448
78ea37eb 3449Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3450Forces the SV to string form if it is not already.
2bbc8d55 3451Will C<mg_get> on C<sv> if appropriate.
796b6530 3452Always sets the C<SvUTF8> flag to avoid future validity checks even
2bbc8d55
SP
3453if the whole string is the same in UTF-8 as not.
3454Returns the number of bytes in the converted string
c461cf8f 3455
0efd0472 3456This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3457use the Encode extension for that.
3458
fe749c9a
KW
3459=for apidoc sv_utf8_upgrade_nomg
3460
796b6530 3461Like C<sv_utf8_upgrade>, but doesn't do magic on C<sv>.
fe749c9a 3462
8d6d96c1
HS
3463=for apidoc sv_utf8_upgrade_flags
3464
78ea37eb 3465Converts the PV of an SV to its UTF-8-encoded form.
645c22ef 3466Forces the SV to string form if it is not already.
8d6d96c1 3467Always sets the SvUTF8 flag to avoid future validity checks even
960b0271
FC
3468if all the bytes are invariant in UTF-8.
3469If C<flags> has C<SV_GMAGIC> bit set,
2bbc8d55 3470will C<mg_get> on C<sv> if appropriate, else not.
2a590426 3471
c58971e9 3472The C<SV_FORCE_UTF8_UPGRADE> flag is now ignored.
2a590426
KW
3473
3474Returns the number of bytes in the converted string.
8d6d96c1 3475
0efd0472 3476This is not a general purpose byte encoding to Unicode interface:
13a6c0e0
JH
3477use the Encode extension for that.
3478
2a590426 3479=for apidoc sv_utf8_upgrade_flags_grow
b3ab6785 3480
796b6530
KW
3481Like C<sv_utf8_upgrade_flags>, but has an additional parameter C<extra>, which is
3482the number of unused bytes the string of C<sv> is guaranteed to have free after
2a590426
KW
3483it upon return. This allows the caller to reserve extra space that it intends
3484to fill, to avoid extra grows.
b3ab6785 3485
2a590426
KW
3486C<sv_utf8_upgrade>, C<sv_utf8_upgrade_nomg>, and C<sv_utf8_upgrade_flags>
3487are implemented in terms of this function.
3488
3489Returns the number of bytes in the converted string (not including the spares).
3490
3491=cut
b3ab6785 3492
6602b933
KW
3493If the routine itself changes the string, it adds a trailing C<NUL>. Such a
3494C<NUL> isn't guaranteed due to having other routines do the work in some input
3495cases, or if the input is already flagged as being in utf8.
b3ab6785 3496
8d6d96c1
HS
3497*/
3498
3499STRLEN
5aaab254 3500Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extra)
8d6d96c1 3501{
b3ab6785 3502 PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW;
7918f24d 3503
808c356f
RGS
3504 if (sv == &PL_sv_undef)
3505 return 0;
892f9127 3506 if (!SvPOK_nog(sv)) {
e0e62c2a 3507 STRLEN len = 0;
d52b7888
NC
3508 if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) {
3509 (void) sv_2pv_flags(sv,&len, flags);
b3ab6785
KW
3510 if (SvUTF8(sv)) {
3511 if (extra) SvGROW(sv, SvCUR(sv) + extra);
d52b7888 3512 return len;
b3ab6785 3513 }
d52b7888 3514 } else {
33fb6f35 3515 (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC);
d52b7888 3516 }
e0e62c2a 3517 }
4411f3b6 3518
fde84d2e
DM
3519 /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already
3520 * compiled and individual nodes will remain non-utf8 even if the
3521 * stringified version of the pattern gets upgraded. Whether the
3522 * PVX of a REGEXP should be grown or we should just croak, I don't
3523 * know - DAPM */
3524 if (SvUTF8(sv) || isREGEXP(sv)) {
b3ab6785 3525 if (extra) SvGROW(sv, SvCUR(sv) + extra);
5fec3b1d 3526 return SvCUR(sv);
f5cee72b 3527 }
5fec3b1d 3528
765f542d 3529 if (SvIsCOW(sv)) {
c56ed9f6 3530 S_sv_uncow(aTHX_ sv, 0);
db42d148
NIS
3531 }
3532
4e93345f 3533 if (SvCUR(sv) == 0) {
e2e3bb6a
KW
3534 if (extra) SvGROW(sv, extra + 1); /* Make sure is room for a trailing
3535 byte */
4e93345f 3536 } else { /* Assume Latin-1/EBCDIC */
c4e7c712 3537 /* This function could be much more efficient if we
2bbc8d55 3538 * had a FLAG in SVs to signal if there are any variant
c4e7c712 3539 * chars in the PV. Given that there isn't such a flag
c58971e9 3540 * make the loop as fast as possible. */
b3ab6785 3541 U8 * s = (U8 *) SvPVX_const(sv);
b3ab6785 3542 U8 *t = s;
c4e7c712 3543
c58971e9 3544 if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
b3ab6785 3545
c58971e9
KW
3546 /* utf8 conversion not needed because all are invariants. Mark
3547 * as UTF-8 even if no variant - saves scanning loop */
3548 SvUTF8_on(sv);
3549 if (extra) SvGROW(sv, SvCUR(sv) + extra);
3550 return SvCUR(sv);
dc772057 3551 }
b3ab6785 3552
c58971e9
KW
3553 /* Here, there is at least one variant (t points to the first one), so
3554 * the string should be converted to utf8. Everything from 's' to
3555 * 't - 1' will occupy only 1 byte each on output.
dc772057 3556 *
c58971e9
KW
3557 * Note that the incoming SV may not have a trailing '\0', as certain
3558 * code in pp_formline can send us partially built SVs.
b3ab6785
KW
3559 *
3560 * There are two main ways to convert. One is to create a new string
3561 * and go through the input starting from the beginning, appending each
c58971e9
KW
3562 * converted value onto the new string as we go along. Going this
3563 * route, it's probably best to initially allocate enough space in the
3564 * string rather than possibly running out of space and having to
3565 * reallocate and then copy what we've done so far. Since everything
3566 * from 's' to 't - 1' is invariant, the destination can be initialized
3567 * with these using a fast memory copy. To be sure to allocate enough
3568 * space, one could use the worst case scenario, where every remaining
3569 * byte expands to two under UTF-8, or one could parse it and count
3570 * exactly how many do expand.
b3ab6785 3571 *
c58971e9
KW